@@ -21,13 +21,13 @@ User API
========
The idle page tracking API is located at ``/sys/kernel/mm/page_idle``.
-Currently, it consists of the only read-write file,
-``/sys/kernel/mm/page_idle/bitmap``.
+Currently, it consists of two read-write file,
+``/sys/kernel/mm/page_idle/bitmap`` and ``/sys/kernel/mm/page_idle/lock``.
-The file implements a bitmap where each bit corresponds to a memory page. The
-bitmap is represented by an array of 8-byte integers, and the page at PFN #i is
-mapped to bit #i%64 of array element #i/64, byte order is native. When a bit is
-set, the corresponding page is idle.
+The ``bitmap`` file implements a bitmap where each bit corresponds to a memory
+page. The bitmap is represented by an array of 8-byte integers, and the page at
+PFN #i is mapped to bit #i%64 of array element #i/64, byte order is native.
+When a bit is set, the corresponding page is idle.
A page is considered idle if it has not been accessed since it was marked idle
(for more details on what "accessed" actually means see the :ref:`Implementation
@@ -74,6 +74,16 @@ See :ref:`Documentation/admin-guide/mm/pagemap.rst <pagemap>` for more
information about ``/proc/pid/pagemap``, ``/proc/kpageflags``, and
``/proc/kpagecgroup``.
+The ``lock`` file is for avoidance of interference from concurrent users. If
+the content of the ``lock`` file is ``1``, it means the ``bitmap`` file is
+currently being used by someone. While the content of the ``lock`` file is
+``1``, writing ``1`` to the file fails. Therefore, users should first
+successfully write ``1`` to the ``lock`` file before starting use of ``bitmap``
+file and write ``0`` to the ``lock`` file after they finished use of the
+``bitmap`` file. If a user writes the ``bitmap`` file while the ``lock`` is
+``0``, the write fails. Meanwhile, reads of the ``bitmap`` file success
+regardless of the ``lock`` status.
+
.. _impl_details:
Implementation Details
@@ -16,6 +16,8 @@
#define BITMAP_CHUNK_SIZE sizeof(u64)
#define BITMAP_CHUNK_BITS (BITMAP_CHUNK_SIZE * BITS_PER_BYTE)
+static DEFINE_MUTEX(page_idle_lock);
+
/*
* Idle page tracking only considers user memory pages, for other types of
* pages the idle flag is always unset and an attempt to set it is silently
@@ -169,6 +171,9 @@ static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj,
unsigned long pfn, end_pfn;
int bit;
+ if (!mutex_is_locked(&page_idle_lock))
+ return -EPERM;
+
if (pos % BITMAP_CHUNK_SIZE || count % BITMAP_CHUNK_SIZE)
return -EINVAL;
@@ -197,17 +202,52 @@ static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj,
return (char *)in - buf;
}
+static ssize_t page_idle_lock_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", mutex_is_locked(&page_idle_lock));
+}
+
+static ssize_t page_idle_lock_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ bool do_lock;
+ int ret;
+
+ ret = kstrtobool(buf, &do_lock);
+ if (ret < 0)
+ return ret;
+
+ if (do_lock) {
+ if (!mutex_trylock(&page_idle_lock))
+ return -EBUSY;
+ } else {
+ mutex_unlock(&page_idle_lock);
+ }
+
+ return count;
+}
+
static struct bin_attribute page_idle_bitmap_attr =
__BIN_ATTR(bitmap, 0600,
page_idle_bitmap_read, page_idle_bitmap_write, 0);
+static struct kobj_attribute page_idle_lock_attr =
+ __ATTR(lock, 0600, page_idle_lock_show, page_idle_lock_store);
+
static struct bin_attribute *page_idle_bin_attrs[] = {
&page_idle_bitmap_attr,
NULL,
};
+static struct attribute *page_idle_lock_attrs[] = {
+ &page_idle_lock_attr.attr,
+ NULL,
+};
+
static const struct attribute_group page_idle_attr_group = {
.bin_attrs = page_idle_bin_attrs,
+ .attrs = page_idle_lock_attrs,
.name = "page_idle",
};