@@ -65,6 +65,8 @@ struct prune_ops {
struct prune_table *table, bool pruned);
};
+int prune_init(void);
+void prune_fini(void);
struct prune *prune_create(unsigned int mask_len, const struct prune_ops *ops,
void *priv);
void prune_destroy(struct prune *prune);
@@ -9,10 +9,24 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/list.h>
+#include <linux/rculist.h>
#include <linux/rhashtable.h>
#include <linux/prune.h>
#include <linux/jhash.h>
#include <linux/bitmap.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+/*
+ * dentry for debugsfs purpose
+ */
+static struct dentry *dentry;
+/*
+ * The list of all allocated prune objects
+ */
+static struct list_head prune_object_list;
/*
* struct prune_table_entry - hold the item configuration and its prune vector.
@@ -42,6 +56,7 @@ struct prune_intersec_table_list_elem {
/*
* struct prune_intersec_table - Represent the intersection of two prune tables.
* @entry_ht: Hash-table of prune_intersec_table_entry.
+ * @items_list: A walker for items_ht.
* @neigh_table: A pointer to the table which this table intersects.
* @list: Node of table's intersec_table_list.
* @mask: Intersection mask of the the two tables.
@@ -51,6 +66,7 @@ struct prune_intersec_table_list_elem {
*/
struct prune_intersec_table {
struct rhashtable entry_ht;
+ struct list_head items_list;
struct prune_table *neigh_table;
struct list_head list;
unsigned long mask[0];
@@ -72,6 +88,7 @@ struct prune_intersec_table {
* @neigh_table_elem_list: Contain list of entries in the neighbor table
* pointed by the neigh_table field in
* prune_intersec_table struct.
+ * @list: Node of items_list in struct prune_intersec_table.
*
* The structure is used to find the intersection of new prune item with the
* current one, and check whether the new item should be pruned or not.
@@ -82,6 +99,7 @@ struct prune_intersec_table_entry {
struct prune_intersec_table *intersec_table;
struct list_head intersec_table_elem_list;
struct list_head neigh_table_elem_list;
+ struct list_head list;
};
/*
@@ -116,6 +134,7 @@ struct prune_table {
* @priv: User data.
* @mask_len: The mask len in bits which this prune object
* support.
+ * @list: Node in prune_object_list.
*/
struct prune {
const struct prune_ops *ops;
@@ -123,6 +142,211 @@ struct prune {
struct list_head table_list;
void *priv;
unsigned int mask_len;
+ struct list_head list;
+};
+
+static void prune_vector_items_dump(struct seq_file *seq,
+ struct prune_table_entry *entry)
+{
+ struct prune_vector_item *vector_item;
+
+ seq_puts(seq, "\t\titem prune vector:\n");
+ list_for_each_entry_rcu(vector_item, &entry->prune_vector, list) {
+ seq_printf(seq, "\t\t\ttable: %p\t is pruned: %s\n",
+ vector_item->table->priv,
+ vector_item->pruned ? "Yes" : "No");
+ }
+}
+
+static void prune_table_items_dump(struct seq_file *seq,
+ struct prune_table *table)
+{
+ struct prune_table_entry *entry;
+ char *buf;
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
+ list_for_each_entry_rcu(entry, &table->entry_list, list) {
+ seq_puts(seq, "\t\titem info\n");
+ seq_puts(seq, "\t\t---------\n");
+ if (buf) {
+ bitmap_print_to_pagebuf(false, buf, entry->item.key,
+ table->prune->mask_len);
+ seq_printf(seq, "\t\titem key: %s\n", buf);
+ }
+ seq_printf(seq, "\t\titem prio: %u\n", entry->item.priority);
+ seq_printf(seq, "\t\titem priv: %p\n", entry->item.priv);
+
+ prune_vector_items_dump(seq, entry);
+ }
+ kfree(buf);
+}
+
+static void
+prune_intersec_item_list_dump(struct seq_file *seq,
+ struct prune_intersec_table *intersec_table)
+{
+ struct prune_intersec_table_entry *intersec_entry;
+ struct prune_intersec_table_list_elem *intersec_item;
+ struct prune_intersec_table_list_elem *neigh_item;
+ unsigned int mask_len = intersec_table->neigh_table->prune->mask_len;
+ char *buf;
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
+ list_for_each_entry_rcu(intersec_entry, &intersec_table->items_list,
+ list) {
+ seq_puts(seq, "\t\t\tintersec entry info\n");
+ seq_puts(seq, "\t\t\t-------------------\n");
+ if (buf) {
+ bitmap_print_to_pagebuf(false, buf,
+ intersec_entry->ht_key,
+ mask_len);
+ seq_printf(seq, "\t\t\t\t mask: %s\n",
+ buf);
+ }
+ seq_puts(seq, "\t\t\t\t intersec element info:\n");
+ list_for_each_entry_rcu(intersec_item,
+ &intersec_entry->intersec_table_elem_list,
+ list) {
+ seq_printf(seq, "\t\t\t\t\tentry prio: %lu\n",
+ *intersec_item->entry->item.key);
+ seq_printf(seq, "\t\t\t\t\tentry prio: %u\n\n",
+ intersec_item->entry->item.priority);
+ }
+ seq_puts(seq, "\t\t\t\t neigh element info:\n");
+ list_for_each_entry_rcu(neigh_item,
+ &intersec_entry->neigh_table_elem_list,
+ list) {
+ if (buf) {
+ bitmap_print_to_pagebuf(false, buf,
+ neigh_item->entry->item.key,
+ mask_len);
+ seq_printf(seq, "\t\titem key: %s\n", buf);
+ }
+ seq_printf(seq, "\t\t\t\t\tentry prio: %u\n\n",
+ neigh_item->entry->item.priority);
+ }
+ }
+ kfree(buf);
+}
+
+static void prune_intersec_tables_dump(struct seq_file *seq,
+ struct prune_table *table)
+{
+ struct prune_intersec_table *intersec_table;
+
+ list_for_each_entry_rcu(intersec_table, &table->intersec_table_list,
+ list) {
+ seq_puts(seq, "\t\tintersec table info\n");
+ seq_puts(seq, "\t\t-------------------\n");
+ seq_printf(seq, "\t\ttable mask: %lu\n", *intersec_table->mask);
+ seq_printf(seq, "\t\tniegh table priv: %p\n",
+ intersec_table->neigh_table->priv);
+ prune_intersec_item_list_dump(seq, intersec_table);
+ }
+}
+
+static void prune_table_dump(struct seq_file *seq, struct prune *prune)
+{
+ struct prune_table *table;
+ unsigned int table_cnt = 0;
+ char *buf;
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
+ list_for_each_entry_rcu(table, &prune->table_list, list) {
+ seq_puts(seq, "\ttable info\n");
+ seq_puts(seq, "\t----------\n");
+ seq_printf(seq, "\ttable id: %p\n", table);
+ seq_printf(seq, "\ttable priv: %p\n", table->priv);
+ if (buf) {
+ bitmap_print_to_pagebuf(false, buf, table->mask,
+ table->prune->mask_len);
+ seq_printf(seq, "\ttable mask: %s\n", buf);
+ }
+ prune_table_items_dump(seq, table);
+ seq_puts(seq, "\n");
+ prune_intersec_tables_dump(seq, table);
+ seq_puts(seq, "\n\n");
+ table_cnt++;
+ }
+ seq_printf(seq, "Number of tables in prune object: %d\n", table_cnt);
+
+ kfree(buf);
+}
+
+static void prune_obj_dump(struct seq_file *seq, struct prune *prune)
+{
+ seq_puts(seq, "prune\n");
+ seq_puts(seq, "-----\n");
+ seq_printf(seq, "prune priv: %p\n", prune->priv);
+ prune_table_dump(seq, prune);
+ seq_puts(seq, "\n\n");
+}
+
+static void *prune_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct prune *prune;
+ loff_t n = *pos;
+
+ rcu_read_lock();
+ if (!list_empty(&prune_object_list)) {
+ if (n-- == 0) {
+ prune = list_first_entry(&prune_object_list,
+ typeof(*prune),
+ list);
+ return prune;
+ }
+ }
+ return NULL;
+}
+
+static void *prune_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct prune *prev_prune_obj = v;
+ struct prune *next_prune_obj = NULL;
+
+ ++(*pos);
+
+ next_prune_obj = list_next_entry(prev_prune_obj, list);
+ if (&next_prune_obj->list == &prune_object_list)
+ return NULL;
+ else
+ return next_prune_obj;
+}
+
+static void prune_seq_stop(struct seq_file *seq, void *v)
+{
+ rcu_read_unlock(); /*unlock the rcu_lock taken at start */
+}
+
+static int prune_seq_show(struct seq_file *seq, void *v)
+{
+ struct prune *prune = v;
+
+ prune_obj_dump(seq, prune);
+ return 0;
+}
+
+static const struct seq_operations prune_seq_ops = {
+ .start = prune_seq_start,
+ .next = prune_seq_next,
+ .stop = prune_seq_stop,
+ .show = prune_seq_show,
+};
+
+static int prune_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &prune_seq_ops);
+}
+
+static const struct file_operations prune_fops = {
+ .owner = THIS_MODULE,
+ .open = prune_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
};
static unsigned int
@@ -179,7 +403,7 @@ static int prune_table_prune_vector_init(struct prune *prune,
{
struct prune_table *table;
- INIT_LIST_HEAD(&entry->prune_vector);
+ INIT_LIST_HEAD_RCU(&entry->prune_vector);
list_for_each_entry(table, &prune->table_list, list) {
struct prune_vector_item *vector_item;
@@ -191,7 +415,7 @@ static int prune_table_prune_vector_init(struct prune *prune,
}
vector_item->pruned = true;
vector_item->table = table;
- list_add_tail(&vector_item->list, &entry->prune_vector);
+ list_add_tail_rcu(&vector_item->list, &entry->prune_vector);
}
return 0;
}
@@ -204,7 +428,8 @@ static void prune_table_prune_vector_fini(struct prune_table_entry *entry)
struct prune_vector_item *vector_item;
vector_item = list_entry(pos, typeof(*vector_item), list);
- list_del(&vector_item->list);
+ list_del_rcu(&vector_item->list);
+ synchronize_rcu();
kfree(vector_item);
}
}
@@ -578,7 +803,9 @@ prune_table_entry_create(struct prune *prune, struct prune_table *table,
prune_table_entry_fini(entry);
return ERR_PTR(err);
}
- list_add_tail(&entry->list, &table->entry_list);
+
+ list_add_tail_rcu(&entry->list, &table->entry_list);
+
entry->item.table = table;
return entry;
@@ -586,7 +813,8 @@ prune_table_entry_create(struct prune *prune, struct prune_table *table,
static void prune_table_entry_destroy(struct prune_table_entry *entry)
{
- list_del(&entry->list);
+ list_del_rcu(&entry->list);
+ synchronize_rcu();
prune_table_entry_fini(entry);
}
@@ -611,8 +839,8 @@ prune_intersec_entry_create(unsigned long *intersec_table_mask,
bitmap_and(intersec_entry->ht_key, intersec_table_mask, item_key,
mask_len);
- INIT_LIST_HEAD(&intersec_entry->intersec_table_elem_list);
- INIT_LIST_HEAD(&intersec_entry->neigh_table_elem_list);
+ INIT_LIST_HEAD_RCU(&intersec_entry->intersec_table_elem_list);
+ INIT_LIST_HEAD_RCU(&intersec_entry->neigh_table_elem_list);
return intersec_entry;
@@ -644,11 +872,11 @@ prune_intersec_item_create(struct prune_table_entry *entry,
intersec_item->entry = entry;
if (!neigh)
- list_add_tail(&intersec_item->list,
- &intersec_entry->intersec_table_elem_list);
+ list_add_tail_rcu(&intersec_item->list,
+ &intersec_entry->intersec_table_elem_list);
else
- list_add_tail(&intersec_item->list,
- &intersec_entry->neigh_table_elem_list);
+ list_add_tail_rcu(&intersec_item->list,
+ &intersec_entry->neigh_table_elem_list);
return intersec_item;
}
@@ -656,7 +884,8 @@ prune_intersec_item_create(struct prune_table_entry *entry,
static void
prune_intersec_item_destroy(struct prune_intersec_table_list_elem *intersec_item)
{
- list_del(&intersec_item->list);
+ list_del_rcu(&intersec_item->list);
+ synchronize_rcu();
kfree(intersec_item);
}
@@ -669,6 +898,7 @@ static int prune_intersec_item_add(struct prune_intersec_table *intersec_table,
struct prune_intersec_table_entry *intersec_entry;
struct rhashtable_params rht_params;
unsigned long *ht_key;
+ unsigned long *mask;
bool found = true;
int err = 0;
@@ -676,6 +906,7 @@ static int prune_intersec_item_add(struct prune_intersec_table *intersec_table,
if (!ht_key)
return -ENOMEM;
+ mask = intersec_table->mask;
bitmap_and(ht_key, intersec_table->mask, entry->item.key, mask_len);
rht_params = prune_intersec_table_entry_rht_params(intersec_table);
@@ -686,7 +917,7 @@ static int prune_intersec_item_add(struct prune_intersec_table *intersec_table,
if (!intersec_entry) {
/* create new intersec entry since there ins't already one */
- intersec_entry = prune_intersec_entry_create(intersec_table->mask,
+ intersec_entry = prune_intersec_entry_create(mask,
entry->item.key,
intersec_table,
mask_len);
@@ -706,6 +937,8 @@ static int prune_intersec_item_add(struct prune_intersec_table *intersec_table,
rht_params);
if (err)
goto err_rhashtable_insert;
+ list_add_tail_rcu(&intersec_entry->list,
+ &intersec_table->items_list);
}
return 0;
@@ -772,6 +1005,8 @@ prune_intersec_item_del(struct prune_intersec_table *intersec_table,
rhashtable_remove_fast(&intersec_table->entry_ht,
&intersec_entry->ht_node,
rht_params);
+ list_del_rcu(&intersec_entry->list);
+ synchronize_rcu();
prune_intersec_entry_destroy(intersec_entry);
}
kfree(ht_key);
@@ -801,7 +1036,9 @@ prune_intersec_table_create(struct prune *prune,
if (err)
goto err_rhashtable_init;
- list_add(&intersec_table->list, &table->intersec_table_list);
+ list_add_tail_rcu(&intersec_table->list, &table->intersec_table_list);
+
+ INIT_LIST_HEAD_RCU(&intersec_table->items_list);
return intersec_table;
@@ -827,6 +1064,8 @@ static void prune_intersec_rht_free(void *ptr, void *arg)
neigh_item = list_entry(pos, typeof(*neigh_item), list);
prune_intersec_item_destroy(neigh_item);
}
+ list_del_rcu(&rht_node->list);
+ synchronize_rcu();
kfree(rht_node->ht_key);
kfree(rht_node);
}
@@ -837,7 +1076,8 @@ prune_intersec_table_destroy(struct prune_intersec_table *intersec_table)
rhashtable_free_and_destroy(&intersec_table->entry_ht,
prune_intersec_rht_free,
NULL);
- list_del(&intersec_table->list);
+ list_del_rcu(&intersec_table->list);
+ synchronize_rcu();
kfree(intersec_table);
}
@@ -916,6 +1156,7 @@ static int prune_intersec_table_add(struct prune *prune,
true);
if (err)
goto err_prune_intersec_table_items_commit_new;
+
return 0;
err_prune_intersec_table_items_commit_new:
@@ -1090,7 +1331,7 @@ static int prune_table_item_vector_add(struct prune_table *table,
}
vector_item->pruned = true;
vector_item->table = new_table;
- list_add_tail(&vector_item->list, &entry->prune_vector);
+ list_add_tail_rcu(&vector_item->list, &entry->prune_vector);
}
return 0;
}
@@ -1173,8 +1414,8 @@ static struct prune_table *prune_table_init(struct prune *prune,
bitmap_copy(table->mask, mask, prune->mask_len);
table->priv = priv;
table->prune = prune;
- INIT_LIST_HEAD(&table->entry_list);
- INIT_LIST_HEAD(&table->intersec_table_list);
+ INIT_LIST_HEAD_RCU(&table->entry_list);
+ INIT_LIST_HEAD_RCU(&table->intersec_table_list);
return table;
}
@@ -1212,10 +1453,11 @@ struct prune *prune_create(unsigned int mask_len, const struct prune_ops *ops,
prune->priv = priv;
prune->mask_len = mask_len;
- INIT_LIST_HEAD(&prune->table_list);
+ INIT_LIST_HEAD_RCU(&prune->table_list);
prune->ops = ops;
prune_intersec_tables_ht_params_init(prune, mask_len);
+ list_add_tail_rcu(&prune->list, &prune_object_list);
return prune;
}
EXPORT_SYMBOL(prune_create);
@@ -1228,6 +1470,8 @@ void prune_destroy(struct prune *prune)
{
WARN_ON(!list_empty(&prune->table_list));
+ list_del_rcu(&prune->list);
+ synchronize_rcu();
kfree(prune);
}
EXPORT_SYMBOL(prune_destroy);
@@ -1267,7 +1511,7 @@ struct prune_table *prune_table_create(struct prune *prune, unsigned long *mask,
if (err)
goto err_table_build;
- list_add_tail(&table->list, &prune->table_list);
+ list_add_tail_rcu(&table->list, &prune->table_list);
return table;
@@ -1298,7 +1542,8 @@ int prune_table_destroy(struct prune_table *table)
prune_item_prune_table_del(table->prune, table);
- list_del(&table->list);
+ list_del_rcu(&table->list);
+ synchronize_rcu();
prune_table_fini(table);
@@ -1440,6 +1685,42 @@ struct list_head *prune_item_prune_vector(struct prune_item *item)
}
EXPORT_SYMBOL(prune_item_prune_vector);
+static struct dentry *prune_debugfs_create(void)
+{
+ dentry = debugfs_create_file("prune_debug", 0644, NULL, NULL,
+ &prune_fops);
+ if (!dentry)
+ pr_warn("Failed to create the debugfs prune file\n");
+
+ return dentry;
+}
+
+static void prune_debugfs_destroy(struct dentry *dentry)
+{
+ debugfs_remove(dentry);
+}
+
+int prune_init(void)
+{
+ INIT_LIST_HEAD_RCU(&prune_object_list);
+#ifdef CONFIG_DEBUG_FS
+ dentry = prune_debugfs_create();
+ if (!dentry)
+ return -EINVAL;
+
+#endif
+ return 0;
+}
+EXPORT_SYMBOL(prune_init);
+
+void prune_fini(void)
+{
+#ifdef CONFIG_DEBUG_FS
+ prune_debugfs_destroy(dentry);
+#endif
+}
+EXPORT_SYMBOL(prune_fini);
+
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Tal Bar <talb@mellanox.com>");
MODULE_DESCRIPTION("Prune lookup table manager");
@@ -1199,11 +1199,13 @@ static int test_prune(void)
static int __init test_prune_init(void)
{
- return test_prune();
+ prune_init();
+ return test_prune();
}
static void __exit test_prune_exit(void)
{
+ prune_fini();
}
module_init(test_prune_init);
This patch provides a debugfs /sys/kernel/debug/prune_debug to represent the prune object state so admin can investigate why for some entries there is latency due to needed lookups. In order to check the current status user should: "cat /sys/kernel/debug/prune_debug" An example output: prune ----- prune priv: (null) number of table in prune object: 2 table info ---------- table id: 0000000021cafe9f table priv: (null) table mask: 16777215 item info --------- item key: 12599424 item prio: 7 item priv: (null) item prune vector: table: (null) is pruned: Yes table: 00000000db28e393 is pruned: No intersec table info ------------------- table mask: 16776960 niegh table priv: 00000000db28e393 intersec entry info ------------------- mask: 12599296 intersec element info: entry prio: 12599424 entry prio: 7 neigh element info: entry key: 549470208 entry prio: 5 entry key: 2696953856 entry prio: 10 table info ---------- table id: 0000000041acfd40 table priv: 00000000db28e393 table mask: 4294967040 item info --------- item key: 549470208 item prio: 5 item priv: (null) item prune vector: table: (null) is pruned: Yes table: 00000000db28e393 is pruned: Yes item info --------- item key: 2696953856 item prio: 10 item priv: (null) item prune vector: table: (null) is pruned: No table: 00000000db28e393 is pruned: Yes intersec table info ------------------- table mask: 16776960 niegh table priv: (null) intersec entry info ------------------- mask: 12599296 intersec element info: entry prio: 549470208 entry prio: 5 entry prio: 2696953856 entry prio: 10 neigh element info: entry key: 12599424 entry prio: 7 Signed-off-by: Tal Bar <talb@mellanox.com> --- include/linux/prune.h | 2 + lib/prune.c | 323 ++++++++++++++++++++++++++++++++++++++++++++++---- lib/test_prune.c | 4 +- 3 files changed, 307 insertions(+), 22 deletions(-)