diff --git a/include/linux/walk_tables.h b/include/linux/walk_tables.h new file mode 100644 index 000000000000..398be60e854a --- /dev/null +++ b/include/linux/walk_tables.h @@ -0,0 +1,88 @@ +/* + * Copyright 2014 Linus Torvalds + * + * This code is distrubuted under the GPLv2 + */ +#ifndef __LINUX_WALK_TABLE_H +#define __LINUX_WALK_TABLE_H + +struct tree_entry; +struct tree_walker_definition; + +/* + * The 'tree_level' data only describes one particular level + * of the tree. The upper levels are totally invisible to the + * user of the tree walker, since the tree walker will walk + * those using the tree definitions. + * + * NOTE! "struct tree_entry" is an opaque type, and is just a + * used as a pointer to the particular level. You can figure + * out which level you are at by looking at the "tree_level", + * but even better is to just use different "lookup()" + * functions for different levels, at which point the + * function is inherent to the level. + * + * NOTE 2! Some trees are fixed-depth, others are not. Each level + * has a lookup function, and can specify whether they are a + * terminal level. It should also fill in the "start" field of + * the tree_level information to point to the next level (or + * to the data). A NULL start is considered to be a hole. + * + * You won't see this hole in the "walk()" callback, but holes do get + * the pre-walk callback so that you can track holes too. + * + * If a "lookup()" function returns that it's a terminal entry and + * has a non-NULL "start", we'll call the "walk()" function with that + * tree-entry _once_, assuming it's a "superpage" that looks like + * a normal final tree-entry but is just much larger. The walk function + * can tell from the size we give it. + * + * NOTE 3! The last level normally doesn't have a (*lookup)() + * function at all, just a "walk" function. For that case, we'll + * call the tree definition "walker()" function instead of + * trying to look anything up, and it is supposed to call the + * "walk()' callback for each entry. + */ +struct tree_level { + unsigned int entry_level; + unsigned int nr_entries; + unsigned int entry_coverage; + unsigned long start, end; + struct tree_entry *entry; +}; + +struct tree_walker { + unsigned long first, last; + const void *data; + const struct tree_walker_definition *def; + int (*walk)(const struct tree_walker *, struct tree_entry *, unsigned long, unsigned int); + int (*hole)(const struct tree_walker *, const struct tree_level *); + int (*pre_walk)(const struct tree_walker *, const struct tree_level *); + int (*post_walk)(const struct tree_walker *, const struct tree_level *); +}; + +/* + * The "lookup()" function needs to return whether this is a terminal + * level or not. + */ +enum walk_tree_lookup { + WALK_TREE_DESCEND, + WALK_TREE_HOLE, + WALK_TREE_SUPERENTRY, +}; + +struct tree_walker_level_definition { + unsigned int level_bits; + enum walk_tree_lookup (*lookup)(struct tree_entry *, unsigned int, struct tree_level *); + int (*walker)(const struct tree_walker *, const struct tree_level *); +}; + +struct tree_walker_definition { + unsigned int total_bits; + unsigned long start, end; + struct tree_walker_level_definition levels[]; +}; + +int walk_tree(struct tree_entry *root, const struct tree_walker_definition *, struct tree_walker *); + +#endif /* __LINUX_WALK_TABLE_H */ diff --git a/lib/walk_tables.c b/lib/walk_tables.c new file mode 100644 index 000000000000..f63ac83f91d7 --- /dev/null +++ b/lib/walk_tables.c @@ -0,0 +1,68 @@ +/* + * Copyright 2014 Linus Torvalds + * + * This code is distrubuted under the GPLv2 + */ +#include + +int walk_tree(struct tree_entry *root, const struct tree_walker_definition *def, struct tree_walker *walk) +{ + walk->def = def; + if (walk->first < def->start) + walk->first = def->start; + if (walk->last > def->end) + walk->last = def->end; + while (walk->first < walk->last) { + const struct tree_walker_level_definition *ldef = def->levels; + unsigned int shift = def->total_bits; + struct tree_level level; + struct tree_entry *tree = root; + + for (level.entry_level = 0; ; level.entry_level++) { + unsigned long mask = (1ul << shift)-1; + + /* Fill in the level description */ + level.nr_entries = 1u << ldef->level_bits; + level.entry_coverage = 1ul << (shift - ldef->level_bits); + level.start = walk->first; + level.end = (level.start | mask)+1; + if (level.end > walk->last) + level.end = walk->last; + + if (ldef->lookup) { + unsigned int index = level.start >> (shift - ldef->level_bits); + index &= level.nr_entries-1; + + switch (ldef->lookup(tree, index, &level)) { + case WALK_TREE_DESCEND: + tree = level.entry; + shift -= ldef->level_bits; + ldef++; + continue; + + case WALK_TREE_HOLE: + if (walk->hole) + walk->hole(walk, &level); + break; + + case WALK_TREE_SUPERENTRY: + if (walk->walk) + walk->walk(walk, level.entry, level.start, level.end - level.start); + break; + } + } else { + if (walk->pre_walk) + walk->pre_walk(walk, &level); + if (walk->walk) + ldef->walker(walk, &level); + if (walk->post_walk) + walk->post_walk(walk, &level); + } + + /* Ok, done with this level */ + walk->first = level.end; + break; + } + } + return 0; +} diff --git a/test_tables.c b/test_tables.c new file mode 100644 index 000000000000..b50ce33807ff --- /dev/null +++ b/test_tables.c @@ -0,0 +1,101 @@ +#include +#include "include/linux/walk_tables.h" + +/* Fake x86-64-like definitions */ +#define PRESENT (1ul << 0) +#define HUGEPAGE (1ul << 7) +enum walk_tree_lookup pgd_lookup(struct tree_entry *root, unsigned int index, struct tree_level *level) +{ + unsigned long entry = ((unsigned long *)root)[index]; + + level->entry = (void *)(entry & ~0xffful); + if (!(entry * PRESENT)) + return WALK_TREE_HOLE; + return (entry & HUGEPAGE) ? WALK_TREE_SUPERENTRY: WALK_TREE_DESCEND; +} + +/* For x86-64, the different levels are the same, so we can reuse the pgd walker */ +#define pud_lookup pgd_lookup +#define pmd_lookup pgd_lookup + +int pte_walker(const struct tree_walker *walk, const struct tree_level *level) +{ + unsigned long start = level->start; + unsigned long end = level->end; + unsigned int idx = (start >> 12) & 511; + unsigned long *ptep = idx + (unsigned long *)level->entry; + + while (start < end) { + unsigned long pte = *ptep; + if (pte & PRESENT) + walk->walk(walk, (struct tree_entry *)ptep, start, level->entry_coverage); + ptep++; + start += level->entry_coverage; + } +} + +struct tree_walker_definition x86_64_def = { + .total_bits = 48, + .start = 0, + .end = 0x7fffffffffff, + .levels = { + { .level_bits = 9, .lookup = pgd_lookup }, + { .level_bits = 9, .lookup = pud_lookup }, + { .level_bits = 9, .lookup = pmd_lookup }, + { .level_bits = 9, .walker = pte_walker } + } +}; + +/* + * And this is a fake walker. + * + * NOTE! The definitions and the walker are separate entities, but the walker + * obviously knows what it is walking, so it can look at the data + */ +static int show_walk(const struct tree_walker *walk, struct tree_entry *pte, unsigned long address, unsigned int size) +{ + unsigned long entry = *(unsigned long *)pte; + printf("%08lx: %08lx (%d)\n", address, entry, size); + return 0; +} + +static int show_hole(const struct tree_walker *walk, const struct tree_level *level) +{ + printf("hole at %08lx (%d)\n", level->start, level->end - level->start); +} + +static int show_pre_walk(const struct tree_walker *walk, const struct tree_level *level) +{ + printf("pre_walk %p at %08lx (%d)\n", level->start, level->entry, level->end - level->start); +} + +static int show_post_walk(const struct tree_walker *walk, const struct tree_level *level) +{ + printf("post_walk %p at %08lx (%d)\n", level->start, level->entry, level->end - level->start); +} + + +/* + * initial 1:1 mapping in fake test page tables for the first 8 pages, + * with page index 5 missing. + * + * And mostly empty page tables. + */ +unsigned long pte[512] __attribute__ ((aligned (4096))) = { 0x0001, 0x1001, 0x2001, 0x3001, 0x4001, 0, 0x6001, 0x7001 }; +unsigned long pmd[512] __attribute__ ((aligned (4096))) = { 1 + (unsigned long) pte, 0, 1 + (unsigned long) pte, }; +unsigned long pud[512] __attribute__ ((aligned (4096))) = { 1 + (unsigned long) pmd, 0, 1 + (unsigned long) pmd, }; +unsigned long pgd[512] __attribute__ ((aligned (4096))) = { 1 + (unsigned long) pud, 0, 1 + (unsigned long) pud, }; + +int main(int argc, char **argv) +{ + struct tree_walker walk = { + .first = 4096, + .last = 4096*512*3, + .walk = show_walk, + .hole = show_hole, + .pre_walk = show_pre_walk, + .post_walk = show_post_walk, + }; + + walk_tree((struct tree_entry *)pgd, &x86_64_def, &walk); +}