Message ID | 20140109201657.28381.9305.stgit@viggo.jf.intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
On Thu, 09 Jan 2014 12:17:26 -0800 Dan Williams <dan.j.williams@intel.com> wrote: > Record actively mapped pages and provide an api for asserting a given > page is dma inactive before execution proceeds. Placing > debug_dma_assert_idle() in cow_user_page() flagged the violation of the > dma-api in the NET_DMA implementation (see commit 77873803363c "net_dma: > mark broken"). > > --- a/include/linux/dma-debug.h > +++ b/include/linux/dma-debug.h > @@ -185,4 +185,10 @@ static inline void debug_dma_dump_mappings(struct device *dev) > > #endif /* CONFIG_DMA_API_DEBUG */ > > +#ifdef CONFIG_DMA_VS_CPU_DEBUG > +extern void debug_dma_assert_idle(struct page *page); > +#else > +static inline void debug_dma_assert_idle(struct page *page) { } Surely this breaks the build when CONFIG_DMA_VS_CPU_DEBUG=n? lib/dma-debug.c is missing the necessary "#ifdef CONFIG_DMA_VS_CPU_DEBUG"s. Do we really need this config setting anyway? What goes bad if we permanently enable this subfeature when dma debugging is enabled? > > ... > > index d87a17a819d0..f67ae111cd2f 100644 > --- a/lib/dma-debug.c > +++ b/lib/dma-debug.c > @@ -57,7 +57,8 @@ struct dma_debug_entry { > struct list_head list; > struct device *dev; > int type; > - phys_addr_t paddr; > + unsigned long pfn; > + size_t offset; Some documentation for the fields would be nice. offset of what relative to what, in what units? > u64 dev_addr; > u64 size; > int direction; > @@ -372,6 +373,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry) > list_del(&entry->list); > } > > > ... > > > +/* memory usage is constrained by the maximum number of available > + * dma-debug entries > + */ A brief design overview would be useful. What goes in tree, how is it indexed, when and why do we add/remove/test items, etc. > +static RADIX_TREE(dma_active_pfn, GFP_NOWAIT); > +static DEFINE_SPINLOCK(radix_lock); > + > +static void __active_pfn_inc_overlap(struct dma_debug_entry *entry) > +{ > + unsigned long pfn = entry->pfn; > + int i; > + > + for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) > + if (radix_tree_tag_get(&dma_active_pfn, pfn, i) == 0) { > + radix_tree_tag_set(&dma_active_pfn, pfn, i); > + return; > + } > + pr_debug("DMA-API: max overlap count (%d) reached for pfn 0x%lx\n", > + RADIX_TREE_MAX_TAGS, pfn); > +} > + > > ... > > +void debug_dma_assert_idle(struct page *page) > +{ > + unsigned long flags; > + struct dma_debug_entry *entry; > + > + if (!page) > + return; > + > + spin_lock_irqsave(&radix_lock, flags); > + entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page)); > + spin_unlock_irqrestore(&radix_lock, flags); > + > + if (!entry) > + return; > + > + err_printk(entry->dev, entry, > + "DMA-API: cpu touching an active dma mapped page " > + "[pfn=0x%lx]\n", entry->pfn); > +} > +EXPORT_SYMBOL_GPL(debug_dma_assert_idle); The export isn't needed for mm/memory.c > > ... > -- To unsubscribe from this list: send the line "unsubscribe dmaengine" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jan 9, 2014 at 4:38 PM, Andrew Morton <akpm@linux-foundation.org> wrote: > On Thu, 09 Jan 2014 12:17:26 -0800 Dan Williams <dan.j.williams@intel.com> wrote: > >> Record actively mapped pages and provide an api for asserting a given >> page is dma inactive before execution proceeds. Placing >> debug_dma_assert_idle() in cow_user_page() flagged the violation of the >> dma-api in the NET_DMA implementation (see commit 77873803363c "net_dma: >> mark broken"). >> >> --- a/include/linux/dma-debug.h >> +++ b/include/linux/dma-debug.h >> @@ -185,4 +185,10 @@ static inline void debug_dma_dump_mappings(struct device *dev) >> >> #endif /* CONFIG_DMA_API_DEBUG */ >> >> +#ifdef CONFIG_DMA_VS_CPU_DEBUG >> +extern void debug_dma_assert_idle(struct page *page); >> +#else >> +static inline void debug_dma_assert_idle(struct page *page) { } > > Surely this breaks the build when CONFIG_DMA_VS_CPU_DEBUG=n? > lib/dma-debug.c is missing the necessary "#ifdef > CONFIG_DMA_VS_CPU_DEBUG"s. facepalm > Do we really need this config setting anyway? What goes bad if we > permanently enable this subfeature when dma debugging is enabled? I did want to provide notification/description of this extra check, but I'll go ahead and fold it into the DMA_API_DEBUG description. The only thing that potentially goes bad is no longer having hard expectation of memory consumption. Before the patch it's a simple sizeof(struct dma_debug_entry) * PREALLOC_DMA_DEBUG_ENTRIES, after the patch it's variable size of the radix tree based on sparseness and variable based on the number of pages included in each dma_map_sg call. The testing with NET_DMA did not involve dma_map_sg calls >> ... >> >> index d87a17a819d0..f67ae111cd2f 100644 >> --- a/lib/dma-debug.c >> +++ b/lib/dma-debug.c >> @@ -57,7 +57,8 @@ struct dma_debug_entry { >> struct list_head list; >> struct device *dev; >> int type; >> - phys_addr_t paddr; >> + unsigned long pfn; >> + size_t offset; > > Some documentation for the fields would be nice. offset of what > relative to what, in what units? This is the same 'offset' passed to dma_map_page(), will document. > >> u64 dev_addr; >> u64 size; >> int direction; >> @@ -372,6 +373,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry) >> list_del(&entry->list); >> } >> >> >> ... >> >> >> +/* memory usage is constrained by the maximum number of available >> + * dma-debug entries >> + */ > > A brief design overview would be useful. What goes in tree, how is it > indexed, when and why do we add/remove/test items, etc. > ...added this documentation to dma_active_pfn for the next revision. /* * For each page mapped (initial page in the case of * dma_alloc_coherent/dma_map_{single|page}, or each page in a * scatterlist) insert into this tree using the pfn as the key. At * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If * the pfn already exists at insertion time add a tag as a reference * count for the overlapping mappings. For now the overlap tracking * just ensures that 'unmaps' balance 'maps' before marking the pfn * idle, but we should also be flagging overlaps as an API violation. * * Memory usage is mostly constrained by the maximum number of available * dma-debug entries. In the case of dma_map_{single|page} and * dma_alloc_coherent there is only one dma_debug_entry and one pfn to * track per each of these calls. dma_map_sg(), on the other hand, * consumes a single dma_debug_entry, but inserts 'nents' entries into * the tree. * * At any time debug_dma_assert_idle() can be called to trigger a * warning if the given page is in the active set. */ >> +static RADIX_TREE(dma_active_pfn, GFP_NOWAIT); >> +static DEFINE_SPINLOCK(radix_lock); >> + >> +static void __active_pfn_inc_overlap(struct dma_debug_entry *entry) >> +{ >> + unsigned long pfn = entry->pfn; >> + int i; >> + >> + for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) >> + if (radix_tree_tag_get(&dma_active_pfn, pfn, i) == 0) { >> + radix_tree_tag_set(&dma_active_pfn, pfn, i); >> + return; >> + } >> + pr_debug("DMA-API: max overlap count (%d) reached for pfn 0x%lx\n", >> + RADIX_TREE_MAX_TAGS, pfn); >> +} >> + >> >> ... >> >> +void debug_dma_assert_idle(struct page *page) >> +{ >> + unsigned long flags; >> + struct dma_debug_entry *entry; >> + >> + if (!page) >> + return; >> + >> + spin_lock_irqsave(&radix_lock, flags); >> + entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page)); >> + spin_unlock_irqrestore(&radix_lock, flags); >> + >> + if (!entry) >> + return; >> + >> + err_printk(entry->dev, entry, >> + "DMA-API: cpu touching an active dma mapped page " >> + "[pfn=0x%lx]\n", entry->pfn); >> +} >> +EXPORT_SYMBOL_GPL(debug_dma_assert_idle); > > The export isn't needed for mm/memory.c True, it can wait until other call sites arise. Thanks Andrew. -- To unsubscribe from this list: send the line "unsubscribe dmaengine" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index fc0e34ce038f..27a029fb2ead 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -185,4 +185,10 @@ static inline void debug_dma_dump_mappings(struct device *dev) #endif /* CONFIG_DMA_API_DEBUG */ +#ifdef CONFIG_DMA_VS_CPU_DEBUG +extern void debug_dma_assert_idle(struct page *page); +#else +static inline void debug_dma_assert_idle(struct page *page) { } +#endif /* CONFIG_DMA_VS_CPU_DEBUG */ + #endif /* __DMA_DEBUG_H */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index db25707aa41b..99751c47fa87 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1575,9 +1575,20 @@ config DMA_API_DEBUG With this option you will be able to detect common bugs in device drivers like double-freeing of DMA mappings or freeing mappings that were never allocated. - This option causes a performance degredation. Use only if you want + This option causes a performance degradation. Use only if you want to debug device drivers. If unsure, say N. +config DMA_VS_CPU_DEBUG + bool "Enable debugging CPU vs DMA ownership of pages" + depends on DMA_API_DEBUG + help + Enable this option to attempt to catch cases where a page owned by + DMA is being touched by the cpu in a way that could cause data + corruption. For example, this enables cow_user_page() to check + that the source page is not undergoing DMA. This option + further increases the overhead of DMA_API_DEBUG. Use only if + debugging device drivers. If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d87a17a819d0..f67ae111cd2f 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -57,7 +57,8 @@ struct dma_debug_entry { struct list_head list; struct device *dev; int type; - phys_addr_t paddr; + unsigned long pfn; + size_t offset; u64 dev_addr; u64 size; int direction; @@ -372,6 +373,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry) list_del(&entry->list); } +static unsigned long long phys_addr(struct dma_debug_entry *entry) +{ + return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; +} + /* * Dump mapping entries for debugging purposes */ @@ -389,9 +395,9 @@ void debug_dma_dump_mappings(struct device *dev) list_for_each_entry(entry, &bucket->list, list) { if (!dev || dev == entry->dev) { dev_info(entry->dev, - "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n", + "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", type2name[entry->type], idx, - (unsigned long long)entry->paddr, + phys_addr(entry), entry->pfn, entry->dev_addr, entry->size, dir2name[entry->direction], maperr2str[entry->map_err_type]); @@ -403,6 +409,83 @@ void debug_dma_dump_mappings(struct device *dev) } EXPORT_SYMBOL(debug_dma_dump_mappings); +/* memory usage is constrained by the maximum number of available + * dma-debug entries + */ +static RADIX_TREE(dma_active_pfn, GFP_NOWAIT); +static DEFINE_SPINLOCK(radix_lock); + +static void __active_pfn_inc_overlap(struct dma_debug_entry *entry) +{ + unsigned long pfn = entry->pfn; + int i; + + for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) + if (radix_tree_tag_get(&dma_active_pfn, pfn, i) == 0) { + radix_tree_tag_set(&dma_active_pfn, pfn, i); + return; + } + pr_debug("DMA-API: max overlap count (%d) reached for pfn 0x%lx\n", + RADIX_TREE_MAX_TAGS, pfn); +} + +static void __active_pfn_dec_overlap(struct dma_debug_entry *entry) +{ + unsigned long pfn = entry->pfn; + int i; + + for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) + if (radix_tree_tag_get(&dma_active_pfn, pfn, i)) { + radix_tree_tag_clear(&dma_active_pfn, pfn, i); + return; + } + radix_tree_delete(&dma_active_pfn, pfn); +} + +static int active_pfn_insert(struct dma_debug_entry *entry) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave(&radix_lock, flags); + rc = radix_tree_insert(&dma_active_pfn, entry->pfn, entry); + if (rc == -EEXIST) + __active_pfn_inc_overlap(entry); + spin_unlock_irqrestore(&radix_lock, flags); + + return rc; +} + +static void active_pfn_remove(struct dma_debug_entry *entry) +{ + unsigned long flags; + + spin_lock_irqsave(&radix_lock, flags); + __active_pfn_dec_overlap(entry); + spin_unlock_irqrestore(&radix_lock, flags); +} + +void debug_dma_assert_idle(struct page *page) +{ + unsigned long flags; + struct dma_debug_entry *entry; + + if (!page) + return; + + spin_lock_irqsave(&radix_lock, flags); + entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page)); + spin_unlock_irqrestore(&radix_lock, flags); + + if (!entry) + return; + + err_printk(entry->dev, entry, + "DMA-API: cpu touching an active dma mapped page " + "[pfn=0x%lx]\n", entry->pfn); +} +EXPORT_SYMBOL_GPL(debug_dma_assert_idle); + /* * Wrapper function for adding an entry to the hash. * This function takes care of locking itself. @@ -411,10 +494,22 @@ static void add_dma_entry(struct dma_debug_entry *entry) { struct hash_bucket *bucket; unsigned long flags; + int rc; bucket = get_hash_bucket(entry, &flags); hash_bucket_add(bucket, entry); put_hash_bucket(bucket, &flags); + + rc = active_pfn_insert(entry); + if (rc == -ENOMEM) { + pr_err("DMA-API: pfn tracking out of memory - " + "disabling dma-debug\n"); + global_disable = true; + } + + /* TODO: report -EEXIST errors as overlapping mappings are not + * supported by the DMA API + */ } static struct dma_debug_entry *__dma_entry_alloc(void) @@ -469,6 +564,8 @@ static void dma_entry_free(struct dma_debug_entry *entry) { unsigned long flags; + active_pfn_remove(entry); + /* * add to beginning of the list - this way the entries are * more likely cache hot when they are reallocated. @@ -895,15 +992,15 @@ static void check_unmap(struct dma_debug_entry *ref) ref->dev_addr, ref->size, type2name[entry->type], type2name[ref->type]); } else if ((entry->type == dma_debug_coherent) && - (ref->paddr != entry->paddr)) { + (phys_addr(ref) != phys_addr(entry))) { err_printk(ref->dev, entry, "DMA-API: device driver frees " "DMA memory with different CPU address " "[device address=0x%016llx] [size=%llu bytes] " "[cpu alloc address=0x%016llx] " "[cpu free address=0x%016llx]", ref->dev_addr, ref->size, - (unsigned long long)entry->paddr, - (unsigned long long)ref->paddr); + phys_addr(entry), + phys_addr(ref)); } if (ref->sg_call_ents && ref->type == dma_debug_sg && @@ -1052,7 +1149,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev = dev; entry->type = dma_debug_page; - entry->paddr = page_to_phys(page) + offset; + entry->pfn = page_to_pfn(page); + entry->offset = offset, entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; @@ -1148,7 +1246,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; - entry->paddr = sg_phys(s); + entry->pfn = page_to_pfn(sg_page(s)); + entry->offset = s->offset, entry->size = sg_dma_len(s); entry->dev_addr = sg_dma_address(s); entry->direction = direction; @@ -1198,7 +1297,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = dir, @@ -1233,7 +1333,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, entry->type = dma_debug_coherent; entry->dev = dev; - entry->paddr = virt_to_phys(virt); + entry->pfn = page_to_pfn(virt_to_page(virt)); + entry->offset = (size_t) virt & PAGE_MASK; entry->size = size; entry->dev_addr = dma_addr; entry->direction = DMA_BIDIRECTIONAL; @@ -1248,7 +1349,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size, struct dma_debug_entry ref = { .type = dma_debug_coherent, .dev = dev, - .paddr = virt_to_phys(virt), + .pfn = page_to_pfn(virt_to_page(virt)), + .offset = (size_t) virt & PAGE_MASK, .dev_addr = addr, .size = size, .direction = DMA_BIDIRECTIONAL, @@ -1356,7 +1458,8 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, @@ -1388,7 +1491,8 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, diff --git a/mm/memory.c b/mm/memory.c index 5d9025f3b3e1..c89788436f81 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -59,6 +59,7 @@ #include <linux/gfp.h> #include <linux/migrate.h> #include <linux/string.h> +#include <linux/dma-debug.h> #include <asm/io.h> #include <asm/pgalloc.h> @@ -2559,6 +2560,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) { + debug_dma_assert_idle(src); + /* * If the source page was a PFN mapping, we don't have * a "struct page" for it. We do a best-effort copy by
Record actively mapped pages and provide an api for asserting a given page is dma inactive before execution proceeds. Placing debug_dma_assert_idle() in cow_user_page() flagged the violation of the dma-api in the NET_DMA implementation (see commit 77873803363c "net_dma: mark broken"). Cc: Joerg Roedel <joro@8bytes.org> Cc: Vinod Koul <vinod.koul@intel.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Russell King <rmk+kernel@arm.linux.org.uk> Cc: James Bottomley <JBottomley@Parallels.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- Added CONFIG_DMA_VS_CPU_DEBUG include/linux/dma-debug.h | 6 ++ lib/Kconfig.debug | 13 ++++- lib/dma-debug.c | 130 +++++++++++++++++++++++++++++++++++++++++---- mm/memory.c | 3 + 4 files changed, 138 insertions(+), 14 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe dmaengine" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html