@@ -31,6 +31,14 @@ struct scatterlist;
#define IO_TLB_SHIFT 11
#define IO_TLB_SIZE (1 << IO_TLB_SHIFT)
+/*
+ * IO TLB BLOCK UNIT as device bounce buffer allocation unit.
+ * This allows device allocates bounce buffer from default io
+ * tlb pool.
+ */
+#define IO_TLB_BLOCKSIZE (8 * IO_TLB_SEGSIZE)
+#define IO_TLB_BLOCK_UNIT (IO_TLB_BLOCKSIZE << IO_TLB_SHIFT)
+
/* default to 64MB */
#define IO_TLB_DEFAULT_SIZE (64UL<<20)
@@ -89,6 +97,11 @@ extern enum swiotlb_force swiotlb_force;
* @late_alloc: %true if allocated using the page allocator
* @force_bounce: %true if swiotlb bouncing is forced
* @for_alloc: %true if the pool is used for memory allocation
+ * @num_child: The child io tlb mem number in the pool.
+ * @child_nslot:The number of IO TLB slot in the child IO TLB mem.
+ * @child_nblock:The number of IO TLB block in the child IO TLB mem.
+ * @child_start:The child index to start searching in the next round.
+ * @block_start:The block index to start searching in the next round.
*/
struct io_tlb_mem {
phys_addr_t start;
@@ -102,6 +115,16 @@ struct io_tlb_mem {
bool late_alloc;
bool force_bounce;
bool for_alloc;
+ unsigned int num_child;
+ unsigned int child_nslot;
+ unsigned int child_nblock;
+ unsigned int child_start;
+ unsigned int block_index;
+ struct io_tlb_mem *child;
+ struct io_tlb_mem *parent;
+ struct io_tlb_block {
+ unsigned int list;
+ } *block;
struct io_tlb_slot {
phys_addr_t orig_addr;
size_t alloc_size;
@@ -130,6 +153,10 @@ unsigned int swiotlb_max_segment(void);
size_t swiotlb_max_mapping_size(struct device *dev);
bool is_swiotlb_active(struct device *dev);
void __init swiotlb_adjust_size(unsigned long size);
+int swiotlb_device_allocate(struct device *dev,
+ unsigned int area_num,
+ unsigned long size);
+void swiotlb_device_free(struct device *dev);
#else
static inline void swiotlb_init(bool addressing_limited, unsigned int flags)
{
@@ -162,6 +189,17 @@ static inline bool is_swiotlb_active(struct device *dev)
static inline void swiotlb_adjust_size(unsigned long size)
{
}
+
+void swiotlb_device_free(struct device *dev)
+{
+}
+
+int swiotlb_device_allocate(struct device *dev,
+ unsigned int area_num,
+ unsigned long size)
+{
+ return -ENOMEM;
+}
#endif /* CONFIG_SWIOTLB */
extern void swiotlb_print_info(void);
@@ -195,7 +195,8 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
unsigned long nslabs, bool late_alloc)
{
void *vaddr = phys_to_virt(start);
- unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
+ unsigned long bytes = nslabs << IO_TLB_SHIFT, i, j;
+ unsigned int block_num = nslabs / IO_TLB_BLOCKSIZE;
mem->nslabs = nslabs;
mem->start = start;
@@ -207,7 +208,36 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
mem->force_bounce = true;
spin_lock_init(&mem->lock);
- for (i = 0; i < mem->nslabs; i++) {
+
+ if (mem->num_child) {
+ mem->child_nslot = nslabs / mem->num_child;
+ mem->child_nblock = block_num / mem->num_child;
+ mem->child_start = 0;
+
+ /*
+ * Initialize child IO TLB mem, divide IO TLB pool
+ * into child number. Reuse parent mem->slot in the
+ * child mem->slot.
+ */
+ for (i = 0; i < mem->num_child; i++) {
+ mem->child[i].slots = mem->slots + i * mem->child_nslot;
+ mem->child[i].block = mem->block + i * mem->child_nblock;
+ mem->child[i].num_child = 0;
+
+ swiotlb_init_io_tlb_mem(&mem->child[i],
+ start + ((i * mem->child_nslot) << IO_TLB_SHIFT),
+ mem->child_nslot, late_alloc);
+ }
+
+ return;
+ }
+
+ for (i = 0, j = 0; i < mem->nslabs; i++) {
+ if (!(i % IO_TLB_BLOCKSIZE)) {
+ mem->block[j].list = block_num--;
+ j++;
+ }
+
mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
@@ -272,6 +302,13 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
__func__, alloc_size, PAGE_SIZE);
+ mem->num_child = 0;
+ mem->block = memblock_alloc(sizeof(struct io_tlb_block) *
+ (default_nslabs / IO_TLB_BLOCKSIZE),
+ SMP_CACHE_BYTES);
+ if (!mem->block)
+ panic("%s: Failed to allocate mem->block.\n", __func__);
+
swiotlb_init_io_tlb_mem(mem, __pa(tlb), default_nslabs, false);
mem->force_bounce = flags & SWIOTLB_FORCE;
@@ -296,7 +333,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
unsigned long bytes;
unsigned char *vstart = NULL;
- unsigned int order;
+ unsigned int order, block_order;
int rc = 0;
if (swiotlb_force_disable)
@@ -334,18 +371,29 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
goto retry;
}
+ block_order = get_order(array_size(sizeof(*mem->block),
+ nslabs / IO_TLB_BLOCKSIZE));
+ mem->block = (struct io_tlb_block *)
+ __get_free_pages(GFP_KERNEL | __GFP_ZERO, block_order);
+ if (!mem->block)
+ goto error_block;
+
mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(array_size(sizeof(*mem->slots), nslabs)));
- if (!mem->slots) {
- free_pages((unsigned long)vstart, order);
- return -ENOMEM;
- }
+ if (!mem->slots)
+ goto error_slots;
set_memory_decrypted((unsigned long)vstart, bytes >> PAGE_SHIFT);
swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, true);
swiotlb_print_info();
return 0;
+
+error_slots:
+ free_pages((unsigned long)mem->block, block_order);
+error_block:
+ free_pages((unsigned long)vstart, order);
+ return -ENOMEM;
}
void __init swiotlb_exit(void)
@@ -353,6 +401,7 @@ void __init swiotlb_exit(void)
struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long tbl_vaddr;
size_t tbl_size, slots_size;
+ unsigned int block_array_size, block_order;
if (swiotlb_force_bounce)
return;
@@ -364,12 +413,16 @@ void __init swiotlb_exit(void)
tbl_vaddr = (unsigned long)phys_to_virt(mem->start);
tbl_size = PAGE_ALIGN(mem->end - mem->start);
slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs));
+ block_array_size = array_size(sizeof(*mem->block), mem->nslabs / IO_TLB_BLOCKSIZE);
set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
if (mem->late_alloc) {
+ block_order = get_order(block_array_size);
+ free_pages((unsigned long)mem->block, block_order);
free_pages(tbl_vaddr, get_order(tbl_size));
free_pages((unsigned long)mem->slots, get_order(slots_size));
} else {
+ memblock_free_late(__pa(mem->block), block_array_size);
memblock_free_late(mem->start, tbl_size);
memblock_free_late(__pa(mem->slots), slots_size);
}
@@ -483,10 +536,11 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
* Find a suitable number of IO TLB entries size that will fit this request and
* allocate a buffer from that IO TLB pool.
*/
-static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
- size_t alloc_size, unsigned int alloc_align_mask)
+static int swiotlb_do_find_slots(struct io_tlb_mem *mem,
+ struct device *dev, phys_addr_t orig_addr,
+ size_t alloc_size,
+ unsigned int alloc_align_mask)
{
- struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
unsigned long boundary_mask = dma_get_seg_boundary(dev);
dma_addr_t tbl_dma_addr =
phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
@@ -546,6 +600,9 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
mem->slots[i].list = 0;
mem->slots[i].alloc_size =
alloc_size - (offset + ((i - index) << IO_TLB_SHIFT));
+
+ if (!(index % IO_TLB_BLOCKSIZE))
+ mem->block[index / IO_TLB_BLOCKSIZE].list = 0;
}
for (i = index - 1;
io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
@@ -565,6 +622,47 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
return index;
}
+static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
+ size_t alloc_size, unsigned int alloc_align_mask)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ struct io_tlb_mem *target_mem = mem;
+ int start = 0, i = 0, index;
+
+ if (mem->num_child) {
+ i = start = mem->child_start;
+ mem->child_start = (mem->child_start + 1) % mem->num_child;
+
+ target_mem = mem->child;
+ }
+
+ do {
+ index = swiotlb_do_find_slots(target_mem + i, dev, orig_addr,
+ alloc_size, alloc_align_mask);
+ if (index >= 0)
+ return i * mem->child_nslot + index;
+ if (++i >= mem->num_child)
+ i = 0;
+ } while (i != start);
+
+ return -1;
+}
+
+static unsigned long mem_used(struct io_tlb_mem *mem)
+{
+ int i;
+ unsigned long used = 0;
+
+ if (mem->num_child) {
+ for (i = 0; i < mem->num_child; i++)
+ used += mem->child[i].used;
+ } else {
+ used = mem->used;
+ }
+
+ return used;
+}
+
phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
unsigned int alloc_align_mask, enum dma_data_direction dir,
@@ -594,7 +692,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
- alloc_size, mem->nslabs, mem->used);
+ alloc_size, mem->nslabs, mem_used(mem));
return (phys_addr_t)DMA_MAPPING_ERROR;
}
@@ -617,9 +715,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
return tlb_addr;
}
-static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
+static void swiotlb_do_release_slots(struct io_tlb_mem *mem,
+ struct device *dev, phys_addr_t tlb_addr)
{
- struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
unsigned long flags;
unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
@@ -660,6 +758,20 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
spin_unlock_irqrestore(&mem->lock, flags);
}
+static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ int index, offset;
+
+ if (mem->num_child) {
+ offset = swiotlb_align_offset(dev, tlb_addr);
+ index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
+ mem = &mem->child[index / mem->child_nslot];
+ }
+
+ swiotlb_do_release_slots(mem, dev, tlb_addr);
+}
+
/*
* tlb_addr is the physical address of the bounce buffer to unmap.
*/
@@ -762,6 +874,172 @@ static int __init __maybe_unused swiotlb_create_default_debugfs(void)
late_initcall(swiotlb_create_default_debugfs);
#endif
+static void swiotlb_do_free_block(struct io_tlb_mem *mem,
+ phys_addr_t start, unsigned int block_num)
+{
+
+ unsigned int start_slot = (start - mem->start) >> IO_TLB_SHIFT;
+ unsigned int block_index = start_slot / IO_TLB_BLOCKSIZE;
+ unsigned int mem_block_num = mem->nslabs / IO_TLB_BLOCKSIZE;
+ unsigned int nslot;
+ unsigned long flags;
+ int count, i, num, j;
+
+ spin_lock_irqsave(&mem->lock, flags);
+ if (block_index + block_num < mem_block_num)
+ count = mem->block[block_index + mem_block_num].list;
+ else
+ count = 0;
+
+ for (i = block_index + block_num; i >= block_index; i--) {
+ mem->block[i].list = ++count;
+
+ for (j = 0; j < IO_TLB_BLOCKSIZE; j++) {
+ nslot = i * IO_TLB_BLOCKSIZE + j;
+ mem->slots[nslot].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
+ mem->slots[nslot].orig_addr = INVALID_PHYS_ADDR;
+ mem->slots[nslot].alloc_size = 0;
+ }
+ }
+
+ for (i = block_index - 1, num = block_index % mem_block_num;
+ i < num && mem->block[i].list; i--)
+ mem->block[i].list = ++count;
+
+ spin_unlock_irqrestore(&mem->lock, flags);
+}
+
+static void swiotlb_free_block(struct io_tlb_mem *mem,
+ phys_addr_t start, unsigned int block_num)
+{
+ unsigned int slot_index, child_index;
+
+ if (mem->num_child) {
+ slot_index = (start - mem->start) >> IO_TLB_SHIFT;
+ child_index = slot_index / mem->child_nslot;
+
+ swiotlb_do_free_block(&mem->child[child_index],
+ start, block_num);
+ } else {
+ swiotlb_do_free_block(mem, start, block_num);
+ }
+}
+
+void swiotlb_device_free(struct device *dev)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ struct io_tlb_mem *parent_mem = dev->dma_io_tlb_mem->parent;
+
+ swiotlb_free_block(parent_mem, mem->start, mem->nslabs / IO_TLB_BLOCKSIZE);
+}
+
+
+static struct page *swiotlb_alloc_block(struct io_tlb_mem *mem, unsigned int block_num)
+{
+ unsigned int mem_block_num = mem->nslabs / IO_TLB_BLOCKSIZE;
+ unsigned int block_index, nslot;
+ phys_addr_t tlb_addr;
+ unsigned long flags;
+ int i, j;
+
+ if (!mem || !mem->block)
+ return NULL;
+
+ spin_lock_irqsave(&mem->lock, flags);
+
+ /* Todo: Search more blocks via wrapping block array. */
+ for (block_index = mem->block_index;
+ block_index < mem_block_num; block_index++)
+ if (mem->block[block_index].list > block_num)
+ break;
+
+ if (block_index == mem_block_num) {
+ spin_unlock_irqrestore(&mem->lock, flags);
+ return NULL;
+ }
+
+ /* Update block and slot list. */
+ for (i = block_index; i < block_index + block_num; i++) {
+ mem->block[i].list = 0;
+
+ for (j = 0; j < IO_TLB_BLOCKSIZE; j++) {
+ nslot = i * IO_TLB_BLOCKSIZE + j;
+ mem->slots[nslot].list = 0;
+ mem->slots[nslot].alloc_size = IO_TLB_SIZE;
+ }
+ }
+
+ mem->index = nslot + 1;
+ mem->block_index += block_num;
+ mem->used += block_num * IO_TLB_BLOCKSIZE;
+ spin_unlock_irqrestore(&mem->lock, flags);
+
+ tlb_addr = slot_addr(mem->start, block_index * IO_TLB_BLOCKSIZE);
+ return pfn_to_page(PFN_DOWN(tlb_addr));
+}
+
+/*
+ * swiotlb_device_allocate - Allocate bounce buffer fo device from
+ * default io tlb pool. The allocation size should be aligned with
+ * IO_TLB_BLOCK_UNIT.
+ */
+int swiotlb_device_allocate(struct device *dev,
+ unsigned int queue_num,
+ unsigned long size)
+{
+ struct io_tlb_mem *mem, *parent_mem = dev->dma_io_tlb_mem;
+ unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_BLOCKSIZE);
+ struct page *page;
+ int ret = -ENOMEM;
+
+ page = swiotlb_alloc_block(parent_mem, nslabs / IO_TLB_BLOCKSIZE);
+ if (!page)
+ return -ENOMEM;
+
+ mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ if (!mem)
+ goto error_mem;
+
+ mem->slots = kzalloc(array_size(sizeof(*mem->slots), nslabs),
+ GFP_KERNEL);
+ if (!mem->slots)
+ goto error_slots;
+
+ mem->block = kcalloc(nslabs / IO_TLB_BLOCKSIZE,
+ sizeof(struct io_tlb_block),
+ GFP_KERNEL);
+ if (!mem->block)
+ goto error_block;
+
+ mem->num_child = queue_num;
+ mem->child = kcalloc(queue_num,
+ sizeof(struct io_tlb_mem),
+ GFP_KERNEL);
+ if (!mem->child)
+ goto error_child;
+
+
+ swiotlb_init_io_tlb_mem(mem, page_to_phys(page), nslabs, true);
+ mem->force_bounce = true;
+ mem->for_alloc = true;
+
+ mem->vaddr = parent_mem->vaddr + page_to_phys(page) - parent_mem->start;
+ dev->dma_io_tlb_mem->parent = parent_mem;
+ dev->dma_io_tlb_mem = mem;
+ return 0;
+
+error_child:
+ kfree(mem->block);
+error_block:
+ kfree(mem->slots);
+error_slots:
+ kfree(mem);
+error_mem:
+ swiotlb_free_block(mem, page_to_phys(page), nslabs / IO_TLB_BLOCKSIZE);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(swiotlb_device_allocate);
+
#ifdef CONFIG_DMA_RESTRICTED_POOL
struct page *swiotlb_alloc(struct device *dev, size_t size)