On Tue, Jul 06, 2021 at 06:48:48AM +0200, Christoph Hellwig wrote:
On Mon, Jul 05, 2021 at 08:03:52PM +0100, Will Deacon wrote:
So at this point, the AMD IOMMU driver does:
swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
where 'swiotlb' is a global variable indicating whether or not swiotlb is in use. It's picked up a bit later on by pci_swiotlb_late_init(), which will call swiotlb_exit() if 'swiotlb' is false.
Now, that used to work fine, because swiotlb_exit() clears 'io_tlb_default_mem' to NULL, but now with the restricted DMA changes, I think that all the devices which have successfully probed beforehand will have stale pointers to the freed structure in their 'dev->dma_io_tlb_mem' field.
Yeah. I don't think we can do that anymore, and I also think it is a bad idea to start with.
I've had a crack at reworking things along the following lines:
- io_tlb_default_mem now lives in the BSS, the flexible array member is now a pointer and that part is allocated dynamically (downside of this is an extra indirection to get at the slots).
- io_tlb_default_mem.nslabs tells you whether the thing is valid
- swiotlb_exit() frees the slots array and clears the rest of the structure to 0. I also extended it to free the actual slabs, but I'm not sure why it wasn't doing that before.
So a non-NULL dev->dma_io_tlb_mem should always be valid to follow.
Untested diff below... Nathan, it would be ace if you're brave enough to give this a shot.
Will
--->8
diff --git a/drivers/base/core.c b/drivers/base/core.c index bbad7c559901..9e1218f89e4b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2820,7 +2820,7 @@ void device_initialize(struct device *dev) dev->dma_coherent = dma_default_coherent; #endif #ifdef CONFIG_SWIOTLB - dev->dma_io_tlb_mem = io_tlb_default_mem; + dev->dma_io_tlb_mem = &io_tlb_default_mem; #endif } EXPORT_SYMBOL_GPL(device_initialize); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 785ec7e8be01..f06d9b4f1e0f 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -164,7 +164,7 @@ int __ref xen_swiotlb_init(void) int rc = -ENOMEM; char *start;
- if (io_tlb_default_mem != NULL) { + if (io_tlb_default_mem.nslabs) { pr_warn("swiotlb buffer already initialized\n"); return -EEXIST; } @@ -547,7 +547,7 @@ xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, static int xen_swiotlb_dma_supported(struct device *hwdev, u64 mask) { - return xen_phys_to_dma(hwdev, io_tlb_default_mem->end - 1) <= mask; + return xen_phys_to_dma(hwdev, io_tlb_default_mem.end - 1) <= mask; }
const struct dma_map_ops xen_swiotlb_dma_ops = { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 39284ff2a6cd..b0cb2a9973f4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -103,9 +103,9 @@ struct io_tlb_mem { phys_addr_t orig_addr; size_t alloc_size; unsigned int list; - } slots[]; + } *slots; }; -extern struct io_tlb_mem *io_tlb_default_mem; +extern struct io_tlb_mem io_tlb_default_mem;
static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 0ffbaae9fba2..91cd1d413027 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -70,7 +70,7 @@
enum swiotlb_force swiotlb_force;
-struct io_tlb_mem *io_tlb_default_mem; +struct io_tlb_mem io_tlb_default_mem;
/* * Max segment that we can provide which (if pages are contingous) will @@ -101,7 +101,7 @@ early_param("swiotlb", setup_io_tlb_npages);
unsigned int swiotlb_max_segment(void) { - return io_tlb_default_mem ? max_segment : 0; + return io_tlb_default_mem.nslabs ? max_segment : 0; } EXPORT_SYMBOL_GPL(swiotlb_max_segment);
@@ -134,9 +134,9 @@ void __init swiotlb_adjust_size(unsigned long size)
void swiotlb_print_info(void) { - struct io_tlb_mem *mem = io_tlb_default_mem; + struct io_tlb_mem *mem = &io_tlb_default_mem;
- if (!mem) { + if (!mem->nslabs) { pr_warn("No low mem\n"); return; } @@ -163,11 +163,11 @@ static inline unsigned long nr_slots(u64 val) */ void __init swiotlb_update_mem_attributes(void) { - struct io_tlb_mem *mem = io_tlb_default_mem; + struct io_tlb_mem *mem = &io_tlb_default_mem; void *vaddr; unsigned long bytes;
- if (!mem || mem->late_alloc) + if (!mem->nslabs || mem->late_alloc) return; vaddr = phys_to_virt(mem->start); bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); @@ -201,25 +201,24 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) { - struct io_tlb_mem *mem; + struct io_tlb_mem *mem = &io_tlb_default_mem; size_t alloc_size;
if (swiotlb_force == SWIOTLB_NO_FORCE) return 0;
/* protect against double initialization */ - if (WARN_ON_ONCE(io_tlb_default_mem)) + if (WARN_ON_ONCE(mem->nslabs)) return -ENOMEM;
- alloc_size = PAGE_ALIGN(struct_size(mem, slots, nslabs)); - mem = memblock_alloc(alloc_size, PAGE_SIZE); - if (!mem) + alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs)); + mem->slots = memblock_alloc(alloc_size, PAGE_SIZE); + if (!mem->slots) panic("%s: Failed to allocate %zu bytes align=0x%lx\n", __func__, alloc_size, PAGE_SIZE);
swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false);
- io_tlb_default_mem = mem; if (verbose) swiotlb_print_info(); swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT); @@ -304,26 +303,24 @@ swiotlb_late_init_with_default_size(size_t default_size) int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) { - struct io_tlb_mem *mem; + struct io_tlb_mem *mem = &io_tlb_default_mem; unsigned long bytes = nslabs << IO_TLB_SHIFT;
if (swiotlb_force == SWIOTLB_NO_FORCE) return 0;
/* protect against double initialization */ - if (WARN_ON_ONCE(io_tlb_default_mem)) + if (WARN_ON_ONCE(mem->nslabs)) return -ENOMEM;
- mem = (void *)__get_free_pages(GFP_KERNEL, - get_order(struct_size(mem, slots, nslabs))); - if (!mem) + mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(array_size(sizeof(*mem->slots), nslabs))); + if (!mem->slots) return -ENOMEM;
- memset(mem, 0, sizeof(*mem)); set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
- io_tlb_default_mem = mem; swiotlb_print_info(); swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT); return 0; @@ -331,18 +328,23 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
void __init swiotlb_exit(void) { - struct io_tlb_mem *mem = io_tlb_default_mem; - size_t size; + struct io_tlb_mem *mem = &io_tlb_default_mem; + size_t tbl_size, slots_size;
- if (!mem) + if (!mem->nslabs) return;
- size = struct_size(mem, slots, mem->nslabs); - if (mem->late_alloc) - free_pages((unsigned long)mem, get_order(size)); - else - memblock_free_late(__pa(mem), PAGE_ALIGN(size)); - io_tlb_default_mem = NULL; + tbl_size = mem->end - mem->start; + slots_size = array_size(sizeof(*mem->slots), mem->nslabs); + if (mem->late_alloc) { + free_pages((unsigned long)mem->start, get_order(tbl_size)); + free_pages((unsigned long)mem->slots, get_order(slots_size)); + } else { + memblock_free_late(__pa(mem->start), PAGE_ALIGN(tbl_size)); + memblock_free_late(__pa(mem->slots), PAGE_ALIGN(slots_size)); + } + + memset(mem, 0, sizeof(*mem)); }
/* @@ -682,7 +684,9 @@ size_t swiotlb_max_mapping_size(struct device *dev)
bool is_swiotlb_active(struct device *dev) { - return dev->dma_io_tlb_mem != NULL; + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + + return mem && mem->nslabs; } EXPORT_SYMBOL_GPL(is_swiotlb_active);
@@ -697,10 +701,10 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem)
static int __init swiotlb_create_default_debugfs(void) { - struct io_tlb_mem *mem = io_tlb_default_mem; + struct io_tlb_mem *mem = &io_tlb_default_mem;
debugfs_dir = debugfs_create_dir("swiotlb", NULL); - if (mem) { + if (mem->nslabs) { mem->debugfs = debugfs_dir; swiotlb_create_debugfs_files(mem); } @@ -754,10 +758,17 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, * to it. */ if (!mem) { - mem = kzalloc(struct_size(mem, slots, nslabs), GFP_KERNEL); + mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) return -ENOMEM;
+ mem->slots = kzalloc(array_size(sizeof(*mem->slots), nslabs), + GFP_KERNEL); + if (!mem->slots) { + kfree(mem); + return -ENOMEM; + } + set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), rmem->size >> PAGE_SHIFT); swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false); @@ -781,7 +792,7 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, static void rmem_swiotlb_device_release(struct reserved_mem *rmem, struct device *dev) { - dev->dma_io_tlb_mem = io_tlb_default_mem; + dev->dma_io_tlb_mem = &io_tlb_default_mem; }
static const struct reserved_mem_ops rmem_swiotlb_ops = {
On 2021-07-06 14:24, Will Deacon wrote:
On Tue, Jul 06, 2021 at 06:48:48AM +0200, Christoph Hellwig wrote:
On Mon, Jul 05, 2021 at 08:03:52PM +0100, Will Deacon wrote:
So at this point, the AMD IOMMU driver does:
swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
where 'swiotlb' is a global variable indicating whether or not swiotlb is in use. It's picked up a bit later on by pci_swiotlb_late_init(), which will call swiotlb_exit() if 'swiotlb' is false.
Now, that used to work fine, because swiotlb_exit() clears 'io_tlb_default_mem' to NULL, but now with the restricted DMA changes, I think that all the devices which have successfully probed beforehand will have stale pointers to the freed structure in their 'dev->dma_io_tlb_mem' field.
Yeah. I don't think we can do that anymore, and I also think it is a bad idea to start with.
I've had a crack at reworking things along the following lines:
io_tlb_default_mem now lives in the BSS, the flexible array member is now a pointer and that part is allocated dynamically (downside of this is an extra indirection to get at the slots).
io_tlb_default_mem.nslabs tells you whether the thing is valid
swiotlb_exit() frees the slots array and clears the rest of the structure to 0. I also extended it to free the actual slabs, but I'm not sure why it wasn't doing that before.
So a non-NULL dev->dma_io_tlb_mem should always be valid to follow.
FWIW I was pondering the question of whether to do something along those lines or just scrap the default assignment entirely, so since I hadn't got round to saying that I've gone ahead and hacked up the alternative (similarly untested) for comparison :)
TBH I'm still not sure which one I prefer...
Robin.
----->8----- diff --git a/drivers/base/core.c b/drivers/base/core.c index ea5b85354526..394abf184c1a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2847,9 +2847,6 @@ void device_initialize(struct device *dev) defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) dev->dma_coherent = dma_default_coherent; #endif -#ifdef CONFIG_SWIOTLB - dev->dma_io_tlb_mem = io_tlb_default_mem; -#endif } EXPORT_SYMBOL_GPL(device_initialize);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 39284ff2a6cd..620f16d89a98 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -107,16 +107,21 @@ struct io_tlb_mem { }; extern struct io_tlb_mem *io_tlb_default_mem;
+static inline struct io_tlb_mem *dev_iotlb_mem(struct device *dev) +{ + return dev->dma_io_tlb_mem ?: io_tlb_default_mem; +} + static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_mem *mem = dev_iotlb_mem(dev);
return mem && paddr >= mem->start && paddr < mem->end; }
static inline bool is_swiotlb_force_bounce(struct device *dev) { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_mem *mem = dev_iotlb_mem(dev);
return mem && mem->force_bounce; } @@ -167,7 +172,7 @@ bool swiotlb_free(struct device *dev, struct page *page, size_t size);
static inline bool is_swiotlb_for_alloc(struct device *dev) { - return dev->dma_io_tlb_mem->for_alloc; + return dev_iotlb_mem(dev)->for_alloc; } #else static inline struct page *swiotlb_alloc(struct device *dev, size_t size) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index b7f76bca89bf..f4942149f87d 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -359,7 +359,7 @@ static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir) { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_mem *mem = dev_iotlb_mem(dev); int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; @@ -440,7 +440,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index) static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, size_t alloc_size) { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_mem *mem = dev_iotlb_mem(dev); unsigned long boundary_mask = dma_get_seg_boundary(dev); dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(dev, mem->start) & boundary_mask; @@ -522,7 +522,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, size_t mapping_size, size_t alloc_size, enum dma_data_direction dir, unsigned long attrs) { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_mem *mem = dev_iotlb_mem(dev); unsigned int offset = swiotlb_align_offset(dev, orig_addr); unsigned int i; int index; @@ -565,7 +565,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_mem *mem = dev_iotlb_mem(dev); unsigned long flags; unsigned int offset = swiotlb_align_offset(dev, tlb_addr); int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; @@ -682,7 +682,7 @@ size_t swiotlb_max_mapping_size(struct device *dev)
bool is_swiotlb_active(struct device *dev) { - return dev->dma_io_tlb_mem != NULL; + return dev_iotlb_mem(dev) != NULL; } EXPORT_SYMBOL_GPL(is_swiotlb_active);
@@ -729,7 +729,7 @@ static void rmem_swiotlb_debugfs_init(struct reserved_mem *rmem)
struct page *swiotlb_alloc(struct device *dev, size_t size) { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_mem *mem = dev_iotlb_mem(dev); phys_addr_t tlb_addr; int index;
@@ -792,7 +792,7 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, static void rmem_swiotlb_device_release(struct reserved_mem *rmem, struct device *dev) { - dev->dma_io_tlb_mem = io_tlb_default_mem; + dev->dma_io_tlb_mem = NULL; }
static const struct reserved_mem_ops rmem_swiotlb_ops = {
dri-devel@lists.freedesktop.org