/* * drivers/video/tegra/nvmap/nvmap_heap.c * * GPU heap allocator. * * Copyright (c) 2011-2018, NVIDIA Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. */ #define pr_fmt(fmt) "%s: " fmt, __func__ #include #include #include #include #include #include #include #include #include #include #include #include #include #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) #include #endif #include #include #include #include "nvmap_priv.h" #include "nvmap_heap.h" /* * "carveouts" are platform-defined regions of physically contiguous memory * which are not managed by the OS. A platform may specify multiple carveouts, * for either small special-purpose memory regions (like IRAM on Tegra SoCs) * or reserved regions of main system memory. * * The carveout allocator returns allocations which are physically contiguous. */ static struct kmem_cache *heap_block_cache; struct list_block { struct nvmap_heap_block block; struct list_head all_list; unsigned int mem_prot; phys_addr_t orig_addr; size_t size; size_t align; struct nvmap_heap *heap; struct list_head free_list; }; struct nvmap_heap { struct list_head all_list; struct mutex lock; const char *name; void *arg; /* heap base */ phys_addr_t base; /* heap size */ size_t len; struct device *cma_dev; struct device *dma_dev; bool is_ivm; bool can_alloc; /* Used only if is_ivm == true */ int peer; /* Used only if is_ivm == true */ int vm_id; /* Used only if is_ivm == true */ struct nvmap_pm_ops pm_ops; }; struct device *dma_dev_from_handle(unsigned long type) { int i; struct nvmap_carveout_node *co_heap; for (i = 0; i < nvmap_dev->nr_carveouts; i++) { co_heap = &nvmap_dev->heaps[i]; if (!(co_heap->heap_bit & type)) continue; return co_heap->carveout->dma_dev; } return ERR_PTR(-ENODEV); } int nvmap_query_heap_peer(struct nvmap_heap *heap) { if (!heap || !heap->is_ivm) return -EINVAL; return heap->peer; } size_t nvmap_query_heap_size(struct nvmap_heap *heap) { if (!heap) return -EINVAL; return heap->len; } void nvmap_heap_debugfs_init(struct dentry *heap_root, struct nvmap_heap *heap) { if (sizeof(heap->base) == sizeof(u64)) debugfs_create_x64("base", S_IRUGO, heap_root, (u64 *)&heap->base); else debugfs_create_x32("base", S_IRUGO, heap_root, (u32 *)&heap->base); if (sizeof(heap->len) == sizeof(u64)) debugfs_create_x64("size", S_IRUGO, heap_root, (u64 *)&heap->len); else debugfs_create_x32("size", S_IRUGO, heap_root, (u32 *)&heap->len); } static phys_addr_t nvmap_alloc_mem(struct nvmap_heap *h, size_t len, phys_addr_t *start) { phys_addr_t pa; DEFINE_DMA_ATTRS(attrs); struct device *dev = h->dma_dev; dma_set_attr(DMA_ATTR_ALLOC_EXACT_SIZE, __DMA_ATTR(attrs)); #ifdef CONFIG_TEGRA_VIRTUALIZATION if (start && h->is_ivm) { void *ret; pa = h->base + (*start); ret = dma_mark_declared_memory_occupied(dev, pa, len, __DMA_ATTR(attrs)); if (IS_ERR(ret)) { dev_err(dev, "Failed to reserve (%pa) len(%zu)\n", &pa, len); return DMA_ERROR_CODE; } else { dev_dbg(dev, "reserved (%pa) len(%zu)\n", &pa, len); } } else #endif { (void)dma_alloc_attrs(dev, len, &pa, GFP_KERNEL, __DMA_ATTR(attrs)); if (!dma_mapping_error(dev, pa)) { int ret; dev_dbg(dev, "Allocated addr (%pa) len(%zu)\n", &pa, len); if (!dma_is_coherent_dev(dev) && h->cma_dev) { ret = nvmap_cache_maint_phys_range( NVMAP_CACHE_OP_WB, pa, pa + len, true, true); if (!ret) return pa; dev_err(dev, "cache WB on (%pa, %zu) failed\n", &pa, len); } } } return pa; } static void nvmap_free_mem(struct nvmap_heap *h, phys_addr_t base, size_t len) { struct device *dev = h->dma_dev; DEFINE_DMA_ATTRS(attrs); dma_set_attr(DMA_ATTR_ALLOC_EXACT_SIZE, __DMA_ATTR(attrs)); dev_dbg(dev, "Free base (%pa) size (%zu)\n", &base, len); #ifdef CONFIG_TEGRA_VIRTUALIZATION if (h->is_ivm && !h->can_alloc) { dma_mark_declared_memory_unoccupied(dev, base, len, __DMA_ATTR(attrs)); } else #endif { dma_free_attrs(dev, len, (void *)(uintptr_t)base, (dma_addr_t)base, __DMA_ATTR(attrs)); } } /* * base_max limits position of allocated chunk in memory. * if base_max is 0 then there is no such limitation. */ static struct nvmap_heap_block *do_heap_alloc(struct nvmap_heap *heap, size_t len, size_t align, unsigned int mem_prot, phys_addr_t base_max, phys_addr_t *start) { struct list_block *heap_block = NULL; dma_addr_t dev_base; struct device *dev = heap->dma_dev; /* since pages are only mappable with one cache attribute, * and most allocations from carveout heaps are DMA coherent * (i.e., non-cacheable), round cacheable allocations up to * a page boundary to ensure that the physical pages will * only be mapped one way. */ if (mem_prot == NVMAP_HANDLE_CACHEABLE || mem_prot == NVMAP_HANDLE_INNER_CACHEABLE) { align = max_t(size_t, align, PAGE_SIZE); len = PAGE_ALIGN(len); } if (heap->is_ivm) align = max_t(size_t, align, NVMAP_IVM_ALIGNMENT); heap_block = kmem_cache_zalloc(heap_block_cache, GFP_KERNEL); if (!heap_block) { dev_err(dev, "%s: failed to alloc heap block %s\n", __func__, dev_name(dev)); goto fail_heap_block_alloc; } dev_base = nvmap_alloc_mem(heap, len, start); if (dma_mapping_error(dev, dev_base)) { dev_err(dev, "failed to alloc mem of size (%zu)\n", len); if (dma_is_coherent_dev(dev)) { struct dma_coherent_stats stats; dma_get_coherent_stats(dev, &stats); dev_err(dev, "used:%zu,curr_size:%zu max:%zu\n", stats.used, stats.size, stats.max); } goto fail_dma_alloc; } heap_block->block.base = dev_base; heap_block->orig_addr = dev_base; heap_block->size = len; list_add_tail(&heap_block->all_list, &heap->all_list); heap_block->heap = heap; heap_block->mem_prot = mem_prot; heap_block->align = align; return &heap_block->block; fail_dma_alloc: kmem_cache_free(heap_block_cache, heap_block); fail_heap_block_alloc: return NULL; } static struct list_block *do_heap_free(struct nvmap_heap_block *block) { struct list_block *b = container_of(block, struct list_block, block); struct nvmap_heap *heap = b->heap; list_del(&b->all_list); nvmap_free_mem(heap, block->base, b->size); kmem_cache_free(heap_block_cache, b); return b; } /* nvmap_heap_alloc: allocates a block of memory of len bytes, aligned to * align bytes. */ struct nvmap_heap_block *nvmap_heap_alloc(struct nvmap_heap *h, struct nvmap_handle *handle, phys_addr_t *start) { struct nvmap_heap_block *b; size_t len = handle->size; size_t align = handle->align; unsigned int prot = handle->flags; mutex_lock(&h->lock); if (h->is_ivm) { /* Is IVM carveout? */ /* Check if this correct IVM heap */ if (handle->peer != h->peer) { mutex_unlock(&h->lock); return NULL; } else { if (h->can_alloc && start) { /* If this partition does actual allocation, it * should not specify start_offset. */ mutex_unlock(&h->lock); return NULL; } else if (!h->can_alloc && !start) { /* If this partition does not do actual * allocation, it should specify start_offset. */ mutex_unlock(&h->lock); return NULL; } } } /* * If this HEAP has pm_ops defined and powering on the * RAM attached with the HEAP returns error, don't * allocate from the heap and return NULL. */ if (h->pm_ops.busy) { if (h->pm_ops.busy() < 0) { pr_err("Unable to power on the heap device\n"); mutex_unlock(&h->lock); return NULL; } } align = max_t(size_t, align, L1_CACHE_BYTES); b = do_heap_alloc(h, len, align, prot, 0, start); if (b) { b->handle = handle; handle->carveout = b; /* Generate IVM for partition that can alloc */ if (h->is_ivm && h->can_alloc) { unsigned int offs = (b->base - h->base); BUG_ON(offs & (NVMAP_IVM_ALIGNMENT - 1)); BUG_ON((offs >> ffs(NVMAP_IVM_ALIGNMENT)) & ~((1 << NVMAP_IVM_OFFSET_WIDTH) - 1)); BUG_ON(h->vm_id & ~(NVMAP_IVM_IVMID_MASK)); /* So, page alignment is sufficient check. */ BUG_ON(len & ~(PAGE_MASK)); handle->ivm_id = ((u64)h->vm_id << NVMAP_IVM_IVMID_SHIFT); handle->ivm_id |= (((offs >> (ffs(NVMAP_IVM_ALIGNMENT) - 1)) & ((1ULL << NVMAP_IVM_OFFSET_WIDTH) - 1)) << NVMAP_IVM_OFFSET_SHIFT); handle->ivm_id |= (len >> PAGE_SHIFT); } } mutex_unlock(&h->lock); return b; } struct nvmap_heap *nvmap_block_to_heap(struct nvmap_heap_block *b) { struct list_block *lb; lb = container_of(b, struct list_block, block); return lb->heap; } /* nvmap_heap_free: frees block b*/ void nvmap_heap_free(struct nvmap_heap_block *b) { struct nvmap_heap *h; struct list_block *lb; if (!b) return; h = nvmap_block_to_heap(b); mutex_lock(&h->lock); lb = container_of(b, struct list_block, block); nvmap_flush_heap_block(NULL, b, lb->size, lb->mem_prot); do_heap_free(b); /* * If this HEAP has pm_ops defined and powering off the * RAM attached with the HEAP returns error, raise warning. */ if (h->pm_ops.idle) { if (h->pm_ops.idle() < 0) WARN_ON(1); } mutex_unlock(&h->lock); } /* nvmap_heap_create: create a heap object of len bytes, starting from * address base. */ struct nvmap_heap *nvmap_heap_create(struct device *parent, const struct nvmap_platform_carveout *co, phys_addr_t base, size_t len, void *arg) { struct nvmap_heap *h; h = kzalloc(sizeof(*h), GFP_KERNEL); if (!h) { dev_err(parent, "%s: out of memory\n", __func__); return NULL; } h->dma_dev = co->dma_dev; if (co->cma_dev) { #ifdef CONFIG_DMA_CMA struct dma_contiguous_stats stats; if (dma_get_contiguous_stats(co->cma_dev, &stats)) goto fail; base = stats.base; len = stats.size; h->cma_dev = co->cma_dev; #else dev_err(parent, "invalid resize config for carveout %s\n", co->name); goto fail; #endif } else if (!co->init_done) { int err; /* declare Non-CMA heap */ err = dma_declare_coherent_memory(h->dma_dev, 0, base, len, DMA_MEMORY_NOMAP | DMA_MEMORY_EXCLUSIVE); #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0) if (!err) { #else if (err & DMA_MEMORY_NOMAP) { #endif dev_info(parent, "%s :dma coherent mem declare %pa,%zu\n", co->name, &base, len); } else { dev_err(parent, "%s: dma coherent declare fail %pa,%zu\n", co->name, &base, len); goto fail; } } dev_set_name(h->dma_dev, "%s", co->name); dma_set_coherent_mask(h->dma_dev, DMA_BIT_MASK(64)); h->name = co->name; h->arg = arg; h->base = base; h->can_alloc = !!co->can_alloc; h->is_ivm = co->is_ivm; h->len = len; h->peer = co->peer; h->vm_id = co->vmid; if (co->pm_ops.busy) h->pm_ops.busy = co->pm_ops.busy; if (co->pm_ops.idle) h->pm_ops.idle = co->pm_ops.idle; INIT_LIST_HEAD(&h->all_list); mutex_init(&h->lock); if (!co->no_cpu_access && nvmap_cache_maint_phys_range(NVMAP_CACHE_OP_WB_INV, base, base + len, true, true)) { dev_err(parent, "cache flush failed\n"); goto fail; } wmb(); if (co->disable_dynamic_dma_map) nvmap_dev->dynamic_dma_map_mask &= ~co->usage_mask; if (co->no_cpu_access) nvmap_dev->cpu_access_mask &= ~co->usage_mask; dev_info(parent, "created heap %s base 0x%p size (%zuKiB)\n", co->name, (void *)(uintptr_t)base, len/1024); return h; fail: kfree(h); return NULL; } /* nvmap_heap_destroy: frees all resources in heap */ void nvmap_heap_destroy(struct nvmap_heap *heap) { WARN_ON(!list_is_singular(&heap->all_list)); while (!list_empty(&heap->all_list)) { struct list_block *l; l = list_first_entry(&heap->all_list, struct list_block, all_list); list_del(&l->all_list); kmem_cache_free(heap_block_cache, l); } kfree(heap); } int nvmap_heap_init(void) { ulong start_time = sched_clock(); heap_block_cache = KMEM_CACHE(list_block, 0); if (!heap_block_cache) { pr_err("%s: unable to create heap block cache\n", __func__); return -ENOMEM; } pr_info("%s: created heap block cache\n", __func__); nvmap_init_time += sched_clock() - start_time; return 0; } void nvmap_heap_deinit(void) { if (heap_block_cache) kmem_cache_destroy(heap_block_cache); heap_block_cache = NULL; } /* * This routine is used to flush the carveout memory from cache. * Why cache flush is needed for carveout? Consider the case, where a piece of * carveout is allocated as cached and released. After this, if the same memory is * allocated for uncached request and the memory is not flushed out from cache. * In this case, the client might pass this to H/W engine and it could start modify * the memory. As this was cached earlier, it might have some portion of it in cache. * During cpu request to read/write other memory, the cached portion of this memory * might get flushed back to main memory and would cause corruptions, if it happens * after H/W writes data to memory. * * But flushing out the memory blindly on each carveout allocation is redundant. * * In order to optimize the carveout buffer cache flushes, the following * strategy is used. * * The whole Carveout is flushed out from cache during its initialization. * During allocation, carveout buffers are not flused from cache. * During deallocation, carveout buffers are flushed, if they were allocated as cached. * if they were allocated as uncached/writecombined, no cache flush is needed. * Just draining store buffers is enough. */ int nvmap_flush_heap_block(struct nvmap_client *client, struct nvmap_heap_block *block, size_t len, unsigned int prot) { phys_addr_t phys = block->base; phys_addr_t end = block->base + len; int ret = 0; if (prot == NVMAP_HANDLE_UNCACHEABLE || prot == NVMAP_HANDLE_WRITE_COMBINE) goto out; ret = nvmap_cache_maint_phys_range(NVMAP_CACHE_OP_WB_INV, phys, end, true, prot != NVMAP_HANDLE_INNER_CACHEABLE); if (ret) goto out; out: wmb(); return ret; }