tegrakernel/kernel/nvidia/drivers/video/tegra/nvmap/nvmap_cache.c

668 lines
17 KiB
C
Raw Normal View History

2022-02-16 09:13:02 -06:00
/*
* drivers/video/tegra/nvmap/nvmap_cache.c
*
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#define pr_fmt(fmt) "nvmap: %s() " fmt, __func__
#include <linux/highmem.h>
#include <linux/io.h>
#include <linux/debugfs.h>
#include <linux/of.h>
#include <soc/tegra/chip-id.h>
#include <trace/events/nvmap.h>
#include "nvmap_priv.h"
#ifndef CONFIG_NVMAP_CACHE_MAINT_BY_SET_WAYS
/* This is basically the L2 cache size but may be tuned as per requirement */
size_t cache_maint_inner_threshold = SIZE_MAX;
int nvmap_cache_maint_by_set_ways;
#else
int nvmap_cache_maint_by_set_ways = 1;
size_t cache_maint_inner_threshold = 8 * SZ_2M;
#endif
static struct static_key nvmap_disable_vaddr_for_cache_maint;
void (*nvmap_get_cacheability)(struct nvmap_handle *h,
bool *inner, bool *outer);
inline static void nvmap_flush_dcache_all(void *dummy)
{
#if defined(CONFIG_DENVER_CPU)
u64 id_afr0;
u64 midr;
asm volatile ("mrs %0, MIDR_EL1" : "=r"(midr));
/* check if current core is a Denver processor */
if ((midr & 0xFF8FFFF0) == 0x4e0f0000) {
asm volatile ("mrs %0, ID_AFR0_EL1" : "=r"(id_afr0));
/* check if complete cache flush through msr is supported */
if (likely((id_afr0 & 0xf00) == 0x100)) {
asm volatile ("msr s3_0_c15_c13_0, %0" : : "r" (0));
asm volatile ("dsb sy");
return;
}
}
#endif
tegra_flush_dcache_all(NULL);
}
static void nvmap_inner_flush_cache_all(void)
{
nvmap_flush_dcache_all(NULL);
}
void (*inner_flush_cache_all)(void) = nvmap_inner_flush_cache_all;
extern void __clean_dcache_louis(void *);
static void nvmap_inner_clean_cache_all(void)
{
#ifdef CONFIG_ARCH_TEGRA_210_SOC
on_each_cpu(__clean_dcache_louis, NULL, 1);
#endif
tegra_clean_dcache_all(NULL);
}
void (*inner_clean_cache_all)(void) = nvmap_inner_clean_cache_all;
static void nvmap_handle_get_cacheability(struct nvmap_handle *h,
bool *inner, bool *outer)
{
*inner = h->flags == NVMAP_HANDLE_CACHEABLE ||
h->flags == NVMAP_HANDLE_INNER_CACHEABLE;
*outer = h->flags == NVMAP_HANDLE_CACHEABLE;
}
static void nvmap_cache_of_setup(struct nvmap_chip_cache_op *op)
{
op->inner_clean_cache_all = nvmap_inner_clean_cache_all;
op->inner_flush_cache_all = nvmap_inner_flush_cache_all;
op->nvmap_get_cacheability = nvmap_handle_get_cacheability;
op->name = kstrdup("set/ways", GFP_KERNEL);
BUG_ON(!op->name);
}
NVMAP_CACHE_OF_DECLARE("nvidia,carveouts", nvmap_cache_of_setup);
void nvmap_select_cache_ops(struct device *dev)
{
struct nvmap_chip_cache_op op;
bool match_found = false;
const struct of_device_id *matches = &__nvmapcache_of_table;
memset(&op, 0, sizeof(op));
for (; matches; matches++) {
if (of_device_is_compatible(dev->of_node,
matches->compatible)) {
const nvmap_setup_chip_cache_fn init_fn = matches->data;
init_fn(&op);
match_found = true;
break;
}
}
if (WARN_ON(match_found == false)) {
pr_err("%s: no cache ops found\n",__func__);
return;
}
inner_flush_cache_all = op.inner_flush_cache_all;
inner_clean_cache_all = op.inner_clean_cache_all;
nvmap_get_cacheability = op.nvmap_get_cacheability;
pr_info("nvmap cache ops set to %s\n", op.name);
kfree(op.name);
if (inner_clean_cache_all && (op.flags & CALL_CLEAN_CACHE_ON_INIT)) {
pr_info("calling cache operation %pF\n",
inner_clean_cache_all);
inner_clean_cache_all();
}
if (inner_flush_cache_all && (op.flags & CALL_FLUSH_CACHE_ON_INIT)) {
pr_info("calling cache operation %pF\n",
inner_flush_cache_all);
inner_flush_cache_all();
}
}
/*
* FIXME:
*
* __clean_dcache_page() is only available on ARM64 (well, we haven't
* implemented it on ARMv7).
*/
void nvmap_clean_cache_page(struct page *page)
{
__clean_dcache_page(page);
}
void nvmap_clean_cache(struct page **pages, int numpages)
{
int i;
/* Not technically a flush but that's what nvmap knows about. */
nvmap_stats_inc(NS_CFLUSH_DONE, numpages << PAGE_SHIFT);
trace_nvmap_cache_flush(numpages << PAGE_SHIFT,
nvmap_stats_read(NS_ALLOC),
nvmap_stats_read(NS_CFLUSH_RQ),
nvmap_stats_read(NS_CFLUSH_DONE));
for (i = 0; i < numpages; i++)
nvmap_clean_cache_page(pages[i]);
}
__weak void nvmap_override_cache_ops(void)
{
nvmap_select_cache_ops(nvmap_dev->dev_user.parent);
}
void inner_cache_maint(unsigned int op, void *vaddr, size_t size)
{
if (op == NVMAP_CACHE_OP_WB_INV)
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
__dma_flush_range(vaddr, vaddr + size);
#else
__dma_flush_area(vaddr, size);
#endif
else if (op == NVMAP_CACHE_OP_INV)
__dma_map_area(vaddr, size, DMA_FROM_DEVICE);
else
__dma_map_area(vaddr, size, DMA_TO_DEVICE);
}
static void heap_page_cache_maint(
struct nvmap_handle *h, unsigned long start, unsigned long end,
unsigned int op, bool inner, bool outer, bool clean_only_dirty)
{
if (h->userflags & NVMAP_HANDLE_CACHE_SYNC) {
/*
* zap user VA->PA mappings so that any access to the pages
* will result in a fault and can be marked dirty
*/
nvmap_handle_mkclean(h, start, end-start);
nvmap_zap_handle(h, start, end - start);
}
if (static_key_false(&nvmap_disable_vaddr_for_cache_maint))
goto per_page_cache_maint;
if (inner) {
if (!h->vaddr) {
if (__nvmap_mmap(h))
__nvmap_munmap(h, h->vaddr);
else
goto per_page_cache_maint;
}
/* Fast inner cache maintenance using single mapping */
inner_cache_maint(op, h->vaddr + start, end - start);
if (!outer)
return;
/* Skip per-page inner maintenance in loop below */
inner = false;
}
per_page_cache_maint:
while (start < end) {
struct page *page;
phys_addr_t paddr;
unsigned long next;
unsigned long off;
size_t size;
int ret;
page = nvmap_to_page(h->pgalloc.pages[start >> PAGE_SHIFT]);
next = min(((start + PAGE_SIZE) & PAGE_MASK), end);
off = start & ~PAGE_MASK;
size = next - start;
paddr = page_to_phys(page) + off;
ret = nvmap_cache_maint_phys_range(op, paddr, paddr + size,
inner, outer);
WARN_ON(ret != 0);
start = next;
}
}
static inline bool can_fast_cache_maint(unsigned long start,
unsigned long end, unsigned int op)
{
if (!nvmap_cache_maint_by_set_ways)
return false;
if ((op == NVMAP_CACHE_OP_INV) ||
((end - start) < cache_maint_inner_threshold))
return false;
return true;
}
static bool fast_cache_maint(struct nvmap_handle *h,
unsigned long start,
unsigned long end, unsigned int op,
bool clean_only_dirty)
{
if (!can_fast_cache_maint(start, end, op))
return false;
if (h->userflags & NVMAP_HANDLE_CACHE_SYNC) {
nvmap_handle_mkclean(h, 0, h->size);
nvmap_zap_handle(h, 0, h->size);
}
if (op == NVMAP_CACHE_OP_WB_INV)
inner_flush_cache_all();
else if (op == NVMAP_CACHE_OP_WB)
inner_clean_cache_all();
return true;
}
struct cache_maint_op {
phys_addr_t start;
phys_addr_t end;
unsigned int op;
struct nvmap_handle *h;
bool inner;
bool outer;
bool clean_only_dirty;
};
int nvmap_cache_maint_phys_range(unsigned int op, phys_addr_t pstart,
phys_addr_t pend, int inner, int outer)
{
unsigned long kaddr;
struct vm_struct *area = NULL;
phys_addr_t loop;
if (!inner)
goto do_outer;
if (can_fast_cache_maint((unsigned long)pstart,
(unsigned long)pend, op)) {
if (op == NVMAP_CACHE_OP_WB_INV)
inner_flush_cache_all();
else if (op == NVMAP_CACHE_OP_WB)
inner_clean_cache_all();
goto do_outer;
}
area = alloc_vm_area(PAGE_SIZE, NULL);
if (!area)
return -ENOMEM;
kaddr = (ulong)area->addr;
loop = pstart;
while (loop < pend) {
phys_addr_t next = (loop + PAGE_SIZE) & PAGE_MASK;
void *base = (void *)kaddr + (loop & ~PAGE_MASK);
next = min(next, pend);
ioremap_page_range(kaddr, kaddr + PAGE_SIZE,
loop, PG_PROT_KERNEL);
inner_cache_maint(op, base, next - loop);
loop = next;
unmap_kernel_range(kaddr, PAGE_SIZE);
}
free_vm_area(area);
do_outer:
return 0;
}
static int do_cache_maint(struct cache_maint_op *cache_work)
{
phys_addr_t pstart = cache_work->start;
phys_addr_t pend = cache_work->end;
int err = 0;
struct nvmap_handle *h = cache_work->h;
unsigned int op = cache_work->op;
if (!h || !h->alloc)
return -EFAULT;
wmb();
if (h->flags == NVMAP_HANDLE_UNCACHEABLE ||
h->flags == NVMAP_HANDLE_WRITE_COMBINE || pstart == pend)
goto out;
trace_nvmap_cache_maint(h->owner, h, pstart, pend, op, pend - pstart);
if (pstart > h->size || pend > h->size) {
pr_warn("cache maintenance outside handle\n");
err = -EINVAL;
goto out;
}
if (fast_cache_maint(h, pstart, pend, op, cache_work->clean_only_dirty))
goto out;
if (h->heap_pgalloc) {
heap_page_cache_maint(h, pstart, pend, op, true,
(h->flags == NVMAP_HANDLE_INNER_CACHEABLE) ?
false : true, cache_work->clean_only_dirty);
goto out;
}
pstart += h->carveout->base;
pend += h->carveout->base;
err = nvmap_cache_maint_phys_range(op, pstart, pend, true,
h->flags != NVMAP_HANDLE_INNER_CACHEABLE);
out:
if (!err) {
if (can_fast_cache_maint(pstart, pend, op))
nvmap_stats_inc(NS_CFLUSH_DONE,
cache_maint_inner_threshold);
else
nvmap_stats_inc(NS_CFLUSH_DONE, pend - pstart);
}
trace_nvmap_cache_flush(pend - pstart,
nvmap_stats_read(NS_ALLOC),
nvmap_stats_read(NS_CFLUSH_RQ),
nvmap_stats_read(NS_CFLUSH_DONE));
return 0;
}
int __nvmap_do_cache_maint(struct nvmap_client *client,
struct nvmap_handle *h,
unsigned long start, unsigned long end,
unsigned int op, bool clean_only_dirty)
{
int err;
struct cache_maint_op cache_op;
h = nvmap_handle_get(h);
if (!h)
return -EFAULT;
if ((start >= h->size) || (end > h->size)) {
pr_debug("%s start: %ld end: %ld h->size: %zu\n", __func__,
start, end, h->size);
nvmap_handle_put(h);
return -EFAULT;
}
if (!(h->heap_type & nvmap_dev->cpu_access_mask)) {
pr_debug("%s heap_type %u access_mask 0x%x\n", __func__,
h->heap_type, nvmap_dev->cpu_access_mask);
nvmap_handle_put(h);
return -EPERM;
}
nvmap_kmaps_inc(h);
if (op == NVMAP_CACHE_OP_INV)
op = NVMAP_CACHE_OP_WB_INV;
/* clean only dirty is applicable only for Write Back operation */
if (op != NVMAP_CACHE_OP_WB)
clean_only_dirty = false;
cache_op.h = h;
cache_op.start = start ? start : 0;
cache_op.end = end ? end : h->size;
cache_op.op = op;
nvmap_get_cacheability(h, &cache_op.inner, &cache_op.outer);
cache_op.clean_only_dirty = clean_only_dirty;
nvmap_stats_inc(NS_CFLUSH_RQ, end - start);
err = do_cache_maint(&cache_op);
nvmap_kmaps_dec(h);
nvmap_handle_put(h);
return err;
}
int __nvmap_cache_maint(struct nvmap_client *client,
struct nvmap_cache_op_64 *op)
{
struct vm_area_struct *vma;
struct nvmap_vma_priv *priv;
struct nvmap_handle *handle;
unsigned long start;
unsigned long end;
int err = 0;
if (!op->addr || op->op < NVMAP_CACHE_OP_WB ||
op->op > NVMAP_CACHE_OP_WB_INV)
return -EINVAL;
handle = nvmap_handle_get_from_fd(op->handle);
if (!handle)
return -EINVAL;
down_read(&current->mm->mmap_sem);
vma = find_vma(current->active_mm, (unsigned long)op->addr);
if (!vma || !is_nvmap_vma(vma) ||
(ulong)op->addr < vma->vm_start ||
(ulong)op->addr >= vma->vm_end ||
op->len > vma->vm_end - (ulong)op->addr) {
err = -EADDRNOTAVAIL;
goto out;
}
priv = (struct nvmap_vma_priv *)vma->vm_private_data;
if (priv->handle != handle) {
err = -EFAULT;
goto out;
}
start = (unsigned long)op->addr - vma->vm_start +
(vma->vm_pgoff << PAGE_SHIFT);
end = start + op->len;
err = __nvmap_do_cache_maint(client, priv->handle, start, end, op->op,
false);
out:
up_read(&current->mm->mmap_sem);
nvmap_handle_put(handle);
return err;
}
/*
* Perform cache op on the list of memory regions within passed handles.
* A memory region within handle[i] is identified by offsets[i], sizes[i]
*
* sizes[i] == 0 is a special case which causes handle wide operation,
* this is done by replacing offsets[i] = 0, sizes[i] = handles[i]->size.
* So, the input arrays sizes, offsets are not guaranteed to be read-only
*
* This will optimze the op if it can.
* In the case that all the handles together are larger than the inner cache
* maint threshold it is possible to just do an entire inner cache flush.
*
* NOTE: this omits outer cache operations which is fine for ARM64
*/
static int __nvmap_do_cache_maint_list(struct nvmap_handle **handles,
u64 *offsets, u64 *sizes, int op, int nr,
bool is_32)
{
int i;
u64 total = 0;
u64 thresh = ~0;
WARN(!IS_ENABLED(CONFIG_ARM64),
"cache list operation may not function properly");
if (nvmap_cache_maint_by_set_ways)
thresh = cache_maint_inner_threshold;
for (i = 0; i < nr; i++) {
bool inner, outer;
u32 *sizes_32 = (u32 *)sizes;
u64 size = is_32 ? sizes_32[i] : sizes[i];
nvmap_get_cacheability(handles[i], &inner, &outer);
if (!inner && !outer)
continue;
if ((op == NVMAP_CACHE_OP_WB) && nvmap_handle_track_dirty(handles[i]))
total += atomic_read(&handles[i]->pgalloc.ndirty);
else
total += size ? size : handles[i]->size;
}
if (!total)
return 0;
/* Full flush in the case the passed list is bigger than our
* threshold. */
if (total >= thresh) {
for (i = 0; i < nr; i++) {
if (handles[i]->userflags &
NVMAP_HANDLE_CACHE_SYNC) {
nvmap_handle_mkclean(handles[i], 0,
handles[i]->size);
nvmap_zap_handle(handles[i], 0,
handles[i]->size);
}
}
if (op == NVMAP_CACHE_OP_WB)
inner_clean_cache_all();
else
inner_flush_cache_all();
nvmap_stats_inc(NS_CFLUSH_RQ, total);
nvmap_stats_inc(NS_CFLUSH_DONE, thresh);
trace_nvmap_cache_flush(total,
nvmap_stats_read(NS_ALLOC),
nvmap_stats_read(NS_CFLUSH_RQ),
nvmap_stats_read(NS_CFLUSH_DONE));
} else {
for (i = 0; i < nr; i++) {
u32 *offs_32 = (u32 *)offsets, *sizes_32 = (u32 *)sizes;
u64 size = is_32 ? sizes_32[i] : sizes[i];
u64 offset = is_32 ? offs_32[i] : offsets[i];
int err;
size = size ?: handles[i]->size;
offset = offset ?: 0;
err = __nvmap_do_cache_maint(handles[i]->owner,
handles[i], offset,
offset + size,
op, false);
if (err) {
pr_err("cache maint per handle failed [%d]\n",
err);
return err;
}
}
}
return 0;
}
inline int nvmap_do_cache_maint_list(struct nvmap_handle **handles,
u64 *offsets, u64 *sizes, int op, int nr,
bool is_32)
{
int ret = 0;
switch (tegra_get_chip_id()) {
case TEGRA194:
/*
* As io-coherency is enabled by default from T194 onwards,
* Don't do cache maint from CPU side. The HW, SCF will do.
*/
break;
default:
ret = __nvmap_do_cache_maint_list(handles,
offsets, sizes, op, nr, is_32);
break;
}
return ret;
}
static int cache_inner_threshold_show(struct seq_file *m, void *v)
{
if (nvmap_cache_maint_by_set_ways)
seq_printf(m, "%zuB\n", cache_maint_inner_threshold);
else
seq_printf(m, "%zuB\n", SIZE_MAX);
return 0;
}
static int cache_inner_threshold_open(struct inode *inode, struct file *file)
{
return single_open(file, cache_inner_threshold_show, inode->i_private);
}
static ssize_t cache_inner_threshold_write(struct file *file,
const char __user *buffer,
size_t count, loff_t *pos)
{
int ret;
struct seq_file *p = file->private_data;
char str[] = "0123456789abcdef";
count = min_t(size_t, strlen(str), count);
if (copy_from_user(str, buffer, count))
return -EINVAL;
if (!nvmap_cache_maint_by_set_ways)
return -EINVAL;
mutex_lock(&p->lock);
ret = sscanf(str, "%16zu", &cache_maint_inner_threshold);
mutex_unlock(&p->lock);
if (ret != 1)
return -EINVAL;
pr_debug("nvmap:cache_maint_inner_threshold is now :%zuB\n",
cache_maint_inner_threshold);
return count;
}
static const struct file_operations cache_inner_threshold_fops = {
.open = cache_inner_threshold_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = cache_inner_threshold_write,
};
int nvmap_cache_debugfs_init(struct dentry *nvmap_root)
{
struct dentry *cache_root;
if (!nvmap_root)
return -ENODEV;
cache_root = debugfs_create_dir("cache", nvmap_root);
if (!cache_root)
return -ENODEV;
if (nvmap_cache_maint_by_set_ways) {
debugfs_create_x32("nvmap_cache_maint_by_set_ways",
S_IRUSR | S_IWUSR,
cache_root,
&nvmap_cache_maint_by_set_ways);
debugfs_create_file("cache_maint_inner_threshold",
S_IRUSR | S_IWUSR,
cache_root,
NULL,
&cache_inner_threshold_fops);
}
debugfs_create_atomic_t("nvmap_disable_vaddr_for_cache_maint",
S_IRUSR | S_IWUSR,
cache_root,
&nvmap_disable_vaddr_for_cache_maint.enabled);
return 0;
}