/*
* drivers/video/tegra/host/nvhost_cdma.c
*
* Tegra Graphics Host Command DMA
*
* Copyright (c) 2010-2018, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include "nvhost_cdma.h"
#include "nvhost_channel.h"
#include "nvhost_job.h"
#include "dev.h"
#include "debug.h"
#include "chip_support.h"
#include
#include
#include
#include
#include
#include
/*
* TODO:
* stats
* - for figuring out what to optimize further
* resizable push buffer
* - some channels hardly need any, some channels (3d) could use more
*/
/*
* push_buffer
*
* The push buffer is a circular array of words to be fetched by command DMA.
* Note that it works slightly differently to the sync queue; fence == cur
* means that the push buffer is full, not empty.
*/
/**
* Allocate pushbuffer memory
*/
int nvhost_push_buffer_alloc(struct push_buffer *pb)
{
struct nvhost_cdma *cdma = pb_to_cdma(pb);
pb->mapped = NULL;
pb->dma_addr = 0;
pb->mapped = dma_alloc_coherent(&cdma_to_dev(cdma)->dev->dev,
PUSH_BUFFER_SIZE + 4,
&pb->dma_addr,
GFP_KERNEL);
if (!pb->mapped) {
nvhost_err(NULL, "failed to allocate pushbuffer");
pb->mapped = NULL;
return -ENOMEM;
}
/* for now, map pushbuffer to all address spaces */
nvhost_vm_map_static(cdma_to_dev(cdma)->dev, pb->mapped,
pb->dma_addr, PUSH_BUFFER_SIZE + 4);
return 0;
}
/**
* Clean up push buffer resources
*/
void nvhost_push_buffer_destroy(struct push_buffer *pb)
{
struct nvhost_cdma *cdma = pb_to_cdma(pb);
if (pb->mapped)
dma_free_coherent(&cdma_to_dev(cdma)->dev->dev,
PUSH_BUFFER_SIZE + 4,
pb->mapped,
pb->dma_addr);
pb->mapped = NULL;
pb->dma_addr = 0;
}
/**
* Push two words to the push buffer
* Caller must ensure push buffer is not full
*/
static void nvhost_push_buffer_push_to(struct push_buffer *pb,
u32 op1, u32 op2)
{
u32 cur = pb->cur;
u32 *p = (u32 *)((uintptr_t)pb->mapped + cur);
WARN_ON(cur == pb->fence);
*(p++) = op1;
*(p++) = op2;
pb->cur = (cur + 8) & (PUSH_BUFFER_SIZE - 1);
}
/**
* Pop a number of two word slots from the push buffer
* Caller must ensure push buffer is not empty
*/
static void nvhost_push_buffer_pop_from(struct push_buffer *pb,
unsigned int slots)
{
/* Advance the next write position */
pb->fence = (pb->fence + slots * 8) & (PUSH_BUFFER_SIZE - 1);
}
/**
* Return the number of two word slots free in the push buffer
*/
static u32 nvhost_push_buffer_space(struct push_buffer *pb)
{
return ((pb->fence - pb->cur) & (PUSH_BUFFER_SIZE - 1)) / 8;
}
u32 nvhost_push_buffer_putptr(struct push_buffer *pb)
{
return pb->cur;
}
dma_addr_t nvhost_push_buffer_start(struct push_buffer *pb)
{
return pb->dma_addr;
}
dma_addr_t nvhost_push_buffer_end(struct push_buffer *pb)
{
return pb->dma_addr + PUSH_BUFFER_SIZE + 4;
}
/**
* Add an entry to the sync queue.
*/
static void add_to_sync_queue(struct nvhost_cdma *cdma,
struct nvhost_job *job,
u32 nr_slots,
u32 first_get)
{
job->first_get = first_get;
job->num_slots = nr_slots;
nvhost_job_get(job);
mutex_lock(&cdma->sync_queue_lock);
list_add_tail(&job->list, &cdma->sync_queue);
mutex_unlock(&cdma->sync_queue_lock);
}
/**
* Return the status of the cdma's sync queue or push buffer for the given event
* - sq empty: returns 1 for empty, 0 for not empty (as in "1 empty queue" :-)
* - pb space: returns the number of free slots in the channel's push buffer
* Must be called with the cdma lock held.
*/
static unsigned int cdma_status_locked(struct nvhost_cdma *cdma,
enum cdma_event event)
{
switch (event) {
case CDMA_EVENT_SYNC_QUEUE_EMPTY:
return list_empty(&cdma->sync_queue) ? 1 : 0;
case CDMA_EVENT_PUSH_BUFFER_SPACE: {
struct push_buffer *pb = &cdma->push_buffer;
return nvhost_push_buffer_space(pb);
}
default:
return 0;
}
}
/**
* Sleep (if necessary) until the requested event happens
* - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty.
* - Returns 1
* - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer
* - Return the amount of space (> 0)
* Must be called with the cdma lock held.
*/
unsigned int nvhost_cdma_wait_locked(struct nvhost_cdma *cdma,
enum cdma_event event)
{
struct mutex *lock;
if (event == CDMA_EVENT_SYNC_QUEUE_EMPTY)
lock = &cdma->sync_queue_lock;
else if (event == CDMA_EVENT_PUSH_BUFFER_SPACE)
lock = &cdma->push_buffer_lock;
else {
nvhost_err(&cdma->pdev->dev,
"invalid event %d", event);
return -EINVAL;
}
mutex_lock(lock);
/*
* Note that we call this function with read lock held on cdma->lock
* So we need to drop both cdma->lock and event lock (either
* sync_queue_lock or push_buffer_lock) before we start waiting on
* event or before calling schedule()
*
* We need to drop event lock since the event is signalled with
* the event lock acquired
* We need to drop cdma->lock since in case event is never signalled
* and timeout routine is invoked - this will result in deadlock since
* the timeout routine will also request write lock on cdma->lock
*/
for (;;) {
unsigned int space;
space = cdma_status_locked(cdma, event);
if (space) {
mutex_unlock(lock);
return space;
}
trace_nvhost_wait_cdma(cdma_to_channel(cdma)->dev->name,
event);
/* If somebody has managed to already start waiting, yield */
if (cdma->event != CDMA_EVENT_NONE) {
mutex_unlock(lock);
up_read(&cdma->lock);
schedule();
down_read(&cdma->lock);
mutex_lock(lock);
continue;
}
cdma->event = event;
mutex_unlock(lock);
up_read(&cdma->lock);
/* start waiting */
down(&cdma->sem);
down_read(&cdma->lock);
mutex_lock(lock);
}
return 0;
}
/**
* Start timer for a buffer submition that has completed yet.
* Must be called with the cdma lock held.
*/
static void cdma_start_timer_locked(struct nvhost_cdma *cdma,
struct nvhost_job *job)
{
/* In the virtual case, timeouts are handled by the server */
if (nvhost_dev_is_virtual(cdma_to_dev(cdma)->dev))
return;
mutex_lock(&cdma->timeout_lock);
if (cdma->timeout.clientid) {
mutex_unlock(&cdma->timeout_lock);
/* timer already started */
return;
}
cdma->timeout.clientid = job->clientid;
cdma->timeout.sp = job->sp;
cdma->timeout.num_syncpts = job->num_syncpts;
cdma->timeout.start_ktime = ktime_get();
cdma->timeout.timeout_debug_dump = job->timeout_debug_dump;
cdma->timeout.timeout = job->timeout;
cdma->timeout.allow_dependency = true;
if (job->timeout)
schedule_delayed_work(&cdma->timeout.wq,
msecs_to_jiffies(cdma->timeout.timeout));
mutex_unlock(&cdma->timeout_lock);
}
/**
* Stop timer when a buffer submition completes.
* Must be called with the cdma lock held.
*/
static void stop_cdma_timer_locked(struct nvhost_cdma *cdma)
{
cancel_delayed_work_sync(&cdma->timeout.wq);
mutex_lock(&cdma->timeout_lock);
if (cdma->timeout.clientid)
cdma->timeout.clientid = 0;
mutex_unlock(&cdma->timeout_lock);
}
/**
* For all sync queue entries that have already finished according to the
* current sync point registers:
* - unpin & unref their mems
* - pop their push buffer slots
* - remove them from the sync queue
* This is normally called from the host code's worker thread, but can be
* called manually if necessary.
* Must be called with the cdma lock held.
*/
static void update_cdma_locked(struct nvhost_cdma *cdma)
{
struct nvhost_master *dev = cdma_to_dev(cdma);
struct nvhost_syncpt *sp = &dev->syncpt;
struct nvhost_job *job;
/* If CDMA is stopped, queue is cleared and we can return */
if (!cdma->running)
return;
/*
* Walk the sync queue, reading the sync point registers as necessary,
* to consume as many sync queue entries as possible without blocking
*/
while (1) {
bool completed = true;
int i;
mutex_lock(&cdma->sync_queue_lock);
if (list_empty(&cdma->sync_queue)) {
if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY) {
cdma->event = CDMA_EVENT_NONE;
up(&cdma->sem);
}
mutex_unlock(&cdma->sync_queue_lock);
break;
}
job = list_first_entry(&cdma->sync_queue,
struct nvhost_job, list);
/* Check whether this syncpt has completed, and bail if not */
for (i = 0; completed && i < job->num_syncpts; ++i)
completed &= nvhost_syncpt_is_expired(sp,
job->sp[i].id, job->sp[i].fence);
if (!completed) {
/* Start timer on next pending syncpt */
mutex_unlock(&cdma->sync_queue_lock);
cdma_start_timer_locked(cdma, job);
break;
}
list_del(&job->list);
mutex_unlock(&cdma->sync_queue_lock);
/* Cancel timeout, when a buffer completes */
stop_cdma_timer_locked(cdma);
/* Drop syncpoint references from this job */
for (i = 0; i < job->num_syncpts; ++i)
nvhost_syncpt_put_ref(sp, job->sp[i].id);
/* Unpin the memory */
nvhost_job_unpin(job);
/* Pop push buffer slots */
mutex_lock(&cdma->push_buffer_lock);
if (job->num_slots) {
struct push_buffer *pb = &cdma->push_buffer;
nvhost_push_buffer_pop_from(pb, job->num_slots);
if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE) {
cdma->event = CDMA_EVENT_NONE;
up(&cdma->sem);
}
}
mutex_unlock(&cdma->push_buffer_lock);
nvhost_job_put(job);
}
}
void nvhost_cdma_finalize_job_incrs(struct nvhost_syncpt *syncpt,
struct nvhost_job_syncpt *sp)
{
u32 id = sp->id;
u32 fence = sp->fence;
atomic_set(&syncpt->min_val[id], fence);
syncpt_op().reset(syncpt, id);
nvhost_syncpt_update_min(syncpt, id);
}
void nvhost_cdma_update_sync_queue(struct nvhost_cdma *cdma,
struct nvhost_syncpt *syncpt, struct platform_device *dev)
{
u32 get_restart;
struct nvhost_job *job = NULL;
int nb_pts = nvhost_syncpt_nb_hw_pts(syncpt);
DECLARE_BITMAP(syncpt_used, nb_pts);
bool is_empty;
bitmap_zero(syncpt_used, nb_pts);
/* ensure that no-one in CPU updates syncpoint values */
mutex_lock(&syncpt->cpu_increment_mutex);
/*
* Move the sync_queue read pointer to the first entry that hasn't
* completed based on the current HW syncpt value. It's likely there
* won't be any (i.e. we're still at the head), but covers the case
* where a syncpt incr happens just prior/during the teardown.
*/
dev_dbg(&dev->dev,
"%s: skip completed buffers still in sync_queue\n",
__func__);
mutex_lock(&cdma->sync_queue_lock);
list_for_each_entry(job, &cdma->sync_queue, list) {
int i;
for (i = 0; i < job->num_syncpts; ++i) {
u32 id = job->sp[i].id;
if (!test_bit(id, syncpt_used))
nvhost_syncpt_update_min(syncpt, id);
set_bit(id, syncpt_used);
if (!nvhost_syncpt_is_expired(syncpt, id,
job->sp[i].fence))
goto out;
}
if (nvhost_debug_force_timeout_dump ||
cdma->timeout.timeout_debug_dump)
nvhost_job_dump(&dev->dev, job);
}
out:
mutex_unlock(&cdma->sync_queue_lock);
/*
* Walk the sync_queue, first incrementing with the CPU syncpts that
* are partially executed (the first buffer) or fully skipped while
* still in the current context (slots are also NOP-ed).
*
* At the point contexts are interleaved, syncpt increments must be
* done inline with the pushbuffer from a GATHER buffer to maintain
* the order (slots are modified to be a GATHER of syncpt incrs).
*
* Note: save in get_restart the location where the timed out buffer
* started in the PB, so we can start the refetch from there (with the
* modified NOP-ed PB slots). This lets things appear to have completed
* properly for this buffer and resources are freed.
*/
dev_dbg(&dev->dev,
"%s: perform CPU incr on pending same ctx buffers\n",
__func__);
get_restart = cdma->last_put;
mutex_lock(&cdma->sync_queue_lock);
is_empty = list_empty(&cdma->sync_queue);
mutex_unlock(&cdma->sync_queue_lock);
if (!is_empty)
get_restart = job->first_get;
/* do CPU increments as long as this context continues */
mutex_lock(&cdma->sync_queue_lock);
list_for_each_entry_from(job, &cdma->sync_queue, list) {
int i;
/* different context, gets us out of this loop */
if (job->clientid != cdma->timeout.clientid)
break;
if (nvhost_debug_force_timeout_dump ||
cdma->timeout.timeout_debug_dump)
nvhost_job_dump(&dev->dev, job);
/* won't need a timeout when replayed */
job->timeout = 0;
/* set notifier to userspace about submit timeout */
nvhost_job_set_notifier(job, NVHOST_CHANNEL_SUBMIT_TIMEOUT);
for (i = 0; i < job->num_syncpts; ++i)
nvhost_cdma_finalize_job_incrs(syncpt, job->sp + i);
/* cleanup push buffer */
cdma_op().timeout_pb_cleanup(cdma, job->first_get,
job->num_slots);
}
mutex_unlock(&cdma->sync_queue_lock);
mutex_unlock(&syncpt->cpu_increment_mutex);
mutex_lock(&cdma->sync_queue_lock);
list_for_each_entry_from(job, &cdma->sync_queue, list)
if (job->clientid == cdma->timeout.clientid)
job->timeout = min(job->timeout, 500);
mutex_unlock(&cdma->sync_queue_lock);
dev_dbg(&dev->dev,
"%s: finished sync_queue modification\n", __func__);
/* roll back DMAGET and start up channel again */
cdma_op().timeout_teardown_end(cdma, get_restart);
}
/**
* Create a cdma
*/
int nvhost_cdma_init(struct platform_device *pdev,
struct nvhost_cdma *cdma)
{
int err;
struct push_buffer *pb = &cdma->push_buffer;
init_rwsem(&cdma->lock);
sema_init(&cdma->sem, 0);
mutex_init(&cdma->push_buffer_lock);
mutex_init(&cdma->sync_queue_lock);
mutex_init(&cdma->timeout_lock);
INIT_LIST_HEAD(&cdma->sync_queue);
cdma->event = CDMA_EVENT_NONE;
cdma->running = false;
cdma->torndown = false;
cdma->pdev = pdev;
err = cdma_pb_op().init(pb);
if (err)
return err;
return 0;
}
/**
* Destroy a cdma
*/
void nvhost_cdma_deinit(struct nvhost_cdma *cdma)
{
struct push_buffer *pb = &cdma->push_buffer;
WARN_ON(cdma->running);
nvhost_push_buffer_destroy(pb);
cdma_op().timeout_destroy(cdma);
}
/**
* Begin a cdma submit
*/
int nvhost_cdma_begin(struct nvhost_cdma *cdma, struct nvhost_job *job)
{
down_read(&cdma->lock);
if (job->timeout) {
/* init state on first submit with timeout value */
if (!cdma->timeout.initialized) {
int err;
err = cdma_op().timeout_init(cdma,
job->sp->id);
if (err) {
up_read(&cdma->lock);
return err;
}
}
}
if (!cdma->running) {
cdma_op().start(cdma);
}
cdma->slots_free = 0;
cdma->slots_used = 0;
cdma->first_get = nvhost_push_buffer_putptr(&cdma->push_buffer);
return 0;
}
static void trace_write_gather(struct nvhost_cdma *cdma,
u32 *cpuva, dma_addr_t iova,
u32 offset, u32 words)
{
if (iova) {
u32 i;
/*
* Write in batches of 128 as there seems to be a limit
* of how much you can output to ftrace at once.
*/
for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
trace_nvhost_cdma_push_gather(
cdma_to_channel(cdma)->dev->name,
(u32)((uintptr_t)iova),
min(words - i, TRACE_MAX_LENGTH),
offset + i * sizeof(u32),
cpuva);
}
}
}
/**
* Push two words into a push buffer slot
* Blocks as necessary if the push buffer is full.
*/
void nvhost_cdma_push(struct nvhost_cdma *cdma, u32 op1, u32 op2)
{
if (nvhost_debug_trace_cmdbuf)
trace_nvhost_cdma_push(cdma_to_channel(cdma)->dev->name,
op1, op2);
nvhost_cdma_push_gather(cdma, NULL, 0, 0, op1, op2);
}
/**
* Push two words into a push buffer slot
* Blocks as necessary if the push buffer is full.
*/
void nvhost_cdma_push_gather(struct nvhost_cdma *cdma,
u32 *cpuva, dma_addr_t iova,
u32 offset, u32 op1, u32 op2)
{
u32 slots_free = cdma->slots_free;
struct push_buffer *pb = &cdma->push_buffer;
if (cpuva)
trace_write_gather(cdma, cpuva, iova, offset, op1 & 0x1fff);
if (slots_free == 0) {
slots_free = nvhost_cdma_wait_locked(cdma,
CDMA_EVENT_PUSH_BUFFER_SPACE);
}
cdma->slots_free = slots_free - 1;
cdma->slots_used++;
mutex_lock(&cdma->push_buffer_lock);
nvhost_push_buffer_push_to(pb, op1, op2);
mutex_unlock(&cdma->push_buffer_lock);
}
/**
* End a cdma submit
* Kick off DMA, add job to the sync queue, and a number of slots to be freed
* from the pushbuffer. The handles for a submit must all be pinned at the same
* time, but they can be unpinned in smaller chunks.
*/
void nvhost_cdma_end(struct nvhost_cdma *cdma,
struct nvhost_job *job)
{
bool was_idle;
mutex_lock(&cdma->sync_queue_lock);
was_idle = list_empty(&cdma->sync_queue);
mutex_unlock(&cdma->sync_queue_lock);
add_to_sync_queue(cdma,
job,
cdma->slots_used,
cdma->first_get);
cdma_op().kick(cdma);
/* start timer on idle -> active transitions */
if (was_idle)
cdma_start_timer_locked(cdma, job);
trace_nvhost_cdma_end(job->ch->dev->name);
up_read(&cdma->lock);
}
/**
* Update cdma state according to current sync point values
*/
void nvhost_cdma_update(struct nvhost_cdma *cdma)
{
down_read(&cdma->lock);
update_cdma_locked(cdma);
up_read(&cdma->lock);
}