tegrakernel/kernel/nvidia/drivers/video/tegra/host/pva/pva_queue.c

1860 lines
50 KiB
C
Raw Permalink Normal View History

2022-02-16 09:13:02 -06:00
/*
* PVA Task Management
*
* Copyright (c) 2016-2019, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/delay.h>
#include <asm/ioctls.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/nvhost.h>
#include <linux/cvnas.h>
#include <trace/events/nvhost.h>
#ifdef CONFIG_EVENTLIB
#include <linux/keventlib.h>
#include <uapi/linux/nvdev_fence.h>
#include <uapi/linux/nvhost_events.h>
#endif
#include <uapi/linux/nvhost_pva_ioctl.h>
#include "nvhost_syncpt_unit_interface.h"
#include "../drivers/staging/android/sync.h"
#include "pva.h"
#include "pva-task.h"
#include "nvhost_buffer.h"
#include "nvhost_queue.h"
#include "pva_mailbox.h"
#include "pva_ccq.h"
#include "pva_queue.h"
#include "dev.h"
#include "hw_cfg_pva.h"
#include "t194/hardware_t194.h"
#include "pva-vpu-perf.h"
#include <trace/events/nvhost_pva.h>
#define ACTION_LIST_FENCE_SIZE 13
#define ACTION_LIST_STATUS_OPERATION_SIZE 11
#define ACTION_LIST_TERMINATION_SIZE 1
#define ACTION_LIST_STATS_SIZE 9
/*
* The worst-case input action buffer size:
* - Prefences trigger a word memory operation (size 13 bytes)
* - Input status reads trigger a half-word memory operation (size 11 bytes)
* - The action list is terminated by a null action (1 byte)
*/
#define INPUT_ACTION_BUFFER_SIZE ( \
ALIGN((PVA_MAX_PREFENCES + 10) * ACTION_LIST_FENCE_SIZE + \
PVA_MAX_INPUT_STATUS * ACTION_LIST_STATUS_OPERATION_SIZE + \
ACTION_LIST_TERMINATION_SIZE, 256))
/*
* The worst-case output action buffer size:
* - Postfences trigger a word memory operation (size 13 bytes)
* - Output status write triggers a half-word memory operation (size 11 bytes)
* - Output action list includes a operation for stats purpose (size 9 bytes)
* - Output action list includes syncpoint and semaphore increments
* - The action list is terminated by a null action (1 byte)
*/
#define OUTPUT_ACTION_BUFFER_SIZE ( \
ALIGN((PVA_MAX_POSTFENCES + 10) * ACTION_LIST_FENCE_SIZE + \
PVA_MAX_OUTPUT_STATUS * ACTION_LIST_STATUS_OPERATION_SIZE + \
ACTION_LIST_STATS_SIZE * 2 + \
ACTION_LIST_FENCE_SIZE * 2 + \
ACTION_LIST_TERMINATION_SIZE, 256))
struct pva_hw_task {
struct pva_task task;
struct pva_action_list preaction_list;
struct pva_action_list postaction_list;
struct pva_task_parameter_array input_parameter_array[PVA_PARAM_LAST];
struct pva_task_parameter_array output_parameter_array[PVA_PARAM_LAST];
u8 preactions[INPUT_ACTION_BUFFER_SIZE];
u8 postactions[OUTPUT_ACTION_BUFFER_SIZE];
struct pva_task_parameter_desc input_surface_desc;
struct pva_task_surface input_surfaces[PVA_MAX_INPUT_SURFACES];
struct pva_task_parameter_desc output_surface_desc;
struct pva_task_surface output_surfaces[PVA_MAX_OUTPUT_SURFACES];
struct pva_task_statistics statistics;
struct pva_task_vpu_perf_counter
vpu_perf_counters[PVA_TASK_VPU_NUM_PERF_COUNTERS];
u8 opaque_data[PVA_MAX_PRIMARY_PAYLOAD_SIZE];
};
static void pva_task_dump(struct pva_submit_task *task)
{
int i;
nvhost_dbg_info("task=%p, "
"input_scalars=(handle=%u, offset=%x), "
"input_surfaces=%p, "
"output_scalars=(handle=%u, offset=%u), "
"output_surfaces=%p, "
"primary_payload=%p (size=%u)",
task,
task->input_scalars.handle, task->input_scalars.offset,
task->input_surfaces,
task->output_scalars.handle, task->output_scalars.offset,
task->output_surfaces,
task->primary_payload, task->primary_payload_size);
for (i = 0; i < task->num_prefences; i++)
nvhost_dbg_info("prefence %d: type=%u, "
"syncpoint_index=%u, syncpoint_value=%u, "
"sync_fd=%u, semaphore_handle=%u, "
"semaphore_offset=%u, semaphore_value=%u", i,
task->prefences[i].type,
task->prefences[i].syncpoint_index,
task->prefences[i].syncpoint_value,
task->prefences[i].sync_fd,
task->prefences[i].semaphore_handle,
task->prefences[i].semaphore_offset,
task->prefences[i].semaphore_value);
for (i = 0; i < PVA_MAX_FENCE_TYPES; i++) {
int j;
for (j = 0; j < task->num_pvafences[i]; j++) {
nvhost_dbg_info("pvafence %d: type=%u, "
"syncpoint_index=%u, syncpoint_value=%u, "
"sync_fd=%u, semaphore_handle=%u, "
"semaphore_offset=%u, semaphore_value=%u", i,
task->pvafences[i][j].fence.type,
task->pvafences[i][j].fence.syncpoint_index,
task->pvafences[i][j].fence.syncpoint_value,
task->pvafences[i][j].fence.sync_fd,
task->pvafences[i][j].fence.semaphore_handle,
task->pvafences[i][j].fence.semaphore_offset,
task->pvafences[i][j].fence.semaphore_value);
}
}
for (i = 0; i < task->num_input_surfaces; i++)
nvhost_dbg_info("input surface %d: format=%llu, "
"surface_handle=%u, surface_offset=%u, "
"roi_handle=%u, roi_offset=%u, surface_stride=%u, "
"line_stride=%u, depth=%u, width=%u, height=%u, "
"layout=%u", i,
task->input_surfaces[i].format,
task->input_surfaces[i].surface_handle,
task->input_surfaces[i].surface_offset,
task->input_surfaces[i].roi_handle,
task->input_surfaces[i].roi_offset,
task->input_surfaces[i].surface_stride,
task->input_surfaces[i].line_stride,
task->input_surfaces[i].depth,
task->input_surfaces[i].width,
task->input_surfaces[i].height,
task->input_surfaces[i].layout);
for (i = 0; i < task->num_output_surfaces; i++)
nvhost_dbg_info("output surface %d: format=%llu, "
"surface_handle=%u, surface_offset=%u, "
"roi_handle=%u, roi_offset=%u, surface_stride=%u,"
"line_stride=%u, depth=%u, width=%u, height=%u, "
"layout=%u", i,
task->output_surfaces[i].format,
task->output_surfaces[i].surface_handle,
task->output_surfaces[i].surface_offset,
task->output_surfaces[i].roi_handle,
task->output_surfaces[i].roi_offset,
task->output_surfaces[i].surface_stride,
task->output_surfaces[i].line_stride,
task->output_surfaces[i].depth,
task->output_surfaces[i].width,
task->output_surfaces[i].height,
task->output_surfaces[i].layout);
for (i = 0; i < task->num_pointers; i++)
nvhost_dbg_info("pointer %d: handle=%u, offset=%u",
i, task->pointers[i].handle,
task->pointers[i].offset);
for (i = 0; i < task->num_input_task_status; i++)
nvhost_dbg_info("input task status %d: handle=%u, offset=%u",
i, task->input_task_status[i].handle,
task->input_task_status[i].offset);
for (i = 0; i < task->num_output_task_status; i++)
nvhost_dbg_info("output task status %d: handle=%u, offset=%u",
i, task->output_task_status[i].handle,
task->output_task_status[i].offset);
}
static void pva_task_get_memsize(size_t *dma_size, size_t *kmem_size)
{
/* Align task addr to 64bytes boundary for DMA use*/
*dma_size = ALIGN(sizeof(struct pva_hw_task) + 64, 64);
*kmem_size = sizeof(struct pva_submit_task);
}
static void pva_task_unpin_mem(struct pva_submit_task *task)
{
int i;
int j;
#define UNPIN_MEMORY(dst_name) \
do { \
if ((((dst_name).dmabuf) != NULL) && \
((dst_name).dma_addr != 0)) { \
nvhost_buffer_submit_unpin(task->buffers, \
&((dst_name).dmabuf), 1); \
dma_buf_put((dst_name).dmabuf); \
} \
} while (0)
for (i = 0; i < task->num_input_surfaces; i++) {
UNPIN_MEMORY(task->input_surfaces_ext[i]);
UNPIN_MEMORY(task->input_surface_rois_ext[i]);
}
for (i = 0; i < task->num_output_surfaces; i++) {
UNPIN_MEMORY(task->output_surfaces_ext[i]);
UNPIN_MEMORY(task->output_surface_rois_ext[i]);
}
for (i = 0; i < task->num_prefences; i++) {
if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SEMAPHORE)
&& task->prefences[i].semaphore_handle)
UNPIN_MEMORY(task->prefences_sema_ext[i]);
}
for (i = 0; i < PVA_MAX_FENCE_TYPES; i++) {
for (j = 0; j < task->num_pvafences[i]; j++) {
struct nvpva_fence *fence = &task->pvafences[i][j];
if ((fence->fence.type == NVDEV_FENCE_TYPE_SEMAPHORE)
&& fence->fence.semaphore_handle) {
UNPIN_MEMORY(task->pvafences_sema_ext[i][j]);
}
}
for (j = 0; j < task->num_pva_ts_buffers[i]; j++) {
struct nvpva_fence *fence = &task->pvafences[i][j];
if (fence->ts_buf_ptr.handle) {
UNPIN_MEMORY(task->pva_ts_buffers_ext[i][j]);
}
}
}
for (i = 0; i < task->num_input_task_status; i++) {
if (task->input_task_status[i].handle) {
UNPIN_MEMORY(task->input_task_status_ext[i]);
}
}
for (i = 0; i < task->num_output_task_status; i++) {
if (task->output_task_status[i].handle) {
UNPIN_MEMORY(task->output_task_status_ext[i]);
}
}
for (i = 0; i < task->num_pointers; i++) {
if (task->pointers[i].handle) {
UNPIN_MEMORY(task->pointers_ext[i]);
}
}
UNPIN_MEMORY(task->input_scalars_ext);
UNPIN_MEMORY(task->output_scalars_ext);
#undef UNPIN_MEMORY
}
static int pva_task_pin_mem(struct pva_submit_task *task)
{
u32 cvsram_base = nvcvnas_get_cvsram_base();
u32 cvsram_sz = nvcvnas_get_cvsram_size();
int err;
int i;
int j;
#define PIN_MEMORY(dst_name, dmabuf_fd) \
do { \
if (!(dmabuf_fd)) { \
err = -EFAULT; \
goto err_map_handle; \
} \
\
((dst_name).dmabuf) = dma_buf_get(dmabuf_fd); \
if (IS_ERR_OR_NULL((dst_name).dmabuf)) { \
(dst_name).dmabuf = NULL; \
err = -EFAULT; \
goto err_map_handle; \
} \
\
err = nvhost_buffer_submit_pin(task->buffers, \
&(dst_name).dmabuf, 1, \
&(dst_name).dma_addr, \
&(dst_name).size, \
&(dst_name).heap); \
if (err < 0) \
goto err_map_handle; \
} while (0)
/* Pin input surfaces */
for (i = 0; i < task->num_input_surfaces; i++) {
/* HACK: nvmap doesn't support CVNAS yet */
if (task->input_surfaces[i].surface_handle == 0) {
u32 offset = task->input_surfaces[i].surface_offset;
if (offset > cvsram_sz) {
err = -EINVAL;
goto err_map_handle;
}
task->input_surfaces_ext[i].dma_addr = cvsram_base;
task->input_surfaces_ext[i].size = cvsram_sz - offset;
task->input_surfaces_ext[i].heap =
NVHOST_BUFFERS_HEAP_CVNAS;
} else {
PIN_MEMORY(task->input_surfaces_ext[i],
task->input_surfaces[i].surface_handle);
}
if (task->input_surfaces[i].roi_handle)
PIN_MEMORY(task->input_surface_rois_ext[i],
task->input_surfaces[i].roi_handle);
}
/* ...and then output surfaces */
for (i = 0; i < task->num_output_surfaces; i++) {
if (task->output_surfaces[i].surface_handle == 0) {
/* HACK: To support the MISR test
* Kernel is not suppose to convert the address being
* passed from the UMD. So setting dma_addr as the
* offset passed from KMD and size to 4MB
*/
u32 offset = task->output_surfaces[i].surface_offset;
/* Only root is allowed to use offsets */
if (current_uid().val != 0) {
err = -EINVAL;
goto err_map_handle;
}
task->output_surfaces_ext[i].dma_addr = offset;
task->output_surfaces_ext[i].size = 0x400000;
} else {
PIN_MEMORY(task->output_surfaces_ext[i],
task->output_surfaces[i].surface_handle);
}
if (task->output_surfaces[i].roi_handle)
PIN_MEMORY(task->output_surface_rois_ext[i],
task->output_surfaces[i].roi_handle);
}
/* check fence semaphore_type before memory pin */
for (i = 0; i < task->num_prefences; i++) {
if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SEMAPHORE)
&& task->prefences[i].semaphore_handle) {
PIN_MEMORY(task->prefences_sema_ext[i],
task->prefences[i].semaphore_handle);
}
}
/* check the generalized fence structures */
for (i = 0; i < PVA_MAX_FENCE_TYPES; i++) {
for (j = 0; j < task->num_pvafences[i]; j++) {
struct nvpva_fence *fence = &task->pvafences[i][j];
if ((fence->fence.type == NVDEV_FENCE_TYPE_SEMAPHORE)
&& fence->fence.semaphore_handle) {
PIN_MEMORY(task->pvafences_sema_ext[i][j],
fence->fence.semaphore_handle);
}
}
for (j = 0; j < task->num_pva_ts_buffers[i]; j++) {
struct nvpva_fence *fence = &task->pvafences[i][j];
if (fence->ts_buf_ptr.handle) {
PIN_MEMORY(task->pva_ts_buffers_ext[i][j],
fence->ts_buf_ptr.handle);
}
}
}
/* Pin the input and output action status */
for (i = 0; i < task->num_input_task_status; i++) {
if (task->input_task_status[i].handle) {
PIN_MEMORY(task->input_task_status_ext[i],
task->input_task_status[i].handle);
}
}
for (i = 0; i < task->num_output_task_status; i++) {
if (task->output_task_status[i].handle) {
PIN_MEMORY(task->output_task_status_ext[i],
task->output_task_status[i].handle);
}
}
/* Pin task pointers */
for (i = 0; i < task->num_pointers; i++) {
if (task->pointers[i].handle) {
PIN_MEMORY(task->pointers_ext[i],
task->pointers[i].handle);
}
}
/* Pin rest */
if (task->input_scalars.handle)
PIN_MEMORY(task->input_scalars_ext,
task->input_scalars.handle);
if (task->output_scalars.handle)
PIN_MEMORY(task->output_scalars_ext,
task->output_scalars.handle);
#undef PIN_MEMORY
return 0;
err_map_handle:
pva_task_unpin_mem(task);
return err;
}
static void pva_task_write_surfaces(struct pva_task_surface *hw_surface,
struct pva_surface *surface,
struct pva_parameter_ext *surface_ext,
struct pva_parameter_ext *roi_ext,
unsigned int count)
{
int i;
for (i = 0; i < count; i++) {
hw_surface[i].address = surface_ext[i].dma_addr +
surface[i].surface_offset;
hw_surface[i].surface_size = surface_ext[i].size;
hw_surface[i].roi_addr = roi_ext[i].dma_addr +
surface[i].roi_offset;
hw_surface[i].roi_size = roi_ext[i].size;
hw_surface[i].format = surface[i].format;
hw_surface[i].width = surface[i].width;
hw_surface[i].height = surface[i].height;
hw_surface[i].line_stride = surface[i].line_stride;
hw_surface[i].plane_stride = surface[i].surface_stride;
hw_surface[i].num_planes = surface[i].depth;
hw_surface[i].layout = surface[i].layout;
hw_surface[i].block_height_log2 = surface[i].block_height_log2;
/* Set bit 39 for block linear surfaces in the address field.
* This bit is used for indicating that memory subsystem should
* convert the block linear format into common block linear format
* that is used by other engines in Tegra. Thebit in itself is
* dropped before making the address translation in SMMU.
*/
if (surface[i].layout == PVA_TASK_SURFACE_LAYOUT_BLOCK_LINEAR)
hw_surface[i].address |= PVA_BIT64(39);
hw_surface[i].memory = surface_ext[i].heap;
}
}
static inline int pva_task_write_atomic_op(u8 *base, u8 action)
{
*base = action;
return 1;
}
static inline int
pva_task_write_struct_ptr_op(u8 *base, u8 action, u64 addr, u16 val)
{
int i = 0;
base[i++] = action;
base[i++] = (u8)((addr >> 0) & 0xff);
base[i++] = (u8)((addr >> 8) & 0xff);
base[i++] = (u8)((addr >> 16) & 0xff);
base[i++] = (u8)((addr >> 24) & 0xff);
base[i++] = (u8)((addr >> 32) & 0xff);
base[i++] = (u8)((addr >> 40) & 0xff);
base[i++] = (u8)((addr >> 48) & 0xff);
base[i++] = (u8)((addr >> 56) & 0xff);
return i;
}
static inline int pva_task_write_ptr_16b_op(u8 *base, u8 action, u64 addr, u16 val)
{
int i = 0;
base[i++] = action;
base[i++] = (u8)((addr >> 0) & 0xff);
base[i++] = (u8)((addr >> 8) & 0xff);
base[i++] = (u8)((addr >> 16) & 0xff);
base[i++] = (u8)((addr >> 24) & 0xff);
base[i++] = (u8)((addr >> 32) & 0xff);
base[i++] = (u8)((addr >> 40) & 0xff);
base[i++] = (u8)((addr >> 48) & 0xff);
base[i++] = (u8)((addr >> 56) & 0xff);
base[i++] = (u8)((val >> 0) & 0xff);
base[i++] = (u8)((val >> 8) & 0xff);
return i;
}
static inline int pva_task_write_ptr_op(u8 *base, u8 action, u64 addr, u32 val)
{
int i = 0;
base[i++] = action;
base[i++] = (u8)((addr >> 0) & 0xff);
base[i++] = (u8)((addr >> 8) & 0xff);
base[i++] = (u8)((addr >> 16) & 0xff);
base[i++] = (u8)((addr >> 24) & 0xff);
base[i++] = (u8)((addr >> 32) & 0xff);
base[i++] = (u8)((addr >> 40) & 0xff);
base[i++] = (u8)((addr >> 48) & 0xff);
base[i++] = (u8)((addr >> 56) & 0xff);
base[i++] = (u8)((val >> 0) & 0xff);
base[i++] = (u8)((val >> 8) & 0xff);
base[i++] = (u8)((val >> 16) & 0xff);
base[i++] = (u8)((val >> 24) & 0xff);
return i;
}
static int pva_task_write_preactions(struct pva_submit_task *task,
struct pva_hw_task *hw_task)
{
struct platform_device *host1x_pdev =
to_platform_device(task->pva->pdev->dev.parent);
u8 *hw_preactions = hw_task->preactions;
int i = 0, j = 0, ptr = 0;
u8 action_ts;
u8 action_f;
u32 increment;
/* Add waits to preactions list */
for (i = 0; i < task->num_prefences; i++) {
struct nvdev_fence *fence = task->prefences + i;
switch (fence->type) {
case NVDEV_FENCE_TYPE_SYNCPT: {
dma_addr_t syncpt_addr = nvhost_syncpt_gos_address(
task->pva->pdev,
fence->syncpoint_index);
if (!syncpt_addr)
syncpt_addr = nvhost_syncpt_address(
task->queue->vm_pdev,
fence->syncpoint_index);
ptr += pva_task_write_ptr_op(&hw_preactions[ptr],
TASK_ACT_PTR_BLK_GTREQL, syncpt_addr,
fence->syncpoint_value);
break;
}
case NVDEV_FENCE_TYPE_SEMAPHORE:
case NVDEV_FENCE_TYPE_SEMAPHORE_TS:{
ptr += pva_task_write_ptr_op(&hw_preactions[ptr],
TASK_ACT_PTR_BLK_GTREQL,
task->prefences_sema_ext[i].dma_addr +
fence->semaphore_offset,
fence->semaphore_value);
break;
}
case NVDEV_FENCE_TYPE_SYNC_FD: {
int thresh, id;
dma_addr_t syncpt_addr;
struct sync_fence *syncfd_fence;
struct sync_pt *pt;
struct nvhost_master *host = nvhost_get_host(
task->pva->pdev);
struct nvhost_syncpt *sp = &host->syncpt;
if (!fence->sync_fd)
break;
syncfd_fence = nvhost_sync_fdget(fence->sync_fd);
if (!syncfd_fence)
break;
for (j = 0; j < syncfd_fence->num_fences; j++) {
pt = sync_pt_from_fence(
syncfd_fence->cbs[j].sync_pt);
if (!pt)
break;
id = nvhost_sync_pt_id(pt);
thresh = nvhost_sync_pt_thresh(pt);
/* validate the synpt ids */
if (!id ||
!nvhost_syncpt_is_valid_hw_pt(sp, id)) {
sync_fence_put(syncfd_fence);
break;
}
if (nvhost_syncpt_is_expired(sp,
id, thresh))
continue;
syncpt_addr = nvhost_syncpt_gos_address(
task->pva->pdev, id);
if (!syncpt_addr)
syncpt_addr = nvhost_syncpt_address(
task->queue->vm_pdev, id);
ptr += pva_task_write_ptr_op(
&hw_preactions[ptr],
TASK_ACT_PTR_BLK_GTREQL,
syncpt_addr, thresh);
}
break;
}
default:
return -ENOSYS;
}
}
for (i = 0; i < PVA_MAX_FENCE_TYPES; i++) {
increment = 0;
switch (i) {
case PVA_FENCE_SOT_V:
action_ts = TASK_ACT_PTR_WRITE_SOT_V_TS;
action_f = TASK_ACT_PTR_WRITE_VAL_SOT_V;
increment = 1;
break;
case PVA_FENCE_SOT_R:
action_ts = TASK_ACT_PTR_WRITE_SOT_R_TS;
action_f = TASK_ACT_PTR_WRITE_VAL_SOT_R;
increment = 1;
break;
default:
action_ts = 0;
action_f = 0;
break;
};
if ((action_ts == 0) || (task->num_pvafences[i] == 0))
continue;
for (j = 0; j < task->num_pva_ts_buffers[i]; j++) {
if (task->pvafences[i][j].ts_buf_ptr.handle) {
int dif;
dif = pva_task_write_ptr_op(
&hw_preactions[ptr],
action_ts,
task->pva_ts_buffers_ext[i][j].dma_addr +
task->pvafences[i][j].ts_buf_ptr.offset,
1U);
ptr += dif;
}
}
for (j = 0; j < task->num_pvafences[i]; j++) {
struct nvdev_fence *fence =
&task->pvafences[i][j].fence;
u32 thresh;
switch (fence->type) {
case NVDEV_FENCE_TYPE_SYNCPT: {
dma_addr_t syncpt_gos_addr =
nvhost_syncpt_gos_address(
task->pva->pdev,
fence->syncpoint_index);
dma_addr_t syncpt_addr =
nvhost_syncpt_address(
task->queue->vm_pdev,
task->queue->syncpt_id);
ptr += pva_task_write_ptr_op(
&hw_preactions[ptr],
action_f,
syncpt_addr,
1U);
task->fence_num += increment;
/* Make a syncpoint increment */
if (syncpt_gos_addr) {
thresh = nvhost_syncpt_read_maxval(
host1x_pdev,
task->queue->syncpt_id) +
task->fence_num;
ptr += pva_task_write_ptr_op(
&hw_preactions[ptr],
TASK_ACT_PTR_WRITE_VAL,
syncpt_gos_addr, thresh);
}
break;
}
case NVDEV_FENCE_TYPE_SEMAPHORE:
case NVDEV_FENCE_TYPE_SEMAPHORE_TS: {
int dif;
dif = pva_task_write_ptr_op(&hw_preactions[ptr],
action_f,
task->pvafences_sema_ext[i][j].dma_addr +
fence->semaphore_offset,
fence->semaphore_value);
ptr += dif;
break;
}
case NVDEV_FENCE_TYPE_SYNC_FD:
/* TODO XXX*/
default:
return -ENOSYS;
}
}
}
/* Perform input status checks */
for (i = 0; i < task->num_input_task_status; i++) {
struct pva_status_handle *input_status =
task->input_task_status + i;
dma_addr_t input_status_addr =
task->input_task_status_ext[i].dma_addr +
input_status->offset;
ptr += pva_task_write_ptr_16b_op(
&hw_preactions[ptr],
TASK_ACT_READ_STATUS,
input_status_addr, 0);
}
ptr += pva_task_write_atomic_op(&hw_preactions[ptr],
TASK_ACT_TERMINATE);
/* Store the preaction list */
hw_task->preaction_list.offset = offsetof(struct pva_hw_task, preactions);
hw_task->preaction_list.length = ptr;
nvhost_dbg_info("preaction buffer alloted size %d: used size %d",
INPUT_ACTION_BUFFER_SIZE, ptr);
return 0;
}
static void pva_task_write_postactions(struct pva_submit_task *task,
struct pva_hw_task *hw_task)
{
dma_addr_t syncpt_addr = nvhost_syncpt_address(task->queue->vm_pdev,
task->queue->syncpt_id);
dma_addr_t syncpt_gos_addr = nvhost_syncpt_gos_address(task->pva->pdev,
task->queue->syncpt_id);
u8 *hw_postactions = hw_task->postactions;
int ptr = 0, i = 0;
struct platform_device *host1x_pdev =
to_platform_device(task->pva->pdev->dev.parent);
dma_addr_t output_status_addr;
u32 thresh;
u8 action_ts;
int j;
/* Write Output action status */
for (i = 0; i < task->num_output_task_status; i++) {
struct pva_status_handle *output_status =
task->output_task_status + i;
output_status_addr = task->output_task_status_ext[i].dma_addr +
output_status->offset;
ptr += pva_task_write_ptr_16b_op(
&hw_postactions[ptr],
TASK_ACT_WRITE_STATUS,
output_status_addr, 1);
}
for (i = 0; i < PVA_MAX_FENCE_TYPES; i++) {
switch (i) {
case PVA_FENCE_EOT_V:
action_ts = TASK_ACT_PTR_WRITE_EOT_V_TS;
break;
case PVA_FENCE_EOT_R:
action_ts = TASK_ACT_PTR_WRITE_EOT_R_TS;
break;
case PVA_FENCE_POST:
action_ts = TASK_ACT_PTR_WRITE_TS;
break;
default:
action_ts = 0;
break;
};
if (action_ts == 0)
continue;
for (j = 0; j < task->num_pva_ts_buffers[i]; j++) {
if (task->pvafences[i][j].ts_buf_ptr.handle) {
int dif;
dif = pva_task_write_ptr_op(
&hw_postactions[ptr],
action_ts,
task->pva_ts_buffers_ext[i][j].dma_addr +
task->pvafences[i][j].ts_buf_ptr.offset,
1U);
ptr += dif;
}
}
}
/* Add postactions list for semaphore */
j = PVA_FENCE_POST;
for (i = 0; i < task->num_pvafences[j]; i++) {
struct nvdev_fence *fence = &task->pvafences[j][i].fence;
if (fence->type == NVDEV_FENCE_TYPE_SEMAPHORE) {
ptr += pva_task_write_ptr_op(&hw_postactions[ptr],
TASK_ACT_PTR_WRITE_VAL,
task->pvafences_sema_ext[j][i].dma_addr +
fence->semaphore_offset,
fence->semaphore_value);
} else if (fence->type == NVDEV_FENCE_TYPE_SEMAPHORE_TS) {
/*
* Timestamp will be filled by ucode hence making the
* place holder for timestamp size, sizeof(u64).
*/
ptr += sizeof(u64) +
pva_task_write_ptr_op(
&hw_postactions[ptr],
TASK_ACT_PTR_WRITE_VAL_TS,
task->pvafences_sema_ext[j][i].dma_addr +
fence->semaphore_offset,
fence->semaphore_value);
}
}
task->fence_num += 1;
/* Make a syncpoint increment */
if (syncpt_gos_addr) {
thresh = nvhost_syncpt_read_maxval(
host1x_pdev,
task->queue->syncpt_id) + task->fence_num;
ptr += pva_task_write_ptr_op(
&hw_postactions[ptr],
TASK_ACT_PTR_WRITE_VAL,
syncpt_gos_addr, thresh);
}
ptr += pva_task_write_ptr_op(&hw_postactions[ptr],
TASK_ACT_PTR_WRITE_VAL, syncpt_addr, 1);
output_status_addr = task->dma_addr +
offsetof(struct pva_hw_task, statistics);
ptr += pva_task_write_struct_ptr_op(
&hw_postactions[ptr],
TASK_ACT_PVA_STATISTICS,
output_status_addr, 1);
if (task->pva->vpu_perf_counters_enable) {
ptr += pva_task_write_struct_ptr_op(
&hw_postactions[ptr],
TASK_ACT_PVA_VPU_PERF_COUNTERS,
task->dma_addr + offsetof(struct pva_hw_task,
vpu_perf_counters),
1);
}
ptr += pva_task_write_atomic_op(&hw_postactions[ptr],
TASK_ACT_TERMINATE);
/* Store the postaction list */
hw_task->postaction_list.offset = offsetof(struct pva_hw_task,
postactions);
hw_task->postaction_list.length = ptr;
nvhost_dbg_info("postaction buffer alloted size %d: used size %d",
OUTPUT_ACTION_BUFFER_SIZE, ptr);
}
static void pva_task_write_output_surfaces(struct pva_submit_task *task,
struct pva_hw_task *hw_task)
{
struct pva_task_parameter_array *surface_parameter;
if (task->num_output_surfaces == 0)
return;
surface_parameter = hw_task->output_parameter_array +
hw_task->task.num_output_parameters;
/* Write parameter descriptor */
surface_parameter->address = task->dma_addr +
offsetof(struct pva_hw_task,
output_surface_desc);
surface_parameter->type = PVA_PARAM_SURFACE_LIST;
surface_parameter->size = sizeof(struct pva_task_parameter_desc) +
sizeof(struct pva_task_surface) *
task->num_output_surfaces;
hw_task->task.num_output_parameters++;
/* Write the surface descriptor base information */
hw_task->output_surface_desc.num_parameters = task->num_output_surfaces;
hw_task->output_surface_desc.reserved = 0;
/* Write the output surfaces */
pva_task_write_surfaces(hw_task->output_surfaces,
task->output_surfaces,
task->output_surfaces_ext,
task->output_surface_rois_ext,
task->num_output_surfaces);
}
static void pva_task_write_input_surfaces(struct pva_submit_task *task,
struct pva_hw_task *hw_task)
{
struct pva_task_parameter_array *surface_parameter;
if (task->num_input_surfaces == 0)
return;
surface_parameter = hw_task->input_parameter_array +
hw_task->task.num_input_parameters;
/* Write parameter descriptor */
surface_parameter->address = task->dma_addr +
offsetof(struct pva_hw_task,
input_surface_desc);
surface_parameter->type = PVA_PARAM_SURFACE_LIST;
surface_parameter->size = sizeof(struct pva_task_parameter_desc) +
sizeof(struct pva_task_surface) *
task->num_input_surfaces;
hw_task->task.num_input_parameters++;
/* Write the surface descriptor base information */
hw_task->input_surface_desc.num_parameters = task->num_input_surfaces;
hw_task->input_surface_desc.reserved = 0;
/* Write the input surfaces */
pva_task_write_surfaces(hw_task->input_surfaces,
task->input_surfaces,
task->input_surfaces_ext,
task->input_surface_rois_ext,
task->num_input_surfaces);
}
static void pva_task_write_non_surfaces(struct pva_submit_task *task,
struct pva_hw_task *hw_task)
{
struct pva_task_parameter_array *hw_input_parameters =
hw_task->input_parameter_array;
struct pva_task_parameter_array *hw_output_parameters =
hw_task->output_parameter_array;
#define COPY_PARAMETER(target, name, name_ext, param_type, count) \
do { \
if ((name).handle) { \
target[(count)].address = (name_ext).dma_addr + \
(name).offset; \
target[(count)].size = (name_ext).size - \
(name).offset; \
target[(count)].type = (param_type); \
(count)++; \
} \
} while (0)
COPY_PARAMETER(hw_input_parameters, task->input_scalars,
task->input_scalars_ext,
PVA_PARAM_SCALAR_LIST,
hw_task->task.num_input_parameters);
COPY_PARAMETER(hw_output_parameters, task->output_scalars,
task->output_scalars_ext,
PVA_PARAM_SCALAR_LIST,
hw_task->task.num_output_parameters);
#undef COPY_PARAMETER
}
static int pva_task_write_opaque_data(struct pva_submit_task *task,
struct pva_hw_task *hw_task)
{
struct pva_task_parameter_array *opaque_parameter;
struct pva_task_opaque_data_desc *opaque_desc;
struct pva_parameter_ext *handle_ext;
unsigned int primary_payload_offset;
struct pva_memory_handle *handle;
unsigned int pointer_list_offset;
struct pva_task_pointer pointer;
u8 *primary_payload, *pointers;
unsigned int num_bytes;
u64 aux, size, flags;
unsigned int i;
if (task->num_pointers == 0 && task->primary_payload_size == 0)
return 0;
/* Calculate size of the opaque data */
num_bytes = sizeof(struct pva_task_opaque_data_desc);
num_bytes += task->primary_payload_size;
num_bytes += sizeof(struct pva_task_pointer) * task->num_pointers;
if (num_bytes > PVA_MAX_PRIMARY_PAYLOAD_SIZE)
return -ENOMEM;
/* Opaque parameter resides always in the input parameter block */
opaque_parameter = hw_task->input_parameter_array +
hw_task->task.num_input_parameters;
/* Write parameter descriptor */
opaque_parameter->address = task->dma_addr +
offsetof(struct pva_hw_task,
opaque_data);
opaque_parameter->type = PVA_PARAM_OPAQUE_DATA;
opaque_parameter->size = num_bytes;
hw_task->task.num_input_parameters++;
/* Determine offset to the primary_payload start */
primary_payload_offset = sizeof(struct pva_task_opaque_data_desc);
primary_payload = hw_task->opaque_data + primary_payload_offset;
/* Determine offset to the start of the pointer list */
pointer_list_offset = primary_payload_offset +
task->primary_payload_size;
pointers = hw_task->opaque_data + pointer_list_offset;
/* Initialize the opaque data descriptor */
opaque_desc = (void *)hw_task->opaque_data;
opaque_desc->primary_payload_size = task->primary_payload_size;
/* Copy the primary_payload */
memcpy(primary_payload,
task->primary_payload,
task->primary_payload_size);
/* Copy the pointers */
for (i = 0; i < task->num_pointers; i++) {
handle = task->pointers + i;
handle_ext = task->pointers_ext + i;
size = handle_ext->size & PVA_TASK_POINTER_AUX_SIZE_MASK;
if (size != handle_ext->size) {
return -EINVAL;
}
flags = 0;
if (handle_ext->heap == NVHOST_BUFFERS_HEAP_CVNAS)
flags |= PVA_TASK_POINTER_AUX_FLAGS_CVNAS;
aux = (size << PVA_TASK_POINTER_AUX_SIZE_SHIFT) |
(flags << PVA_TASK_POINTER_AUX_FLAGS_SHIFT);
pointer.address = handle_ext->dma_addr + handle->offset;
pointer.aux = aux;
/* The data might be unaligned. copy it byte-by-byte */
memcpy(pointers, &pointer, sizeof(pointer));
pointers += sizeof(pointer);
}
return 0;
}
static int pva_task_write(struct pva_submit_task *task, bool atomic)
{
struct pva_hw_task *hw_task;
int err;
int i;
/* Task start from the memory base */
hw_task = task->va;
/* Write the preaction list */
err = pva_task_write_preactions(task, hw_task);
if (err < 0)
return err;
/* Write the postaction list */
pva_task_write_postactions(task, hw_task);
/* Initialize parameters */
pva_task_write_non_surfaces(task, hw_task);
/* Write the pointers and the primary payload */
err = pva_task_write_opaque_data(task, hw_task);
if (err < 0)
return err;
/* Write input surfaces */
pva_task_write_input_surfaces(task, hw_task);
/* Write output surfaces */
pva_task_write_output_surfaces(task, hw_task);
hw_task->task.input_parameters = offsetof(struct pva_hw_task,
input_parameter_array);
hw_task->task.output_parameters = offsetof(struct pva_hw_task,
output_parameter_array);
hw_task->task.gen_task.versionid = TASK_VERSION_ID;
hw_task->task.gen_task.engineid = PVA_ENGINE_ID;
hw_task->task.gen_task.sequence = 0;
hw_task->task.gen_task.length = offsetof(struct pva_hw_task,
input_surface_desc);
hw_task->task.gen_task.n_preaction_lists = 1;
hw_task->task.gen_task.preaction_lists_p = offsetof(struct pva_hw_task,
preaction_list);
hw_task->task.gen_task.n_postaction_lists = 1;
hw_task->task.gen_task.postaction_lists_p = offsetof(struct pva_hw_task,
postaction_list);
hw_task->task.runlist_version = PVA_TASK_VERSION_ID;
hw_task->task.queue_id = task->queue->id;
hw_task->task.flags = atomic ? PVA_TASK_FL_ATOMIC : 0;
hw_task->task.operation = task->operation;
hw_task->task.timeout = task->timeout;
/* Set flags to debug the vpu application if debugfs node is set for vpu id */
if (task->pva->dbg_vpu_app_id == task->operation)
hw_task->task.flags |= PVA_TASK_FL_VPU_DEBUG;
/* This should be delivered from userspace - hard-code
* until the mechanism is in place.
*/
hw_task->task.operation_version = 1;
for (i = 0; i < roundup(sizeof(struct pva_hw_task), 16) / 16; i++) {
u8 *task_va = task->va;
u32 base = i * 16;
nvhost_dbg_info("%02x, %02x, %02x, %02x, %02x, %02x, %02x %02x, "
"%02x, %02x, %02x, %02x, %02x, %02x, %02x %02x",
task_va[base],
task_va[base + 1],
task_va[base + 2],
task_va[base + 3],
task_va[base + 4],
task_va[base + 5],
task_va[base + 6],
task_va[base + 7],
task_va[base + 8],
task_va[base + 9],
task_va[base + 10],
task_va[base + 11],
task_va[base + 12],
task_va[base + 13],
task_va[base + 14],
task_va[base + 15]);
}
return 0;
}
#ifdef CONFIG_EVENTLIB
static void
pva_eventlib_record_perf_counter(struct platform_device *pdev,
u32 syncpt_id,
u32 syncpt_thresh,
u32 operation,
u32 tag,
u32 count,
u32 sum,
u64 sum_squared,
u32 min,
u32 max,
u64 timestamp_begin,
u64 timestamp_end)
{
struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
struct nvhost_vpu_perf_counter perf_counter;
if (!pdata->eventlib_id)
return;
perf_counter.class_id = pdata->class;
perf_counter.syncpt_id = syncpt_id;
perf_counter.syncpt_thresh = syncpt_thresh;
perf_counter.operation = operation;
perf_counter.tag = tag;
perf_counter.count = count;
perf_counter.average = sum / count;
perf_counter.variance =
((u64)count * sum_squared - (u64)sum * (u64)sum)
/ (u64)count / (u64)count;
perf_counter.minimum = min;
perf_counter.maximum = max;
keventlib_write(pdata->eventlib_id,
&perf_counter,
sizeof(perf_counter),
NVHOST_VPU_PERF_COUNTER_BEGIN,
timestamp_begin);
keventlib_write(pdata->eventlib_id,
&perf_counter,
sizeof(perf_counter),
NVHOST_VPU_PERF_COUNTER_END,
timestamp_end);
}
static void
pva_eventlib_record_r5_states(struct platform_device *pdev,
u32 syncpt_id,
u32 syncpt_thresh,
struct pva_task_statistics *stats,
u32 operation)
{
struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
struct nvhost_pva_task_state state;
if (!pdata->eventlib_id)
return;
state.class_id = pdata->class;
state.syncpt_id = syncpt_id;
state.syncpt_thresh = syncpt_thresh;
state.operation = operation;
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
NVHOST_PVA_QUEUE_BEGIN,
stats->queued_time);
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
NVHOST_PVA_QUEUE_END,
stats->vpu_assigned_time);
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
NVHOST_PVA_PREPARE_BEGIN,
stats->vpu_assigned_time);
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
NVHOST_PVA_PREPARE_END,
stats->vpu_start_time);
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
stats->vpu_assigned == 0 ? NVHOST_PVA_VPU0_BEGIN
: NVHOST_PVA_VPU1_BEGIN,
stats->vpu_start_time);
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
stats->vpu_assigned == 0 ? NVHOST_PVA_VPU0_END
: NVHOST_PVA_VPU1_END,
stats->vpu_complete_time);
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
NVHOST_PVA_POST_BEGIN,
stats->vpu_complete_time);
keventlib_write(pdata->eventlib_id,
&state,
sizeof(state),
NVHOST_PVA_POST_END,
stats->complete_time);
}
#else
static void
pva_eventlib_record_perf_counter(struct platform_device *pdev,
u32 syncpt_id,
u32 syncpt_thresh,
u32 operation,
u32 tag,
u32 count,
u32 sum,
u64 sum_squared,
u32 min,
u32 max,
u64 timestamp_begin,
u64 timestamp_end)
{
}
static void
pva_eventlib_record_r5_states(struct platform_device *pdev,
struct pva_task_statistics *stats,
u32 operation)
{
}
#endif
static void pva_task_update(struct pva_submit_task *task)
{
struct nvhost_queue *queue = task->queue;
struct pva_hw_task *hw_task = task->va;
struct pva *pva = task->pva;
struct platform_device *pdev = pva->pdev;
struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
struct pva_task_statistics *stats = &hw_task->statistics;
struct pva_task_vpu_perf_counter *perf;
u32 idx;
trace_nvhost_task_timestamp(dev_name(&pdev->dev),
pdata->class,
queue->syncpt_id,
task->syncpt_thresh,
stats->vpu_assigned_time,
stats->complete_time);
nvhost_eventlib_log_task(pdev,
queue->syncpt_id,
task->syncpt_thresh,
stats->vpu_assigned_time,
stats->complete_time);
nvhost_dbg_info("Completed task %p (0x%llx), start_time=%llu, end_time=%llu",
task, (u64)task->dma_addr,
stats->vpu_assigned_time,
stats->complete_time);
trace_nvhost_pva_task_stats(pdev->name,
stats->queued_time,
stats->head_time,
stats->input_actions_complete,
stats->vpu_assigned_time,
stats->vpu_start_time,
stats->vpu_complete_time,
stats->complete_time,
stats->vpu_assigned);
nvhost_dbg_info("QueuedTime %llu, HeadTime 0x%llu, "
"InputActionComplete %llu, VpuAssignedTime %llu, "
"VpuStartTime %llu, VpuCompleteTime %llu, "
"TaskCompeteTime %llu, AssignedVpu %d",
stats->queued_time,
stats->head_time,
stats->input_actions_complete,
stats->vpu_assigned_time,
stats->vpu_start_time,
stats->vpu_complete_time,
stats->complete_time,
stats->vpu_assigned);
pva_eventlib_record_r5_states(pdev,
queue->syncpt_id,
task->syncpt_thresh,
stats, task->operation);
/* Record task postfences */
nvhost_eventlib_log_fences(pdev,
queue->syncpt_id,
task->syncpt_thresh,
&(task->pvafences[PVA_FENCE_POST][0].fence),
1,
NVDEV_FENCE_KIND_POST,
stats->complete_time);
if (task->pva->vpu_perf_counters_enable) {
for (idx = 0; idx < PVA_TASK_VPU_NUM_PERF_COUNTERS; idx++) {
perf = &hw_task->vpu_perf_counters[idx];
if (perf->count != 0) {
trace_nvhost_pva_task_vpu_perf(
pdev->name, idx, perf->count,
perf->sum, perf->sum_squared,
perf->min, perf->max);
pva_eventlib_record_perf_counter(
pdev, queue->syncpt_id,
task->syncpt_thresh,
task->operation, idx, perf->count,
perf->sum, perf->sum_squared,
perf->min, perf->max,
stats->vpu_assigned_time,
stats->complete_time);
}
}
}
/* Unpin job memory. PVA shouldn't be using it anymore */
pva_task_unpin_mem(task);
/* Drop PM runtime reference of PVA */
nvhost_module_idle(task->pva->pdev);
/* remove the task from the queue */
list_del(&task->node);
/* Not linked anymore so drop the reference */
kref_put(&task->ref, pva_task_free);
/* Drop queue reference to allow reusing it */
nvhost_queue_put(queue);
}
void pva_task_free(struct kref *ref)
{
struct pva_submit_task *task =
container_of(ref, struct pva_submit_task, ref);
/* Release memory that was allocated for the task */
nvhost_queue_free_task_memory(task->queue, task->pool_index);
}
static void pva_queue_update(void *priv, int nr_completed)
{
struct nvhost_queue *queue = priv;
struct pva_submit_task *task, *n;
struct list_head completed;
INIT_LIST_HEAD(&completed);
/* Move completed tasks to a separate list */
mutex_lock(&queue->list_lock);
list_for_each_entry_safe(task, n, &queue->tasklist, node) {
if (!nvhost_syncpt_is_expired_ext(queue->pool->pdev,
queue->syncpt_id,
task->syncpt_thresh))
break;
list_move_tail(&task->node, &completed);
}
mutex_unlock(&queue->list_lock);
/* Handle completed tasks */
list_for_each_entry_safe(task, n, &completed, node)
pva_task_update(task);
}
static void pva_queue_dump(struct nvhost_queue *queue, struct seq_file *s)
{
struct pva_submit_task *task;
int i = 0;
int k = PVA_FENCE_POST;
seq_printf(s, "Queue %u, Tasks\n", queue->id);
mutex_lock(&queue->list_lock);
list_for_each_entry(task, &queue->tasklist, node) {
int j;
seq_printf(s, " #%u: Operation = %u\n",
i++, task->operation);
for (j = 0; j < task->num_prefences; j++)
seq_printf(s, " prefence %d: \n\t"
"syncpoint_index=%u, syncpoint_value=%u\n",
j,
task->prefences[j].syncpoint_index,
task->prefences[j].syncpoint_value);
for (j = 0; j < task->num_pvafences[k]; j++)
seq_printf(s, " postfence %d: \n\t"
"syncpoint_index=%u, syncpoint_value=%u\n",
j,
task->pvafences[k][j].fence.syncpoint_index,
task->pvafences[k][j].fence.syncpoint_value);
}
mutex_unlock(&queue->list_lock);
}
static int pva_task_submit_channel_ccq(struct pva_submit_task *task,
u32 *thresh)
{
struct nvhost_queue *queue = task->queue;
u64 fifo_flags = PVA_FIFO_INT_ON_ERR;
u64 fifo_cmd = pva_fifo_submit(queue->id,
task->dma_addr,
fifo_flags);
u32 syncpt_wait_ids[PVA_MAX_PREFENCES];
u32 syncpt_wait_thresh[PVA_MAX_PREFENCES];
unsigned int i;
u32 cmdbuf[4];
int err = 0;
/* Pick up fences... */
for (i = 0; i < task->num_prefences; i++) {
/* ..and ensure that we have only syncpoints present */
if (task->prefences[i].type != NVDEV_FENCE_TYPE_SYNCPT)
return -EINVAL;
/* Put fences into a separate array */
syncpt_wait_ids[i] =
task->prefences[i].syncpoint_index;
syncpt_wait_thresh[i] =
task->prefences[i].syncpoint_value;
}
/* A simple command buffer: Write two words into the ccq
* register
*/
cmdbuf[0] = nvhost_opcode_setpayload(2);
cmdbuf[1] = nvhost_opcode_nonincr_w(cfg_ccq_r() >> 2);
cmdbuf[2] = (u32)(fifo_cmd >> 32);
cmdbuf[3] = (u32)(fifo_cmd & 0xffffffff);
/* Submit the command buffer and waits to channel */
err = nvhost_queue_submit_to_host1x(queue,
cmdbuf,
ARRAY_SIZE(cmdbuf),
1,
syncpt_wait_ids,
syncpt_wait_thresh,
task->num_prefences,
thresh);
return err;
}
static int pva_task_submit_mmio_ccq(struct pva_submit_task *task,
u32 *thresh)
{
struct platform_device *host1x_pdev =
to_platform_device(task->pva->pdev->dev.parent);
struct nvhost_queue *queue = task->queue;
u32 old_maxval, new_maxval;
u64 fifo_flags = PVA_FIFO_INT_ON_ERR;
u64 fifo_cmd = pva_fifo_submit(queue->id,
task->dma_addr,
fifo_flags);
int err = 0;
/* Increment syncpoint to capture threshold */
old_maxval = nvhost_syncpt_read_maxval(host1x_pdev, queue->syncpt_id);
new_maxval = nvhost_syncpt_incr_max_ext(host1x_pdev,
queue->syncpt_id,
task->fence_num);
err = pva_ccq_send(task->pva, fifo_cmd);
if (err < 0)
goto err_submit;
*thresh = new_maxval;
return 0;
err_submit:
nvhost_syncpt_set_maxval(host1x_pdev, queue->syncpt_id, old_maxval);
return err;
}
static int pva_task_submit_mailbox(struct pva_submit_task *task,
u32 *thresh)
{
struct platform_device *host1x_pdev =
to_platform_device(task->pva->pdev->dev.parent);
struct nvhost_queue *queue = task->queue;
struct pva_mailbox_status_regs status;
u32 old_maxval, new_maxval;
struct pva_cmd cmd;
u32 flags, nregs;
int err = 0;
/* Construct submit command */
flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE;
nregs = pva_cmd_submit(&cmd, queue->id,
task->dma_addr, flags);
/* Increment syncpoint to capture threshold */
old_maxval = nvhost_syncpt_read_maxval(host1x_pdev, queue->syncpt_id);
new_maxval = nvhost_syncpt_incr_max_ext(host1x_pdev,
queue->syncpt_id,
task->fence_num);
/* Submit request to PVA and wait for response */
err = pva_mailbox_send_cmd_sync(task->pva, &cmd, nregs, &status);
if (err < 0) {
nvhost_warn(&task->pva->pdev->dev,
"Failed to submit task: %d", err);
goto err_submit;
}
/* Ensure that response is valid */
if (status.error != PVA_ERR_NO_ERROR) {
nvhost_warn(&task->pva->pdev->dev, "PVA task rejected: %u",
status.error);
err = -EINVAL;
goto err_submit;
}
*thresh = new_maxval;
return 0;
err_submit:
nvhost_syncpt_set_maxval(host1x_pdev, queue->syncpt_id, old_maxval);
return err;
}
static int pva_task_submit(struct pva_submit_task *task,
u32 *task_thresh)
{
struct platform_device *host1x_pdev =
to_platform_device(task->pva->pdev->dev.parent);
struct nvhost_queue *queue = task->queue;
u32 thresh = 0;
u64 timestamp;
int err = 0;
nvhost_dbg_info("Submitting task %p (0x%llx)", task,
(u64)task->dma_addr);
/* Get a reference of the queue to avoid it being reused. It
* gets freed in the callback...
*/
nvhost_queue_get(queue);
/* Turn on the hardware */
err = nvhost_module_busy(task->pva->pdev);
if (err)
goto err_module_busy;
/*
* TSC timestamp is same as CNTVCT. Task statistics are being
* reported in TSC ticks.
*/
timestamp = arch_counter_get_cntvct();
/* Choose the submit policy based on the mode */
switch (task->pva->submit_mode) {
case PVA_SUBMIT_MODE_MAILBOX:
err = pva_task_submit_mailbox(task, &thresh);
break;
case PVA_SUBMIT_MODE_MMIO_CCQ:
err = pva_task_submit_mmio_ccq(task, &thresh);
break;
case PVA_SUBMIT_MODE_CHANNEL_CCQ:
err = pva_task_submit_channel_ccq(task, &thresh);
break;
}
if (err < 0)
goto err_submit;
/* Record task prefences */
nvhost_eventlib_log_fences(task->pva->pdev,
queue->syncpt_id,
thresh,
task->prefences,
task->num_prefences,
NVDEV_FENCE_KIND_PRE,
timestamp);
nvhost_eventlib_log_submit(task->pva->pdev,
queue->syncpt_id,
thresh,
timestamp);
task->syncpt_thresh = thresh;
nvhost_dbg_info("Postfence id=%u, value=%u",
queue->syncpt_id, thresh);
*task_thresh = thresh;
/* Going to be linked so obtain the reference */
kref_get(&task->ref);
/*
* Tasks in the queue list can be modified by the interrupt handler.
* Adding the task into the list must be the last step before
* registering the interrupt handler.
*/
mutex_lock(&queue->list_lock);
list_add_tail(&task->node, &queue->tasklist);
mutex_unlock(&queue->list_lock);
/*
* Register the interrupt handler. This must be done after adding
* the tasks into the queue since otherwise we may miss the completion
* event.
*/
WARN_ON(nvhost_intr_register_notifier(host1x_pdev,
queue->syncpt_id, thresh,
pva_queue_update, queue));
return err;
err_submit:
nvhost_module_idle(task->pva->pdev);
err_module_busy:
nvhost_queue_put(queue);
return err;
}
static int pva_queue_submit(struct nvhost_queue *queue, void *args)
{
struct pva_submit_tasks *task_header = args;
int err = 0;
int i;
for (i = 0; i < task_header->num_tasks; i++) {
struct pva_submit_task *task = task_header->tasks[i];
u32 *thresh = &task_header->task_thresh[i];
task->fence_num = 0;
/* First, dump the task that we are submitting */
pva_task_dump(task);
/* Pin job memory */
err = pva_task_pin_mem(task);
if (err < 0)
break;
/* Write the task data */
pva_task_write(task, false);
err = pva_task_submit(task, thresh);
if (err < 0)
break;
}
return err;
}
static int pva_queue_set_attribute(struct nvhost_queue *queue, void *args)
{
uint32_t flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE;
struct pva_queue_set_attribute *set_attr = args;
struct pva_queue_attribute *attr = set_attr->attr;
struct pva_mailbox_status_regs status;
struct pva_cmd cmd;
int err = 0;
u32 nregs;
nregs = pva_cmd_set_queue_attributes(&cmd, queue->id, attr->id,
attr->value,
flags);
/* Submit request to PVA and wait for response */
if (set_attr->bootup)
err = pva_mailbox_send_cmd_sync_locked(set_attr->pva,
&cmd,
nregs,
&status);
else
err = pva_mailbox_send_cmd_sync(set_attr->pva,
&cmd,
nregs,
&status);
if (err < 0) {
nvhost_warn(&set_attr->pva->pdev->dev,
"Failed to set attributes: %d\n",
err);
goto end;
}
/* Ensure that response is valid */
if (status.error != PVA_ERR_NO_ERROR) {
nvhost_warn(&set_attr->pva->pdev->dev,
"PVA Q attribute rejected: %u\n",
status.error);
err = -EINVAL;
}
end:
return err;
}
static void pva_queue_cleanup_fence(struct nvdev_fence *fence,
struct pva_parameter_ext *fence_ext)
{
struct dma_buf *dmabuf;
u8 *dmabuf_cpuva;
u32 *fence_cpuva;
if (fence->type != NVDEV_FENCE_TYPE_SEMAPHORE)
return;
dmabuf = fence_ext->dmabuf;
dmabuf_cpuva = dma_buf_vmap(dmabuf);
if (!dmabuf_cpuva)
return;
if (!(fence->semaphore_offset % 4))
return;
fence_cpuva = (void *)&dmabuf_cpuva[fence->semaphore_offset];
*fence_cpuva = fence->semaphore_value;
dma_buf_vunmap(dmabuf, dmabuf_cpuva);
}
static void pva_queue_cleanup_status(struct pva_status_handle *status_h,
struct pva_parameter_ext *status_h_ext)
{
struct dma_buf *dmabuf = status_h_ext->dmabuf;
u8 *dmabuf_cpuva = dma_buf_vmap(dmabuf);
struct nvhost_notification *status_ptr;
if (!dmabuf_cpuva)
return;
status_ptr = (void *)&dmabuf_cpuva[status_h->offset];
status_ptr->status = 0x8888;
dma_buf_vunmap(dmabuf, dmabuf_cpuva);
}
static void pva_queue_cleanup(struct nvhost_queue *queue,
struct pva_submit_task *task)
{
struct platform_device *pdev = queue->pool->pdev;
struct nvhost_master *host = nvhost_get_host(pdev);
bool expired = nvhost_syncpt_is_expired(&host->syncpt,
queue->syncpt_id,
task->syncpt_thresh);
unsigned int i;
unsigned int j;
/*
* Ensure that there won't be communication with PVA for
* checking the task status
*/
task->invalid = true;
/* Ignore expired fences */
if (expired)
return;
/* Write task status first */
for (i = 0; i < task->num_output_task_status; i++)
pva_queue_cleanup_status(task->output_task_status,
task->output_task_status_ext);
/* Finish up non-syncpoint fences */
for (i = 0; i < PVA_MAX_FENCE_TYPES; i++) {
for (j = 0; j < task->num_pvafences[i]; j++) {
pva_queue_cleanup_fence(&task->pvafences[i][j].fence,
&task->pvafences_sema_ext[i][j]);
}
}
/* Finish syncpoint increments to release waiters */
nvhost_syncpt_cpu_incr_ext(pdev, queue->syncpt_id);
}
static int pva_queue_abort(struct nvhost_queue *queue)
{
struct pva_submit_task *task;
mutex_lock(&queue->list_lock);
list_for_each_entry(task, &queue->tasklist, node)
pva_queue_cleanup(queue, task);
mutex_unlock(&queue->list_lock);
return 0;
}
struct nvhost_queue_ops pva_queue_ops = {
.abort = pva_queue_abort,
.submit = pva_queue_submit,
.get_task_size = pva_task_get_memsize,
.dump = pva_queue_dump,
.set_attribute = pva_queue_set_attribute,
};