/* * PVA Task Management * * Copyright (c) 2016-2019, NVIDIA Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #ifdef CONFIG_EVENTLIB #include #include #include #endif #include #include "nvhost_syncpt_unit_interface.h" #include "../drivers/staging/android/sync.h" #include "pva.h" #include "pva-task.h" #include "nvhost_buffer.h" #include "nvhost_queue.h" #include "pva_mailbox.h" #include "pva_ccq.h" #include "pva_queue.h" #include "dev.h" #include "hw_cfg_pva.h" #include "t194/hardware_t194.h" #include "pva-vpu-perf.h" #include #define ACTION_LIST_FENCE_SIZE 13 #define ACTION_LIST_STATUS_OPERATION_SIZE 11 #define ACTION_LIST_TERMINATION_SIZE 1 #define ACTION_LIST_STATS_SIZE 9 /* * The worst-case input action buffer size: * - Prefences trigger a word memory operation (size 13 bytes) * - Input status reads trigger a half-word memory operation (size 11 bytes) * - The action list is terminated by a null action (1 byte) */ #define INPUT_ACTION_BUFFER_SIZE ( \ ALIGN((PVA_MAX_PREFENCES + 10) * ACTION_LIST_FENCE_SIZE + \ PVA_MAX_INPUT_STATUS * ACTION_LIST_STATUS_OPERATION_SIZE + \ ACTION_LIST_TERMINATION_SIZE, 256)) /* * The worst-case output action buffer size: * - Postfences trigger a word memory operation (size 13 bytes) * - Output status write triggers a half-word memory operation (size 11 bytes) * - Output action list includes a operation for stats purpose (size 9 bytes) * - Output action list includes syncpoint and semaphore increments * - The action list is terminated by a null action (1 byte) */ #define OUTPUT_ACTION_BUFFER_SIZE ( \ ALIGN((PVA_MAX_POSTFENCES + 10) * ACTION_LIST_FENCE_SIZE + \ PVA_MAX_OUTPUT_STATUS * ACTION_LIST_STATUS_OPERATION_SIZE + \ ACTION_LIST_STATS_SIZE * 2 + \ ACTION_LIST_FENCE_SIZE * 2 + \ ACTION_LIST_TERMINATION_SIZE, 256)) struct pva_hw_task { struct pva_task task; struct pva_action_list preaction_list; struct pva_action_list postaction_list; struct pva_task_parameter_array input_parameter_array[PVA_PARAM_LAST]; struct pva_task_parameter_array output_parameter_array[PVA_PARAM_LAST]; u8 preactions[INPUT_ACTION_BUFFER_SIZE]; u8 postactions[OUTPUT_ACTION_BUFFER_SIZE]; struct pva_task_parameter_desc input_surface_desc; struct pva_task_surface input_surfaces[PVA_MAX_INPUT_SURFACES]; struct pva_task_parameter_desc output_surface_desc; struct pva_task_surface output_surfaces[PVA_MAX_OUTPUT_SURFACES]; struct pva_task_statistics statistics; struct pva_task_vpu_perf_counter vpu_perf_counters[PVA_TASK_VPU_NUM_PERF_COUNTERS]; u8 opaque_data[PVA_MAX_PRIMARY_PAYLOAD_SIZE]; }; static void pva_task_dump(struct pva_submit_task *task) { int i; nvhost_dbg_info("task=%p, " "input_scalars=(handle=%u, offset=%x), " "input_surfaces=%p, " "output_scalars=(handle=%u, offset=%u), " "output_surfaces=%p, " "primary_payload=%p (size=%u)", task, task->input_scalars.handle, task->input_scalars.offset, task->input_surfaces, task->output_scalars.handle, task->output_scalars.offset, task->output_surfaces, task->primary_payload, task->primary_payload_size); for (i = 0; i < task->num_prefences; i++) nvhost_dbg_info("prefence %d: type=%u, " "syncpoint_index=%u, syncpoint_value=%u, " "sync_fd=%u, semaphore_handle=%u, " "semaphore_offset=%u, semaphore_value=%u", i, task->prefences[i].type, task->prefences[i].syncpoint_index, task->prefences[i].syncpoint_value, task->prefences[i].sync_fd, task->prefences[i].semaphore_handle, task->prefences[i].semaphore_offset, task->prefences[i].semaphore_value); for (i = 0; i < PVA_MAX_FENCE_TYPES; i++) { int j; for (j = 0; j < task->num_pvafences[i]; j++) { nvhost_dbg_info("pvafence %d: type=%u, " "syncpoint_index=%u, syncpoint_value=%u, " "sync_fd=%u, semaphore_handle=%u, " "semaphore_offset=%u, semaphore_value=%u", i, task->pvafences[i][j].fence.type, task->pvafences[i][j].fence.syncpoint_index, task->pvafences[i][j].fence.syncpoint_value, task->pvafences[i][j].fence.sync_fd, task->pvafences[i][j].fence.semaphore_handle, task->pvafences[i][j].fence.semaphore_offset, task->pvafences[i][j].fence.semaphore_value); } } for (i = 0; i < task->num_input_surfaces; i++) nvhost_dbg_info("input surface %d: format=%llu, " "surface_handle=%u, surface_offset=%u, " "roi_handle=%u, roi_offset=%u, surface_stride=%u, " "line_stride=%u, depth=%u, width=%u, height=%u, " "layout=%u", i, task->input_surfaces[i].format, task->input_surfaces[i].surface_handle, task->input_surfaces[i].surface_offset, task->input_surfaces[i].roi_handle, task->input_surfaces[i].roi_offset, task->input_surfaces[i].surface_stride, task->input_surfaces[i].line_stride, task->input_surfaces[i].depth, task->input_surfaces[i].width, task->input_surfaces[i].height, task->input_surfaces[i].layout); for (i = 0; i < task->num_output_surfaces; i++) nvhost_dbg_info("output surface %d: format=%llu, " "surface_handle=%u, surface_offset=%u, " "roi_handle=%u, roi_offset=%u, surface_stride=%u," "line_stride=%u, depth=%u, width=%u, height=%u, " "layout=%u", i, task->output_surfaces[i].format, task->output_surfaces[i].surface_handle, task->output_surfaces[i].surface_offset, task->output_surfaces[i].roi_handle, task->output_surfaces[i].roi_offset, task->output_surfaces[i].surface_stride, task->output_surfaces[i].line_stride, task->output_surfaces[i].depth, task->output_surfaces[i].width, task->output_surfaces[i].height, task->output_surfaces[i].layout); for (i = 0; i < task->num_pointers; i++) nvhost_dbg_info("pointer %d: handle=%u, offset=%u", i, task->pointers[i].handle, task->pointers[i].offset); for (i = 0; i < task->num_input_task_status; i++) nvhost_dbg_info("input task status %d: handle=%u, offset=%u", i, task->input_task_status[i].handle, task->input_task_status[i].offset); for (i = 0; i < task->num_output_task_status; i++) nvhost_dbg_info("output task status %d: handle=%u, offset=%u", i, task->output_task_status[i].handle, task->output_task_status[i].offset); } static void pva_task_get_memsize(size_t *dma_size, size_t *kmem_size) { /* Align task addr to 64bytes boundary for DMA use*/ *dma_size = ALIGN(sizeof(struct pva_hw_task) + 64, 64); *kmem_size = sizeof(struct pva_submit_task); } static void pva_task_unpin_mem(struct pva_submit_task *task) { int i; int j; #define UNPIN_MEMORY(dst_name) \ do { \ if ((((dst_name).dmabuf) != NULL) && \ ((dst_name).dma_addr != 0)) { \ nvhost_buffer_submit_unpin(task->buffers, \ &((dst_name).dmabuf), 1); \ dma_buf_put((dst_name).dmabuf); \ } \ } while (0) for (i = 0; i < task->num_input_surfaces; i++) { UNPIN_MEMORY(task->input_surfaces_ext[i]); UNPIN_MEMORY(task->input_surface_rois_ext[i]); } for (i = 0; i < task->num_output_surfaces; i++) { UNPIN_MEMORY(task->output_surfaces_ext[i]); UNPIN_MEMORY(task->output_surface_rois_ext[i]); } for (i = 0; i < task->num_prefences; i++) { if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SEMAPHORE) && task->prefences[i].semaphore_handle) UNPIN_MEMORY(task->prefences_sema_ext[i]); } for (i = 0; i < PVA_MAX_FENCE_TYPES; i++) { for (j = 0; j < task->num_pvafences[i]; j++) { struct nvpva_fence *fence = &task->pvafences[i][j]; if ((fence->fence.type == NVDEV_FENCE_TYPE_SEMAPHORE) && fence->fence.semaphore_handle) { UNPIN_MEMORY(task->pvafences_sema_ext[i][j]); } } for (j = 0; j < task->num_pva_ts_buffers[i]; j++) { struct nvpva_fence *fence = &task->pvafences[i][j]; if (fence->ts_buf_ptr.handle) { UNPIN_MEMORY(task->pva_ts_buffers_ext[i][j]); } } } for (i = 0; i < task->num_input_task_status; i++) { if (task->input_task_status[i].handle) { UNPIN_MEMORY(task->input_task_status_ext[i]); } } for (i = 0; i < task->num_output_task_status; i++) { if (task->output_task_status[i].handle) { UNPIN_MEMORY(task->output_task_status_ext[i]); } } for (i = 0; i < task->num_pointers; i++) { if (task->pointers[i].handle) { UNPIN_MEMORY(task->pointers_ext[i]); } } UNPIN_MEMORY(task->input_scalars_ext); UNPIN_MEMORY(task->output_scalars_ext); #undef UNPIN_MEMORY } static int pva_task_pin_mem(struct pva_submit_task *task) { u32 cvsram_base = nvcvnas_get_cvsram_base(); u32 cvsram_sz = nvcvnas_get_cvsram_size(); int err; int i; int j; #define PIN_MEMORY(dst_name, dmabuf_fd) \ do { \ if (!(dmabuf_fd)) { \ err = -EFAULT; \ goto err_map_handle; \ } \ \ ((dst_name).dmabuf) = dma_buf_get(dmabuf_fd); \ if (IS_ERR_OR_NULL((dst_name).dmabuf)) { \ (dst_name).dmabuf = NULL; \ err = -EFAULT; \ goto err_map_handle; \ } \ \ err = nvhost_buffer_submit_pin(task->buffers, \ &(dst_name).dmabuf, 1, \ &(dst_name).dma_addr, \ &(dst_name).size, \ &(dst_name).heap); \ if (err < 0) \ goto err_map_handle; \ } while (0) /* Pin input surfaces */ for (i = 0; i < task->num_input_surfaces; i++) { /* HACK: nvmap doesn't support CVNAS yet */ if (task->input_surfaces[i].surface_handle == 0) { u32 offset = task->input_surfaces[i].surface_offset; if (offset > cvsram_sz) { err = -EINVAL; goto err_map_handle; } task->input_surfaces_ext[i].dma_addr = cvsram_base; task->input_surfaces_ext[i].size = cvsram_sz - offset; task->input_surfaces_ext[i].heap = NVHOST_BUFFERS_HEAP_CVNAS; } else { PIN_MEMORY(task->input_surfaces_ext[i], task->input_surfaces[i].surface_handle); } if (task->input_surfaces[i].roi_handle) PIN_MEMORY(task->input_surface_rois_ext[i], task->input_surfaces[i].roi_handle); } /* ...and then output surfaces */ for (i = 0; i < task->num_output_surfaces; i++) { if (task->output_surfaces[i].surface_handle == 0) { /* HACK: To support the MISR test * Kernel is not suppose to convert the address being * passed from the UMD. So setting dma_addr as the * offset passed from KMD and size to 4MB */ u32 offset = task->output_surfaces[i].surface_offset; /* Only root is allowed to use offsets */ if (current_uid().val != 0) { err = -EINVAL; goto err_map_handle; } task->output_surfaces_ext[i].dma_addr = offset; task->output_surfaces_ext[i].size = 0x400000; } else { PIN_MEMORY(task->output_surfaces_ext[i], task->output_surfaces[i].surface_handle); } if (task->output_surfaces[i].roi_handle) PIN_MEMORY(task->output_surface_rois_ext[i], task->output_surfaces[i].roi_handle); } /* check fence semaphore_type before memory pin */ for (i = 0; i < task->num_prefences; i++) { if ((task->prefences[i].type == NVDEV_FENCE_TYPE_SEMAPHORE) && task->prefences[i].semaphore_handle) { PIN_MEMORY(task->prefences_sema_ext[i], task->prefences[i].semaphore_handle); } } /* check the generalized fence structures */ for (i = 0; i < PVA_MAX_FENCE_TYPES; i++) { for (j = 0; j < task->num_pvafences[i]; j++) { struct nvpva_fence *fence = &task->pvafences[i][j]; if ((fence->fence.type == NVDEV_FENCE_TYPE_SEMAPHORE) && fence->fence.semaphore_handle) { PIN_MEMORY(task->pvafences_sema_ext[i][j], fence->fence.semaphore_handle); } } for (j = 0; j < task->num_pva_ts_buffers[i]; j++) { struct nvpva_fence *fence = &task->pvafences[i][j]; if (fence->ts_buf_ptr.handle) { PIN_MEMORY(task->pva_ts_buffers_ext[i][j], fence->ts_buf_ptr.handle); } } } /* Pin the input and output action status */ for (i = 0; i < task->num_input_task_status; i++) { if (task->input_task_status[i].handle) { PIN_MEMORY(task->input_task_status_ext[i], task->input_task_status[i].handle); } } for (i = 0; i < task->num_output_task_status; i++) { if (task->output_task_status[i].handle) { PIN_MEMORY(task->output_task_status_ext[i], task->output_task_status[i].handle); } } /* Pin task pointers */ for (i = 0; i < task->num_pointers; i++) { if (task->pointers[i].handle) { PIN_MEMORY(task->pointers_ext[i], task->pointers[i].handle); } } /* Pin rest */ if (task->input_scalars.handle) PIN_MEMORY(task->input_scalars_ext, task->input_scalars.handle); if (task->output_scalars.handle) PIN_MEMORY(task->output_scalars_ext, task->output_scalars.handle); #undef PIN_MEMORY return 0; err_map_handle: pva_task_unpin_mem(task); return err; } static void pva_task_write_surfaces(struct pva_task_surface *hw_surface, struct pva_surface *surface, struct pva_parameter_ext *surface_ext, struct pva_parameter_ext *roi_ext, unsigned int count) { int i; for (i = 0; i < count; i++) { hw_surface[i].address = surface_ext[i].dma_addr + surface[i].surface_offset; hw_surface[i].surface_size = surface_ext[i].size; hw_surface[i].roi_addr = roi_ext[i].dma_addr + surface[i].roi_offset; hw_surface[i].roi_size = roi_ext[i].size; hw_surface[i].format = surface[i].format; hw_surface[i].width = surface[i].width; hw_surface[i].height = surface[i].height; hw_surface[i].line_stride = surface[i].line_stride; hw_surface[i].plane_stride = surface[i].surface_stride; hw_surface[i].num_planes = surface[i].depth; hw_surface[i].layout = surface[i].layout; hw_surface[i].block_height_log2 = surface[i].block_height_log2; /* Set bit 39 for block linear surfaces in the address field. * This bit is used for indicating that memory subsystem should * convert the block linear format into common block linear format * that is used by other engines in Tegra. Thebit in itself is * dropped before making the address translation in SMMU. */ if (surface[i].layout == PVA_TASK_SURFACE_LAYOUT_BLOCK_LINEAR) hw_surface[i].address |= PVA_BIT64(39); hw_surface[i].memory = surface_ext[i].heap; } } static inline int pva_task_write_atomic_op(u8 *base, u8 action) { *base = action; return 1; } static inline int pva_task_write_struct_ptr_op(u8 *base, u8 action, u64 addr, u16 val) { int i = 0; base[i++] = action; base[i++] = (u8)((addr >> 0) & 0xff); base[i++] = (u8)((addr >> 8) & 0xff); base[i++] = (u8)((addr >> 16) & 0xff); base[i++] = (u8)((addr >> 24) & 0xff); base[i++] = (u8)((addr >> 32) & 0xff); base[i++] = (u8)((addr >> 40) & 0xff); base[i++] = (u8)((addr >> 48) & 0xff); base[i++] = (u8)((addr >> 56) & 0xff); return i; } static inline int pva_task_write_ptr_16b_op(u8 *base, u8 action, u64 addr, u16 val) { int i = 0; base[i++] = action; base[i++] = (u8)((addr >> 0) & 0xff); base[i++] = (u8)((addr >> 8) & 0xff); base[i++] = (u8)((addr >> 16) & 0xff); base[i++] = (u8)((addr >> 24) & 0xff); base[i++] = (u8)((addr >> 32) & 0xff); base[i++] = (u8)((addr >> 40) & 0xff); base[i++] = (u8)((addr >> 48) & 0xff); base[i++] = (u8)((addr >> 56) & 0xff); base[i++] = (u8)((val >> 0) & 0xff); base[i++] = (u8)((val >> 8) & 0xff); return i; } static inline int pva_task_write_ptr_op(u8 *base, u8 action, u64 addr, u32 val) { int i = 0; base[i++] = action; base[i++] = (u8)((addr >> 0) & 0xff); base[i++] = (u8)((addr >> 8) & 0xff); base[i++] = (u8)((addr >> 16) & 0xff); base[i++] = (u8)((addr >> 24) & 0xff); base[i++] = (u8)((addr >> 32) & 0xff); base[i++] = (u8)((addr >> 40) & 0xff); base[i++] = (u8)((addr >> 48) & 0xff); base[i++] = (u8)((addr >> 56) & 0xff); base[i++] = (u8)((val >> 0) & 0xff); base[i++] = (u8)((val >> 8) & 0xff); base[i++] = (u8)((val >> 16) & 0xff); base[i++] = (u8)((val >> 24) & 0xff); return i; } static int pva_task_write_preactions(struct pva_submit_task *task, struct pva_hw_task *hw_task) { struct platform_device *host1x_pdev = to_platform_device(task->pva->pdev->dev.parent); u8 *hw_preactions = hw_task->preactions; int i = 0, j = 0, ptr = 0; u8 action_ts; u8 action_f; u32 increment; /* Add waits to preactions list */ for (i = 0; i < task->num_prefences; i++) { struct nvdev_fence *fence = task->prefences + i; switch (fence->type) { case NVDEV_FENCE_TYPE_SYNCPT: { dma_addr_t syncpt_addr = nvhost_syncpt_gos_address( task->pva->pdev, fence->syncpoint_index); if (!syncpt_addr) syncpt_addr = nvhost_syncpt_address( task->queue->vm_pdev, fence->syncpoint_index); ptr += pva_task_write_ptr_op(&hw_preactions[ptr], TASK_ACT_PTR_BLK_GTREQL, syncpt_addr, fence->syncpoint_value); break; } case NVDEV_FENCE_TYPE_SEMAPHORE: case NVDEV_FENCE_TYPE_SEMAPHORE_TS:{ ptr += pva_task_write_ptr_op(&hw_preactions[ptr], TASK_ACT_PTR_BLK_GTREQL, task->prefences_sema_ext[i].dma_addr + fence->semaphore_offset, fence->semaphore_value); break; } case NVDEV_FENCE_TYPE_SYNC_FD: { int thresh, id; dma_addr_t syncpt_addr; struct sync_fence *syncfd_fence; struct sync_pt *pt; struct nvhost_master *host = nvhost_get_host( task->pva->pdev); struct nvhost_syncpt *sp = &host->syncpt; if (!fence->sync_fd) break; syncfd_fence = nvhost_sync_fdget(fence->sync_fd); if (!syncfd_fence) break; for (j = 0; j < syncfd_fence->num_fences; j++) { pt = sync_pt_from_fence( syncfd_fence->cbs[j].sync_pt); if (!pt) break; id = nvhost_sync_pt_id(pt); thresh = nvhost_sync_pt_thresh(pt); /* validate the synpt ids */ if (!id || !nvhost_syncpt_is_valid_hw_pt(sp, id)) { sync_fence_put(syncfd_fence); break; } if (nvhost_syncpt_is_expired(sp, id, thresh)) continue; syncpt_addr = nvhost_syncpt_gos_address( task->pva->pdev, id); if (!syncpt_addr) syncpt_addr = nvhost_syncpt_address( task->queue->vm_pdev, id); ptr += pva_task_write_ptr_op( &hw_preactions[ptr], TASK_ACT_PTR_BLK_GTREQL, syncpt_addr, thresh); } break; } default: return -ENOSYS; } } for (i = 0; i < PVA_MAX_FENCE_TYPES; i++) { increment = 0; switch (i) { case PVA_FENCE_SOT_V: action_ts = TASK_ACT_PTR_WRITE_SOT_V_TS; action_f = TASK_ACT_PTR_WRITE_VAL_SOT_V; increment = 1; break; case PVA_FENCE_SOT_R: action_ts = TASK_ACT_PTR_WRITE_SOT_R_TS; action_f = TASK_ACT_PTR_WRITE_VAL_SOT_R; increment = 1; break; default: action_ts = 0; action_f = 0; break; }; if ((action_ts == 0) || (task->num_pvafences[i] == 0)) continue; for (j = 0; j < task->num_pva_ts_buffers[i]; j++) { if (task->pvafences[i][j].ts_buf_ptr.handle) { int dif; dif = pva_task_write_ptr_op( &hw_preactions[ptr], action_ts, task->pva_ts_buffers_ext[i][j].dma_addr + task->pvafences[i][j].ts_buf_ptr.offset, 1U); ptr += dif; } } for (j = 0; j < task->num_pvafences[i]; j++) { struct nvdev_fence *fence = &task->pvafences[i][j].fence; u32 thresh; switch (fence->type) { case NVDEV_FENCE_TYPE_SYNCPT: { dma_addr_t syncpt_gos_addr = nvhost_syncpt_gos_address( task->pva->pdev, fence->syncpoint_index); dma_addr_t syncpt_addr = nvhost_syncpt_address( task->queue->vm_pdev, task->queue->syncpt_id); ptr += pva_task_write_ptr_op( &hw_preactions[ptr], action_f, syncpt_addr, 1U); task->fence_num += increment; /* Make a syncpoint increment */ if (syncpt_gos_addr) { thresh = nvhost_syncpt_read_maxval( host1x_pdev, task->queue->syncpt_id) + task->fence_num; ptr += pva_task_write_ptr_op( &hw_preactions[ptr], TASK_ACT_PTR_WRITE_VAL, syncpt_gos_addr, thresh); } break; } case NVDEV_FENCE_TYPE_SEMAPHORE: case NVDEV_FENCE_TYPE_SEMAPHORE_TS: { int dif; dif = pva_task_write_ptr_op(&hw_preactions[ptr], action_f, task->pvafences_sema_ext[i][j].dma_addr + fence->semaphore_offset, fence->semaphore_value); ptr += dif; break; } case NVDEV_FENCE_TYPE_SYNC_FD: /* TODO XXX*/ default: return -ENOSYS; } } } /* Perform input status checks */ for (i = 0; i < task->num_input_task_status; i++) { struct pva_status_handle *input_status = task->input_task_status + i; dma_addr_t input_status_addr = task->input_task_status_ext[i].dma_addr + input_status->offset; ptr += pva_task_write_ptr_16b_op( &hw_preactions[ptr], TASK_ACT_READ_STATUS, input_status_addr, 0); } ptr += pva_task_write_atomic_op(&hw_preactions[ptr], TASK_ACT_TERMINATE); /* Store the preaction list */ hw_task->preaction_list.offset = offsetof(struct pva_hw_task, preactions); hw_task->preaction_list.length = ptr; nvhost_dbg_info("preaction buffer alloted size %d: used size %d", INPUT_ACTION_BUFFER_SIZE, ptr); return 0; } static void pva_task_write_postactions(struct pva_submit_task *task, struct pva_hw_task *hw_task) { dma_addr_t syncpt_addr = nvhost_syncpt_address(task->queue->vm_pdev, task->queue->syncpt_id); dma_addr_t syncpt_gos_addr = nvhost_syncpt_gos_address(task->pva->pdev, task->queue->syncpt_id); u8 *hw_postactions = hw_task->postactions; int ptr = 0, i = 0; struct platform_device *host1x_pdev = to_platform_device(task->pva->pdev->dev.parent); dma_addr_t output_status_addr; u32 thresh; u8 action_ts; int j; /* Write Output action status */ for (i = 0; i < task->num_output_task_status; i++) { struct pva_status_handle *output_status = task->output_task_status + i; output_status_addr = task->output_task_status_ext[i].dma_addr + output_status->offset; ptr += pva_task_write_ptr_16b_op( &hw_postactions[ptr], TASK_ACT_WRITE_STATUS, output_status_addr, 1); } for (i = 0; i < PVA_MAX_FENCE_TYPES; i++) { switch (i) { case PVA_FENCE_EOT_V: action_ts = TASK_ACT_PTR_WRITE_EOT_V_TS; break; case PVA_FENCE_EOT_R: action_ts = TASK_ACT_PTR_WRITE_EOT_R_TS; break; case PVA_FENCE_POST: action_ts = TASK_ACT_PTR_WRITE_TS; break; default: action_ts = 0; break; }; if (action_ts == 0) continue; for (j = 0; j < task->num_pva_ts_buffers[i]; j++) { if (task->pvafences[i][j].ts_buf_ptr.handle) { int dif; dif = pva_task_write_ptr_op( &hw_postactions[ptr], action_ts, task->pva_ts_buffers_ext[i][j].dma_addr + task->pvafences[i][j].ts_buf_ptr.offset, 1U); ptr += dif; } } } /* Add postactions list for semaphore */ j = PVA_FENCE_POST; for (i = 0; i < task->num_pvafences[j]; i++) { struct nvdev_fence *fence = &task->pvafences[j][i].fence; if (fence->type == NVDEV_FENCE_TYPE_SEMAPHORE) { ptr += pva_task_write_ptr_op(&hw_postactions[ptr], TASK_ACT_PTR_WRITE_VAL, task->pvafences_sema_ext[j][i].dma_addr + fence->semaphore_offset, fence->semaphore_value); } else if (fence->type == NVDEV_FENCE_TYPE_SEMAPHORE_TS) { /* * Timestamp will be filled by ucode hence making the * place holder for timestamp size, sizeof(u64). */ ptr += sizeof(u64) + pva_task_write_ptr_op( &hw_postactions[ptr], TASK_ACT_PTR_WRITE_VAL_TS, task->pvafences_sema_ext[j][i].dma_addr + fence->semaphore_offset, fence->semaphore_value); } } task->fence_num += 1; /* Make a syncpoint increment */ if (syncpt_gos_addr) { thresh = nvhost_syncpt_read_maxval( host1x_pdev, task->queue->syncpt_id) + task->fence_num; ptr += pva_task_write_ptr_op( &hw_postactions[ptr], TASK_ACT_PTR_WRITE_VAL, syncpt_gos_addr, thresh); } ptr += pva_task_write_ptr_op(&hw_postactions[ptr], TASK_ACT_PTR_WRITE_VAL, syncpt_addr, 1); output_status_addr = task->dma_addr + offsetof(struct pva_hw_task, statistics); ptr += pva_task_write_struct_ptr_op( &hw_postactions[ptr], TASK_ACT_PVA_STATISTICS, output_status_addr, 1); if (task->pva->vpu_perf_counters_enable) { ptr += pva_task_write_struct_ptr_op( &hw_postactions[ptr], TASK_ACT_PVA_VPU_PERF_COUNTERS, task->dma_addr + offsetof(struct pva_hw_task, vpu_perf_counters), 1); } ptr += pva_task_write_atomic_op(&hw_postactions[ptr], TASK_ACT_TERMINATE); /* Store the postaction list */ hw_task->postaction_list.offset = offsetof(struct pva_hw_task, postactions); hw_task->postaction_list.length = ptr; nvhost_dbg_info("postaction buffer alloted size %d: used size %d", OUTPUT_ACTION_BUFFER_SIZE, ptr); } static void pva_task_write_output_surfaces(struct pva_submit_task *task, struct pva_hw_task *hw_task) { struct pva_task_parameter_array *surface_parameter; if (task->num_output_surfaces == 0) return; surface_parameter = hw_task->output_parameter_array + hw_task->task.num_output_parameters; /* Write parameter descriptor */ surface_parameter->address = task->dma_addr + offsetof(struct pva_hw_task, output_surface_desc); surface_parameter->type = PVA_PARAM_SURFACE_LIST; surface_parameter->size = sizeof(struct pva_task_parameter_desc) + sizeof(struct pva_task_surface) * task->num_output_surfaces; hw_task->task.num_output_parameters++; /* Write the surface descriptor base information */ hw_task->output_surface_desc.num_parameters = task->num_output_surfaces; hw_task->output_surface_desc.reserved = 0; /* Write the output surfaces */ pva_task_write_surfaces(hw_task->output_surfaces, task->output_surfaces, task->output_surfaces_ext, task->output_surface_rois_ext, task->num_output_surfaces); } static void pva_task_write_input_surfaces(struct pva_submit_task *task, struct pva_hw_task *hw_task) { struct pva_task_parameter_array *surface_parameter; if (task->num_input_surfaces == 0) return; surface_parameter = hw_task->input_parameter_array + hw_task->task.num_input_parameters; /* Write parameter descriptor */ surface_parameter->address = task->dma_addr + offsetof(struct pva_hw_task, input_surface_desc); surface_parameter->type = PVA_PARAM_SURFACE_LIST; surface_parameter->size = sizeof(struct pva_task_parameter_desc) + sizeof(struct pva_task_surface) * task->num_input_surfaces; hw_task->task.num_input_parameters++; /* Write the surface descriptor base information */ hw_task->input_surface_desc.num_parameters = task->num_input_surfaces; hw_task->input_surface_desc.reserved = 0; /* Write the input surfaces */ pva_task_write_surfaces(hw_task->input_surfaces, task->input_surfaces, task->input_surfaces_ext, task->input_surface_rois_ext, task->num_input_surfaces); } static void pva_task_write_non_surfaces(struct pva_submit_task *task, struct pva_hw_task *hw_task) { struct pva_task_parameter_array *hw_input_parameters = hw_task->input_parameter_array; struct pva_task_parameter_array *hw_output_parameters = hw_task->output_parameter_array; #define COPY_PARAMETER(target, name, name_ext, param_type, count) \ do { \ if ((name).handle) { \ target[(count)].address = (name_ext).dma_addr + \ (name).offset; \ target[(count)].size = (name_ext).size - \ (name).offset; \ target[(count)].type = (param_type); \ (count)++; \ } \ } while (0) COPY_PARAMETER(hw_input_parameters, task->input_scalars, task->input_scalars_ext, PVA_PARAM_SCALAR_LIST, hw_task->task.num_input_parameters); COPY_PARAMETER(hw_output_parameters, task->output_scalars, task->output_scalars_ext, PVA_PARAM_SCALAR_LIST, hw_task->task.num_output_parameters); #undef COPY_PARAMETER } static int pva_task_write_opaque_data(struct pva_submit_task *task, struct pva_hw_task *hw_task) { struct pva_task_parameter_array *opaque_parameter; struct pva_task_opaque_data_desc *opaque_desc; struct pva_parameter_ext *handle_ext; unsigned int primary_payload_offset; struct pva_memory_handle *handle; unsigned int pointer_list_offset; struct pva_task_pointer pointer; u8 *primary_payload, *pointers; unsigned int num_bytes; u64 aux, size, flags; unsigned int i; if (task->num_pointers == 0 && task->primary_payload_size == 0) return 0; /* Calculate size of the opaque data */ num_bytes = sizeof(struct pva_task_opaque_data_desc); num_bytes += task->primary_payload_size; num_bytes += sizeof(struct pva_task_pointer) * task->num_pointers; if (num_bytes > PVA_MAX_PRIMARY_PAYLOAD_SIZE) return -ENOMEM; /* Opaque parameter resides always in the input parameter block */ opaque_parameter = hw_task->input_parameter_array + hw_task->task.num_input_parameters; /* Write parameter descriptor */ opaque_parameter->address = task->dma_addr + offsetof(struct pva_hw_task, opaque_data); opaque_parameter->type = PVA_PARAM_OPAQUE_DATA; opaque_parameter->size = num_bytes; hw_task->task.num_input_parameters++; /* Determine offset to the primary_payload start */ primary_payload_offset = sizeof(struct pva_task_opaque_data_desc); primary_payload = hw_task->opaque_data + primary_payload_offset; /* Determine offset to the start of the pointer list */ pointer_list_offset = primary_payload_offset + task->primary_payload_size; pointers = hw_task->opaque_data + pointer_list_offset; /* Initialize the opaque data descriptor */ opaque_desc = (void *)hw_task->opaque_data; opaque_desc->primary_payload_size = task->primary_payload_size; /* Copy the primary_payload */ memcpy(primary_payload, task->primary_payload, task->primary_payload_size); /* Copy the pointers */ for (i = 0; i < task->num_pointers; i++) { handle = task->pointers + i; handle_ext = task->pointers_ext + i; size = handle_ext->size & PVA_TASK_POINTER_AUX_SIZE_MASK; if (size != handle_ext->size) { return -EINVAL; } flags = 0; if (handle_ext->heap == NVHOST_BUFFERS_HEAP_CVNAS) flags |= PVA_TASK_POINTER_AUX_FLAGS_CVNAS; aux = (size << PVA_TASK_POINTER_AUX_SIZE_SHIFT) | (flags << PVA_TASK_POINTER_AUX_FLAGS_SHIFT); pointer.address = handle_ext->dma_addr + handle->offset; pointer.aux = aux; /* The data might be unaligned. copy it byte-by-byte */ memcpy(pointers, &pointer, sizeof(pointer)); pointers += sizeof(pointer); } return 0; } static int pva_task_write(struct pva_submit_task *task, bool atomic) { struct pva_hw_task *hw_task; int err; int i; /* Task start from the memory base */ hw_task = task->va; /* Write the preaction list */ err = pva_task_write_preactions(task, hw_task); if (err < 0) return err; /* Write the postaction list */ pva_task_write_postactions(task, hw_task); /* Initialize parameters */ pva_task_write_non_surfaces(task, hw_task); /* Write the pointers and the primary payload */ err = pva_task_write_opaque_data(task, hw_task); if (err < 0) return err; /* Write input surfaces */ pva_task_write_input_surfaces(task, hw_task); /* Write output surfaces */ pva_task_write_output_surfaces(task, hw_task); hw_task->task.input_parameters = offsetof(struct pva_hw_task, input_parameter_array); hw_task->task.output_parameters = offsetof(struct pva_hw_task, output_parameter_array); hw_task->task.gen_task.versionid = TASK_VERSION_ID; hw_task->task.gen_task.engineid = PVA_ENGINE_ID; hw_task->task.gen_task.sequence = 0; hw_task->task.gen_task.length = offsetof(struct pva_hw_task, input_surface_desc); hw_task->task.gen_task.n_preaction_lists = 1; hw_task->task.gen_task.preaction_lists_p = offsetof(struct pva_hw_task, preaction_list); hw_task->task.gen_task.n_postaction_lists = 1; hw_task->task.gen_task.postaction_lists_p = offsetof(struct pva_hw_task, postaction_list); hw_task->task.runlist_version = PVA_TASK_VERSION_ID; hw_task->task.queue_id = task->queue->id; hw_task->task.flags = atomic ? PVA_TASK_FL_ATOMIC : 0; hw_task->task.operation = task->operation; hw_task->task.timeout = task->timeout; /* Set flags to debug the vpu application if debugfs node is set for vpu id */ if (task->pva->dbg_vpu_app_id == task->operation) hw_task->task.flags |= PVA_TASK_FL_VPU_DEBUG; /* This should be delivered from userspace - hard-code * until the mechanism is in place. */ hw_task->task.operation_version = 1; for (i = 0; i < roundup(sizeof(struct pva_hw_task), 16) / 16; i++) { u8 *task_va = task->va; u32 base = i * 16; nvhost_dbg_info("%02x, %02x, %02x, %02x, %02x, %02x, %02x %02x, " "%02x, %02x, %02x, %02x, %02x, %02x, %02x %02x", task_va[base], task_va[base + 1], task_va[base + 2], task_va[base + 3], task_va[base + 4], task_va[base + 5], task_va[base + 6], task_va[base + 7], task_va[base + 8], task_va[base + 9], task_va[base + 10], task_va[base + 11], task_va[base + 12], task_va[base + 13], task_va[base + 14], task_va[base + 15]); } return 0; } #ifdef CONFIG_EVENTLIB static void pva_eventlib_record_perf_counter(struct platform_device *pdev, u32 syncpt_id, u32 syncpt_thresh, u32 operation, u32 tag, u32 count, u32 sum, u64 sum_squared, u32 min, u32 max, u64 timestamp_begin, u64 timestamp_end) { struct nvhost_device_data *pdata = platform_get_drvdata(pdev); struct nvhost_vpu_perf_counter perf_counter; if (!pdata->eventlib_id) return; perf_counter.class_id = pdata->class; perf_counter.syncpt_id = syncpt_id; perf_counter.syncpt_thresh = syncpt_thresh; perf_counter.operation = operation; perf_counter.tag = tag; perf_counter.count = count; perf_counter.average = sum / count; perf_counter.variance = ((u64)count * sum_squared - (u64)sum * (u64)sum) / (u64)count / (u64)count; perf_counter.minimum = min; perf_counter.maximum = max; keventlib_write(pdata->eventlib_id, &perf_counter, sizeof(perf_counter), NVHOST_VPU_PERF_COUNTER_BEGIN, timestamp_begin); keventlib_write(pdata->eventlib_id, &perf_counter, sizeof(perf_counter), NVHOST_VPU_PERF_COUNTER_END, timestamp_end); } static void pva_eventlib_record_r5_states(struct platform_device *pdev, u32 syncpt_id, u32 syncpt_thresh, struct pva_task_statistics *stats, u32 operation) { struct nvhost_device_data *pdata = platform_get_drvdata(pdev); struct nvhost_pva_task_state state; if (!pdata->eventlib_id) return; state.class_id = pdata->class; state.syncpt_id = syncpt_id; state.syncpt_thresh = syncpt_thresh; state.operation = operation; keventlib_write(pdata->eventlib_id, &state, sizeof(state), NVHOST_PVA_QUEUE_BEGIN, stats->queued_time); keventlib_write(pdata->eventlib_id, &state, sizeof(state), NVHOST_PVA_QUEUE_END, stats->vpu_assigned_time); keventlib_write(pdata->eventlib_id, &state, sizeof(state), NVHOST_PVA_PREPARE_BEGIN, stats->vpu_assigned_time); keventlib_write(pdata->eventlib_id, &state, sizeof(state), NVHOST_PVA_PREPARE_END, stats->vpu_start_time); keventlib_write(pdata->eventlib_id, &state, sizeof(state), stats->vpu_assigned == 0 ? NVHOST_PVA_VPU0_BEGIN : NVHOST_PVA_VPU1_BEGIN, stats->vpu_start_time); keventlib_write(pdata->eventlib_id, &state, sizeof(state), stats->vpu_assigned == 0 ? NVHOST_PVA_VPU0_END : NVHOST_PVA_VPU1_END, stats->vpu_complete_time); keventlib_write(pdata->eventlib_id, &state, sizeof(state), NVHOST_PVA_POST_BEGIN, stats->vpu_complete_time); keventlib_write(pdata->eventlib_id, &state, sizeof(state), NVHOST_PVA_POST_END, stats->complete_time); } #else static void pva_eventlib_record_perf_counter(struct platform_device *pdev, u32 syncpt_id, u32 syncpt_thresh, u32 operation, u32 tag, u32 count, u32 sum, u64 sum_squared, u32 min, u32 max, u64 timestamp_begin, u64 timestamp_end) { } static void pva_eventlib_record_r5_states(struct platform_device *pdev, struct pva_task_statistics *stats, u32 operation) { } #endif static void pva_task_update(struct pva_submit_task *task) { struct nvhost_queue *queue = task->queue; struct pva_hw_task *hw_task = task->va; struct pva *pva = task->pva; struct platform_device *pdev = pva->pdev; struct nvhost_device_data *pdata = platform_get_drvdata(pdev); struct pva_task_statistics *stats = &hw_task->statistics; struct pva_task_vpu_perf_counter *perf; u32 idx; trace_nvhost_task_timestamp(dev_name(&pdev->dev), pdata->class, queue->syncpt_id, task->syncpt_thresh, stats->vpu_assigned_time, stats->complete_time); nvhost_eventlib_log_task(pdev, queue->syncpt_id, task->syncpt_thresh, stats->vpu_assigned_time, stats->complete_time); nvhost_dbg_info("Completed task %p (0x%llx), start_time=%llu, end_time=%llu", task, (u64)task->dma_addr, stats->vpu_assigned_time, stats->complete_time); trace_nvhost_pva_task_stats(pdev->name, stats->queued_time, stats->head_time, stats->input_actions_complete, stats->vpu_assigned_time, stats->vpu_start_time, stats->vpu_complete_time, stats->complete_time, stats->vpu_assigned); nvhost_dbg_info("QueuedTime %llu, HeadTime 0x%llu, " "InputActionComplete %llu, VpuAssignedTime %llu, " "VpuStartTime %llu, VpuCompleteTime %llu, " "TaskCompeteTime %llu, AssignedVpu %d", stats->queued_time, stats->head_time, stats->input_actions_complete, stats->vpu_assigned_time, stats->vpu_start_time, stats->vpu_complete_time, stats->complete_time, stats->vpu_assigned); pva_eventlib_record_r5_states(pdev, queue->syncpt_id, task->syncpt_thresh, stats, task->operation); /* Record task postfences */ nvhost_eventlib_log_fences(pdev, queue->syncpt_id, task->syncpt_thresh, &(task->pvafences[PVA_FENCE_POST][0].fence), 1, NVDEV_FENCE_KIND_POST, stats->complete_time); if (task->pva->vpu_perf_counters_enable) { for (idx = 0; idx < PVA_TASK_VPU_NUM_PERF_COUNTERS; idx++) { perf = &hw_task->vpu_perf_counters[idx]; if (perf->count != 0) { trace_nvhost_pva_task_vpu_perf( pdev->name, idx, perf->count, perf->sum, perf->sum_squared, perf->min, perf->max); pva_eventlib_record_perf_counter( pdev, queue->syncpt_id, task->syncpt_thresh, task->operation, idx, perf->count, perf->sum, perf->sum_squared, perf->min, perf->max, stats->vpu_assigned_time, stats->complete_time); } } } /* Unpin job memory. PVA shouldn't be using it anymore */ pva_task_unpin_mem(task); /* Drop PM runtime reference of PVA */ nvhost_module_idle(task->pva->pdev); /* remove the task from the queue */ list_del(&task->node); /* Not linked anymore so drop the reference */ kref_put(&task->ref, pva_task_free); /* Drop queue reference to allow reusing it */ nvhost_queue_put(queue); } void pva_task_free(struct kref *ref) { struct pva_submit_task *task = container_of(ref, struct pva_submit_task, ref); /* Release memory that was allocated for the task */ nvhost_queue_free_task_memory(task->queue, task->pool_index); } static void pva_queue_update(void *priv, int nr_completed) { struct nvhost_queue *queue = priv; struct pva_submit_task *task, *n; struct list_head completed; INIT_LIST_HEAD(&completed); /* Move completed tasks to a separate list */ mutex_lock(&queue->list_lock); list_for_each_entry_safe(task, n, &queue->tasklist, node) { if (!nvhost_syncpt_is_expired_ext(queue->pool->pdev, queue->syncpt_id, task->syncpt_thresh)) break; list_move_tail(&task->node, &completed); } mutex_unlock(&queue->list_lock); /* Handle completed tasks */ list_for_each_entry_safe(task, n, &completed, node) pva_task_update(task); } static void pva_queue_dump(struct nvhost_queue *queue, struct seq_file *s) { struct pva_submit_task *task; int i = 0; int k = PVA_FENCE_POST; seq_printf(s, "Queue %u, Tasks\n", queue->id); mutex_lock(&queue->list_lock); list_for_each_entry(task, &queue->tasklist, node) { int j; seq_printf(s, " #%u: Operation = %u\n", i++, task->operation); for (j = 0; j < task->num_prefences; j++) seq_printf(s, " prefence %d: \n\t" "syncpoint_index=%u, syncpoint_value=%u\n", j, task->prefences[j].syncpoint_index, task->prefences[j].syncpoint_value); for (j = 0; j < task->num_pvafences[k]; j++) seq_printf(s, " postfence %d: \n\t" "syncpoint_index=%u, syncpoint_value=%u\n", j, task->pvafences[k][j].fence.syncpoint_index, task->pvafences[k][j].fence.syncpoint_value); } mutex_unlock(&queue->list_lock); } static int pva_task_submit_channel_ccq(struct pva_submit_task *task, u32 *thresh) { struct nvhost_queue *queue = task->queue; u64 fifo_flags = PVA_FIFO_INT_ON_ERR; u64 fifo_cmd = pva_fifo_submit(queue->id, task->dma_addr, fifo_flags); u32 syncpt_wait_ids[PVA_MAX_PREFENCES]; u32 syncpt_wait_thresh[PVA_MAX_PREFENCES]; unsigned int i; u32 cmdbuf[4]; int err = 0; /* Pick up fences... */ for (i = 0; i < task->num_prefences; i++) { /* ..and ensure that we have only syncpoints present */ if (task->prefences[i].type != NVDEV_FENCE_TYPE_SYNCPT) return -EINVAL; /* Put fences into a separate array */ syncpt_wait_ids[i] = task->prefences[i].syncpoint_index; syncpt_wait_thresh[i] = task->prefences[i].syncpoint_value; } /* A simple command buffer: Write two words into the ccq * register */ cmdbuf[0] = nvhost_opcode_setpayload(2); cmdbuf[1] = nvhost_opcode_nonincr_w(cfg_ccq_r() >> 2); cmdbuf[2] = (u32)(fifo_cmd >> 32); cmdbuf[3] = (u32)(fifo_cmd & 0xffffffff); /* Submit the command buffer and waits to channel */ err = nvhost_queue_submit_to_host1x(queue, cmdbuf, ARRAY_SIZE(cmdbuf), 1, syncpt_wait_ids, syncpt_wait_thresh, task->num_prefences, thresh); return err; } static int pva_task_submit_mmio_ccq(struct pva_submit_task *task, u32 *thresh) { struct platform_device *host1x_pdev = to_platform_device(task->pva->pdev->dev.parent); struct nvhost_queue *queue = task->queue; u32 old_maxval, new_maxval; u64 fifo_flags = PVA_FIFO_INT_ON_ERR; u64 fifo_cmd = pva_fifo_submit(queue->id, task->dma_addr, fifo_flags); int err = 0; /* Increment syncpoint to capture threshold */ old_maxval = nvhost_syncpt_read_maxval(host1x_pdev, queue->syncpt_id); new_maxval = nvhost_syncpt_incr_max_ext(host1x_pdev, queue->syncpt_id, task->fence_num); err = pva_ccq_send(task->pva, fifo_cmd); if (err < 0) goto err_submit; *thresh = new_maxval; return 0; err_submit: nvhost_syncpt_set_maxval(host1x_pdev, queue->syncpt_id, old_maxval); return err; } static int pva_task_submit_mailbox(struct pva_submit_task *task, u32 *thresh) { struct platform_device *host1x_pdev = to_platform_device(task->pva->pdev->dev.parent); struct nvhost_queue *queue = task->queue; struct pva_mailbox_status_regs status; u32 old_maxval, new_maxval; struct pva_cmd cmd; u32 flags, nregs; int err = 0; /* Construct submit command */ flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE; nregs = pva_cmd_submit(&cmd, queue->id, task->dma_addr, flags); /* Increment syncpoint to capture threshold */ old_maxval = nvhost_syncpt_read_maxval(host1x_pdev, queue->syncpt_id); new_maxval = nvhost_syncpt_incr_max_ext(host1x_pdev, queue->syncpt_id, task->fence_num); /* Submit request to PVA and wait for response */ err = pva_mailbox_send_cmd_sync(task->pva, &cmd, nregs, &status); if (err < 0) { nvhost_warn(&task->pva->pdev->dev, "Failed to submit task: %d", err); goto err_submit; } /* Ensure that response is valid */ if (status.error != PVA_ERR_NO_ERROR) { nvhost_warn(&task->pva->pdev->dev, "PVA task rejected: %u", status.error); err = -EINVAL; goto err_submit; } *thresh = new_maxval; return 0; err_submit: nvhost_syncpt_set_maxval(host1x_pdev, queue->syncpt_id, old_maxval); return err; } static int pva_task_submit(struct pva_submit_task *task, u32 *task_thresh) { struct platform_device *host1x_pdev = to_platform_device(task->pva->pdev->dev.parent); struct nvhost_queue *queue = task->queue; u32 thresh = 0; u64 timestamp; int err = 0; nvhost_dbg_info("Submitting task %p (0x%llx)", task, (u64)task->dma_addr); /* Get a reference of the queue to avoid it being reused. It * gets freed in the callback... */ nvhost_queue_get(queue); /* Turn on the hardware */ err = nvhost_module_busy(task->pva->pdev); if (err) goto err_module_busy; /* * TSC timestamp is same as CNTVCT. Task statistics are being * reported in TSC ticks. */ timestamp = arch_counter_get_cntvct(); /* Choose the submit policy based on the mode */ switch (task->pva->submit_mode) { case PVA_SUBMIT_MODE_MAILBOX: err = pva_task_submit_mailbox(task, &thresh); break; case PVA_SUBMIT_MODE_MMIO_CCQ: err = pva_task_submit_mmio_ccq(task, &thresh); break; case PVA_SUBMIT_MODE_CHANNEL_CCQ: err = pva_task_submit_channel_ccq(task, &thresh); break; } if (err < 0) goto err_submit; /* Record task prefences */ nvhost_eventlib_log_fences(task->pva->pdev, queue->syncpt_id, thresh, task->prefences, task->num_prefences, NVDEV_FENCE_KIND_PRE, timestamp); nvhost_eventlib_log_submit(task->pva->pdev, queue->syncpt_id, thresh, timestamp); task->syncpt_thresh = thresh; nvhost_dbg_info("Postfence id=%u, value=%u", queue->syncpt_id, thresh); *task_thresh = thresh; /* Going to be linked so obtain the reference */ kref_get(&task->ref); /* * Tasks in the queue list can be modified by the interrupt handler. * Adding the task into the list must be the last step before * registering the interrupt handler. */ mutex_lock(&queue->list_lock); list_add_tail(&task->node, &queue->tasklist); mutex_unlock(&queue->list_lock); /* * Register the interrupt handler. This must be done after adding * the tasks into the queue since otherwise we may miss the completion * event. */ WARN_ON(nvhost_intr_register_notifier(host1x_pdev, queue->syncpt_id, thresh, pva_queue_update, queue)); return err; err_submit: nvhost_module_idle(task->pva->pdev); err_module_busy: nvhost_queue_put(queue); return err; } static int pva_queue_submit(struct nvhost_queue *queue, void *args) { struct pva_submit_tasks *task_header = args; int err = 0; int i; for (i = 0; i < task_header->num_tasks; i++) { struct pva_submit_task *task = task_header->tasks[i]; u32 *thresh = &task_header->task_thresh[i]; task->fence_num = 0; /* First, dump the task that we are submitting */ pva_task_dump(task); /* Pin job memory */ err = pva_task_pin_mem(task); if (err < 0) break; /* Write the task data */ pva_task_write(task, false); err = pva_task_submit(task, thresh); if (err < 0) break; } return err; } static int pva_queue_set_attribute(struct nvhost_queue *queue, void *args) { uint32_t flags = PVA_CMD_INT_ON_ERR | PVA_CMD_INT_ON_COMPLETE; struct pva_queue_set_attribute *set_attr = args; struct pva_queue_attribute *attr = set_attr->attr; struct pva_mailbox_status_regs status; struct pva_cmd cmd; int err = 0; u32 nregs; nregs = pva_cmd_set_queue_attributes(&cmd, queue->id, attr->id, attr->value, flags); /* Submit request to PVA and wait for response */ if (set_attr->bootup) err = pva_mailbox_send_cmd_sync_locked(set_attr->pva, &cmd, nregs, &status); else err = pva_mailbox_send_cmd_sync(set_attr->pva, &cmd, nregs, &status); if (err < 0) { nvhost_warn(&set_attr->pva->pdev->dev, "Failed to set attributes: %d\n", err); goto end; } /* Ensure that response is valid */ if (status.error != PVA_ERR_NO_ERROR) { nvhost_warn(&set_attr->pva->pdev->dev, "PVA Q attribute rejected: %u\n", status.error); err = -EINVAL; } end: return err; } static void pva_queue_cleanup_fence(struct nvdev_fence *fence, struct pva_parameter_ext *fence_ext) { struct dma_buf *dmabuf; u8 *dmabuf_cpuva; u32 *fence_cpuva; if (fence->type != NVDEV_FENCE_TYPE_SEMAPHORE) return; dmabuf = fence_ext->dmabuf; dmabuf_cpuva = dma_buf_vmap(dmabuf); if (!dmabuf_cpuva) return; if (!(fence->semaphore_offset % 4)) return; fence_cpuva = (void *)&dmabuf_cpuva[fence->semaphore_offset]; *fence_cpuva = fence->semaphore_value; dma_buf_vunmap(dmabuf, dmabuf_cpuva); } static void pva_queue_cleanup_status(struct pva_status_handle *status_h, struct pva_parameter_ext *status_h_ext) { struct dma_buf *dmabuf = status_h_ext->dmabuf; u8 *dmabuf_cpuva = dma_buf_vmap(dmabuf); struct nvhost_notification *status_ptr; if (!dmabuf_cpuva) return; status_ptr = (void *)&dmabuf_cpuva[status_h->offset]; status_ptr->status = 0x8888; dma_buf_vunmap(dmabuf, dmabuf_cpuva); } static void pva_queue_cleanup(struct nvhost_queue *queue, struct pva_submit_task *task) { struct platform_device *pdev = queue->pool->pdev; struct nvhost_master *host = nvhost_get_host(pdev); bool expired = nvhost_syncpt_is_expired(&host->syncpt, queue->syncpt_id, task->syncpt_thresh); unsigned int i; unsigned int j; /* * Ensure that there won't be communication with PVA for * checking the task status */ task->invalid = true; /* Ignore expired fences */ if (expired) return; /* Write task status first */ for (i = 0; i < task->num_output_task_status; i++) pva_queue_cleanup_status(task->output_task_status, task->output_task_status_ext); /* Finish up non-syncpoint fences */ for (i = 0; i < PVA_MAX_FENCE_TYPES; i++) { for (j = 0; j < task->num_pvafences[i]; j++) { pva_queue_cleanup_fence(&task->pvafences[i][j].fence, &task->pvafences_sema_ext[i][j]); } } /* Finish syncpoint increments to release waiters */ nvhost_syncpt_cpu_incr_ext(pdev, queue->syncpt_id); } static int pva_queue_abort(struct nvhost_queue *queue) { struct pva_submit_task *task; mutex_lock(&queue->list_lock); list_for_each_entry(task, &queue->tasklist, node) pva_queue_cleanup(queue, task); mutex_unlock(&queue->list_lock); return 0; } struct nvhost_queue_ops pva_queue_ops = { .abort = pva_queue_abort, .submit = pva_queue_submit, .get_task_size = pva_task_get_memsize, .dump = pva_queue_dump, .set_attribute = pva_queue_set_attribute, };