552 lines
12 KiB
C
552 lines
12 KiB
C
|
/*
|
||
|
* drivers/misc/tegra-profiler/carmel_pmu.c
|
||
|
*
|
||
|
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||
|
*
|
||
|
* This program is free software; you can redistribute it and/or modify it
|
||
|
* under the terms and conditions of the GNU General Public License,
|
||
|
* version 2, as published by the Free Software Foundation.
|
||
|
*
|
||
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||
|
* more details.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||
|
|
||
|
#include <linux/types.h>
|
||
|
#include <linux/list.h>
|
||
|
#include <linux/bitmap.h>
|
||
|
#include <linux/errno.h>
|
||
|
#include <linux/topology.h>
|
||
|
|
||
|
#include <soc/tegra/chip-id.h>
|
||
|
|
||
|
#include <asm/sysreg.h>
|
||
|
|
||
|
#include "carmel_pmu.h"
|
||
|
#include "quadd.h"
|
||
|
#include "debug.h"
|
||
|
|
||
|
/*
|
||
|
* Some parts of this code are taken from platform/tegra/tegra19_perf_uncore.c
|
||
|
* and modified for tegra-profiler.
|
||
|
*/
|
||
|
|
||
|
/* Global registers */
|
||
|
#define SYS_NV_PMSELR_EL0 sys_reg(3, 3, 15, 5, 1)
|
||
|
|
||
|
/* Unit registers */
|
||
|
#define SYS_NV_PMCNTENSET_EL0 sys_reg(3, 3, 15, 4, 0)
|
||
|
#define SYS_NV_PMCNTENCLR_EL0 sys_reg(3, 3, 15, 4, 1)
|
||
|
#define SYS_NV_PMOVSSET_EL0 sys_reg(3, 3, 15, 4, 2)
|
||
|
#define SYS_NV_PMOVSCLR_EL0 sys_reg(3, 3, 15, 4, 3)
|
||
|
#define SYS_NV_PMCR_EL0 sys_reg(3, 3, 15, 4, 4)
|
||
|
#define SYS_NV_PMINTENSET_EL1 sys_reg(3, 0, 15, 2, 0)
|
||
|
#define SYS_NV_PMINTENCLR_EL1 sys_reg(3, 0, 15, 2, 1)
|
||
|
|
||
|
/* Counter registers */
|
||
|
#define SYS_NV_PMEVCNTR0_EL0 sys_reg(3, 3, 15, 0, 0)
|
||
|
#define SYS_NV_PMEVCNTR1_EL0 sys_reg(3, 3, 15, 0, 1)
|
||
|
#define SYS_NV_PMEVTYPER0_EL0 sys_reg(3, 3, 15, 2, 0)
|
||
|
#define SYS_NV_PMEVTYPER1_EL0 sys_reg(3, 3, 15, 2, 1)
|
||
|
|
||
|
/*
|
||
|
* Format for raw events is 0xEEU
|
||
|
* EE: event number
|
||
|
* U: unit (0-3 for L2s, 4 for uncore)
|
||
|
* eg. all L2D_CACHE: r160,r161,r162,r163
|
||
|
*/
|
||
|
#define CARMEL_UNIT(__id) (__id & 0xf)
|
||
|
#define CARMEL_EVENT(__id) (__id >> 4)
|
||
|
|
||
|
#define L2D_CACHE 0x16
|
||
|
#define L2D_CACHE_REFILL 0x17
|
||
|
#define L2D_CACHE_WB 0x18
|
||
|
|
||
|
#define BUS_ACCESS 0x19
|
||
|
#define BUS_CYCLES 0x1D
|
||
|
#define L3D_CACHE_ALLOCATE 0x29
|
||
|
#define L3D_CACHE_REFILL 0x2A
|
||
|
#define L3D_CACHE 0x2B
|
||
|
#define L3D_CACHE_WB 0x2C
|
||
|
|
||
|
#define L2D_CACHE_LD 0x50
|
||
|
#define L2D_CACHE_ST 0x51
|
||
|
#define L2D_CACHE_REFILL_LD 0x52
|
||
|
#define L2D_CACHE_REFILL_ST 0x53
|
||
|
#define L2D_CACHE_REFILL_VICTIM 0x56
|
||
|
|
||
|
#define L2D_PREFETCH_C0 0xC5
|
||
|
#define L2D_PREFETCH_C1 0xC6
|
||
|
|
||
|
#define NV_INT_START 0x200
|
||
|
#define NV_INT_END 0x208
|
||
|
|
||
|
#define CARMEL_PMCR_E (0x1 << 0) /* Enable all counters */
|
||
|
#define CARMEL_PMCR_P (0x1 << 1) /* Reset all counters */
|
||
|
|
||
|
/* Counters reading CTR_INVAL are in a powered down unit */
|
||
|
#define CTR_INVAL 0xffffffff
|
||
|
|
||
|
/* L2 units in the Carmel CCPLEX */
|
||
|
#define NUM_L2S 0x4
|
||
|
|
||
|
/* Carmel uncore perfmon supports two counters per unit */
|
||
|
#define UNIT_CTRS 0x2
|
||
|
|
||
|
struct cntr_info {
|
||
|
u32 prev_val;
|
||
|
u32 id_raw;
|
||
|
u32 id_hw;
|
||
|
};
|
||
|
|
||
|
struct carmel_unit {
|
||
|
u32 id;
|
||
|
u32 pmselr;
|
||
|
bool is_used;
|
||
|
bool is_available;
|
||
|
struct cntr_info cntrs[UNIT_CTRS];
|
||
|
DECLARE_BITMAP(used_ctrs, UNIT_CTRS);
|
||
|
struct list_head next;
|
||
|
};
|
||
|
|
||
|
static struct carmel_pmu_ctx {
|
||
|
struct carmel_unit clusters[NUM_L2S];
|
||
|
struct carmel_unit scf;
|
||
|
DECLARE_BITMAP(used_units, NUM_L2S + 1);
|
||
|
struct list_head units;
|
||
|
} ctx;
|
||
|
|
||
|
static inline void set_unit(struct carmel_unit *unit)
|
||
|
{
|
||
|
write_sysreg_s(unit->pmselr, SYS_NV_PMSELR_EL0);
|
||
|
isb();
|
||
|
}
|
||
|
|
||
|
static inline u32 get_unit_pmselr(u32 g, u32 u)
|
||
|
{
|
||
|
return ((g << 8) | u);
|
||
|
}
|
||
|
|
||
|
static inline void set_event_type(u32 val, u32 idx)
|
||
|
{
|
||
|
if (idx == 0)
|
||
|
write_sysreg_s(val, SYS_NV_PMEVTYPER0_EL0);
|
||
|
else if (idx == 1)
|
||
|
write_sysreg_s(val, SYS_NV_PMEVTYPER1_EL0);
|
||
|
|
||
|
isb();
|
||
|
}
|
||
|
|
||
|
static inline u32 read_cntr(u32 idx)
|
||
|
{
|
||
|
u32 val = 0;
|
||
|
|
||
|
if (idx == 0)
|
||
|
val = read_sysreg_s(SYS_NV_PMEVCNTR0_EL0);
|
||
|
else if (idx == 1)
|
||
|
val = read_sysreg_s(SYS_NV_PMEVCNTR1_EL0);
|
||
|
|
||
|
return val;
|
||
|
}
|
||
|
|
||
|
static inline void write_cntr(u32 val, u32 idx)
|
||
|
{
|
||
|
if (idx == 0)
|
||
|
write_sysreg_s(val, SYS_NV_PMEVCNTR0_EL0);
|
||
|
else if (idx == 1)
|
||
|
write_sysreg_s(val, SYS_NV_PMEVCNTR1_EL0);
|
||
|
}
|
||
|
|
||
|
static int set_event(u32 val, u32 idx)
|
||
|
{
|
||
|
u32 __val;
|
||
|
|
||
|
set_event_type(val, idx);
|
||
|
write_cntr(0, idx);
|
||
|
isb();
|
||
|
|
||
|
/* If counter is stuck at CTR_INVAL, the unit is powered down */
|
||
|
__val = read_cntr(idx);
|
||
|
if (__val == CTR_INVAL)
|
||
|
return -ENODEV;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void start_unit(struct carmel_unit *unit)
|
||
|
{
|
||
|
unsigned long idx = 0;
|
||
|
|
||
|
if (bitmap_empty(unit->used_ctrs, UNIT_CTRS))
|
||
|
return;
|
||
|
|
||
|
set_unit(unit);
|
||
|
|
||
|
while (idx < UNIT_CTRS) {
|
||
|
idx = find_next_bit(unit->used_ctrs, UNIT_CTRS, idx);
|
||
|
if (idx != UNIT_CTRS) {
|
||
|
unit->cntrs[idx].prev_val = 0;
|
||
|
write_sysreg_s(BIT(idx), SYS_NV_PMCNTENSET_EL0);
|
||
|
}
|
||
|
idx++;
|
||
|
}
|
||
|
|
||
|
write_sysreg_s(CARMEL_PMCR_E, SYS_NV_PMCR_EL0);
|
||
|
}
|
||
|
|
||
|
static void stop_unit(struct carmel_unit *unit)
|
||
|
{
|
||
|
unsigned long idx = 0;
|
||
|
|
||
|
if (bitmap_empty(unit->used_ctrs, UNIT_CTRS))
|
||
|
return;
|
||
|
|
||
|
set_unit(unit);
|
||
|
|
||
|
while (idx < UNIT_CTRS) {
|
||
|
idx = find_next_bit(unit->used_ctrs, UNIT_CTRS, idx);
|
||
|
if (idx != UNIT_CTRS)
|
||
|
write_sysreg_s(BIT(idx), SYS_NV_PMCNTENCLR_EL0);
|
||
|
idx++;
|
||
|
}
|
||
|
|
||
|
write_sysreg_s(CARMEL_PMCR_P, SYS_NV_PMCR_EL0);
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
read_unit_cntrs(struct carmel_unit *unit,
|
||
|
struct quadd_event_data *events, int max)
|
||
|
{
|
||
|
unsigned long idx = 0;
|
||
|
u32 val, prev_val, delta;
|
||
|
struct cntr_info *cntr;
|
||
|
struct quadd_event_data *curr, *end;
|
||
|
|
||
|
if (unlikely(bitmap_empty(unit->used_ctrs, UNIT_CTRS)))
|
||
|
return 0;
|
||
|
|
||
|
curr = events;
|
||
|
end = events + max;
|
||
|
|
||
|
set_unit(unit);
|
||
|
|
||
|
while (idx < UNIT_CTRS && curr < end) {
|
||
|
idx = find_next_bit(unit->used_ctrs, UNIT_CTRS, idx);
|
||
|
if (idx != UNIT_CTRS) {
|
||
|
cntr = &unit->cntrs[idx];
|
||
|
val = read_cntr(idx);
|
||
|
if (val == CTR_INVAL)
|
||
|
val = cntr->prev_val;
|
||
|
|
||
|
curr->event_source =
|
||
|
QUADD_EVENT_SOURCE_CARMEL_UNCORE_PMU;
|
||
|
curr->max_count = U32_MAX;
|
||
|
|
||
|
curr->event.type = QUADD_EVENT_TYPE_RAW_CARMEL_UNCORE;
|
||
|
curr->event.id = cntr->id_raw;
|
||
|
|
||
|
prev_val = cntr->prev_val;
|
||
|
|
||
|
if (prev_val <= val)
|
||
|
delta = val - prev_val;
|
||
|
else
|
||
|
delta = U32_MAX - prev_val + val;
|
||
|
|
||
|
curr->val = val;
|
||
|
curr->prev_val = prev_val;
|
||
|
curr->delta = delta;
|
||
|
|
||
|
cntr->prev_val = val;
|
||
|
curr++;
|
||
|
}
|
||
|
idx++;
|
||
|
}
|
||
|
|
||
|
return curr - events;
|
||
|
}
|
||
|
|
||
|
static inline struct carmel_unit *get_unit(u32 id)
|
||
|
{
|
||
|
switch (id) {
|
||
|
case 0 ... (NUM_L2S - 1):
|
||
|
return &ctx.clusters[id];
|
||
|
case NUM_L2S:
|
||
|
return &ctx.scf;
|
||
|
default:
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static int add_event(const struct quadd_event *event)
|
||
|
{
|
||
|
int err;
|
||
|
unsigned long idx;
|
||
|
u32 unit_id, event_raw;
|
||
|
struct cntr_info *cntr;
|
||
|
struct carmel_unit *unit;
|
||
|
|
||
|
unit_id = CARMEL_UNIT(event->id);
|
||
|
event_raw = CARMEL_EVENT(event->id);
|
||
|
|
||
|
unit = get_unit(unit_id);
|
||
|
if (!unit || !unit->is_available)
|
||
|
return -ENOENT;
|
||
|
|
||
|
set_unit(unit);
|
||
|
|
||
|
idx = find_first_zero_bit(unit->used_ctrs, UNIT_CTRS);
|
||
|
if (idx >= UNIT_CTRS)
|
||
|
return -EOPNOTSUPP;
|
||
|
|
||
|
err = set_event(event_raw, idx);
|
||
|
if (err < 0)
|
||
|
return err;
|
||
|
|
||
|
cntr = &unit->cntrs[idx];
|
||
|
cntr->id_raw = event->id;
|
||
|
cntr->id_hw = event_raw;
|
||
|
|
||
|
set_bit(idx, unit->used_ctrs);
|
||
|
set_bit(unit->id, ctx.used_units);
|
||
|
|
||
|
unit->is_used = true;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void clean_all_units(void)
|
||
|
{
|
||
|
struct carmel_unit *unit;
|
||
|
|
||
|
list_for_each_entry(unit, &ctx.units, next) {
|
||
|
if (unit->is_used) {
|
||
|
memset(unit->cntrs, 0, sizeof(unit->cntrs));
|
||
|
bitmap_zero(unit->used_ctrs, UNIT_CTRS);
|
||
|
}
|
||
|
}
|
||
|
bitmap_zero(ctx.used_units, NUM_L2S + 1);
|
||
|
}
|
||
|
|
||
|
static int carmel_pmu_enable(void)
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void carmel_pmu_disable(void)
|
||
|
{
|
||
|
clean_all_units();
|
||
|
}
|
||
|
|
||
|
static void carmel_pmu_start(void)
|
||
|
{
|
||
|
struct carmel_unit *unit;
|
||
|
|
||
|
list_for_each_entry(unit, &ctx.units, next) {
|
||
|
if (unit->is_used)
|
||
|
start_unit(unit);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void carmel_pmu_stop(void)
|
||
|
{
|
||
|
struct carmel_unit *unit;
|
||
|
|
||
|
list_for_each_entry(unit, &ctx.units, next) {
|
||
|
if (unit->is_used)
|
||
|
stop_unit(unit);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static int carmel_pmu_read(struct quadd_event_data *events, int max)
|
||
|
{
|
||
|
struct carmel_unit *unit;
|
||
|
struct quadd_event_data *curr, *end;
|
||
|
|
||
|
if (max == 0)
|
||
|
return 0;
|
||
|
|
||
|
curr = events;
|
||
|
end = events + max;
|
||
|
|
||
|
list_for_each_entry(unit, &ctx.units, next) {
|
||
|
if (unit->is_used) {
|
||
|
curr += read_unit_cntrs(unit, curr, end - curr);
|
||
|
if (curr >= end)
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return curr - events;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
carmel_pmu_set_events(int cpuid, const struct quadd_event *events, int size)
|
||
|
{
|
||
|
int i, err;
|
||
|
|
||
|
clean_all_units();
|
||
|
|
||
|
for (i = 0; i < size; i++) {
|
||
|
const struct quadd_event *event = &events[i];
|
||
|
|
||
|
if (event->type == QUADD_EVENT_TYPE_RAW_CARMEL_UNCORE) {
|
||
|
err = add_event(event);
|
||
|
if (err < 0) {
|
||
|
clean_all_units();
|
||
|
return err;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
supported_events(int cpuid, struct quadd_event *events,
|
||
|
int max, unsigned int *raw_event_mask)
|
||
|
{
|
||
|
*raw_event_mask = 0x0fff;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
current_events(int cpuid, struct quadd_event *events, int max)
|
||
|
{
|
||
|
struct quadd_event *curr, *end;
|
||
|
struct carmel_unit *unit;
|
||
|
|
||
|
if (max == 0)
|
||
|
return 0;
|
||
|
|
||
|
curr = events;
|
||
|
end = curr + max;
|
||
|
|
||
|
list_for_each_entry(unit, &ctx.units, next) {
|
||
|
if (unit->is_used) {
|
||
|
unsigned long idx = 0;
|
||
|
|
||
|
while (idx < UNIT_CTRS) {
|
||
|
idx = find_next_bit(unit->used_ctrs,
|
||
|
UNIT_CTRS, idx);
|
||
|
if (idx != UNIT_CTRS) {
|
||
|
curr->type =
|
||
|
QUADD_EVENT_TYPE_RAW_CARMEL_UNCORE;
|
||
|
curr->id = unit->cntrs[idx].id_raw;
|
||
|
|
||
|
if (++curr >= end)
|
||
|
return curr - events;
|
||
|
}
|
||
|
idx++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return curr - events;
|
||
|
}
|
||
|
|
||
|
static bool is_cluster_available(int cluster_id)
|
||
|
{
|
||
|
int cpu;
|
||
|
|
||
|
for_each_possible_cpu(cpu)
|
||
|
if (cpu_topology[cpu].cluster_id == cluster_id)
|
||
|
return true;
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
QUADD_PMU_CNTR_INFO(l2d_cache, L2D_CACHE);
|
||
|
QUADD_PMU_CNTR_INFO(l2d_refill, L2D_CACHE_REFILL);
|
||
|
QUADD_PMU_CNTR_INFO(l2d_cache_wb, L2D_CACHE_WB);
|
||
|
QUADD_PMU_CNTR_INFO(l2d_ld, L2D_CACHE_LD);
|
||
|
QUADD_PMU_CNTR_INFO(l2d_st, L2D_CACHE_ST);
|
||
|
QUADD_PMU_CNTR_INFO(l2d_refill_ld, L2D_CACHE_REFILL_LD);
|
||
|
QUADD_PMU_CNTR_INFO(l2d_refill_st, L2D_CACHE_REFILL_ST);
|
||
|
QUADD_PMU_CNTR_INFO(l2d_refill_victim, L2D_CACHE_REFILL_VICTIM);
|
||
|
QUADD_PMU_CNTR_INFO(l2d_prefetch_c0, L2D_PREFETCH_C0);
|
||
|
QUADD_PMU_CNTR_INFO(l2d_prefetch_c1, L2D_PREFETCH_C1);
|
||
|
QUADD_PMU_CNTR_INFO(bus_access, BUS_ACCESS);
|
||
|
QUADD_PMU_CNTR_INFO(bus_cycles, BUS_CYCLES);
|
||
|
QUADD_PMU_CNTR_INFO(l3d_cache_allocate, L3D_CACHE_ALLOCATE);
|
||
|
QUADD_PMU_CNTR_INFO(l3d_cache_refill, L3D_CACHE_REFILL);
|
||
|
QUADD_PMU_CNTR_INFO(l3d_cache, L3D_CACHE);
|
||
|
QUADD_PMU_CNTR_INFO(l3d_cache_wb, L3D_CACHE_WB);
|
||
|
|
||
|
static const struct quadd_pmu_cntr_info *carmel_cntrs[] = {
|
||
|
&quadd_pmu_cntr_l2d_cache,
|
||
|
&quadd_pmu_cntr_l2d_refill,
|
||
|
&quadd_pmu_cntr_l2d_cache_wb,
|
||
|
&quadd_pmu_cntr_l2d_ld,
|
||
|
&quadd_pmu_cntr_l2d_st,
|
||
|
&quadd_pmu_cntr_l2d_refill_ld,
|
||
|
&quadd_pmu_cntr_l2d_refill_st,
|
||
|
&quadd_pmu_cntr_l2d_refill_victim,
|
||
|
&quadd_pmu_cntr_l2d_prefetch_c0,
|
||
|
&quadd_pmu_cntr_l2d_prefetch_c1,
|
||
|
&quadd_pmu_cntr_bus_access,
|
||
|
&quadd_pmu_cntr_bus_cycles,
|
||
|
&quadd_pmu_cntr_l3d_cache_allocate,
|
||
|
&quadd_pmu_cntr_l3d_cache_refill,
|
||
|
&quadd_pmu_cntr_l3d_cache,
|
||
|
&quadd_pmu_cntr_l3d_cache_wb,
|
||
|
NULL,
|
||
|
};
|
||
|
|
||
|
static struct
|
||
|
quadd_event_source carmel_uncore_pmu_int = {
|
||
|
.name = "carmel_pmu",
|
||
|
.enable = carmel_pmu_enable,
|
||
|
.disable = carmel_pmu_disable,
|
||
|
.start = carmel_pmu_start,
|
||
|
.stop = carmel_pmu_stop,
|
||
|
.read = carmel_pmu_read,
|
||
|
.set_events = carmel_pmu_set_events,
|
||
|
.supported_events = supported_events,
|
||
|
.current_events = current_events,
|
||
|
.pmu_cntrs = carmel_cntrs,
|
||
|
};
|
||
|
|
||
|
struct quadd_event_source *
|
||
|
quadd_carmel_uncore_pmu_init(void)
|
||
|
{
|
||
|
int i;
|
||
|
struct carmel_unit *unit;
|
||
|
|
||
|
if (tegra_get_chipid() != TEGRA_CHIPID_TEGRA19)
|
||
|
return NULL;
|
||
|
|
||
|
INIT_LIST_HEAD(&ctx.units);
|
||
|
|
||
|
for (i = 0; i < NUM_L2S; i++) {
|
||
|
unit = &ctx.clusters[i];
|
||
|
unit->id = i;
|
||
|
unit->pmselr = get_unit_pmselr(1, i);
|
||
|
unit->is_used = false;
|
||
|
unit->is_available = is_cluster_available(i);
|
||
|
|
||
|
bitmap_zero(unit->used_ctrs, UNIT_CTRS);
|
||
|
list_add(&unit->next, &ctx.units);
|
||
|
}
|
||
|
|
||
|
unit = &ctx.scf;
|
||
|
unit->id = NUM_L2S;
|
||
|
unit->pmselr = get_unit_pmselr(0, 0);
|
||
|
unit->is_used = false;
|
||
|
unit->is_available = true;
|
||
|
|
||
|
bitmap_zero(unit->used_ctrs, UNIT_CTRS);
|
||
|
list_add(&unit->next, &ctx.units);
|
||
|
|
||
|
bitmap_zero(ctx.used_units, NUM_L2S + 1);
|
||
|
|
||
|
return &carmel_uncore_pmu_int;
|
||
|
}
|
||
|
|
||
|
void quadd_carmel_uncore_pmu_deinit(void)
|
||
|
{
|
||
|
}
|