tegrakernel/kernel/nvidia/drivers/platform/tegra/mc/mcerr.c

/*
 * arch/arm/mach-tegra/mcerr.c
 *
 * MC error code common to T3x and T11x. T20 has been left alone.
 *
 * Copyright (c) 2010-2018, NVIDIA Corporation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

#define pr_fmt(fmt) "mc-err: " fmt

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of_device.h>
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/stat.h>
#include <linux/sched.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/moduleparam.h>
#include <linux/platform_device.h>
#include <linux/of_irq.h>
#include <linux/atomic.h>

#include <linux/platform/tegra/mc.h>
#include <linux/platform/tegra/mcerr.h>
#include <linux/platform/tegra/tegra_emc_err.h>
#include <linux/platform/tegra/mc-regs-t18x.h>

static const struct of_device_id __mcerr_of_table_sentinel
	__used __section(__mcerr_of_table_end);
extern struct of_device_id __mcerr_of_table;

static bool mcerr_throttle_enabled = true;
u32  mcerr_silenced;
static atomic_t error_count;

static void unthrottle_prints(struct work_struct *work);
static DECLARE_DELAYED_WORK(unthrottle_prints_work, unthrottle_prints);
static struct dentry *mcerr_debugfs_dir;
u32 mc_int_mask;
static struct mcerr_ops *mcerr_ops;

static void unthrottle_prints(struct work_struct *work)
{
	atomic_set(&error_count, 0);
}

static void disable_interrupt(unsigned int irq)
{
	mc_writel(0, MC_INTMASK);
}

static void enable_interrupt(unsigned int irq)
{
	mc_writel(mc_int_mask, MC_INTMASK);
}

static irqreturn_t tegra_mcerr_thread(int irq, void *data)
{
	unsigned long count;

	cancel_delayed_work(&unthrottle_prints_work);
	count = atomic_inc_return(&error_count);

	if (mcerr_throttle_enabled && count >= MAX_PRINTS) {
		schedule_delayed_work(&unthrottle_prints_work, HZ/2);
		if (count == MAX_PRINTS)
			mcerr_pr("Too many MC errors; throttling prints\n");
		mcerr_ops->clear_interrupt(irq);
		goto exit;
	}

	mcerr_ops->log_mcerr_fault(irq);
exit:
	mcerr_ops->enable_interrupt(irq);

	return IRQ_HANDLED;
}

/*
 * The actual error handling takes longer than is ideal so this must be
 * threaded.
 */
static irqreturn_t tegra_mcerr_hard_irq(int irq, void *data)
{
#ifdef CONFIG_TEGRA_MC_TRACE_PRINTK
	trace_printk("MCERR detected.\n");
#endif
	 /*
	  * Disable MC Error interrupt till the MC Error info is logged.
	  * MC Errors can be lost as MC HW holds one MC error at a time.
	  * The first MC Error is good enough to point out potential memory
	  * access issues in SW and allow debugging further.
	  */
	mcerr_ops->disable_interrupt(irq);
	return IRQ_WAKE_THREAD;
}

/*
 * Print the MC err stats for each client.
 */
static int mcerr_default_debugfs_show(struct seq_file *s, void *v)
{
	int i, j;
	int do_print;

	seq_printf(s, "%-18s %-18s", "swgroup", "client");
	for (i = 0; i < (sizeof(u32) * 8); i++) {
		if (mcerr_ops->intr_descriptions[i])
			seq_printf(s, " %-12s",
				   mcerr_ops->intr_descriptions[i]);
	}
	seq_puts(s, "\n");

	for (i = 0; i < mcerr_ops->nr_clients; i++) {
		do_print = 0;

		/* Only print clients who actually have errors. */
		for (j = 0; j < (sizeof(u32) * 8); j++) {
			if (mcerr_ops->intr_descriptions[j] &&
			    mcerr_ops->mc_clients[i].intr_counts[j]) {
				do_print = 1;
				break;
			}
		}

		if (do_print) {
			seq_printf(s, "%-18s %-18s",
				   mcerr_ops->mc_clients[i].name,
				   mcerr_ops->mc_clients[i].swgroup);
			for (j = 0; j < (sizeof(u32) * 8); j++) {
				if (!mcerr_ops->intr_descriptions[j])
					continue;
				seq_printf(s, " %-12u",
					   mcerr_ops->mc_clients[i].intr_counts[j]);
			}
			seq_puts(s, "\n");
		}
	}

	return 0;
}

static int mcerr_debugfs_open(struct inode *inode, struct file *file)
{
	return single_open(file, mcerr_ops->mcerr_debugfs_show, NULL);
}

static const struct file_operations mcerr_debugfs_fops = {
	.open           = mcerr_debugfs_open,
	.read           = seq_read,
	.llseek         = seq_lseek,
	.release        = single_release,
};

static int __get_throttle(void *data, u64 *val)
{
	*val = mcerr_throttle_enabled;
	return 0;
}

static int __set_throttle(void *data, u64 val)
{
	atomic_set(&error_count, 0);

	mcerr_throttle_enabled = (bool) val;
	return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(mcerr_throttle_debugfs_fops, __get_throttle,
			__set_throttle, "%llu\n");

int tegra_mcerr_init(struct dentry *mc_parent, struct platform_device *pdev)
{
	int irq;
	const void *prop;
	bool match_found = false;
	struct device_node *np = pdev->dev.of_node;
	const struct of_device_id *matches = &__mcerr_of_table;

	for (; matches; matches++) {
		if (of_device_is_compatible(np, matches->compatible)) {
			const of_mcerr_init_fn init_fn = matches->data;

			mcerr_ops = init_fn(np);
			match_found = true;
			break;
		}
	}

	if (WARN_ON(match_found == false)) {
		pr_err("%s: no mcerr_ops found\n", __func__);
		return -EINVAL;
	}

	if (!mcerr_ops || !mcerr_ops->clear_interrupt ||
		!mcerr_ops->log_mcerr_fault) {
		pr_err("invalid mcerr ops. disabling mcerr.\n");
		goto fail;
	}

	mcerr_ops->mcerr_debugfs_show = mcerr_ops->mcerr_debugfs_show ?: mcerr_default_debugfs_show;
	mcerr_ops->enable_interrupt = mcerr_ops->enable_interrupt ?: enable_interrupt;
	mcerr_ops->disable_interrupt = mcerr_ops->disable_interrupt ?: disable_interrupt;

	if (mcerr_ops->nr_clients == 0 ||
	    mcerr_ops->intr_descriptions == NULL) {
		pr_err("Missing necessary chip_specific functionality!\n");
		return -ENODEV;
	}

	prop = of_get_property(pdev->dev.of_node, "int_mask", NULL);
	if (!prop) {
		pr_err("No int_mask prop for mcerr!\n");
		return -EINVAL;
	}

	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
	if (irq < 0) {
		pr_err("Unable to parse/map MC error interrupt\n");
		goto done;
	}

	if (request_threaded_irq(irq, tegra_mcerr_hard_irq,
				 tegra_mcerr_thread, 0, "mc_status", NULL)) {
		pr_err("Unable to register MC error interrupt\n");
		goto done;
	}

	mc_int_mask = be32_to_cpup(prop);
	/* clear any mc-err's that occured before. */
	mcerr_ops->clear_interrupt(irq);
	mc_writel(mc_int_mask, MC_INTMASK);
	pr_debug("Set intmask: 0x%x\n", mc_readl(MC_INTMASK));

	/* This need to be fixed to work for all SOC's. */
	if (IS_ENABLED(CONFIG_ARCH_TEGRA_18x_SOC)) {
		prop = of_get_property(pdev->dev.of_node,"compatible", NULL);
		if (prop && strcmp(prop, "nvidia,tegra-t18x-mc") == 0)
			tegra_emcerr_init(mc_parent, pdev);
	}

	if (!mc_parent)
		goto done;

	mcerr_debugfs_dir = debugfs_create_dir("err", mc_parent);
	if (mcerr_debugfs_dir == NULL) {
		pr_err("Failed to make debugfs node: %ld\n",
		       PTR_ERR(mcerr_debugfs_dir));
		goto done;
	}
	debugfs_create_file("mcerr", 0644, mcerr_debugfs_dir, NULL,
			    &mcerr_debugfs_fops);
	debugfs_create_file("mcerr_throttle", S_IRUGO | S_IWUSR,
			    mcerr_debugfs_dir, NULL,
			    &mcerr_throttle_debugfs_fops);
	debugfs_create_u32("quiet", 0644, mcerr_debugfs_dir, &mcerr_silenced);
done:
	return 0;
fail:
	pr_err("init failied\n");
	return -EINVAL;
}