/* * arch/arm/mach-tegra/mcerr.c * * MC error code common to T3x and T11x. T20 has been left alone. * * Copyright (c) 2010-2018, NVIDIA Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #define pr_fmt(fmt) "mc-err: " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static const struct of_device_id __mcerr_of_table_sentinel __used __section(__mcerr_of_table_end); extern struct of_device_id __mcerr_of_table; static bool mcerr_throttle_enabled = true; u32 mcerr_silenced; static atomic_t error_count; static void unthrottle_prints(struct work_struct *work); static DECLARE_DELAYED_WORK(unthrottle_prints_work, unthrottle_prints); static struct dentry *mcerr_debugfs_dir; u32 mc_int_mask; static struct mcerr_ops *mcerr_ops; static void unthrottle_prints(struct work_struct *work) { atomic_set(&error_count, 0); } static void disable_interrupt(unsigned int irq) { mc_writel(0, MC_INTMASK); } static void enable_interrupt(unsigned int irq) { mc_writel(mc_int_mask, MC_INTMASK); } static irqreturn_t tegra_mcerr_thread(int irq, void *data) { unsigned long count; cancel_delayed_work(&unthrottle_prints_work); count = atomic_inc_return(&error_count); if (mcerr_throttle_enabled && count >= MAX_PRINTS) { schedule_delayed_work(&unthrottle_prints_work, HZ/2); if (count == MAX_PRINTS) mcerr_pr("Too many MC errors; throttling prints\n"); mcerr_ops->clear_interrupt(irq); goto exit; } mcerr_ops->log_mcerr_fault(irq); exit: mcerr_ops->enable_interrupt(irq); return IRQ_HANDLED; } /* * The actual error handling takes longer than is ideal so this must be * threaded. */ static irqreturn_t tegra_mcerr_hard_irq(int irq, void *data) { #ifdef CONFIG_TEGRA_MC_TRACE_PRINTK trace_printk("MCERR detected.\n"); #endif /* * Disable MC Error interrupt till the MC Error info is logged. * MC Errors can be lost as MC HW holds one MC error at a time. * The first MC Error is good enough to point out potential memory * access issues in SW and allow debugging further. */ mcerr_ops->disable_interrupt(irq); return IRQ_WAKE_THREAD; } /* * Print the MC err stats for each client. */ static int mcerr_default_debugfs_show(struct seq_file *s, void *v) { int i, j; int do_print; seq_printf(s, "%-18s %-18s", "swgroup", "client"); for (i = 0; i < (sizeof(u32) * 8); i++) { if (mcerr_ops->intr_descriptions[i]) seq_printf(s, " %-12s", mcerr_ops->intr_descriptions[i]); } seq_puts(s, "\n"); for (i = 0; i < mcerr_ops->nr_clients; i++) { do_print = 0; /* Only print clients who actually have errors. */ for (j = 0; j < (sizeof(u32) * 8); j++) { if (mcerr_ops->intr_descriptions[j] && mcerr_ops->mc_clients[i].intr_counts[j]) { do_print = 1; break; } } if (do_print) { seq_printf(s, "%-18s %-18s", mcerr_ops->mc_clients[i].name, mcerr_ops->mc_clients[i].swgroup); for (j = 0; j < (sizeof(u32) * 8); j++) { if (!mcerr_ops->intr_descriptions[j]) continue; seq_printf(s, " %-12u", mcerr_ops->mc_clients[i].intr_counts[j]); } seq_puts(s, "\n"); } } return 0; } static int mcerr_debugfs_open(struct inode *inode, struct file *file) { return single_open(file, mcerr_ops->mcerr_debugfs_show, NULL); } static const struct file_operations mcerr_debugfs_fops = { .open = mcerr_debugfs_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static int __get_throttle(void *data, u64 *val) { *val = mcerr_throttle_enabled; return 0; } static int __set_throttle(void *data, u64 val) { atomic_set(&error_count, 0); mcerr_throttle_enabled = (bool) val; return 0; } DEFINE_SIMPLE_ATTRIBUTE(mcerr_throttle_debugfs_fops, __get_throttle, __set_throttle, "%llu\n"); int tegra_mcerr_init(struct dentry *mc_parent, struct platform_device *pdev) { int irq; const void *prop; bool match_found = false; struct device_node *np = pdev->dev.of_node; const struct of_device_id *matches = &__mcerr_of_table; for (; matches; matches++) { if (of_device_is_compatible(np, matches->compatible)) { const of_mcerr_init_fn init_fn = matches->data; mcerr_ops = init_fn(np); match_found = true; break; } } if (WARN_ON(match_found == false)) { pr_err("%s: no mcerr_ops found\n", __func__); return -EINVAL; } if (!mcerr_ops || !mcerr_ops->clear_interrupt || !mcerr_ops->log_mcerr_fault) { pr_err("invalid mcerr ops. disabling mcerr.\n"); goto fail; } mcerr_ops->mcerr_debugfs_show = mcerr_ops->mcerr_debugfs_show ?: mcerr_default_debugfs_show; mcerr_ops->enable_interrupt = mcerr_ops->enable_interrupt ?: enable_interrupt; mcerr_ops->disable_interrupt = mcerr_ops->disable_interrupt ?: disable_interrupt; if (mcerr_ops->nr_clients == 0 || mcerr_ops->intr_descriptions == NULL) { pr_err("Missing necessary chip_specific functionality!\n"); return -ENODEV; } prop = of_get_property(pdev->dev.of_node, "int_mask", NULL); if (!prop) { pr_err("No int_mask prop for mcerr!\n"); return -EINVAL; } irq = irq_of_parse_and_map(pdev->dev.of_node, 0); if (irq < 0) { pr_err("Unable to parse/map MC error interrupt\n"); goto done; } if (request_threaded_irq(irq, tegra_mcerr_hard_irq, tegra_mcerr_thread, 0, "mc_status", NULL)) { pr_err("Unable to register MC error interrupt\n"); goto done; } mc_int_mask = be32_to_cpup(prop); /* clear any mc-err's that occured before. */ mcerr_ops->clear_interrupt(irq); mc_writel(mc_int_mask, MC_INTMASK); pr_debug("Set intmask: 0x%x\n", mc_readl(MC_INTMASK)); /* This need to be fixed to work for all SOC's. */ if (IS_ENABLED(CONFIG_ARCH_TEGRA_18x_SOC)) { prop = of_get_property(pdev->dev.of_node,"compatible", NULL); if (prop && strcmp(prop, "nvidia,tegra-t18x-mc") == 0) tegra_emcerr_init(mc_parent, pdev); } if (!mc_parent) goto done; mcerr_debugfs_dir = debugfs_create_dir("err", mc_parent); if (mcerr_debugfs_dir == NULL) { pr_err("Failed to make debugfs node: %ld\n", PTR_ERR(mcerr_debugfs_dir)); goto done; } debugfs_create_file("mcerr", 0644, mcerr_debugfs_dir, NULL, &mcerr_debugfs_fops); debugfs_create_file("mcerr_throttle", S_IRUGO | S_IWUSR, mcerr_debugfs_dir, NULL, &mcerr_throttle_debugfs_fops); debugfs_create_u32("quiet", 0644, mcerr_debugfs_dir, &mcerr_silenced); done: return 0; fail: pr_err("init failied\n"); return -EINVAL; }