518 lines
13 KiB
C
518 lines
13 KiB
C
/*
|
|
* RAS driver for T194
|
|
*
|
|
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*/
|
|
#include <linux/module.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/of_device.h>
|
|
#include <linux/arm64_ras.h>
|
|
#include <linux/of_irq.h>
|
|
#include <linux/cpuhotplug.h>
|
|
|
|
static int fhi_irq[CONFIG_NR_CPUS];
|
|
static u8 is_ras_probe_done;
|
|
static LIST_HEAD(fhi_callback_list);
|
|
static DEFINE_RAW_SPINLOCK(fhi_lock);
|
|
|
|
/* saved hotplug state */
|
|
static enum cpuhp_state hp_state;
|
|
|
|
static const struct of_device_id arm64_ras_match[] = {
|
|
{
|
|
.name = "arm64_ras",
|
|
.compatible = "arm,armv8.2-ras",
|
|
},
|
|
{ },
|
|
};
|
|
MODULE_DEVICE_TABLE(of, arm64_ras_match);
|
|
|
|
/* Architecturally defined primary error code
|
|
* Used for decoding ERR<x>STATUS:SERR bits
|
|
*/
|
|
static struct ras_error serr_errors[] = {
|
|
{.name = "No error", .error_code = 0x0},
|
|
{.name = "Implementation defined error", .error_code = 0x1},
|
|
{.name = "Data value from non-associative internal memory",
|
|
.error_code = 0x2},
|
|
{.name = "Implementation defined pin", .error_code = 0x3},
|
|
{.name = "Assertion Failure", .error_code = 0x4},
|
|
{.name = "Internal Data Path", .error_code = 0x5},
|
|
{.name = "Data value from associative memory", .error_code = 0x6},
|
|
{.name = "Address/control value(s) from associative memory",
|
|
.error_code = 0x7},
|
|
{.name = "Data value from a TLB", .error_code = 0x8},
|
|
{.name = "Address/control value(s) from a TLB", .error_code = 0x9},
|
|
{.name = "Data value from producer", .error_code = 0xA},
|
|
{.name = "Address/control value(s) from producer", .error_code = 0xB},
|
|
{.name = "Data value from (non-associative) external memory",
|
|
.error_code = 0xC},
|
|
{.name = "Illegal address (software fault)", .error_code = 0xD},
|
|
{.name = "Illegal access (software fault)", .error_code = 0xE},
|
|
{.name = "Illegal state (software fault)", .error_code = 0xF},
|
|
{.name = "Internal data register", .error_code = 0x10},
|
|
{.name = "Internal control register", .error_code = 0x11},
|
|
{.name = "Error response from slave", .error_code = 0x12},
|
|
{.name = "External timeout", .error_code = 0x13},
|
|
{.name = "Internal timeout", .error_code = 0x14},
|
|
{.name = "Deferred error from slave not supported at master",
|
|
.error_code = 0x15},
|
|
};
|
|
|
|
/* Read ERR<X>SELR */
|
|
u64 ras_read_errselr(void)
|
|
{
|
|
u64 errselr;
|
|
|
|
asm volatile("mrs %0, s3_0_c5_c3_1" : "=r" (errselr));
|
|
return errselr;
|
|
}
|
|
EXPORT_SYMBOL(ras_read_errselr);
|
|
|
|
/* For read/write of any ERR<X> register, error record X is selected
|
|
* by writing to ERRSELR_EL1
|
|
* ISB is needed in the end to ensure this write completes before another
|
|
* read/write to this error record comes in.
|
|
*/
|
|
void ras_write_errselr(u64 errx)
|
|
{
|
|
asm volatile("msr s3_0_c5_c3_1, %0" : : "r" (errx));
|
|
isb();
|
|
}
|
|
EXPORT_SYMBOL(ras_write_errselr);
|
|
|
|
/* Read ERR<X>STATUS */
|
|
u64 ras_read_error_status(void)
|
|
{
|
|
u64 status;
|
|
|
|
asm volatile("mrs %0, s3_0_c5_c4_2" : "=r" (status));
|
|
return status;
|
|
}
|
|
EXPORT_SYMBOL(ras_read_error_status);
|
|
|
|
/* Write to ERR<X>STATUS */
|
|
void ras_write_error_status(u64 status)
|
|
{
|
|
asm volatile("msr s3_0_c5_c4_2, %0" : : "r" (status));
|
|
}
|
|
EXPORT_SYMBOL(ras_write_error_status);
|
|
|
|
/* Read ERR<X>ADDR */
|
|
static u64 ras_read_error_address(void)
|
|
{
|
|
u64 addr;
|
|
|
|
asm volatile("mrs %0, s3_0_c5_c4_3" : "=r" (addr));
|
|
return addr;
|
|
}
|
|
|
|
/* Write to ERR<X>ADDR*/
|
|
void ras_write_error_addr(u64 addr)
|
|
{
|
|
asm volatile("msr s3_0_c5_c4_3, %0" : : "r" (addr));
|
|
}
|
|
EXPORT_SYMBOL(ras_write_error_addr);
|
|
|
|
/* Read ERR<X>MISC0 */
|
|
static u64 ras_read_error_misc0(void)
|
|
{
|
|
u64 misc0;
|
|
|
|
asm volatile("mrs %0, s3_0_c5_c5_0" : "=r" (misc0));
|
|
return misc0;
|
|
}
|
|
|
|
/* Write to ERR<X>MISC0*/
|
|
void ras_write_error_misc0(u64 misc0)
|
|
{
|
|
asm volatile("msr s3_0_c5_c5_0, %0" : : "r" (misc0));
|
|
}
|
|
EXPORT_SYMBOL(ras_write_error_misc0);
|
|
|
|
/* Read ERR<X>MISC1 */
|
|
static u64 ras_read_error_misc1(void)
|
|
{
|
|
u64 misc1;
|
|
|
|
asm volatile("mrs %0, s3_0_c5_c5_1" : "=r" (misc1));
|
|
return misc1;
|
|
}
|
|
|
|
/* Write to ERR<X>MISC1*/
|
|
void ras_write_error_misc1(u64 misc1)
|
|
{
|
|
asm volatile("msr s3_0_c5_c5_1, %0" : : "r" (misc1));
|
|
}
|
|
EXPORT_SYMBOL(ras_write_error_misc1);
|
|
|
|
/* Read ERR<X>CTLR_EL1*/
|
|
u64 ras_read_error_control(void)
|
|
{
|
|
u64 err_ctl;
|
|
|
|
asm volatile("mrs %0, s3_0_c5_c4_1" : "=r" (err_ctl));
|
|
return err_ctl;
|
|
}
|
|
EXPORT_SYMBOL(ras_read_error_control);
|
|
|
|
/* Write to ERR<X>CTLR_EL1*/
|
|
void ras_write_error_control(u64 err_ctl)
|
|
{
|
|
asm volatile("msr s3_0_c5_c4_1, %0" : : "r" (err_ctl));
|
|
}
|
|
EXPORT_SYMBOL(ras_write_error_control);
|
|
|
|
/* Read ERR<X>PFGCTL_EL1*/
|
|
u64 ras_read_pfg_control(void)
|
|
{
|
|
u64 pfg_ctl;
|
|
|
|
asm volatile("mrs %0, s3_0_c15_c1_4" : "=r" (pfg_ctl));
|
|
return pfg_ctl;
|
|
}
|
|
EXPORT_SYMBOL(ras_read_pfg_control);
|
|
|
|
/* Write to ERR<X>PFGCTL_EL1*/
|
|
void ras_write_pfg_control(u64 pfg_ctl)
|
|
{
|
|
asm volatile("msr s3_0_c15_c1_4, %0" : : "r" (pfg_ctl));
|
|
}
|
|
EXPORT_SYMBOL(ras_write_pfg_control);
|
|
|
|
/* Read ERR<X>PFGCDN_EL1*/
|
|
u64 ras_read_pfg_cdn(void)
|
|
{
|
|
u64 pfg_cdn;
|
|
|
|
asm volatile("mrs %0, s3_0_c15_c1_6" : "=r" (pfg_cdn));
|
|
return pfg_cdn;
|
|
}
|
|
EXPORT_SYMBOL(ras_read_pfg_cdn);
|
|
|
|
/* Write to ERR<X>PFGCDN_EL1*/
|
|
void ras_write_pfg_cdn(u64 pfg_cdn)
|
|
{
|
|
asm volatile("msr s3_0_c15_c1_6, %0" : : "r" (pfg_cdn));
|
|
}
|
|
EXPORT_SYMBOL(ras_write_pfg_cdn);
|
|
|
|
void unregister_fhi_callback(struct ras_fhi_callback *callback)
|
|
{
|
|
unsigned long flags;
|
|
|
|
raw_spin_lock_irqsave(&fhi_lock, flags);
|
|
list_del(&callback->node);
|
|
raw_spin_unlock_irqrestore(&fhi_lock, flags);
|
|
}
|
|
EXPORT_SYMBOL(unregister_fhi_callback);
|
|
|
|
static unsigned int read_pfr0_ras_version(void)
|
|
{
|
|
return PFR0_RAS(read_cpuid(ID_AA64PFR0_EL1));
|
|
}
|
|
|
|
static void is_ras_version1(void *ret)
|
|
{
|
|
*(int *)(ret) = (read_pfr0_ras_version() == PFR0_RAS_VERSION_1);
|
|
}
|
|
|
|
/* check if RAS is supported on this CPU
|
|
* ret = 0 if not supported
|
|
* = 1 if supported
|
|
*/
|
|
int is_ras_cpu(int cpu)
|
|
{
|
|
int ret;
|
|
|
|
smp_call_function_single(cpu, is_ras_version1, &ret, 1);
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(is_ras_cpu);
|
|
|
|
int is_this_ras_cpu(void)
|
|
{
|
|
return read_pfr0_ras_version() == PFR0_RAS_VERSION_1;
|
|
}
|
|
EXPORT_SYMBOL(is_this_ras_cpu);
|
|
|
|
int is_ras_ready(void)
|
|
{
|
|
return is_ras_probe_done == 1;
|
|
}
|
|
EXPORT_SYMBOL(is_ras_ready);
|
|
|
|
void print_error_record(struct error_record *record, u64 status, int errselr)
|
|
{
|
|
struct ras_error *errors;
|
|
u64 misc0, misc1, addr;
|
|
u16 ierr, serr;
|
|
u64 i;
|
|
int found = 0;
|
|
u64 err_status = 0;
|
|
|
|
pr_crit("**************************************\n");
|
|
pr_crit("RAS Error in %s, ERRSELR_EL1=%d:\n", record->name, errselr);
|
|
pr_crit("\tStatus = 0x%llx\n", status);
|
|
|
|
/* Find the name of IERR */
|
|
ierr = get_error_status_ierr(status);
|
|
errors = record->errors;
|
|
if (errors) {
|
|
for (i = 0; errors[i].name; i++) {
|
|
if (errors[i].error_code == ierr) {
|
|
pr_crit("\tIERR = %s: 0x%x\n",
|
|
errors[i].name, ierr);
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (!found)
|
|
pr_crit("\tUnknown IERR: 0x%x\n", ierr);
|
|
} else {
|
|
pr_crit("\tBank does not have any known IERR errors\n");
|
|
}
|
|
|
|
/* Find the name of SERR */
|
|
found = 0;
|
|
serr = get_error_status_serr(status);
|
|
|
|
for (i = 0; serr_errors[i].name; i++) {
|
|
if (serr_errors[i].error_code == serr) {
|
|
pr_crit("\tSERR = %s: 0x%x\n", serr_errors[i].name,
|
|
serr);
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (!found)
|
|
pr_crit("\tUnknown SERR: 0x%x\n", serr);
|
|
|
|
if (status & ERRi_STATUS_OF) {
|
|
pr_crit("\tOverflow (there may be more errors) - Uncorrectable\n");
|
|
/* Clear the error by writing 1 to ERRi_STATUS:OF */
|
|
err_status |= ERRi_STATUS_OF;
|
|
}
|
|
if (status & ERRi_STATUS_UE) {
|
|
pr_crit("\tUncorrectable (this is fatal)\n");
|
|
/* Clear the error by writing 1 to ERR_STATUS:UE
|
|
* and 11 to ERR_STATUS:UET
|
|
*/
|
|
err_status |= ERRi_STATUS_UE;
|
|
err_status |= ERRi_STATUS_UET;
|
|
}
|
|
if (get_error_status_ce(status)) {
|
|
pr_crit("\tCorrectable Error\n");
|
|
/* Clear the error by writing 1 to ERR_STATUS:CE */
|
|
err_status |= ERRi_STATUS_CE;
|
|
}
|
|
if (status & ERRi_STATUS_MV) {
|
|
misc0 = ras_read_error_misc0();
|
|
misc1 = ras_read_error_misc1();
|
|
pr_crit("\tMISC0 = 0x%llx\n", misc0);
|
|
pr_crit("\tMISC1 = 0x%llx\n", misc1);
|
|
/* write 1 to clear ERR_STATUS:MV */
|
|
err_status |= ERRi_STATUS_MV;
|
|
}
|
|
if (status & ERRi_STATUS_AV) {
|
|
addr = ras_read_error_address();
|
|
pr_crit("\tADDR = 0x%llx\n", addr);
|
|
/* write 1 to clear ERR_STATUS:AV */
|
|
err_status |= ERRi_STATUS_AV;
|
|
}
|
|
|
|
/* Write 1 to clear ERR_STATUS:V */
|
|
err_status |= ERRi_STATUS_VALID;
|
|
|
|
/* Write to ERR_STATUS to clear the error */
|
|
ras_write_error_status(err_status);
|
|
pr_crit("**************************************\n");
|
|
}
|
|
EXPORT_SYMBOL(print_error_record);
|
|
|
|
/* Fault Handling Interrupt or FHI
|
|
* for handling Correctable Errors
|
|
*/
|
|
static irqreturn_t ras_fhi_isr(int irq, void *dev_id)
|
|
{
|
|
unsigned long flags;
|
|
struct ras_fhi_callback *callback;
|
|
|
|
/* Iterate through the banks looking for one with an error */
|
|
pr_crit("CPU%d: RAS: FHI %d detected\n", smp_processor_id(), irq);
|
|
|
|
raw_spin_lock_irqsave(&fhi_lock, flags);
|
|
list_for_each_entry(callback, &fhi_callback_list, node) {
|
|
callback->fn();
|
|
}
|
|
raw_spin_unlock_irqrestore(&fhi_lock, flags);
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
static int ras_fhi_enable(unsigned int cpu)
|
|
{
|
|
if (irq_force_affinity(fhi_irq[cpu], cpumask_of(cpu))) {
|
|
pr_info("%s: Failed to set IRQ %d affinity to CPU%d\n",
|
|
__func__, fhi_irq[cpu], cpu);
|
|
return -EINVAL;
|
|
}
|
|
enable_irq(fhi_irq[cpu]);
|
|
pr_info("%s: FHI %d enabled on CPU%d\n", __func__, fhi_irq[cpu], cpu);
|
|
return 0;
|
|
}
|
|
|
|
static int ras_fhi_disable(unsigned int cpu)
|
|
{
|
|
int boot_cpu = 0;
|
|
|
|
disable_irq_nosync(fhi_irq[cpu]);
|
|
if (irq_force_affinity(fhi_irq[cpu], cpumask_of(boot_cpu)))
|
|
pr_info("%s: Failed to set IRQ %d affinity to boot cpu %d\n",
|
|
__func__, fhi_irq[cpu], boot_cpu);
|
|
pr_info("%s: FHI %d disabled\n", __func__, fhi_irq[cpu]);
|
|
return 0;
|
|
}
|
|
|
|
/* Register Fault Handling Interrupt or FHI
|
|
* for Correctable Errors
|
|
*/
|
|
static int ras_register_fhi_isr(struct platform_device *pdev)
|
|
{
|
|
int err = 0, cpu, irq_count;
|
|
|
|
irq_count = platform_irq_count(pdev);
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
BUG_ON(cpu >= irq_count);
|
|
fhi_irq[cpu] = platform_get_irq(pdev, cpu);
|
|
if (fhi_irq[cpu] <= 0) {
|
|
dev_err(&pdev->dev, "No IRQ\n");
|
|
err = -ENOENT;
|
|
goto isr_err;
|
|
}
|
|
}
|
|
|
|
isr_err:
|
|
return err;
|
|
}
|
|
|
|
/* This is an API for CPU specific FHI callbacks
|
|
* to be registered with fhi_isr handler
|
|
*/
|
|
int register_fhi_callback(struct ras_fhi_callback *callback, void *cookie)
|
|
{
|
|
unsigned long flags;
|
|
int err = 0, cpu = 0;
|
|
|
|
raw_spin_lock_irqsave(&fhi_lock, flags);
|
|
list_add(&callback->node, &fhi_callback_list);
|
|
raw_spin_unlock_irqrestore(&fhi_lock, flags);
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
err = request_irq(fhi_irq[cpu], ras_fhi_isr,
|
|
IRQ_PER_CPU, "ras-fhi", cookie);
|
|
if (err) {
|
|
pr_err("%s: request_irq(%d) failed (%d)\n", __func__,
|
|
fhi_irq[cpu], err);
|
|
goto isr_err;
|
|
}
|
|
disable_irq(fhi_irq[cpu]);
|
|
}
|
|
/* Ensure that any CPU brought online sets up FHI */
|
|
hp_state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
|
|
"arm64_ras:online",
|
|
ras_fhi_enable,
|
|
ras_fhi_disable);
|
|
|
|
if (hp_state < 0)
|
|
pr_info("%s: unable to register FHI\n", __func__);
|
|
|
|
isr_err:
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(register_fhi_callback);
|
|
|
|
static int ras_probe(struct platform_device *pdev)
|
|
{
|
|
const struct of_device_id *match;
|
|
int err = 0, cpu, probe = 0;
|
|
struct device *dev = &pdev->dev;
|
|
|
|
/* probe only if RAS is supported on either of the online CPUs */
|
|
for_each_online_cpu(cpu) {
|
|
if (is_ras_cpu(cpu))
|
|
probe = 1;
|
|
}
|
|
|
|
if (!probe) {
|
|
dev_info(dev, "None of the CPUs support RAS");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* register FHI for Correctable Errors */
|
|
match = of_match_device(of_match_ptr(arm64_ras_match),
|
|
dev);
|
|
if (!match) {
|
|
dev_err(dev, "RAS detected, but no device-tree node found,"
|
|
" Cannot register RAS fault handler interrupt (FHI)");
|
|
return -ENODEV;
|
|
}
|
|
err = ras_register_fhi_isr(pdev);
|
|
if (err < 0) {
|
|
dev_info(dev, "Failed to register Fault Handling Interrupts"
|
|
" ISR");
|
|
return err;
|
|
}
|
|
|
|
/* make sure we have executed everything in the probe
|
|
* before setting is_ras_probe_done
|
|
*/
|
|
smp_mb();
|
|
is_ras_probe_done = 1;
|
|
dev_info(dev, "probed");
|
|
return 0;
|
|
}
|
|
|
|
static int ras_remove(struct platform_device *pdev)
|
|
{
|
|
cpuhp_remove_state(hp_state);
|
|
return 0;
|
|
}
|
|
|
|
static struct platform_driver ras_driver = {
|
|
.probe = ras_probe,
|
|
.remove = ras_remove,
|
|
.driver = {
|
|
.owner = THIS_MODULE,
|
|
.name = "arm64_ras",
|
|
.of_match_table = of_match_ptr(arm64_ras_match),
|
|
},
|
|
};
|
|
|
|
static int __init ras_init(void)
|
|
{
|
|
return platform_driver_register(&ras_driver);
|
|
}
|
|
|
|
static void __exit ras_exit(void)
|
|
{
|
|
platform_driver_unregister(&ras_driver);
|
|
}
|
|
|
|
arch_initcall(ras_init);
|
|
module_exit(ras_exit);
|
|
|
|
MODULE_LICENSE("GPL v2");
|
|
MODULE_DESCRIPTION("ARM64 RAS Driver / FHI handler");
|
|
|