diff --git a/plat/nvidia/tegra/include/tegra_private.h b/plat/nvidia/tegra/include/tegra_private.h index a6d8e68..c181c36 100644 --- a/plat/nvidia/tegra/include/tegra_private.h +++ b/plat/nvidia/tegra/include/tegra_private.h @@ -159,6 +159,7 @@ #if RAS_EXTENSION void tegra194_ras_enable(void); +void tegra194_ras_corrected_err_clear(void); #endif #endif /* TEGRA_PRIVATE_H */ diff --git a/plat/nvidia/tegra/soc/t194/drivers/include/mce_private.h b/plat/nvidia/tegra/soc/t194/drivers/include/mce_private.h index 1fe3aad..6dafeb2 100644 --- a/plat/nvidia/tegra/soc/t194/drivers/include/mce_private.h +++ b/plat/nvidia/tegra/soc/t194/drivers/include/mce_private.h @@ -58,6 +58,7 @@ void nvg_enable_strict_checking_mode(void); void nvg_system_shutdown(void); void nvg_system_reboot(void); +void nvg_clear_hsm_corr_status(void); /* declarations for assembly functions */ void nvg_set_request_data(uint64_t req, uint64_t data); @@ -71,5 +72,6 @@ void mce_enable_strict_checking(void); void mce_system_shutdown(void); void mce_system_reboot(void); +void mce_clear_hsm_corr_status(void); #endif /* MCE_PRIVATE_H */ diff --git a/plat/nvidia/tegra/soc/t194/drivers/mce/mce.c b/plat/nvidia/tegra/soc/t194/drivers/mce/mce.c index 7edd7a0..4663a3d 100644 --- a/plat/nvidia/tegra/soc/t194/drivers/mce/mce.c +++ b/plat/nvidia/tegra/soc/t194/drivers/mce/mce.c @@ -234,3 +234,11 @@ { nvg_system_reboot(); } + +/******************************************************************************* + * Handler to clear CCPLEX->HSM correctable RAS error signal. + ******************************************************************************/ +void mce_clear_hsm_corr_status(void) +{ + nvg_clear_hsm_corr_status(); +} diff --git a/plat/nvidia/tegra/soc/t194/drivers/mce/nvg.c b/plat/nvidia/tegra/soc/t194/drivers/mce/nvg.c index ef740a1..fdf9429 100644 --- a/plat/nvidia/tegra/soc/t194/drivers/mce/nvg.c +++ b/plat/nvidia/tegra/soc/t194/drivers/mce/nvg.c @@ -236,3 +236,15 @@ nvg_set_request_data((uint64_t)TEGRA_NVG_CHANNEL_SHUTDOWN, (uint64_t)TEGRA_NVG_SHUTDOWN); } + +/* + * Request to clear CCPLEX->HSM correctable error signal. + * NVGDATA[1]: A write of 1 clears the CCPLEX->HSM correctable error signal, + * A write of 0 has no effect. + */ +void nvg_clear_hsm_corr_status(void) +{ + nvg_hsm_error_ctrl_channel_t status = { .bits = { .corr = 1U, }, }; + + nvg_set_request_data((uint64_t)TEGRA_NVG_CHANNEL_HSM_ERROR_CTRL, status.flat); +} diff --git a/plat/nvidia/tegra/soc/t194/plat_ras.c b/plat/nvidia/tegra/soc/t194/plat_ras.c index f9ebb37..eb896a4af 100644 --- a/plat/nvidia/tegra/soc/t194/plat_ras.c +++ b/plat/nvidia/tegra/soc/t194/plat_ras.c @@ -60,7 +60,12 @@ ras_unlock(); } -/* Function to enable uncorrectable errors as External abort (SError) */ +/* + * Function to enable all supported RAS error report. + * + * Uncorrected errors are set to report as External abort (SError) + * Corrected errors are set to report as interrupt. + */ void tegra194_ras_enable(void) { VERBOSE("%s\n", __func__); @@ -86,11 +91,15 @@ assert(aux_data != NULL); for (uint32_t j = 0; j < num_idx; j++) { - uint64_t err_ctrl = 0ULL; - /* enable SError reporting for uncorrectable error */ - ERR_CTLR_ENABLE_FIELD(err_ctrl, UE); - ERR_CTLR_ENABLE_FIELD(err_ctrl, ED); + /* ERRCTLR register value. */ + uint64_t err_ctrl = 0ULL; + /* all supported errors for this node. */ + uint64_t err_fr; + /* uncorrectable errors */ + uint64_t uncorr_errs; + /* correctable errors */ + uint64_t corr_errs; /* * Catch error if something wrong with the RAS aux data @@ -98,13 +107,37 @@ */ assert(aux_data[j].err_ctrl != NULL); - /* enable the specified errors */ - err_ctrl |= aux_data[j].err_ctrl(); - - /* Write to ERRSELR_EL1 to select the error record */ + /* + * Write to ERRSELR_EL1 to select the RAS error node. + * Always program this at first to select corresponding + * RAS node before any other RAS register r/w. + */ ser_sys_select_record(idx_start + j); - /* enable specified errors */ + err_fr = read_erxfr_el1() & ERR_FR_EN_BITS_MASK; + uncorr_errs = aux_data[j].err_ctrl(); + corr_errs = ~uncorr_errs & err_fr; + + /* enable error reporting */ + ERR_CTLR_ENABLE_FIELD(err_ctrl, ED); + + /* enable SError reporting for uncorrectable errors */ + if ((uncorr_errs & err_fr) != 0ULL) { + ERR_CTLR_ENABLE_FIELD(err_ctrl, UE); + } + + /* generate interrupt for corrected errors. */ + if (corr_errs != 0ULL) { + ERR_CTLR_ENABLE_FIELD(err_ctrl, CFI); + } + + /* enable the supported errors */ + err_ctrl |= err_fr; + + VERBOSE("errselr_el1:0x%x, erxfr:0x%llx, err_ctrl:0x%llx\n", + idx_start + j, err_fr, err_ctrl); + + /* enable specified errors, or set to 0 if no supported error */ write_erxctlr_el1(err_ctrl); /* @@ -116,6 +149,42 @@ } } +/* + * Function to clear RAS ERRSTATUS for corrected RAS error. + * This function ignores any new RAS error signaled during clearing; it is not + * multi-core safe(no ras_lock is taken to reduce overhead). + */ +void tegra194_ras_corrected_err_clear(void) +{ + uint64_t clear_ce_status = 0ULL; + + ERR_STATUS_SET_FIELD(clear_ce_status, AV, 0x1UL); + ERR_STATUS_SET_FIELD(clear_ce_status, V, 0x1UL); + ERR_STATUS_SET_FIELD(clear_ce_status, OF, 0x1UL); + ERR_STATUS_SET_FIELD(clear_ce_status, MV, 0x1UL); + ERR_STATUS_SET_FIELD(clear_ce_status, CE, 0x3UL); + + for (uint32_t i = 0U; i < err_record_mappings.num_err_records; i++) { + + const struct err_record_info *info = &err_record_mappings.err_records[i]; + uint32_t idx_start = info->sysreg.idx_start; + uint32_t num_idx = info->sysreg.num_idx; + + for (uint32_t j = 0U; j < num_idx; j++) { + + uint64_t status; + uint32_t err_idx = idx_start + j; + + write_errselr_el1(err_idx); + status = read_erxstatus_el1(); + + if (ERR_STATUS_GET_FIELD(status, CE) != 0U) { + write_erxstatus_el1(clear_ce_status); + } + } + } +} + /* Function to probe an error from error record group. */ static int32_t tegra194_ras_record_probe(const struct err_record_info *info, int *probe_data) @@ -129,26 +198,43 @@ } /* Function to handle error from one given node */ -static int32_t tegra194_ras_node_handler(const struct ras_error *errors, uint64_t status) +static int32_t tegra194_ras_node_handler(uint32_t errselr, + const struct ras_error *errors, uint64_t status) { bool found = false; uint32_t ierr = (uint32_t)ERR_STATUS_GET_FIELD(status, IERR); uint32_t serr = (uint32_t)ERR_STATUS_GET_FIELD(status, SERR); - /* IERR to error message */ - for (uint32_t i = 0; errors[i].error_msg != NULL; i++) { - if (ierr == errors[i].error_code) { - ERROR("IERR = %s(0x%x)\n", - errors[i].error_msg, errors[i].error_code); - found = true; - break; - } - } - if (!found) { - ERROR("unknown IERR: 0x%x\n", ierr); + /* not a valid error. */ + if (ERR_STATUS_GET_FIELD(status, V) == 0U) { + return 0; } - ERROR("SERR = %s(0x%x)\n", ras_serr_to_str(serr), serr); + /* Print uncorrectable errror information. */ + if (ERR_STATUS_GET_FIELD(status, UE) != 0U) { + + /* IERR to error message */ + for (uint32_t i = 0; errors[i].error_msg != NULL; i++) { + if (ierr == errors[i].error_code) { + ERROR("ERRSELR_EL1:0x%x\n, IERR = %s(0x%x)\n", + errselr, errors[i].error_msg, + errors[i].error_code); + found = true; + break; + } + } + + if (!found) { + ERROR("unknown uncorrectable eror, " + "ERRSELR_EL1:0x%x, IERR: 0x%x\n", errselr, ierr); + } + + ERROR("SERR = %s(0x%x)\n", ras_serr_to_str(serr), serr); + } else { + /* For corrected error, simply clear it. */ + VERBOSE("corrected RAS error is cleared: ERRSELR_EL1:0x%x, " + "IERR:0x%x, SERR:0x%x\n", errselr, ierr, serr); + } /* Write to clear reported errors. */ write_erxstatus_el1(status); @@ -158,11 +244,13 @@ /* Function to handle one error node from an error record group. */ static int32_t tegra194_ras_record_handler(const struct err_record_info *info, - int probe_data, const struct err_handler_data *const data) + int probe_data, const struct err_handler_data *const data __unused) { uint32_t num_idx = info->sysreg.num_idx; uint32_t idx_start = info->sysreg.idx_start; const struct ras_aux_data *aux_data = info->aux_data; + const struct ras_error *errors; + uint32_t offset; uint64_t status = 0ULL; @@ -171,8 +259,8 @@ assert(probe_data >= 0); assert((uint32_t)probe_data < num_idx); - uint32_t offset = (uint32_t)probe_data; - const struct ras_error *errors = aux_data[offset].error_records; + offset = (uint32_t)probe_data; + errors = aux_data[offset].error_records; assert(errors != NULL); @@ -182,10 +270,7 @@ /* Retrieve status register from the error record */ status = read_erxstatus_el1(); - assert(ERR_STATUS_GET_FIELD(status, V) != 0U); - assert(ERR_STATUS_GET_FIELD(status, UE) != 0U); - - return tegra194_ras_node_handler(errors, status); + return tegra194_ras_node_handler(idx_start + offset, errors, status); } diff --git a/plat/nvidia/tegra/soc/t194/plat_sip_calls.c b/plat/nvidia/tegra/soc/t194/plat_sip_calls.c index 884762d..a3f996d 100644 --- a/plat/nvidia/tegra/soc/t194/plat_sip_calls.c +++ b/plat/nvidia/tegra/soc/t194/plat_sip_calls.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,7 @@ * Tegra194 SiP SMCs ******************************************************************************/ #define TEGRA_SIP_GET_SMMU_PER 0xC200FF00U +#define TEGRA_SIP_CLEAR_RAS_CORRECTED_ERRORS 0xC200FF01U /******************************************************************************* * This function is responsible for handling all T194 SiP calls @@ -69,6 +71,15 @@ break; +#if RAS_EXTENSION + case TEGRA_SIP_CLEAR_RAS_CORRECTED_ERRORS: + /* clear all RAS error records for corrected errors at first. */ + tegra194_ras_corrected_err_clear(); + /* clear HSM corrected error status. */ + mce_clear_hsm_corr_status(); + break; +#endif + default: ret = -ENOTSUP; break;