Skip to content

Commit 7a81273

Browse files
yghannamjfvogel
authored andcommitted
x86/mce: Handle varying MCA bank counts
Linux reads MCG_CAP[Count] to find the number of MCA banks visible to a CPU. Currently, this number is the same for all CPUs and a warning is shown if there is a difference. The number of banks is overwritten with the MCG_CAP[Count] value of each following CPU that boots. According to the Intel SDM and AMD APM, the MCG_CAP[Count] value gives the number of banks that are available to a "processor implementation". The AMD BKDGs/PPRs further clarify that this value is per core. This value has historically been the same for every core in the system, but that is not an architectural requirement. Future AMD systems may have different MCG_CAP[Count] values per core, so the assumption that all CPUs will have the same MCG_CAP[Count] value will no longer be valid. Also, the first CPU to boot will allocate the struct mce_banks[] array using the number of banks based on its MCG_CAP[Count] value. The machine check handler and other functions use the global number of banks to iterate and index into the mce_banks[] array. So it's possible to use an out-of-bounds index on an asymmetric system where a following CPU sees a MCG_CAP[Count] value greater than its predecessors. Thus, allocate the mce_banks[] array to the maximum number of banks. This will avoid the potential out-of-bounds index since the value of mca_cfg.banks is capped to MAX_NR_BANKS. Set the value of mca_cfg.banks equal to the max of the previous value and the value for the current CPU. This way mca_cfg.banks will always represent the max number of banks detected on any CPU in the system. This will ensure that all CPUs will access all the banks that are visible to them. A CPU that can access fewer than the max number of banks will find the registers of the extra banks to be read-as-zero. Furthermore, print the resulting number of MCA banks in use. Do this in mcheck_late_init() so that the final value is printed after all CPUs have been initialized. Finally, get bank count from target CPU when doing injection with mce-inject module. [ bp: Remove out-of-bounds example, passify and cleanup commit message. ] Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> Signed-off-by: Borislav Petkov <bp@suse.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: linux-edac <linux-edac@vger.kernel.org> Cc: Pu Wen <puwen@hygon.cn> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: x86-ml <x86@kernel.org> Link: https://lkml.kernel.org/r/20180727214009.78289-1-Yazen.Ghannam@amd.com (cherry picked from commit 006c077) Orabug: 29547647 Signed-off-by: Somasundaram Krishnasamy <somasundaram.krishnasamy@oracle.com> Reviewed-by: John Donnelly <john.p.donnelly@oracle.com> Conflicts: arch/x86/kernel/cpu/mce/core.c kzalloc is changed to kcalloc in __mcheck_cpu_mce_banks_init
1 parent 6145b29 commit 7a81273

File tree

2 files changed

+14
-22
lines changed

2 files changed

+14
-22
lines changed

arch/x86/kernel/cpu/mce/core.c

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1498,13 +1498,12 @@ EXPORT_SYMBOL_GPL(mce_notify_irq);
14981498
static int __mcheck_cpu_mce_banks_init(void)
14991499
{
15001500
int i;
1501-
u8 num_banks = mca_cfg.banks;
15021501

1503-
mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL);
1502+
mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL);
15041503
if (!mce_banks)
15051504
return -ENOMEM;
15061505

1507-
for (i = 0; i < num_banks; i++) {
1506+
for (i = 0; i < MAX_NR_BANKS; i++) {
15081507
struct mce_bank *b = &mce_banks[i];
15091508

15101509
b->ctl = -1ULL;
@@ -1518,28 +1517,19 @@ static int __mcheck_cpu_mce_banks_init(void)
15181517
*/
15191518
static int __mcheck_cpu_cap_init(void)
15201519
{
1521-
unsigned b;
15221520
u64 cap;
1521+
u8 b;
15231522

15241523
rdmsrl(MSR_IA32_MCG_CAP, cap);
15251524

15261525
b = cap & MCG_BANKCNT_MASK;
1527-
if (!mca_cfg.banks)
1528-
pr_info("CPU supports %d MCE banks\n", b);
1529-
1530-
if (b > MAX_NR_BANKS) {
1531-
pr_warn("Using only %u machine check banks out of %u\n",
1532-
MAX_NR_BANKS, b);
1526+
if (WARN_ON_ONCE(b > MAX_NR_BANKS))
15331527
b = MAX_NR_BANKS;
1534-
}
15351528

1536-
/* Don't support asymmetric configurations today */
1537-
WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks);
1538-
mca_cfg.banks = b;
1529+
mca_cfg.banks = max(mca_cfg.banks, b);
15391530

15401531
if (!mce_banks) {
15411532
int err = __mcheck_cpu_mce_banks_init();
1542-
15431533
if (err)
15441534
return err;
15451535
}
@@ -2471,6 +2461,8 @@ EXPORT_SYMBOL_GPL(mcsafe_key);
24712461

24722462
static int __init mcheck_late_init(void)
24732463
{
2464+
pr_info("Using %d MCE banks\n", mca_cfg.banks);
2465+
24742466
if (mca_cfg.recovery)
24752467
static_branch_inc(&mcsafe_key);
24762468

arch/x86/kernel/cpu/mce/inject.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@
4646
static struct mce i_mce;
4747
static struct dentry *dfs_inj;
4848

49-
static u8 n_banks;
50-
5149
#define MAX_FLAG_OPT_SIZE 4
5250
#define NBCFG 0x44
5351

@@ -567,9 +565,15 @@ static void do_inject(void)
567565
static int inj_bank_set(void *data, u64 val)
568566
{
569567
struct mce *m = (struct mce *)data;
568+
u8 n_banks;
569+
u64 cap;
570+
571+
/* Get bank count on target CPU so we can handle non-uniform values. */
572+
rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
573+
n_banks = cap & MCG_BANKCNT_MASK;
570574

571575
if (val >= n_banks) {
572-
pr_err("Non-existent MCE bank: %llu\n", val);
576+
pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu);
573577
return -EINVAL;
574578
}
575579

@@ -659,10 +663,6 @@ static struct dfs_node {
659663
static int __init debugfs_init(void)
660664
{
661665
unsigned int i;
662-
u64 cap;
663-
664-
rdmsrl(MSR_IA32_MCG_CAP, cap);
665-
n_banks = cap & MCG_BANKCNT_MASK;
666666

667667
dfs_inj = debugfs_create_dir("mce-inject", NULL);
668668
if (!dfs_inj)

0 commit comments

Comments
 (0)