From 90dee58c159cf8c6a5426f78d1ef914b91442265 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Thu, 28 Sep 2023 17:33:01 +0800 Subject: [PATCH 1/2] [PowerPC] Mask constant operands in ValueBit tracking In IR or C code, left/right shift amount larger than value size is undefined behavior. But in practise, backend lowering for srl_parts/sra_parts/shl_parts produces add/sub of shift amounts, thus constant shift amounts might be negative or larger than value size. And the lowering depends on behavior in ISA. PowerPC ISA says, the lowest 7 bits (6 bits if in 32-bit instruction) will be taken, and if the highest among them is 1, result will be zero, otherwise the low 6 bits (or 5 on 32-bit) are used as shift amount. This patch emulates the behavior and avoids array overflow in bit permutation's value bits calculator. --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 36 ++++-- llvm/test/CodeGen/PowerPC/pr59074.ll | 132 ++++++++++++++++++++ 2 files changed, 155 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/pr59074.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6a3710407bc4f..19427c6322373 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1632,7 +1632,7 @@ class BitPermutationSelector { default: break; case ISD::ROTL: if (isa(V.getOperand(1))) { - unsigned RotAmt = V.getConstantOperandVal(1); + unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; @@ -1645,15 +1645,20 @@ class BitPermutationSelector { case ISD::SHL: case PPCISD::SHL: if (isa(V.getOperand(1))) { - unsigned ShiftAmt = V.getConstantOperandVal(1); + // sld takes 7 bits, slw takes 6. + unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; - for (unsigned i = ShiftAmt; i < NumBits; ++i) - Bits[i] = LHSBits[i - ShiftAmt]; - - for (unsigned i = 0; i < ShiftAmt; ++i) - Bits[i] = ValueBit(ValueBit::ConstZero); + if (ShiftAmt >= NumBits) { + for (unsigned i = 0; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } else { + for (unsigned i = ShiftAmt; i < NumBits; ++i) + Bits[i] = LHSBits[i - ShiftAmt]; + for (unsigned i = 0; i < ShiftAmt; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } return std::make_pair(Interesting = true, &Bits); } @@ -1661,15 +1666,20 @@ class BitPermutationSelector { case ISD::SRL: case PPCISD::SRL: if (isa(V.getOperand(1))) { - unsigned ShiftAmt = V.getConstantOperandVal(1); + // srd takes lowest 7 bits, srw takes 6. + unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; - for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) - Bits[i] = LHSBits[i + ShiftAmt]; - - for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) - Bits[i] = ValueBit(ValueBit::ConstZero); + if (ShiftAmt >= NumBits) { + for (unsigned i = 0; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } else { + for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) + Bits[i] = LHSBits[i + ShiftAmt]; + for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } return std::make_pair(Interesting = true, &Bits); } diff --git a/llvm/test/CodeGen/PowerPC/pr59074.ll b/llvm/test/CodeGen/PowerPC/pr59074.ll new file mode 100644 index 0000000000000..3e328c6ad9f0b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr59074.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE64 +; RUN: llc -mtriple=powerpcle-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE32 +; RUN: llc -mtriple=powerpc64-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE64 +; RUN: llc -mtriple=powerpc-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE32 + +; To verify this doesn't crash due to array out of bound. +define void @pr59074(ptr %0) { +; LE64-LABEL: pr59074: +; LE64: # %bb.0: # %entry +; LE64-NEXT: lwz 6, 0(3) +; LE64-NEXT: li 7, 12 +; LE64-NEXT: ld 4, 16(3) +; LE64-NEXT: ld 5, 24(3) +; LE64-NEXT: addi 6, 6, -12 +; LE64-NEXT: std 4, 16(3) +; LE64-NEXT: std 5, 24(3) +; LE64-NEXT: srd 6, 7, 6 +; LE64-NEXT: li 7, 0 +; LE64-NEXT: std 7, 8(3) +; LE64-NEXT: std 6, 0(3) +; LE64-NEXT: blr +; +; LE32-LABEL: pr59074: +; LE32: # %bb.0: # %entry +; LE32-NEXT: stwu 1, -80(1) +; LE32-NEXT: .cfi_def_cfa_offset 80 +; LE32-NEXT: lwz 4, 0(3) +; LE32-NEXT: xxlxor 0, 0, 0 +; LE32-NEXT: li 5, 4 +; LE32-NEXT: addi 6, 1, 16 +; LE32-NEXT: li 7, 0 +; LE32-NEXT: li 8, 12 +; LE32-NEXT: xxswapd 0, 0 +; LE32-NEXT: addi 4, 4, -12 +; LE32-NEXT: rlwinm 9, 4, 29, 28, 31 +; LE32-NEXT: stxvd2x 0, 6, 5 +; LE32-NEXT: stw 7, 44(1) +; LE32-NEXT: stw 7, 40(1) +; LE32-NEXT: stw 7, 36(1) +; LE32-NEXT: stw 8, 16(1) +; LE32-NEXT: lwzux 5, 9, 6 +; LE32-NEXT: li 6, 7 +; LE32-NEXT: lwz 7, 8(9) +; LE32-NEXT: nand 6, 4, 6 +; LE32-NEXT: lwz 8, 4(9) +; LE32-NEXT: clrlwi 4, 4, 29 +; LE32-NEXT: lwz 9, 12(9) +; LE32-NEXT: clrlwi 6, 6, 27 +; LE32-NEXT: subfic 11, 4, 32 +; LE32-NEXT: srw 5, 5, 4 +; LE32-NEXT: slwi 10, 7, 1 +; LE32-NEXT: srw 7, 7, 4 +; LE32-NEXT: slw 6, 10, 6 +; LE32-NEXT: srw 10, 8, 4 +; LE32-NEXT: slw 8, 8, 11 +; LE32-NEXT: slw 11, 9, 11 +; LE32-NEXT: srw 4, 9, 4 +; LE32-NEXT: or 5, 8, 5 +; LE32-NEXT: or 7, 11, 7 +; LE32-NEXT: or 6, 10, 6 +; LE32-NEXT: stw 4, 12(3) +; LE32-NEXT: stw 7, 8(3) +; LE32-NEXT: stw 5, 0(3) +; LE32-NEXT: stw 6, 4(3) +; LE32-NEXT: addi 1, 1, 80 +; LE32-NEXT: blr +; +; BE64-LABEL: pr59074: +; BE64: # %bb.0: # %entry +; BE64-NEXT: lwz 6, 12(3) +; BE64-NEXT: li 7, 12 +; BE64-NEXT: ld 4, 24(3) +; BE64-NEXT: ld 5, 16(3) +; BE64-NEXT: addi 6, 6, -12 +; BE64-NEXT: std 4, 24(3) +; BE64-NEXT: std 5, 16(3) +; BE64-NEXT: srd 6, 7, 6 +; BE64-NEXT: li 7, 0 +; BE64-NEXT: std 7, 0(3) +; BE64-NEXT: std 6, 8(3) +; BE64-NEXT: blr +; +; BE32-LABEL: pr59074: +; BE32: # %bb.0: # %entry +; BE32-NEXT: lwz 4, 12(3) +; BE32-NEXT: xxlxor 0, 0, 0 +; BE32-NEXT: addi 5, 1, -64 +; BE32-NEXT: li 6, 12 +; BE32-NEXT: li 7, 0 +; BE32-NEXT: addi 8, 1, -48 +; BE32-NEXT: li 10, 7 +; BE32-NEXT: stxvw4x 0, 0, 5 +; BE32-NEXT: addi 4, 4, -12 +; BE32-NEXT: stw 6, -36(1) +; BE32-NEXT: stw 7, -40(1) +; BE32-NEXT: stw 7, -44(1) +; BE32-NEXT: rlwinm 9, 4, 29, 28, 31 +; BE32-NEXT: stw 7, -48(1) +; BE32-NEXT: sub 5, 8, 9 +; BE32-NEXT: nand 6, 4, 10 +; BE32-NEXT: clrlwi 4, 4, 29 +; BE32-NEXT: clrlwi 6, 6, 27 +; BE32-NEXT: lwz 7, 4(5) +; BE32-NEXT: lwz 8, 8(5) +; BE32-NEXT: lwz 9, 0(5) +; BE32-NEXT: lwz 5, 12(5) +; BE32-NEXT: slwi 10, 7, 1 +; BE32-NEXT: srw 11, 8, 4 +; BE32-NEXT: srw 7, 7, 4 +; BE32-NEXT: srw 5, 5, 4 +; BE32-NEXT: slw 6, 10, 6 +; BE32-NEXT: subfic 10, 4, 32 +; BE32-NEXT: srw 4, 9, 4 +; BE32-NEXT: slw 8, 8, 10 +; BE32-NEXT: slw 10, 9, 10 +; BE32-NEXT: or 6, 11, 6 +; BE32-NEXT: or 7, 10, 7 +; BE32-NEXT: or 5, 8, 5 +; BE32-NEXT: stw 4, 0(3) +; BE32-NEXT: stw 6, 8(3) +; BE32-NEXT: stw 5, 12(3) +; BE32-NEXT: stw 7, 4(3) +; BE32-NEXT: blr +entry: + %v1 = load <2 x i128>, <2 x i128>* %0 + %v2 = insertelement <2 x i128> %v1, i128 12, i32 0 + %v3 = sub <2 x i128> %v1, %v2 + %v4 = lshr <2 x i128> %v2, %v3 + store <2 x i128> %v4, <2 x i128>* %0 + ret void +} From 6759869abed47f6c488cf17e16ce85d984c58a27 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 6 Feb 2024 11:10:42 +0800 Subject: [PATCH 2/2] Add assert --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 19427c6322373..9e5f0b36616d1 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1632,6 +1632,7 @@ class BitPermutationSelector { default: break; case ISD::ROTL: if (isa(V.getOperand(1))) { + assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!"); unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;