From c914d557f416f3cd9cf94ae9fe69e5648f598ff4 Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Tue, 3 Sep 2024 16:09:43 +0100 Subject: [PATCH 01/14] [llvm][ARM]Add ARM widen strings pass - Pass optimizes memcpy's by padding out destinations and sources to a full word to make ARM backend generate full word loads instead of loading a single byte (ldrb) and/or half word (ldrh). Only pads destination when it's a stack allocated constant size array and source when it's constant string. Heuristic to decide whether to pad or not is very basic and could be improved to allow more examples to be padded. - Pass works at the midend level Change-Id: I1c6371f0962e7ad3c166602b800d041ac1cc7b04 --- .../llvm/Transforms/Scalar/ARMWidenStrings.h | 30 +++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + .../lib/Transforms/Scalar/ARMWidenStrings.cpp | 227 ++++++++++++++++++ llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 + .../ARMWidenStrings/arm-widen-strings-1.ll | 25 ++ .../ARMWidenStrings/arm-widen-strings-2.ll | 22 ++ .../arm-widen-strings-lengths-dont-match.ll | 28 +++ .../arm-widen-strings-more-than-64-bytes.ll | 29 +++ .../arm-widen-strings-ptrtoint.ll | 42 ++++ .../arm-widen-strings-struct-test.ll | 52 ++++ .../arm-widen-strings-volatile.ll | 29 +++ 12 files changed, 487 insertions(+) create mode 100755 llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h create mode 100644 llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll diff --git a/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h b/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h new file mode 100755 index 0000000000000..3bda666660144 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h @@ -0,0 +1,30 @@ +//===- ARMWidenStrings.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the interface for the ArmWidenStrings pass +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H +#define LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; + +class ARMWidenStringsPass : public PassInfoMixin { +public: + ARMWidenStringsPass() = default; + PreservedAnalyses run(Function &F, FunctionAnalysisManager &); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H \ No newline at end of file diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index ebad3507eb5e2..0b1198e9b47ab 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -212,6 +212,7 @@ #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar/ADCE.h" +#include "llvm/Transforms/Scalar/ARMWidenStrings.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/AnnotationRemarks.h" #include "llvm/Transforms/Scalar/BDCE.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 90859c18c4f49..e4b7697c74b33 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -490,6 +490,7 @@ FUNCTION_PASS("view-dom-only", DomOnlyViewer()) FUNCTION_PASS("view-post-dom", PostDomViewer()) FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer()) FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass()) +FUNCTION_PASS("arm-widen-strings", ARMWidenStringsPass()) #undef FUNCTION_PASS #ifndef FUNCTION_PASS_WITH_PARAMS diff --git a/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp b/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp new file mode 100644 index 0000000000000..dd06c2a7ea10d --- /dev/null +++ b/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp @@ -0,0 +1,227 @@ +// ARMWidenStrings.cpp - Widen strings to word boundaries to speed up +// programs that use simple strcpy's with constant strings as source +// and stack allocated array for destination. + +#define DEBUG_TYPE "arm-widen-strings" + +#include "llvm/Transforms/Scalar/ARMWidenStrings.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +cl::opt DisableARMWidenStrings("disable-arm-widen-strings", + cl::init(false)); + +namespace { + +class ARMWidenStrings { +public: + /* + Max number of bytes that memcpy allows for lowering to load/stores before it + uses library function (__aeabi_memcpy). This is the same value returned by + ARMSubtarget::getMaxInlineSizeThreshold which I would have called in place of + the constant int but can't get access to the subtarget info class from the + midend. + */ + const unsigned int MemcpyInliningLimit = 64; + + bool run(Function &F); +}; + +static bool IsCharArray(Type *t) { + const unsigned int CHAR_BIT_SIZE = 8; + return t && t->isArrayTy() && t->getArrayElementType()->isIntegerTy() && + t->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE; +} + +bool ARMWidenStrings::run(Function &F) { + if (DisableARMWidenStrings) { + return false; + } + + LLVM_DEBUG(dbgs() << "Running ARMWidenStrings on module " << F.getName() + << "\n"); + + for (Function::iterator b = F.begin(); b != F.end(); ++b) { + for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) { + CallInst *CI = dyn_cast(i); + if (!CI) { + continue; + } + + Function *CallMemcpy = CI->getCalledFunction(); + // find out if the current call instruction is a call to llvm memcpy + // intrinsics + if (CallMemcpy == NULL || !CallMemcpy->isIntrinsic() || + CallMemcpy->getIntrinsicID() != Intrinsic::memcpy) { + continue; + } + + LLVM_DEBUG(dbgs() << "Found call to strcpy/memcpy:\n" << *CI << "\n"); + + auto *Alloca = dyn_cast(CI->getArgOperand(0)); + auto *SourceVar = dyn_cast(CI->getArgOperand(1)); + auto *BytesToCopy = dyn_cast(CI->getArgOperand(2)); + auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); + + if (!BytesToCopy) { + LLVM_DEBUG(dbgs() << "Number of bytes to copy is null\n"); + continue; + } + + uint64_t NumBytesToCopy = BytesToCopy->getZExtValue(); + + if (!Alloca) { + LLVM_DEBUG(dbgs() << "Destination isn't a Alloca\n"); + continue; + } + + if (!SourceVar) { + LLVM_DEBUG(dbgs() << "Source isn't a global constant variable\n"); + continue; + } + + if (!IsVolatile || IsVolatile->isOne()) { + LLVM_DEBUG( + dbgs() << "Not widening strings for this memcpy because it's " + "a volatile operations\n"); + continue; + } + + if (NumBytesToCopy % 4 == 0) { + LLVM_DEBUG(dbgs() << "Bytes to copy in strcpy/memcpy is already word " + "aligned so nothing to do here.\n"); + continue; + } + + if (!SourceVar->hasInitializer() || !SourceVar->isConstant() || + !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) { + LLVM_DEBUG(dbgs() << "Source is not constant global, thus it's " + "mutable therefore it's not safe to pad\n"); + continue; + } + + ConstantDataArray *SourceDataArray = + dyn_cast(SourceVar->getInitializer()); + if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) { + LLVM_DEBUG(dbgs() << "Source isn't a constant data array\n"); + continue; + } + + if (!Alloca->isStaticAlloca()) { + LLVM_DEBUG(dbgs() << "Destination allocation isn't a static " + "constant which is locally allocated in this " + "function, so skipping.\n"); + continue; + } + + // Make sure destination is definitley a char array. + if (!IsCharArray(Alloca->getAllocatedType())) { + LLVM_DEBUG(dbgs() << "Destination doesn't look like a constant char (8 " + "bits) array\n"); + continue; + } + LLVM_DEBUG(dbgs() << "With Alloca: " << *Alloca << "\n"); + + uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); + uint64_t SZSize = SourceDataArray->getType()->getNumElements(); + + // For safety purposes lets add a constraint and only padd when + // num bytes to copy == destination array size == source string + // which is a constant + LLVM_DEBUG(dbgs() << "Number of bytes to copy is: " << NumBytesToCopy + << "\n"); + LLVM_DEBUG(dbgs() << "Size of destination array is: " << DZSize << "\n"); + LLVM_DEBUG(dbgs() << "Size of source array is: " << SZSize << "\n"); + if (NumBytesToCopy != DZSize || DZSize != SZSize) { + LLVM_DEBUG(dbgs() << "Size of number of bytes to copy, destination " + "array and source string don't match, so " + "skipping\n"); + continue; + } + LLVM_DEBUG(dbgs() << "Going to widen.\n"); + unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4); + LLVM_DEBUG(dbgs() << "Number of bytes to pad by is " << NumBytesToPad + << "\n"); + unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; + + if (TotalBytes > MemcpyInliningLimit) { + LLVM_DEBUG( + dbgs() << "Not going to pad because total number of bytes is " + << TotalBytes + << " which be greater than the inlining " + "limit for memcpy which is " + << MemcpyInliningLimit << "\n"); + continue; + } + + // update destination char array to be word aligned (memcpy(X,...,...)) + IRBuilder<> BuildAlloca(Alloca); + AllocaInst *NewAlloca = cast(BuildAlloca.CreateAlloca( + ArrayType::get(Alloca->getAllocatedType()->getArrayElementType(), + NumBytesToCopy + NumBytesToPad))); + NewAlloca->takeName(Alloca); + NewAlloca->setAlignment(Alloca->getAlign()); + Alloca->replaceAllUsesWith(NewAlloca); + + LLVM_DEBUG(dbgs() << "Updating users of destination stack object to use " + << "new size\n"); + + // update source to be word aligned (memcpy(...,X,...)) + // create replacement string with padded null bytes. + StringRef Data = SourceDataArray->getRawDataValues(); + std::vector StrData(Data.begin(), Data.end()); + for (unsigned int p = 0; p < NumBytesToPad; p++) + StrData.push_back('\0'); + auto Arr = ArrayRef(StrData.data(), TotalBytes); + + // create new padded version of global variable string. + Constant *SourceReplace = ConstantDataArray::get(F.getContext(), Arr); + GlobalVariable *NewGV = new GlobalVariable( + *F.getParent(), SourceReplace->getType(), true, + SourceVar->getLinkage(), SourceReplace, SourceReplace->getName()); + + // copy any other attributes from original global variable string + // e.g. unamed_addr + NewGV->copyAttributesFrom(SourceVar); + NewGV->takeName(SourceVar); + + // replace intrinsic source. + CI->setArgOperand(1, NewGV); + + // Update number of bytes to copy (memcpy(...,...,X)) + CI->setArgOperand(2, + ConstantInt::get(BytesToCopy->getType(), TotalBytes)); + LLVM_DEBUG(dbgs() << "Padded dest/source and increased number of bytes:\n" + << *CI << "\n" + << *NewAlloca << "\n"); + } + } + return true; +} + +} // end of anonymous namespace + +PreservedAnalyses ARMWidenStringsPass::run(Function &F, + FunctionAnalysisManager &AM) { + if (!ARMWidenStrings().run(F)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index 939a145723956..a9607e4ebc658 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_component_library(LLVMScalarOpts ADCE.cpp AlignmentFromAssumptions.cpp AnnotationRemarks.cpp + ARMWidenStrings.cpp BDCE.cpp CallSiteSplitting.cpp ConstantHoisting.cpp diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll new file mode 100644 index 0000000000000..e11cf372c36a6 --- /dev/null +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default,arm-widen-strings" -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default" -S | FileCheck %s --check-prefix=TURNED-OFF +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +; CHECK: [12 x i8] +; TURNED-OFF-NOT: [12 x i8] +@.str = private unnamed_addr constant [10 x i8] c"123456789\00", align 1 + +; Function Attrs: nounwind +define hidden void @foo() #0 { +entry: +; CHECK: %something = alloca [12 x i8] +; TURNED-OFF-NOT: %something = alloca [12 x i8] + %something = alloca [10 x i8], align 1 + %arraydecay = getelementptr inbounds [10 x i8], ptr %something, i32 0, i32 0 +; CHECK: @llvm.memcpy.p0.p0.i32 + %call = call ptr @strcpy(ptr %arraydecay, ptr @.str) + %arraydecay1 = getelementptr inbounds [10 x i8], ptr %something, i32 0, i32 0 + %call2 = call i32 @bar(ptr %arraydecay1) + ret void +} + +declare ptr @strcpy(ptr, ptr) #1 + +declare i32 @bar(...) #1 diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll new file mode 100644 index 0000000000000..2df8108f445fe --- /dev/null +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default,arm-widen-strings" -S | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +; CHECK: [64 x i8] +@.str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1 + +; Function Attrs: nounwind +define hidden void @foo() #0 { +entry: +; CHECK: %something = alloca [64 x i8] + %something = alloca [62 x i8], align 1 + %arraydecay = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0 +; CHECK: @llvm.memcpy.p0.p0.i32 + %call = call ptr @strcpy(ptr %arraydecay, ptr @.str) + %arraydecay1 = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0 + %call2 = call i32 @bar(ptr %arraydecay1) + ret void +} + +declare ptr @strcpy(ptr, ptr) #1 + +declare i32 @bar(...) #1 diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll new file mode 100644 index 0000000000000..a0c1e21329816 --- /dev/null +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-arm-none-eabi" + +; CHECK: [17 x i8] +@.str = private unnamed_addr constant [17 x i8] c"aaaaaaaaaaaaaaaa\00", align 1 + +; Function Attrs: nounwind +define hidden void @foo() local_unnamed_addr #0 { +entry: + %something = alloca [20 x i8], align 1 + call void @llvm.lifetime.start(i64 20, ptr nonnull %something) #3 + call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 nonnull %something, ptr align 1 @.str, i32 17, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) #3 + call void @llvm.lifetime.end(i64 20, ptr nonnull %something) #3 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, ptr nocapture) #1 + +declare i32 @bar(...) local_unnamed_addr #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end(i64, ptr nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll new file mode 100644 index 0000000000000..67cb99023c532 --- /dev/null +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-arm-none-eabi" + +; CHECK: [65 x i8] +; CHECK-NOT: [68 x i8] +@.str = private unnamed_addr constant [65 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzz\00", align 1 + +; Function Attrs: nounwind +define hidden void @foo() local_unnamed_addr #0 { +entry: + %something = alloca [65 x i8], align 1 + call void @llvm.lifetime.start(i64 65, ptr nonnull %something) #3 + call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 nonnull %something, ptr align 1 @.str, i32 65, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) #3 + call void @llvm.lifetime.end(i64 65, ptr nonnull %something) #3 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, ptr nocapture) #1 + +declare i32 @bar(...) local_unnamed_addr #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end(i64, ptr nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll new file mode 100644 index 0000000000000..3f02c02ad845b --- /dev/null +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +; CHECK: [48 x i8] +@f.string1 = private unnamed_addr constant [45 x i8] c"The quick brown dog jumps over the lazy fox.\00", align 1 + +; Function Attrs: nounwind +define hidden i32 @f() { +entry: + %string1 = alloca [45 x i8], align 1 + %pos = alloca i32, align 4 + %token = alloca ptr, align 4 + call void @llvm.lifetime.start.p0i8(i64 45, ptr %string1) + call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %string1, ptr align 1 @f.string1, i32 45, i1 false) + call void @llvm.lifetime.start.p0i8(i64 4, ptr %pos) + call void @llvm.lifetime.start.p0i8(i64 4, ptr %token) + %call = call ptr @strchr(ptr %string1, i32 101) + store ptr %call, ptr %token, align 4 + %0 = load ptr, ptr %token, align 4 + %sub.ptr.lhs.cast = ptrtoint ptr %0 to i32 + %sub.ptr.rhs.cast = ptrtoint ptr %string1 to i32 + %sub.ptr.sub = sub i32 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + %add = add nsw i32 %sub.ptr.sub, 1 + store i32 %add, ptr %pos, align 4 + %1 = load i32, ptr %pos, align 4 + call void @llvm.lifetime.end.p0i8(i64 4, ptr %token) + call void @llvm.lifetime.end.p0i8(i64 4, ptr %pos) + call void @llvm.lifetime.end.p0i8(i64 45, ptr %string1) + ret i32 %1 +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, ptr nocapture) + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) + +; Function Attrs: nounwind +declare ptr @strchr(ptr, i32) + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, ptr nocapture) diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll new file mode 100644 index 0000000000000..937bfaecd8e3e --- /dev/null +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll @@ -0,0 +1,52 @@ +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-arm-none-eabi" + +%struct.P = type { i32, [13 x i8] } + +; CHECK-NOT: [16 x i8] +@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1 +@.str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 +@__ARM_use_no_argv = global i32 1, section ".ARM.use_no_argv", align 4 +@llvm.used = appending global [1 x ptr] [ptr @__ARM_use_no_argv], section "llvm.metadata" + +; Function Attrs: nounwind +define hidden i32 @main() local_unnamed_addr #0 { +entry: + %p = alloca %struct.P, align 4 + call void @llvm.lifetime.start(i64 20, ptr nonnull %p) #2 + store i32 10, ptr %p, align 4, !tbaa !3 + %arraydecay = getelementptr inbounds %struct.P, ptr %p, i32 0, i32 1, i32 0 + call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %arraydecay, ptr align 1 @.str, i32 13, i1 false) + %puts = call i32 @puts(ptr %arraydecay) + call void @llvm.lifetime.end(i64 20, ptr nonnull %p) #2 + ret i32 0 +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, ptr nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end(i64, ptr nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 + +; Function Attrs: nounwind +declare i32 @puts(ptr nocapture readonly) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m0" "target-features"="+strict-align" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 4} +!2 = !{!"Component: ARM Compiler 6 devbuild Tool: armclang [devbuild]"} +!3 = !{!4, !5, i64 0} +!4 = !{!"P", !5, i64 0, !6, i64 4} +!5 = !{!"int", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll new file mode 100644 index 0000000000000..6cbd823a18c36 --- /dev/null +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-arm-none-eabi" + +; CHECK-NOT: [64 x i8] +@.str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1 + +; Function Attrs: nounwind +define hidden void @foo() local_unnamed_addr #0 { +entry: + %something = alloca [62 x i8], align 1 + %0 = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0 + call void @llvm.lifetime.start(i64 62, ptr nonnull %0) #3 + call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 nonnull %0, ptr align 1 @.str, i32 62, i1 true) + %call2 = call i32 @bar(ptr nonnull %0) #3 + call void @llvm.lifetime.end(i64 62, ptr nonnull %0) #3 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, ptr nocapture) #1 + +declare i32 @bar(...) local_unnamed_addr #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end(i64, ptr nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 From d80226770c6f5472654ee2b5f08fc80f2c3053c7 Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Wed, 11 Sep 2024 16:54:42 +0100 Subject: [PATCH 02/14] Responding to review comments Change-Id: I492ea4e5b6f589e5d877eeb6be31f7ab4720be9b --- .../lib/Transforms/Scalar/ARMWidenStrings.cpp | 61 +++++-------------- 1 file changed, 15 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp b/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp index dd06c2a7ea10d..1439e8af04292 100644 --- a/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp +++ b/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp @@ -1,6 +1,16 @@ -// ARMWidenStrings.cpp - Widen strings to word boundaries to speed up -// programs that use simple strcpy's with constant strings as source -// and stack allocated array for destination. +//===- ARMWidenStrings.cpp - Widen strings to ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Widen strings to word boundaries to speed up programs that use simple +// strcpy's with constant strings as source and stack allocated array for +// destination. +// +//===----------------------------------------------------------------------===// #define DEBUG_TYPE "arm-widen-strings" @@ -25,8 +35,7 @@ using namespace llvm; -cl::opt DisableARMWidenStrings("disable-arm-widen-strings", - cl::init(false)); +cl::opt DisableARMWidenStrings("disable-arm-widen-strings"); namespace { @@ -73,71 +82,53 @@ bool ARMWidenStrings::run(Function &F) { continue; } - LLVM_DEBUG(dbgs() << "Found call to strcpy/memcpy:\n" << *CI << "\n"); - auto *Alloca = dyn_cast(CI->getArgOperand(0)); auto *SourceVar = dyn_cast(CI->getArgOperand(1)); auto *BytesToCopy = dyn_cast(CI->getArgOperand(2)); auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); if (!BytesToCopy) { - LLVM_DEBUG(dbgs() << "Number of bytes to copy is null\n"); continue; } uint64_t NumBytesToCopy = BytesToCopy->getZExtValue(); if (!Alloca) { - LLVM_DEBUG(dbgs() << "Destination isn't a Alloca\n"); continue; } + // Source isn't a global constant variable if (!SourceVar) { - LLVM_DEBUG(dbgs() << "Source isn't a global constant variable\n"); continue; } if (!IsVolatile || IsVolatile->isOne()) { - LLVM_DEBUG( - dbgs() << "Not widening strings for this memcpy because it's " - "a volatile operations\n"); continue; } if (NumBytesToCopy % 4 == 0) { - LLVM_DEBUG(dbgs() << "Bytes to copy in strcpy/memcpy is already word " - "aligned so nothing to do here.\n"); continue; } if (!SourceVar->hasInitializer() || !SourceVar->isConstant() || !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) { - LLVM_DEBUG(dbgs() << "Source is not constant global, thus it's " - "mutable therefore it's not safe to pad\n"); continue; } ConstantDataArray *SourceDataArray = dyn_cast(SourceVar->getInitializer()); if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) { - LLVM_DEBUG(dbgs() << "Source isn't a constant data array\n"); continue; } if (!Alloca->isStaticAlloca()) { - LLVM_DEBUG(dbgs() << "Destination allocation isn't a static " - "constant which is locally allocated in this " - "function, so skipping.\n"); continue; } // Make sure destination is definitley a char array. if (!IsCharArray(Alloca->getAllocatedType())) { - LLVM_DEBUG(dbgs() << "Destination doesn't look like a constant char (8 " - "bits) array\n"); continue; } - LLVM_DEBUG(dbgs() << "With Alloca: " << *Alloca << "\n"); uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); uint64_t SZSize = SourceDataArray->getType()->getNumElements(); @@ -145,29 +136,13 @@ bool ARMWidenStrings::run(Function &F) { // For safety purposes lets add a constraint and only padd when // num bytes to copy == destination array size == source string // which is a constant - LLVM_DEBUG(dbgs() << "Number of bytes to copy is: " << NumBytesToCopy - << "\n"); - LLVM_DEBUG(dbgs() << "Size of destination array is: " << DZSize << "\n"); - LLVM_DEBUG(dbgs() << "Size of source array is: " << SZSize << "\n"); if (NumBytesToCopy != DZSize || DZSize != SZSize) { - LLVM_DEBUG(dbgs() << "Size of number of bytes to copy, destination " - "array and source string don't match, so " - "skipping\n"); continue; } - LLVM_DEBUG(dbgs() << "Going to widen.\n"); unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4); - LLVM_DEBUG(dbgs() << "Number of bytes to pad by is " << NumBytesToPad - << "\n"); unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; if (TotalBytes > MemcpyInliningLimit) { - LLVM_DEBUG( - dbgs() << "Not going to pad because total number of bytes is " - << TotalBytes - << " which be greater than the inlining " - "limit for memcpy which is " - << MemcpyInliningLimit << "\n"); continue; } @@ -180,9 +155,6 @@ bool ARMWidenStrings::run(Function &F) { NewAlloca->setAlignment(Alloca->getAlign()); Alloca->replaceAllUsesWith(NewAlloca); - LLVM_DEBUG(dbgs() << "Updating users of destination stack object to use " - << "new size\n"); - // update source to be word aligned (memcpy(...,X,...)) // create replacement string with padded null bytes. StringRef Data = SourceDataArray->getRawDataValues(); @@ -208,9 +180,6 @@ bool ARMWidenStrings::run(Function &F) { // Update number of bytes to copy (memcpy(...,...,X)) CI->setArgOperand(2, ConstantInt::get(BytesToCopy->getType(), TotalBytes)); - LLVM_DEBUG(dbgs() << "Padded dest/source and increased number of bytes:\n" - << *CI << "\n" - << *NewAlloca << "\n"); } } return true; From 2a2c6c9ae1fe8e1e4b5fc85cd2da9a50898387f3 Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Fri, 13 Sep 2024 12:24:32 +0100 Subject: [PATCH 03/14] Making ARMWidenStrings to be target independent Change-Id: Ic6ed9a549e39020e8c04b38bc21ba8162b4ebfd9 --- .../llvm/Analysis/TargetTransformInfo.h | 8 + .../llvm/Analysis/TargetTransformInfoImpl.h | 2 + .../llvm/Transforms/Scalar/ARMWidenStrings.h | 30 --- llvm/lib/Analysis/TargetTransformInfo.cpp | 3 + llvm/lib/Passes/PassBuilder.cpp | 1 - llvm/lib/Passes/PassRegistry.def | 1 - .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 6 + llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 3 + llvm/lib/Transforms/IPO/GlobalOpt.cpp | 149 +++++++++++++ .../lib/Transforms/Scalar/ARMWidenStrings.cpp | 196 ------------------ llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 - .../ARMWidenStrings/arm-widen-strings-1.ll | 2 +- .../ARMWidenStrings/arm-widen-strings-2.ll | 2 +- .../arm-widen-strings-lengths-dont-match.ll | 2 +- .../arm-widen-strings-more-than-64-bytes.ll | 2 +- .../arm-widen-strings-ptrtoint.ll | 2 +- .../arm-widen-strings-struct-test.ll | 2 +- .../arm-widen-strings-volatile.ll | 2 +- 18 files changed, 178 insertions(+), 236 deletions(-) delete mode 100755 llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h delete mode 100644 llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 0459941fe05cd..85fa21ebc51f3 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1819,6 +1819,9 @@ class TargetTransformInfo { /// \return The maximum number of function arguments the target supports. unsigned getMaxNumArgs() const; + /// \return true if global strings should be padded to an alignment boundary + bool useWidenGlobalStrings() const; + /// @} private: @@ -2225,6 +2228,7 @@ class TargetTransformInfo::Concept { getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; virtual bool hasArmWideBranch(bool Thumb) const = 0; virtual unsigned getMaxNumArgs() const = 0; + virtual bool useWidenGlobalStrings() const = 0; }; template @@ -3026,6 +3030,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { unsigned getMaxNumArgs() const override { return Impl.getMaxNumArgs(); } + + bool useWidenGlobalStrings() const override { + return Impl.useWidenGlobalStrings(); + } }; template diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index dbdfb4d8cdfa3..5b9a7f6cf0ea2 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1006,6 +1006,8 @@ class TargetTransformInfoImplBase { unsigned getMaxNumArgs() const { return UINT_MAX; } + bool useWidenGlobalStrings() const { return false; } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h b/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h deleted file mode 100755 index 3bda666660144..0000000000000 --- a/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h +++ /dev/null @@ -1,30 +0,0 @@ -//===- ARMWidenStrings.h --------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file provides the interface for the ArmWidenStrings pass -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H -#define LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H - -#include "llvm/IR/PassManager.h" - -namespace llvm { - -class Module; - -class ARMWidenStringsPass : public PassInfoMixin { -public: - ARMWidenStringsPass() = default; - PreservedAnalyses run(Function &F, FunctionAnalysisManager &); -}; - -} // end namespace llvm - -#endif // LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H \ No newline at end of file diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index a47462b61e03b..dd07223ab6bdd 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1381,6 +1381,9 @@ bool TargetTransformInfo::isProfitableToSinkOperands( bool TargetTransformInfo::isVectorShiftByScalarCheap(Type *Ty) const { return TTIImpl->isVectorShiftByScalarCheap(Ty); + +bool TargetTransformInfo::useWidenGlobalStrings() const { + return TTIImpl->useWidenGlobalStrings(); } TargetTransformInfo::Concept::~Concept() = default; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 0b1198e9b47ab..ebad3507eb5e2 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -212,7 +212,6 @@ #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar/ADCE.h" -#include "llvm/Transforms/Scalar/ARMWidenStrings.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/AnnotationRemarks.h" #include "llvm/Transforms/Scalar/BDCE.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index e4b7697c74b33..90859c18c4f49 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -490,7 +490,6 @@ FUNCTION_PASS("view-dom-only", DomOnlyViewer()) FUNCTION_PASS("view-post-dom", PostDomViewer()) FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer()) FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass()) -FUNCTION_PASS("arm-widen-strings", ARMWidenStringsPass()) #undef FUNCTION_PASS #ifndef FUNCTION_PASS_WITH_PARAMS diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 835ae98efb852..7991c8bc15311 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -56,6 +56,10 @@ static cl::opt AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true), cl::desc("Enable the generation of WLS loops")); +static cl::opt UseWidenGlobalStrings( + "widen-global-strings", cl::Hidden, cl::init(true), + cl::desc("Enable the widening of global strings to alignment boundaries")); + extern cl::opt EnableTailPredication; extern cl::opt EnableMaskedGatherScatters; @@ -2805,3 +2809,5 @@ bool ARMTTIImpl::isProfitableToSinkOperands(Instruction *I, } return true; } + +bool ARMTTIImpl::useWidenGlobalStrings() const { return UseWidenGlobalStrings; } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index b0a75134ee02b..23a76d7d010f2 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -337,6 +337,9 @@ class ARMTTIImpl : public BasicTTIImplBase { bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl &Ops) const; + + bool useWidenGlobalStrings() const; + /// @} }; diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index aae4926e027ff..84c1585fede11 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -92,6 +92,8 @@ STATISTIC(NumInternalFunc, "Number of internal functions"); STATISTIC(NumColdCC, "Number of functions marked coldcc"); STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs"); STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed"); +STATISTIC(NumGlobalStringsPadded, + "Number of global strings padded to alignment boundary"); static cl::opt EnableColdCCStressTest("enable-coldcc-stress-test", @@ -2029,6 +2031,145 @@ OptimizeFunctions(Module &M, return Changed; } +static bool IsCharArray(Type *t) { + const unsigned int CHAR_BIT_SIZE = 8; + return t && t->isArrayTy() && t->getArrayElementType()->isIntegerTy() && + t->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE; +} + +static bool +tryWidenGlobalStrings(Function &F, + function_ref GetTTI) { + bool changed = false; + + for (Function::iterator b = F.begin(); b != F.end(); ++b) { + for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) { + CallInst *CI = dyn_cast(i); + if (!CI) { + continue; + } + + TargetTransformInfo &TTI = GetTTI(F); + + Function *CallMemcpy = CI->getCalledFunction(); + // find out if the current call instruction is a call to llvm memcpy + // intrinsics + if (CallMemcpy == NULL || !CallMemcpy->isIntrinsic() || + CallMemcpy->getIntrinsicID() != Intrinsic::memcpy) { + continue; + } + + auto *Alloca = dyn_cast(CI->getArgOperand(0)); + auto *SourceVar = dyn_cast(CI->getArgOperand(1)); + auto *BytesToCopy = dyn_cast(CI->getArgOperand(2)); + auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); + + if (!BytesToCopy) { + continue; + } + + uint64_t NumBytesToCopy = BytesToCopy->getZExtValue(); + + if (!Alloca) { + continue; + } + + // Source isn't a global constant variable + if (!SourceVar) { + continue; + } + + if (!IsVolatile || IsVolatile->isOne()) { + continue; + } + + if (NumBytesToCopy % 4 == 0) { + continue; + } + + if (!SourceVar->hasInitializer() || !SourceVar->isConstant() || + !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) { + continue; + } + + ConstantDataArray *SourceDataArray = + dyn_cast(SourceVar->getInitializer()); + if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) { + continue; + } + + if (!Alloca->isStaticAlloca()) { + continue; + } + + // Make sure destination is definitley a char array. + if (!IsCharArray(Alloca->getAllocatedType())) { + continue; + } + + uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); + uint64_t SZSize = SourceDataArray->getType()->getNumElements(); + + // For safety purposes lets add a constraint and only padd when + // num bytes to copy == destination array size == source string + // which is a constant + if (NumBytesToCopy != DZSize || DZSize != SZSize) { + continue; + } + unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4); + unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; + + /* + Max number of bytes that memcpy allows for lowering to load/stores before + it uses library function (__aeabi_memcpy). + */ + unsigned MaxMemIntrinsicSize = + TTI.getMaxMemIntrinsicInlineSizeThreshold(); + if (TotalBytes > MaxMemIntrinsicSize) { + continue; + } + + // update destination char array to be word aligned (memcpy(X,...,...)) + IRBuilder<> BuildAlloca(Alloca); + AllocaInst *NewAlloca = cast(BuildAlloca.CreateAlloca( + ArrayType::get(Alloca->getAllocatedType()->getArrayElementType(), + NumBytesToCopy + NumBytesToPad))); + NewAlloca->takeName(Alloca); + NewAlloca->setAlignment(Alloca->getAlign()); + Alloca->replaceAllUsesWith(NewAlloca); + + // update source to be word aligned (memcpy(...,X,...)) + // create replacement string with padded null bytes. + StringRef Data = SourceDataArray->getRawDataValues(); + std::vector StrData(Data.begin(), Data.end()); + for (unsigned int p = 0; p < NumBytesToPad; p++) + StrData.push_back('\0'); + auto Arr = ArrayRef(StrData.data(), TotalBytes); + + // create new padded version of global variable string. + Constant *SourceReplace = ConstantDataArray::get(F.getContext(), Arr); + GlobalVariable *NewGV = new GlobalVariable( + *F.getParent(), SourceReplace->getType(), true, + SourceVar->getLinkage(), SourceReplace, SourceReplace->getName()); + + // copy any other attributes from original global variable string + // e.g. unamed_addr + NewGV->copyAttributesFrom(SourceVar); + NewGV->takeName(SourceVar); + + // replace intrinsic source. + CI->setArgOperand(1, NewGV); + + // Update number of bytes to copy (memcpy(...,...,X)) + CI->setArgOperand(2, + ConstantInt::get(BytesToCopy->getType(), TotalBytes)); + NumGlobalStringsPadded++; + changed |= true; + } + } + return changed; +} + static bool OptimizeGlobalVars(Module &M, function_ref GetTTI, @@ -2058,6 +2199,14 @@ OptimizeGlobalVars(Module &M, continue; } + // Pad global strings if allowed + for (Function &F : llvm::make_early_inc_range(M)) { + TargetTransformInfo &TTI = GetTTI(F); + if (TTI.useWidenGlobalStrings()) { + Changed |= tryWidenGlobalStrings(F, GetTTI); + } + } + Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree); } return Changed; diff --git a/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp b/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp deleted file mode 100644 index 1439e8af04292..0000000000000 --- a/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp +++ /dev/null @@ -1,196 +0,0 @@ -//===- ARMWidenStrings.cpp - Widen strings to ---------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Widen strings to word boundaries to speed up programs that use simple -// strcpy's with constant strings as source and stack allocated array for -// destination. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm-widen-strings" - -#include "llvm/Transforms/Scalar/ARMWidenStrings.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/ValueSymbolTable.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/TargetParser/Triple.h" -#include "llvm/Transforms/Scalar.h" - -using namespace llvm; - -cl::opt DisableARMWidenStrings("disable-arm-widen-strings"); - -namespace { - -class ARMWidenStrings { -public: - /* - Max number of bytes that memcpy allows for lowering to load/stores before it - uses library function (__aeabi_memcpy). This is the same value returned by - ARMSubtarget::getMaxInlineSizeThreshold which I would have called in place of - the constant int but can't get access to the subtarget info class from the - midend. - */ - const unsigned int MemcpyInliningLimit = 64; - - bool run(Function &F); -}; - -static bool IsCharArray(Type *t) { - const unsigned int CHAR_BIT_SIZE = 8; - return t && t->isArrayTy() && t->getArrayElementType()->isIntegerTy() && - t->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE; -} - -bool ARMWidenStrings::run(Function &F) { - if (DisableARMWidenStrings) { - return false; - } - - LLVM_DEBUG(dbgs() << "Running ARMWidenStrings on module " << F.getName() - << "\n"); - - for (Function::iterator b = F.begin(); b != F.end(); ++b) { - for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) { - CallInst *CI = dyn_cast(i); - if (!CI) { - continue; - } - - Function *CallMemcpy = CI->getCalledFunction(); - // find out if the current call instruction is a call to llvm memcpy - // intrinsics - if (CallMemcpy == NULL || !CallMemcpy->isIntrinsic() || - CallMemcpy->getIntrinsicID() != Intrinsic::memcpy) { - continue; - } - - auto *Alloca = dyn_cast(CI->getArgOperand(0)); - auto *SourceVar = dyn_cast(CI->getArgOperand(1)); - auto *BytesToCopy = dyn_cast(CI->getArgOperand(2)); - auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); - - if (!BytesToCopy) { - continue; - } - - uint64_t NumBytesToCopy = BytesToCopy->getZExtValue(); - - if (!Alloca) { - continue; - } - - // Source isn't a global constant variable - if (!SourceVar) { - continue; - } - - if (!IsVolatile || IsVolatile->isOne()) { - continue; - } - - if (NumBytesToCopy % 4 == 0) { - continue; - } - - if (!SourceVar->hasInitializer() || !SourceVar->isConstant() || - !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) { - continue; - } - - ConstantDataArray *SourceDataArray = - dyn_cast(SourceVar->getInitializer()); - if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) { - continue; - } - - if (!Alloca->isStaticAlloca()) { - continue; - } - - // Make sure destination is definitley a char array. - if (!IsCharArray(Alloca->getAllocatedType())) { - continue; - } - - uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); - uint64_t SZSize = SourceDataArray->getType()->getNumElements(); - - // For safety purposes lets add a constraint and only padd when - // num bytes to copy == destination array size == source string - // which is a constant - if (NumBytesToCopy != DZSize || DZSize != SZSize) { - continue; - } - unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4); - unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; - - if (TotalBytes > MemcpyInliningLimit) { - continue; - } - - // update destination char array to be word aligned (memcpy(X,...,...)) - IRBuilder<> BuildAlloca(Alloca); - AllocaInst *NewAlloca = cast(BuildAlloca.CreateAlloca( - ArrayType::get(Alloca->getAllocatedType()->getArrayElementType(), - NumBytesToCopy + NumBytesToPad))); - NewAlloca->takeName(Alloca); - NewAlloca->setAlignment(Alloca->getAlign()); - Alloca->replaceAllUsesWith(NewAlloca); - - // update source to be word aligned (memcpy(...,X,...)) - // create replacement string with padded null bytes. - StringRef Data = SourceDataArray->getRawDataValues(); - std::vector StrData(Data.begin(), Data.end()); - for (unsigned int p = 0; p < NumBytesToPad; p++) - StrData.push_back('\0'); - auto Arr = ArrayRef(StrData.data(), TotalBytes); - - // create new padded version of global variable string. - Constant *SourceReplace = ConstantDataArray::get(F.getContext(), Arr); - GlobalVariable *NewGV = new GlobalVariable( - *F.getParent(), SourceReplace->getType(), true, - SourceVar->getLinkage(), SourceReplace, SourceReplace->getName()); - - // copy any other attributes from original global variable string - // e.g. unamed_addr - NewGV->copyAttributesFrom(SourceVar); - NewGV->takeName(SourceVar); - - // replace intrinsic source. - CI->setArgOperand(1, NewGV); - - // Update number of bytes to copy (memcpy(...,...,X)) - CI->setArgOperand(2, - ConstantInt::get(BytesToCopy->getType(), TotalBytes)); - } - } - return true; -} - -} // end of anonymous namespace - -PreservedAnalyses ARMWidenStringsPass::run(Function &F, - FunctionAnalysisManager &AM) { - if (!ARMWidenStrings().run(F)) - return PreservedAnalyses::all(); - - return PreservedAnalyses::none(); -} diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index a9607e4ebc658..939a145723956 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -2,7 +2,6 @@ add_llvm_component_library(LLVMScalarOpts ADCE.cpp AlignmentFromAssumptions.cpp AnnotationRemarks.cpp - ARMWidenStrings.cpp BDCE.cpp CallSiteSplitting.cpp ConstantHoisting.cpp diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll index e11cf372c36a6..6a8adf1af57a4 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default,arm-widen-strings" -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default,globalopt" -S | FileCheck %s ; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default" -S | FileCheck %s --check-prefix=TURNED-OFF target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll index 2df8108f445fe..46bc715b8f750 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default,arm-widen-strings" -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default,globalopt" -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" ; CHECK: [64 x i8] diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll index a0c1e21329816..d5545cb9d6b88 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv6m-arm-none-eabi" diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll index 67cb99023c532..de11c4a899c8d 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv6m-arm-none-eabi" diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll index 3f02c02ad845b..1ec13eb72a6e2 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" ; CHECK: [48 x i8] diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll index 937bfaecd8e3e..7e9ddf7b1a879 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv6m-arm-none-eabi" diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll index 6cbd823a18c36..24e9131b11907 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv6m-arm-none-eabi" From 2a2cfdc92e1eda6e898563e15ab63283b801b41b Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Wed, 18 Sep 2024 11:44:41 +0100 Subject: [PATCH 04/14] Review comments Updating patch so that when attempting to widen global strings we only check whether the variable is being called by a memcpy intrinsic. Change-Id: I088403636c2ed0acc231af77b399b1b95f1abbc2 --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 226 ++++++++---------- .../ARMWidenStrings/arm-widen-strings-1.ll | 20 +- .../ARMWidenStrings/arm-widen-strings-2.ll | 19 +- 3 files changed, 122 insertions(+), 143 deletions(-) diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 84c1585fede11..07c0567c9b211 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2031,143 +2031,127 @@ OptimizeFunctions(Module &M, return Changed; } -static bool IsCharArray(Type *t) { +static bool IsCharArray(Type *T) { const unsigned int CHAR_BIT_SIZE = 8; - return t && t->isArrayTy() && t->getArrayElementType()->isIntegerTy() && - t->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE; + return T && T->isArrayTy() && T->getArrayElementType()->isIntegerTy() && + T->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE; } static bool -tryWidenGlobalStrings(Function &F, - function_ref GetTTI) { - bool changed = false; - - for (Function::iterator b = F.begin(); b != F.end(); ++b) { - for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) { - CallInst *CI = dyn_cast(i); - if (!CI) { - continue; - } +tryWidenGlobalString(CallInst *CI, GlobalVariable *SourceVar, + function_ref GetTTI, + function_ref GetTLI) { - TargetTransformInfo &TTI = GetTTI(F); + auto *F = CI->getCalledFunction(); + auto *Alloca = dyn_cast(CI->getArgOperand(0)); + auto *BytesToCopy = dyn_cast(CI->getArgOperand(2)); + auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); - Function *CallMemcpy = CI->getCalledFunction(); - // find out if the current call instruction is a call to llvm memcpy - // intrinsics - if (CallMemcpy == NULL || !CallMemcpy->isIntrinsic() || - CallMemcpy->getIntrinsicID() != Intrinsic::memcpy) { - continue; - } + if (!BytesToCopy) + return false; - auto *Alloca = dyn_cast(CI->getArgOperand(0)); - auto *SourceVar = dyn_cast(CI->getArgOperand(1)); - auto *BytesToCopy = dyn_cast(CI->getArgOperand(2)); - auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); + uint64_t NumBytesToCopy = BytesToCopy->getZExtValue(); - if (!BytesToCopy) { - continue; - } + if (!Alloca) + return false; - uint64_t NumBytesToCopy = BytesToCopy->getZExtValue(); + if (!IsVolatile || IsVolatile->isOne()) + return false; - if (!Alloca) { - continue; - } + if (NumBytesToCopy % 4 == 0) + return false; - // Source isn't a global constant variable - if (!SourceVar) { - continue; - } + if (!SourceVar->hasInitializer() || !SourceVar->isConstant() || + !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) + return false; - if (!IsVolatile || IsVolatile->isOne()) { - continue; - } + ConstantDataArray *SourceDataArray = + dyn_cast(SourceVar->getInitializer()); + if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) + return false; - if (NumBytesToCopy % 4 == 0) { - continue; - } + if (!Alloca->isStaticAlloca()) + return false; - if (!SourceVar->hasInitializer() || !SourceVar->isConstant() || - !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) { - continue; - } + // Make sure destination is definitley a char array. + if (!IsCharArray(Alloca->getAllocatedType())) + return false; - ConstantDataArray *SourceDataArray = - dyn_cast(SourceVar->getInitializer()); - if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) { - continue; - } + uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); + uint64_t SZSize = SourceDataArray->getType()->getNumElements(); - if (!Alloca->isStaticAlloca()) { - continue; - } + // For safety purposes lets add a constraint and only padd when + // num bytes to copy == destination array size == source string + // which is a constant + if (NumBytesToCopy != DZSize || DZSize != SZSize) + return false; - // Make sure destination is definitley a char array. - if (!IsCharArray(Alloca->getAllocatedType())) { - continue; - } + unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4); + unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; - uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); - uint64_t SZSize = SourceDataArray->getType()->getNumElements(); + // Max number of bytes that memcpy allows for lowering to load/stores before + // it uses library function (__aeabi_memcpy). + TargetTransformInfo &TTI = GetTTI(*F); + unsigned MaxMemIntrinsicSize = TTI.getMaxMemIntrinsicInlineSizeThreshold(); + if (TotalBytes > MaxMemIntrinsicSize) + return false; - // For safety purposes lets add a constraint and only padd when - // num bytes to copy == destination array size == source string - // which is a constant - if (NumBytesToCopy != DZSize || DZSize != SZSize) { - continue; - } - unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4); - unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; - - /* - Max number of bytes that memcpy allows for lowering to load/stores before - it uses library function (__aeabi_memcpy). - */ - unsigned MaxMemIntrinsicSize = - TTI.getMaxMemIntrinsicInlineSizeThreshold(); - if (TotalBytes > MaxMemIntrinsicSize) { - continue; - } + // Update destination char array to be word aligned (memcpy(X,...,...)) + IRBuilder<> BuildAlloca(Alloca); + AllocaInst *NewAlloca = cast(BuildAlloca.CreateAlloca( + ArrayType::get(Alloca->getAllocatedType()->getArrayElementType(), + NumBytesToCopy + NumBytesToPad))); + NewAlloca->takeName(Alloca); + NewAlloca->setAlignment(Alloca->getAlign()); + Alloca->replaceAllUsesWith(NewAlloca); + + // Update source to be word aligned (memcpy(...,X,...)) + // create replacement string with padded null bytes. + StringRef Data = SourceDataArray->getRawDataValues(); + std::vector StrData(Data.begin(), Data.end()); + for (unsigned int p = 0; p < NumBytesToPad; p++) + StrData.push_back('\0'); + auto Arr = ArrayRef(StrData.data(), TotalBytes); + + // Create new padded version of global variable string. + Constant *SourceReplace = ConstantDataArray::get(F->getContext(), Arr); + GlobalVariable *NewGV = new GlobalVariable( + *(F->getParent()), SourceReplace->getType(), true, + SourceVar->getLinkage(), SourceReplace, SourceReplace->getName()); - // update destination char array to be word aligned (memcpy(X,...,...)) - IRBuilder<> BuildAlloca(Alloca); - AllocaInst *NewAlloca = cast(BuildAlloca.CreateAlloca( - ArrayType::get(Alloca->getAllocatedType()->getArrayElementType(), - NumBytesToCopy + NumBytesToPad))); - NewAlloca->takeName(Alloca); - NewAlloca->setAlignment(Alloca->getAlign()); - Alloca->replaceAllUsesWith(NewAlloca); - - // update source to be word aligned (memcpy(...,X,...)) - // create replacement string with padded null bytes. - StringRef Data = SourceDataArray->getRawDataValues(); - std::vector StrData(Data.begin(), Data.end()); - for (unsigned int p = 0; p < NumBytesToPad; p++) - StrData.push_back('\0'); - auto Arr = ArrayRef(StrData.data(), TotalBytes); - - // create new padded version of global variable string. - Constant *SourceReplace = ConstantDataArray::get(F.getContext(), Arr); - GlobalVariable *NewGV = new GlobalVariable( - *F.getParent(), SourceReplace->getType(), true, - SourceVar->getLinkage(), SourceReplace, SourceReplace->getName()); - - // copy any other attributes from original global variable string - // e.g. unamed_addr - NewGV->copyAttributesFrom(SourceVar); - NewGV->takeName(SourceVar); - - // replace intrinsic source. - CI->setArgOperand(1, NewGV); - - // Update number of bytes to copy (memcpy(...,...,X)) - CI->setArgOperand(2, - ConstantInt::get(BytesToCopy->getType(), TotalBytes)); - NumGlobalStringsPadded++; - changed |= true; - } + // Copy any other attributes from original global variable string + // e.g. unamed_addr + NewGV->copyAttributesFrom(SourceVar); + NewGV->takeName(SourceVar); + + // Replace intrinsic source. + CI->setArgOperand(1, NewGV); + + // Update number of bytes to copy (memcpy(...,...,X)) + CI->setArgOperand(2, ConstantInt::get(BytesToCopy->getType(), TotalBytes)); + NumGlobalStringsPadded++; + return true; +} + +static bool tryWidenGlobalStringsUsedByMemcpy( + GlobalVariable *GV, function_ref GetTLI, + function_ref GetTTI) { + for (auto *User : GV->users()) { + CallInst *CI = dyn_cast(User); + if (!CI) + continue; + + Function *F = CI->getCalledFunction(); + if (!F || !F->isIntrinsic() || F->getIntrinsicID() != Intrinsic::memcpy) + continue; + + TargetTransformInfo &TTI = GetTTI(*F); + if (!TTI.useWidenGlobalStrings()) + return false; + + return tryWidenGlobalString(CI, GV, GetTTI, GetTLI); } - return changed; + return false; } static bool @@ -2199,13 +2183,9 @@ OptimizeGlobalVars(Module &M, continue; } - // Pad global strings if allowed - for (Function &F : llvm::make_early_inc_range(M)) { - TargetTransformInfo &TTI = GetTTI(F); - if (TTI.useWidenGlobalStrings()) { - Changed |= tryWidenGlobalStrings(F, GetTTI); - } - } + // For global variable strings called in a memcpy + // we try to pad to nearest valid alignment boundary + Changed |= tryWidenGlobalStringsUsedByMemcpy(&GV, GetTLI, GetTTI); Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree); } diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll index 6a8adf1af57a4..432a5728315e6 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll @@ -1,25 +1,25 @@ -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default,globalopt" -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s ; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default" -S | FileCheck %s --check-prefix=TURNED-OFF + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" ; CHECK: [12 x i8] ; TURNED-OFF-NOT: [12 x i8] @.str = private unnamed_addr constant [10 x i8] c"123456789\00", align 1 -; Function Attrs: nounwind -define hidden void @foo() #0 { +define hidden void @foo() local_unnamed_addr { entry: ; CHECK: %something = alloca [12 x i8] ; TURNED-OFF-NOT: %something = alloca [12 x i8] %something = alloca [10 x i8], align 1 - %arraydecay = getelementptr inbounds [10 x i8], ptr %something, i32 0, i32 0 -; CHECK: @llvm.memcpy.p0.p0.i32 - %call = call ptr @strcpy(ptr %arraydecay, ptr @.str) - %arraydecay1 = getelementptr inbounds [10 x i8], ptr %something, i32 0, i32 0 - %call2 = call i32 @bar(ptr %arraydecay1) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) %something, ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 10, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) ret void } -declare ptr @strcpy(ptr, ptr) #1 +declare i32 @bar(...) local_unnamed_addr + +; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #0 -declare i32 @bar(...) #1 +attributes #0 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite) } diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll index 46bc715b8f750..ecbe93411e4eb 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll +++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll @@ -4,19 +4,18 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" ; CHECK: [64 x i8] @.str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1 -; Function Attrs: nounwind -define hidden void @foo() #0 { +define hidden void @foo() local_unnamed_addr { entry: -; CHECK: %something = alloca [64 x i8] + ; CHECK: %something = alloca [64 x i8] %something = alloca [62 x i8], align 1 - %arraydecay = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0 -; CHECK: @llvm.memcpy.p0.p0.i32 - %call = call ptr @strcpy(ptr %arraydecay, ptr @.str) - %arraydecay1 = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0 - %call2 = call i32 @bar(ptr %arraydecay1) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) %something, ptr noundef nonnull align 1 dereferenceable(62) @.str, i32 62, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) ret void } -declare ptr @strcpy(ptr, ptr) #1 +declare i32 @bar(...) local_unnamed_addr -declare i32 @bar(...) #1 +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #0 + +attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } From 72567c4536f03f6d3f530e34ad14fb7d468dd776 Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Fri, 27 Sep 2024 12:47:01 +0100 Subject: [PATCH 05/14] Review comments --- .../llvm/Analysis/TargetTransformInfo.h | 11 ++-- .../llvm/Analysis/TargetTransformInfoImpl.h | 2 +- llvm/lib/Analysis/TargetTransformInfo.cpp | 4 ++ .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 18 +++++ llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 2 + llvm/lib/Transforms/IPO/GlobalOpt.cpp | 66 +++++++------------ .../ARMWidenStrings/arm-widen-strings-1.ll | 25 ------- .../arm-widen-strings-ptrtoint.ll | 42 ------------ .../arm-widen-strings-struct-test.ll | 52 --------------- .../GlobalOpt/ARM/arm-widen-strings-1.ll | 33 ++++++++++ .../ARM}/arm-widen-strings-2.ll | 14 ++-- .../arm-widen-strings-lengths-dont-match.ll | 21 +++--- .../arm-widen-strings-more-than-64-bytes.ll | 20 +++--- .../ARM/arm-widen-strings-ptrtoint.ll | 59 +++++++++++++++++ .../ARM/arm-widen-strings-struct-test.ll | 48 ++++++++++++++ .../ARM}/arm-widen-strings-volatile.ll | 21 +++--- 16 files changed, 236 insertions(+), 202 deletions(-) delete mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll delete mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll delete mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll create mode 100644 llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll rename llvm/test/Transforms/{ARMWidenStrings => GlobalOpt/ARM}/arm-widen-strings-2.ll (57%) rename llvm/test/Transforms/{ARMWidenStrings => GlobalOpt/ARM}/arm-widen-strings-lengths-dont-match.ll (57%) rename llvm/test/Transforms/{ARMWidenStrings => GlobalOpt/ARM}/arm-widen-strings-more-than-64-bytes.ll (59%) create mode 100644 llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll create mode 100644 llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll rename llvm/test/Transforms/{ARMWidenStrings => GlobalOpt/ARM}/arm-widen-strings-volatile.ll (57%) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 85fa21ebc51f3..1e7ca42c92383 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1819,8 +1819,9 @@ class TargetTransformInfo { /// \return The maximum number of function arguments the target supports. unsigned getMaxNumArgs() const; - /// \return true if global strings should be padded to an alignment boundary - bool useWidenGlobalStrings() const; + /// \return For an array of given Size, return alignment boundary to + /// pad to. Default is no padding. + unsigned getNumBytesToPad(unsigned Size) const; /// @} @@ -2228,7 +2229,7 @@ class TargetTransformInfo::Concept { getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; virtual bool hasArmWideBranch(bool Thumb) const = 0; virtual unsigned getMaxNumArgs() const = 0; - virtual bool useWidenGlobalStrings() const = 0; + virtual unsigned getNumBytesToPad(unsigned Size) const = 0; }; template @@ -3031,8 +3032,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.getMaxNumArgs(); } - bool useWidenGlobalStrings() const override { - return Impl.useWidenGlobalStrings(); + unsigned getNumBytesToPad(unsigned Size) const override { + return Impl.getNumBytesToPad(Size); } }; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 5b9a7f6cf0ea2..b72089bad62c7 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1006,7 +1006,7 @@ class TargetTransformInfoImplBase { unsigned getMaxNumArgs() const { return UINT_MAX; } - bool useWidenGlobalStrings() const { return false; } + unsigned getNumBytesToPad(unsigned Size) const { return 0; } protected: // Obtain the minimum required size to hold the value (without the sign) diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index dd07223ab6bdd..b15b7af4323aa 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1386,6 +1386,10 @@ bool TargetTransformInfo::useWidenGlobalStrings() const { return TTIImpl->useWidenGlobalStrings(); } +unsigned TargetTransformInfo::getNumBytesToPad(unsigned Size) const { + return TTIImpl->getNumBytesToPad(Size); +} + TargetTransformInfo::Concept::~Concept() = default; TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 7991c8bc15311..526ba8de720af 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2811,3 +2811,21 @@ bool ARMTTIImpl::isProfitableToSinkOperands(Instruction *I, } bool ARMTTIImpl::useWidenGlobalStrings() const { return UseWidenGlobalStrings; } + +unsigned ARMTTIImpl::getNumBytesToPad(unsigned Size) const { + // We pad to 4 byte boundaries; + if (Size % 4 == 0) + return 0; + + unsigned NumBytesToPad = 4 - (Size % 4); + unsigned NewSize = Size + NumBytesToPad; + + // Max number of bytes that memcpy allows for lowering to load/stores before + // it uses library function (__aeabi_memcpy). + unsigned MaxMemIntrinsicSize = getMaxMemIntrinsicInlineSizeThreshold(); + + if (NewSize > MaxMemIntrinsicSize) + return 0; + + return NumBytesToPad; +} diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 23a76d7d010f2..f702b1e9fee39 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -340,6 +340,8 @@ class ARMTTIImpl : public BasicTTIImplBase { bool useWidenGlobalStrings() const; + unsigned getNumBytesToPad(unsigned Size) const; + /// @} }; diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 07c0567c9b211..b1ce4f995d0aa 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2037,70 +2037,44 @@ static bool IsCharArray(Type *T) { T->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE; } -static bool -tryWidenGlobalString(CallInst *CI, GlobalVariable *SourceVar, - function_ref GetTTI, - function_ref GetTLI) { - +static bool tryWidenGlobalString(CallInst *CI, GlobalVariable *SourceVar, + unsigned NumBytesToPad, + unsigned NumBytesToCopy, + ConstantInt *BytesToCopyOp) { auto *F = CI->getCalledFunction(); auto *Alloca = dyn_cast(CI->getArgOperand(0)); - auto *BytesToCopy = dyn_cast(CI->getArgOperand(2)); auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); - if (!BytesToCopy) - return false; - - uint64_t NumBytesToCopy = BytesToCopy->getZExtValue(); - - if (!Alloca) - return false; - - if (!IsVolatile || IsVolatile->isOne()) - return false; - - if (NumBytesToCopy % 4 == 0) + if (!Alloca || !IsVolatile || IsVolatile->isOne()) return false; if (!SourceVar->hasInitializer() || !SourceVar->isConstant() || !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) return false; + if (!Alloca->isStaticAlloca() || !IsCharArray(Alloca->getAllocatedType())) + return false; + ConstantDataArray *SourceDataArray = dyn_cast(SourceVar->getInitializer()); if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) return false; - if (!Alloca->isStaticAlloca()) - return false; - - // Make sure destination is definitley a char array. - if (!IsCharArray(Alloca->getAllocatedType())) - return false; - uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); uint64_t SZSize = SourceDataArray->getType()->getNumElements(); - // For safety purposes lets add a constraint and only padd when + // For safety purposes lets add a constraint and only pad when // num bytes to copy == destination array size == source string // which is a constant if (NumBytesToCopy != DZSize || DZSize != SZSize) return false; - unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4); unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; - // Max number of bytes that memcpy allows for lowering to load/stores before - // it uses library function (__aeabi_memcpy). - TargetTransformInfo &TTI = GetTTI(*F); - unsigned MaxMemIntrinsicSize = TTI.getMaxMemIntrinsicInlineSizeThreshold(); - if (TotalBytes > MaxMemIntrinsicSize) - return false; - // Update destination char array to be word aligned (memcpy(X,...,...)) IRBuilder<> BuildAlloca(Alloca); - AllocaInst *NewAlloca = cast(BuildAlloca.CreateAlloca( - ArrayType::get(Alloca->getAllocatedType()->getArrayElementType(), - NumBytesToCopy + NumBytesToPad))); + AllocaInst *NewAlloca = BuildAlloca.CreateAlloca(ArrayType::get( + Alloca->getAllocatedType()->getArrayElementType(), TotalBytes)); NewAlloca->takeName(Alloca); NewAlloca->setAlignment(Alloca->getAlign()); Alloca->replaceAllUsesWith(NewAlloca); @@ -2128,13 +2102,13 @@ tryWidenGlobalString(CallInst *CI, GlobalVariable *SourceVar, CI->setArgOperand(1, NewGV); // Update number of bytes to copy (memcpy(...,...,X)) - CI->setArgOperand(2, ConstantInt::get(BytesToCopy->getType(), TotalBytes)); + CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(), TotalBytes)); NumGlobalStringsPadded++; return true; } static bool tryWidenGlobalStringsUsedByMemcpy( - GlobalVariable *GV, function_ref GetTLI, + GlobalVariable *GV, function_ref GetTTI) { for (auto *User : GV->users()) { CallInst *CI = dyn_cast(User); @@ -2146,10 +2120,16 @@ static bool tryWidenGlobalStringsUsedByMemcpy( continue; TargetTransformInfo &TTI = GetTTI(*F); - if (!TTI.useWidenGlobalStrings()) - return false; + auto *BytesToCopyOp = dyn_cast(CI->getArgOperand(2)); + if (!BytesToCopyOp) + continue; + + unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue(); + unsigned NumBytesToPad = TTI.getNumBytesToPad(NumBytesToCopy); - return tryWidenGlobalString(CI, GV, GetTTI, GetTLI); + if (NumBytesToPad) + return tryWidenGlobalString(CI, GV, NumBytesToPad, NumBytesToCopy, + BytesToCopyOp); } return false; } @@ -2185,7 +2165,7 @@ OptimizeGlobalVars(Module &M, // For global variable strings called in a memcpy // we try to pad to nearest valid alignment boundary - Changed |= tryWidenGlobalStringsUsedByMemcpy(&GV, GetTLI, GetTTI); + Changed |= tryWidenGlobalStringsUsedByMemcpy(&GV, GetTTI); Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree); } diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll deleted file mode 100644 index 432a5728315e6..0000000000000 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default" -S | FileCheck %s --check-prefix=TURNED-OFF - -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" - -; CHECK: [12 x i8] -; TURNED-OFF-NOT: [12 x i8] -@.str = private unnamed_addr constant [10 x i8] c"123456789\00", align 1 - -define hidden void @foo() local_unnamed_addr { -entry: -; CHECK: %something = alloca [12 x i8] -; TURNED-OFF-NOT: %something = alloca [12 x i8] - %something = alloca [10 x i8], align 1 - call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) %something, ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 10, i1 false) - %call2 = call i32 @bar(ptr nonnull %something) - ret void -} - -declare i32 @bar(...) local_unnamed_addr - -; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite) -declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #0 - -attributes #0 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite) } diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll deleted file mode 100644 index 1ec13eb72a6e2..0000000000000 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" - -; CHECK: [48 x i8] -@f.string1 = private unnamed_addr constant [45 x i8] c"The quick brown dog jumps over the lazy fox.\00", align 1 - -; Function Attrs: nounwind -define hidden i32 @f() { -entry: - %string1 = alloca [45 x i8], align 1 - %pos = alloca i32, align 4 - %token = alloca ptr, align 4 - call void @llvm.lifetime.start.p0i8(i64 45, ptr %string1) - call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %string1, ptr align 1 @f.string1, i32 45, i1 false) - call void @llvm.lifetime.start.p0i8(i64 4, ptr %pos) - call void @llvm.lifetime.start.p0i8(i64 4, ptr %token) - %call = call ptr @strchr(ptr %string1, i32 101) - store ptr %call, ptr %token, align 4 - %0 = load ptr, ptr %token, align 4 - %sub.ptr.lhs.cast = ptrtoint ptr %0 to i32 - %sub.ptr.rhs.cast = ptrtoint ptr %string1 to i32 - %sub.ptr.sub = sub i32 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %add = add nsw i32 %sub.ptr.sub, 1 - store i32 %add, ptr %pos, align 4 - %1 = load i32, ptr %pos, align 4 - call void @llvm.lifetime.end.p0i8(i64 4, ptr %token) - call void @llvm.lifetime.end.p0i8(i64 4, ptr %pos) - call void @llvm.lifetime.end.p0i8(i64 45, ptr %string1) - ret i32 %1 -} - -; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.start.p0i8(i64, ptr nocapture) - -; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) - -; Function Attrs: nounwind -declare ptr @strchr(ptr, i32) - -; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.end.p0i8(i64, ptr nocapture) diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll deleted file mode 100644 index 7e9ddf7b1a879..0000000000000 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll +++ /dev/null @@ -1,52 +0,0 @@ -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" -target triple = "thumbv6m-arm-none-eabi" - -%struct.P = type { i32, [13 x i8] } - -; CHECK-NOT: [16 x i8] -@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1 -@.str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 -@__ARM_use_no_argv = global i32 1, section ".ARM.use_no_argv", align 4 -@llvm.used = appending global [1 x ptr] [ptr @__ARM_use_no_argv], section "llvm.metadata" - -; Function Attrs: nounwind -define hidden i32 @main() local_unnamed_addr #0 { -entry: - %p = alloca %struct.P, align 4 - call void @llvm.lifetime.start(i64 20, ptr nonnull %p) #2 - store i32 10, ptr %p, align 4, !tbaa !3 - %arraydecay = getelementptr inbounds %struct.P, ptr %p, i32 0, i32 1, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %arraydecay, ptr align 1 @.str, i32 13, i1 false) - %puts = call i32 @puts(ptr %arraydecay) - call void @llvm.lifetime.end(i64 20, ptr nonnull %p) #2 - ret i32 0 -} - -; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.start(i64, ptr nocapture) #1 - -; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.end(i64, ptr nocapture) #1 - -; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 - -; Function Attrs: nounwind -declare i32 @puts(ptr nocapture readonly) #2 - -attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m0" "target-features"="+strict-align" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 1, !"min_enum_size", i32 4} -!2 = !{!"Component: ARM Compiler 6 devbuild Tool: armclang [devbuild]"} -!3 = !{!4, !5, i64 0} -!4 = !{!"P", !5, i64 0, !6, i64 4} -!5 = !{!"int", !6, i64 0} -!6 = !{!"omnipotent char", !7, i64 0} -!7 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll new file mode 100644 index 0000000000000..1fd82434de681 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default" -S | FileCheck %s --check-prefix=TURNED-OFF + +; CHECK: [12 x i8] +; TURNED-OFF-NOT: [12 x i8] +@.str = private unnamed_addr constant [10 x i8] c"123456789\00", align 1 + +define hidden void @foo() local_unnamed_addr { +; CHECK-LABEL: define hidden void @foo() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [12 x i8], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = alloca [10 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 12, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: ret void +; +; TURNED-OFF-LABEL: define hidden void @foo() local_unnamed_addr { +; TURNED-OFF-NEXT: [[ENTRY:.*:]] +; TURNED-OFF-NEXT: [[SOMETHING:%.*]] = alloca [10 x i8], align 1 +; TURNED-OFF-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 10, i1 false) +; TURNED-OFF-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; TURNED-OFF-NEXT: ret void +; +entry: + %something = alloca [10 x i8], align 1 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) %something, ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 10, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) + ret void +} + +declare i32 @bar(...) local_unnamed_addr +declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #0 diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll similarity index 57% rename from llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll rename to llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll index ecbe93411e4eb..2e5f9eddb3a19 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll @@ -1,12 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default,globalopt" -S | FileCheck %s -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" ; CHECK: [64 x i8] @.str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1 define hidden void @foo() local_unnamed_addr { +; CHECK-LABEL: define hidden void @foo() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [64 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(64) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(64) @.str, i32 64, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: ret void +; entry: - ; CHECK: %something = alloca [64 x i8] %something = alloca [62 x i8], align 1 call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) %something, ptr noundef nonnull align 1 dereferenceable(62) @.str, i32 62, i1 false) %call2 = call i32 @bar(ptr nonnull %something) @@ -14,8 +20,4 @@ entry: } declare i32 @bar(...) local_unnamed_addr - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #0 - -attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll similarity index 57% rename from llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll rename to llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll index d5545cb9d6b88..3f2996fc6d357 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll @@ -1,12 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" -target triple = "thumbv6m-arm-none-eabi" - ; CHECK: [17 x i8] @.str = private unnamed_addr constant [17 x i8] c"aaaaaaaaaaaaaaaa\00", align 1 ; Function Attrs: nounwind define hidden void @foo() local_unnamed_addr #0 { +; CHECK-LABEL: define hidden void @foo() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [20 x i8], align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 1 [[SOMETHING]], ptr align 1 @.str, i32 17, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: ret void +; entry: %something = alloca [20 x i8], align 1 call void @llvm.lifetime.start(i64 20, ptr nonnull %something) #3 @@ -16,13 +23,7 @@ entry: ret void } -; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.start(i64, ptr nocapture) #1 - declare i32 @bar(...) local_unnamed_addr #2 - -; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, ptr nocapture) #1 declare void @llvm.lifetime.end(i64, ptr nocapture) #1 - -; Function Attrs: argmemonly nounwind declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll similarity index 59% rename from llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll rename to llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll index de11c4a899c8d..9aa1255b9310f 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll @@ -1,6 +1,5 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" -target triple = "thumbv6m-arm-none-eabi" ; CHECK: [65 x i8] ; CHECK-NOT: [68 x i8] @@ -8,6 +7,15 @@ target triple = "thumbv6m-arm-none-eabi" ; Function Attrs: nounwind define hidden void @foo() local_unnamed_addr #0 { +; CHECK-LABEL: define hidden void @foo() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [65 x i8], align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 65, ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 1 [[SOMETHING]], ptr align 1 @.str, i32 65, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 65, ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: ret void +; entry: %something = alloca [65 x i8], align 1 call void @llvm.lifetime.start(i64 65, ptr nonnull %something) #3 @@ -17,13 +25,7 @@ entry: ret void } -; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.start(i64, ptr nocapture) #1 - declare i32 @bar(...) local_unnamed_addr #2 - -; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, ptr nocapture) #1 declare void @llvm.lifetime.end(i64, ptr nocapture) #1 - -; Function Attrs: argmemonly nounwind declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll new file mode 100644 index 0000000000000..c9cb442dd6ea4 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s + +; CHECK: [48 x i8] +@f.string1 = private unnamed_addr constant [45 x i8] c"The quick brown dog jumps over the lazy fox.\00", align 1 + +; Function Attrs: nounwind +define hidden i32 @f() { +; CHECK-LABEL: define hidden i32 @f() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[STRING1:%.*]] = alloca [48 x i8], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = alloca [45 x i8], align 1 +; CHECK-NEXT: [[POS:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TOKEN:%.*]] = alloca ptr, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 45, ptr [[STRING1]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STRING1]], ptr align 1 @f.string1, i32 48, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[POS]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TOKEN]]) +; CHECK-NEXT: [[CALL:%.*]] = call ptr @strchr(ptr [[STRING1]], i32 101) +; CHECK-NEXT: store ptr [[CALL]], ptr [[TOKEN]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TOKEN]], align 4 +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP1]] to i32 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STRING1]] to i32 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i32 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[SUB_PTR_SUB]], 1 +; CHECK-NEXT: store i32 [[ADD]], ptr [[POS]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[POS]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TOKEN]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[POS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 45, ptr [[STRING1]]) +; CHECK-NEXT: ret i32 [[TMP2]] +; +entry: + %string1 = alloca [45 x i8], align 1 + %pos = alloca i32, align 4 + %token = alloca ptr, align 4 + call void @llvm.lifetime.start.p0i8(i64 45, ptr %string1) + call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %string1, ptr align 1 @f.string1, i32 45, i1 false) + call void @llvm.lifetime.start.p0i8(i64 4, ptr %pos) + call void @llvm.lifetime.start.p0i8(i64 4, ptr %token) + %call = call ptr @strchr(ptr %string1, i32 101) + store ptr %call, ptr %token, align 4 + %0 = load ptr, ptr %token, align 4 + %sub.ptr.lhs.cast = ptrtoint ptr %0 to i32 + %sub.ptr.rhs.cast = ptrtoint ptr %string1 to i32 + %sub.ptr.sub = sub i32 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + %add = add nsw i32 %sub.ptr.sub, 1 + store i32 %add, ptr %pos, align 4 + %1 = load i32, ptr %pos, align 4 + call void @llvm.lifetime.end.p0i8(i64 4, ptr %token) + call void @llvm.lifetime.end.p0i8(i64 4, ptr %pos) + call void @llvm.lifetime.end.p0i8(i64 45, ptr %string1) + ret i32 %1 +} + +declare ptr @strchr(ptr, i32) +declare void @llvm.lifetime.start.p0i8(i64, ptr nocapture) +declare void @llvm.lifetime.end.p0i8(i64, ptr nocapture) +declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll new file mode 100644 index 0000000000000..9503b6c33a212 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s +%struct.P = type { i32, [13 x i8] } + +; CHECK-NOT: [16 x i8] +@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1 + +; Function Attrs: nounwind +define hidden i32 @main() local_unnamed_addr #0 { +; CHECK-LABEL: define hidden i32 @main() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_P:%.*]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr nonnull [[P]]) +; CHECK-NEXT: store i32 10, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [[STRUCT_P]], ptr [[P]], i32 0, i32 1, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[ARRAYDECAY]], ptr align 1 @.str, i32 13, i1 false) +; CHECK-NEXT: [[PUTS:%.*]] = call i32 @puts(ptr [[ARRAYDECAY]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr nonnull [[P]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %p = alloca %struct.P, align 4 + call void @llvm.lifetime.start(i64 20, ptr nonnull %p) #2 + store i32 10, ptr %p, align 4, !tbaa !1 + %arraydecay = getelementptr inbounds %struct.P, ptr %p, i32 0, i32 1, i32 0 + call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %arraydecay, ptr align 1 @.str, i32 13, i1 false) + %puts = call i32 @puts(ptr %arraydecay) + call void @llvm.lifetime.end(i64 20, ptr nonnull %p) #2 + ret i32 0 +} + +declare i32 @puts(ptr nocapture readonly) #2 +declare void @llvm.lifetime.start(i64, ptr nocapture) #1 +declare void @llvm.lifetime.end(i64, ptr nocapture) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 + +!1 = !{!2, !3, i64 0} +!2 = !{!"P", !3, i64 0, !4, i64 4} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0} +; CHECK: [[META1]] = !{!"P", [[META2]], i64 0, [[META3:![0-9]+]], i64 4} +; CHECK: [[META2]] = !{!"int", [[META3]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +;. diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll similarity index 57% rename from llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll rename to llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll index 24e9131b11907..ba7b7d45719bb 100644 --- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll @@ -1,12 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" -target triple = "thumbv6m-arm-none-eabi" ; CHECK-NOT: [64 x i8] @.str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1 ; Function Attrs: nounwind define hidden void @foo() local_unnamed_addr #0 { +; CHECK-LABEL: define hidden void @foo() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [62 x i8], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [62 x i8], ptr [[SOMETHING]], i32 0, i32 0 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 62, ptr nonnull [[TMP0]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 1 [[TMP0]], ptr align 1 @.str, i32 62, i1 true) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[TMP0]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 62, ptr nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; entry: %something = alloca [62 x i8], align 1 %0 = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0 @@ -17,13 +26,7 @@ entry: ret void } -; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.start(i64, ptr nocapture) #1 - declare i32 @bar(...) local_unnamed_addr #2 - -; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, ptr nocapture) #1 declare void @llvm.lifetime.end(i64, ptr nocapture) #1 - -; Function Attrs: argmemonly nounwind declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 From 21ca2ba468fa23850e1f96fa8d03e8c0319ad9af Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Wed, 2 Oct 2024 12:17:28 +0100 Subject: [PATCH 06/14] Responding to review comments --- .../llvm/Analysis/TargetTransformInfo.h | 10 +-- .../llvm/Analysis/TargetTransformInfoImpl.h | 4 +- llvm/lib/Analysis/TargetTransformInfo.cpp | 6 +- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 11 ++- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 2 +- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 72 ++++++++++--------- .../GlobalOpt/ARM/arm-widen-non-byte-array.ll | 22 ++++++ .../ARM/arm-widen-string-multi-use.ll | 33 +++++++++ .../GlobalOpt/ARM/arm-widen-strings-1.ll | 11 --- .../GlobalOpt/ARM/arm-widen-strings-2.ll | 5 +- .../arm-widen-strings-lengths-dont-match.ll | 3 - .../arm-widen-strings-more-than-64-bytes.ll | 3 - .../ARM/arm-widen-strings-ptrtoint.ll | 4 -- .../ARM/arm-widen-strings-struct-test.ll | 3 - .../ARM/arm-widen-strings-volatile.ll | 3 - 15 files changed, 119 insertions(+), 73 deletions(-) create mode 100644 llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll create mode 100644 llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 1e7ca42c92383..0dc513d8e65b7 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1821,7 +1821,7 @@ class TargetTransformInfo { /// \return For an array of given Size, return alignment boundary to /// pad to. Default is no padding. - unsigned getNumBytesToPad(unsigned Size) const; + unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const; /// @} @@ -2229,7 +2229,8 @@ class TargetTransformInfo::Concept { getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; virtual bool hasArmWideBranch(bool Thumb) const = 0; virtual unsigned getMaxNumArgs() const = 0; - virtual unsigned getNumBytesToPad(unsigned Size) const = 0; + virtual unsigned getNumBytesToPadGlobalArray(unsigned Size, + Type *ArrayType) const = 0; }; template @@ -3032,8 +3033,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.getMaxNumArgs(); } - unsigned getNumBytesToPad(unsigned Size) const override { - return Impl.getNumBytesToPad(Size); + unsigned getNumBytesToPadGlobalArray(unsigned Size, + Type *ArrayType) const override { + return Impl.getNumBytesToPadGlobalArray(Size, ArrayType); } }; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index b72089bad62c7..0b7792f89a05c 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1006,7 +1006,9 @@ class TargetTransformInfoImplBase { unsigned getMaxNumArgs() const { return UINT_MAX; } - unsigned getNumBytesToPad(unsigned Size) const { return 0; } + unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const { + return 0; + } protected: // Obtain the minimum required size to hold the value (without the sign) diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index b15b7af4323aa..163635ef5f6a7 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1386,8 +1386,10 @@ bool TargetTransformInfo::useWidenGlobalStrings() const { return TTIImpl->useWidenGlobalStrings(); } -unsigned TargetTransformInfo::getNumBytesToPad(unsigned Size) const { - return TTIImpl->getNumBytesToPad(Size); +unsigned +TargetTransformInfo::getNumBytesToPadGlobalArray(unsigned Size, + Type *ArrayType) const { + return TTIImpl->getNumBytesToPadGlobalArray(Size, ArrayType); } TargetTransformInfo::Concept::~Concept() = default; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 526ba8de720af..2623fffd29212 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2664,6 +2664,7 @@ bool ARMTTIImpl::hasArmWideBranch(bool Thumb) const { } } +<<<<<<< HEAD /// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth /// of the vector elements. static bool areExtractExts(Value *Ext1, Value *Ext2) { @@ -2812,8 +2813,14 @@ bool ARMTTIImpl::isProfitableToSinkOperands(Instruction *I, bool ARMTTIImpl::useWidenGlobalStrings() const { return UseWidenGlobalStrings; } -unsigned ARMTTIImpl::getNumBytesToPad(unsigned Size) const { - // We pad to 4 byte boundaries; +unsigned ARMTTIImpl::getNumBytesToPadGlobalArray(unsigned Size, + Type *ArrayType) const { + // Don't modify none integer array types + if (!ArrayType || !ArrayType->isArrayTy() || + !ArrayType->getArrayElementType()->isIntegerTy()) + return 0; + + // We pad to 4 byte boundaries if (Size % 4 == 0) return 0; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index f702b1e9fee39..a1ba45eaa7021 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -340,7 +340,7 @@ class ARMTTIImpl : public BasicTTIImplBase { bool useWidenGlobalStrings() const; - unsigned getNumBytesToPad(unsigned Size) const; + unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const; /// @} }; diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index b1ce4f995d0aa..ee8a0501a26be 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -92,8 +92,8 @@ STATISTIC(NumInternalFunc, "Number of internal functions"); STATISTIC(NumColdCC, "Number of functions marked coldcc"); STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs"); STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed"); -STATISTIC(NumGlobalStringsPadded, - "Number of global strings padded to alignment boundary"); +STATISTIC(NumGlobalArraysPadded, + "Number of global arrays padded to alignment boundary"); static cl::opt EnableColdCCStressTest("enable-coldcc-stress-test", @@ -2031,16 +2031,10 @@ OptimizeFunctions(Module &M, return Changed; } -static bool IsCharArray(Type *T) { - const unsigned int CHAR_BIT_SIZE = 8; - return T && T->isArrayTy() && T->getArrayElementType()->isIntegerTy() && - T->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE; -} - -static bool tryWidenGlobalString(CallInst *CI, GlobalVariable *SourceVar, - unsigned NumBytesToPad, - unsigned NumBytesToCopy, - ConstantInt *BytesToCopyOp) { +static bool tryWidenGlobalArray(CallInst *CI, GlobalVariable *SourceVar, + unsigned NumBytesToPad, unsigned NumBytesToCopy, + ConstantInt *BytesToCopyOp, + ConstantDataArray *SourceDataArray) { auto *F = CI->getCalledFunction(); auto *Alloca = dyn_cast(CI->getArgOperand(0)); auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); @@ -2052,48 +2046,51 @@ static bool tryWidenGlobalString(CallInst *CI, GlobalVariable *SourceVar, !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) return false; - if (!Alloca->isStaticAlloca() || !IsCharArray(Alloca->getAllocatedType())) - return false; - - ConstantDataArray *SourceDataArray = - dyn_cast(SourceVar->getInitializer()); - if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) + if (!Alloca->isStaticAlloca()) return false; uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); uint64_t SZSize = SourceDataArray->getType()->getNumElements(); + unsigned ElementByteWidth = SourceDataArray->getElementByteSize(); + // Calculate the number of elements to copy while avoiding floored + // division of integers returning wrong values i.e. copying one byte + // from an array of i16 would yield 0 elements to copy as supposed to 1. + unsigned NumElementsToCopy = + (NumBytesToCopy + ElementByteWidth - 1) / ElementByteWidth; // For safety purposes lets add a constraint and only pad when - // num bytes to copy == destination array size == source string - // which is a constant - if (NumBytesToCopy != DZSize || DZSize != SZSize) + // NumElementsToCopy == destination array size == + // source string which is a constant + if (NumElementsToCopy != DZSize || DZSize != SZSize) return false; unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; + NumElementsToCopy = (TotalBytes + ElementByteWidth - 1) / ElementByteWidth; - // Update destination char array to be word aligned (memcpy(X,...,...)) + // Update destination array to be word aligned (memcpy(X,...,...)) IRBuilder<> BuildAlloca(Alloca); AllocaInst *NewAlloca = BuildAlloca.CreateAlloca(ArrayType::get( - Alloca->getAllocatedType()->getArrayElementType(), TotalBytes)); + Alloca->getAllocatedType()->getArrayElementType(), NumElementsToCopy)); NewAlloca->takeName(Alloca); NewAlloca->setAlignment(Alloca->getAlign()); Alloca->replaceAllUsesWith(NewAlloca); + Alloca->eraseFromParent(); // Update source to be word aligned (memcpy(...,X,...)) - // create replacement string with padded null bytes. + // create replacement with padded null bytes. StringRef Data = SourceDataArray->getRawDataValues(); std::vector StrData(Data.begin(), Data.end()); for (unsigned int p = 0; p < NumBytesToPad; p++) StrData.push_back('\0'); auto Arr = ArrayRef(StrData.data(), TotalBytes); - // Create new padded version of global variable string. + // Create new padded version of global variable. Constant *SourceReplace = ConstantDataArray::get(F->getContext(), Arr); GlobalVariable *NewGV = new GlobalVariable( *(F->getParent()), SourceReplace->getType(), true, SourceVar->getLinkage(), SourceReplace, SourceReplace->getName()); - // Copy any other attributes from original global variable string + // Copy any other attributes from original global variable // e.g. unamed_addr NewGV->copyAttributesFrom(SourceVar); NewGV->takeName(SourceVar); @@ -2103,11 +2100,11 @@ static bool tryWidenGlobalString(CallInst *CI, GlobalVariable *SourceVar, // Update number of bytes to copy (memcpy(...,...,X)) CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(), TotalBytes)); - NumGlobalStringsPadded++; + NumGlobalArraysPadded++; return true; } -static bool tryWidenGlobalStringsUsedByMemcpy( +static bool tryWidenGlobalArraysUsedByMemcpy( GlobalVariable *GV, function_ref GetTTI) { for (auto *User : GV->users()) { @@ -2124,12 +2121,21 @@ static bool tryWidenGlobalStringsUsedByMemcpy( if (!BytesToCopyOp) continue; + if (!GV->hasInitializer()) + continue; + + ConstantDataArray *SourceDataArray = + dyn_cast(GV->getInitializer()); + if (!SourceDataArray) + continue; + unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue(); - unsigned NumBytesToPad = TTI.getNumBytesToPad(NumBytesToCopy); + unsigned NumBytesToPad = TTI.getNumBytesToPadGlobalArray( + NumBytesToCopy, SourceDataArray->getType()); if (NumBytesToPad) - return tryWidenGlobalString(CI, GV, NumBytesToPad, NumBytesToCopy, - BytesToCopyOp); + return tryWidenGlobalArray(CI, GV, NumBytesToPad, NumBytesToCopy, + BytesToCopyOp, SourceDataArray); } return false; } @@ -2163,9 +2169,9 @@ OptimizeGlobalVars(Module &M, continue; } - // For global variable strings called in a memcpy + // For global variable arrays called in a memcpy // we try to pad to nearest valid alignment boundary - Changed |= tryWidenGlobalStringsUsedByMemcpy(&GV, GetTTI); + Changed |= tryWidenGlobalArraysUsedByMemcpy(&GV, GetTTI); Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree); } diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll new file mode 100644 index 0000000000000..346612efdda6b --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s + +@.i16 = private unnamed_addr constant [5 x i16] [i16 1, i16 2, i16 3, i16 4, i16 5] , align 1 + +define hidden void @memcpy_i16_array() local_unnamed_addr { +; CHECK-LABEL: define hidden void @memcpy_i16_array() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [6 x i16], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 12, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]]) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [5 x i16], align 1 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) %something, ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 10, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) + ret void +} + + +declare i32 @bar(...) local_unnamed_addr diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll new file mode 100644 index 0000000000000..7a3bc65edfe66 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s + +@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1 + +define hidden void @memcpy_multiple() local_unnamed_addr { +; CHECK-LABEL: define hidden void @memcpy_multiple() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [4 x i8], align 1 +; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [4 x i8], align 1 +; CHECK-NEXT: [[SOMETHING2:%.*]] = alloca [4 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB1:[0-9]+]], i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0:[0-9]+]], i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: [[CALL3:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]]) +; CHECK-NEXT: [[CALL4:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING2]]) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [3 x i8], align 1 + %something1 = alloca [3 x i8], align 1 + %something2 = alloca [3 x i8], align 1 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something1, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something2, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) + %call3 = call i32 @bar(ptr nonnull %something1) + %call4 = call i32 @bar(ptr nonnull %something2) + ret void +} + +declare i32 @bar(...) local_unnamed_addr diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll index 1fd82434de681..0cec5bf1b635f 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll @@ -1,27 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default" -S | FileCheck %s --check-prefix=TURNED-OFF ; CHECK: [12 x i8] -; TURNED-OFF-NOT: [12 x i8] @.str = private unnamed_addr constant [10 x i8] c"123456789\00", align 1 define hidden void @foo() local_unnamed_addr { ; CHECK-LABEL: define hidden void @foo() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [12 x i8], align 1 -; CHECK-NEXT: [[TMP0:%.*]] = alloca [10 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 12, i1 false) ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) ; CHECK-NEXT: ret void ; -; TURNED-OFF-LABEL: define hidden void @foo() local_unnamed_addr { -; TURNED-OFF-NEXT: [[ENTRY:.*:]] -; TURNED-OFF-NEXT: [[SOMETHING:%.*]] = alloca [10 x i8], align 1 -; TURNED-OFF-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 10, i1 false) -; TURNED-OFF-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) -; TURNED-OFF-NEXT: ret void -; entry: %something = alloca [10 x i8], align 1 call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) %something, ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 10, i1 false) @@ -30,4 +20,3 @@ entry: } declare i32 @bar(...) local_unnamed_addr -declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #0 diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll index 2e5f9eddb3a19..5d1d008aafcad 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default,globalopt" -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s ; CHECK: [64 x i8] @.str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1 @@ -8,7 +8,7 @@ define hidden void @foo() local_unnamed_addr { ; CHECK-LABEL: define hidden void @foo() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [64 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(64) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(64) @.str, i32 64, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(62) @.str, i32 64, i1 false) ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) ; CHECK-NEXT: ret void ; @@ -20,4 +20,3 @@ entry: } declare i32 @bar(...) local_unnamed_addr -declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #0 diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll index 3f2996fc6d357..f1c8ac260c9ef 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll @@ -24,6 +24,3 @@ entry: } declare i32 @bar(...) local_unnamed_addr #2 -declare void @llvm.lifetime.start(i64, ptr nocapture) #1 -declare void @llvm.lifetime.end(i64, ptr nocapture) #1 -declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll index 9aa1255b9310f..961653753f54c 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll @@ -26,6 +26,3 @@ entry: } declare i32 @bar(...) local_unnamed_addr #2 -declare void @llvm.lifetime.start(i64, ptr nocapture) #1 -declare void @llvm.lifetime.end(i64, ptr nocapture) #1 -declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll index c9cb442dd6ea4..e82712ebe22ea 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll @@ -9,7 +9,6 @@ define hidden i32 @f() { ; CHECK-LABEL: define hidden i32 @f() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[STRING1:%.*]] = alloca [48 x i8], align 1 -; CHECK-NEXT: [[TMP0:%.*]] = alloca [45 x i8], align 1 ; CHECK-NEXT: [[POS:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TOKEN:%.*]] = alloca ptr, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 45, ptr [[STRING1]]) @@ -54,6 +53,3 @@ entry: } declare ptr @strchr(ptr, i32) -declare void @llvm.lifetime.start.p0i8(i64, ptr nocapture) -declare void @llvm.lifetime.end.p0i8(i64, ptr nocapture) -declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll index 9503b6c33a212..9cb0c53bf1652 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll @@ -30,9 +30,6 @@ entry: } declare i32 @puts(ptr nocapture readonly) #2 -declare void @llvm.lifetime.start(i64, ptr nocapture) #1 -declare void @llvm.lifetime.end(i64, ptr nocapture) #1 -declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 !1 = !{!2, !3, i64 0} !2 = !{!"P", !3, i64 0, !4, i64 4} diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll index ba7b7d45719bb..4d2559579ce1b 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll @@ -27,6 +27,3 @@ entry: } declare i32 @bar(...) local_unnamed_addr #2 -declare void @llvm.lifetime.start(i64, ptr nocapture) #1 -declare void @llvm.lifetime.end(i64, ptr nocapture) #1 -declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1 From 41aec6f81d49493a9888d1322e934976a7d38607 Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Fri, 4 Oct 2024 13:15:59 +0100 Subject: [PATCH 07/14] Review comments: eliminating generation of multiple globals --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 41 +++++++++++-------- .../GlobalOpt/ARM/arm-widen-non-byte-array.ll | 10 ++--- .../ARM/arm-widen-string-multi-use.ll | 14 +++---- .../GlobalOpt/ARM/arm-widen-strings-1.ll | 11 +++-- .../GlobalOpt/ARM/arm-widen-strings-2.ll | 11 +++-- .../arm-widen-strings-lengths-dont-match.ll | 8 ++-- .../arm-widen-strings-more-than-64-bytes.ll | 8 ++-- .../ARM/arm-widen-strings-ptrtoint.ll | 9 ++-- .../ARM/arm-widen-strings-struct-test.ll | 6 +-- .../ARM/arm-widen-strings-volatile.ll | 8 ++-- 10 files changed, 65 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index ee8a0501a26be..ba9f6d1a395c5 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2031,11 +2031,10 @@ OptimizeFunctions(Module &M, return Changed; } -static bool tryWidenGlobalArray(CallInst *CI, GlobalVariable *SourceVar, - unsigned NumBytesToPad, unsigned NumBytesToCopy, - ConstantInt *BytesToCopyOp, - ConstantDataArray *SourceDataArray) { - auto *F = CI->getCalledFunction(); +static bool tryWidenDestArray(Function *F, CallInst *CI, + GlobalVariable *SourceVar, unsigned NumBytesToPad, + unsigned NumBytesToCopy, + ConstantDataArray *SourceDataArray) { auto *Alloca = dyn_cast(CI->getArgOperand(0)); auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); @@ -2055,8 +2054,7 @@ static bool tryWidenGlobalArray(CallInst *CI, GlobalVariable *SourceVar, // Calculate the number of elements to copy while avoiding floored // division of integers returning wrong values i.e. copying one byte // from an array of i16 would yield 0 elements to copy as supposed to 1. - unsigned NumElementsToCopy = - (NumBytesToCopy + ElementByteWidth - 1) / ElementByteWidth; + unsigned NumElementsToCopy = divideCeil(NumBytesToCopy, ElementByteWidth); // For safety purposes lets add a constraint and only pad when // NumElementsToCopy == destination array size == @@ -2065,7 +2063,7 @@ static bool tryWidenGlobalArray(CallInst *CI, GlobalVariable *SourceVar, return false; unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; - NumElementsToCopy = (TotalBytes + ElementByteWidth - 1) / ElementByteWidth; + NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth); // Update destination array to be word aligned (memcpy(X,...,...)) IRBuilder<> BuildAlloca(Alloca); @@ -2075,14 +2073,21 @@ static bool tryWidenGlobalArray(CallInst *CI, GlobalVariable *SourceVar, NewAlloca->setAlignment(Alloca->getAlign()); Alloca->replaceAllUsesWith(NewAlloca); Alloca->eraseFromParent(); + return true; +} +static bool widenGlobalArray(Function *F, CallInst *CI, + GlobalVariable *SourceVar, unsigned NumBytesToPad, + unsigned NumBytesToCopy, + ConstantInt *BytesToCopyOp, + ConstantDataArray *SourceDataArray) { // Update source to be word aligned (memcpy(...,X,...)) // create replacement with padded null bytes. StringRef Data = SourceDataArray->getRawDataValues(); std::vector StrData(Data.begin(), Data.end()); for (unsigned int p = 0; p < NumBytesToPad; p++) StrData.push_back('\0'); - auto Arr = ArrayRef(StrData.data(), TotalBytes); + auto Arr = ArrayRef(StrData.data(), NumBytesToCopy + NumBytesToPad); // Create new padded version of global variable. Constant *SourceReplace = ConstantDataArray::get(F->getContext(), Arr); @@ -2095,11 +2100,8 @@ static bool tryWidenGlobalArray(CallInst *CI, GlobalVariable *SourceVar, NewGV->copyAttributesFrom(SourceVar); NewGV->takeName(SourceVar); - // Replace intrinsic source. - CI->setArgOperand(1, NewGV); - - // Update number of bytes to copy (memcpy(...,...,X)) - CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(), TotalBytes)); + CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(), + NumBytesToCopy + NumBytesToPad)); NumGlobalArraysPadded++; return true; } @@ -2133,9 +2135,14 @@ static bool tryWidenGlobalArraysUsedByMemcpy( unsigned NumBytesToPad = TTI.getNumBytesToPadGlobalArray( NumBytesToCopy, SourceDataArray->getType()); - if (NumBytesToPad) - return tryWidenGlobalArray(CI, GV, NumBytesToPad, NumBytesToCopy, - BytesToCopyOp, SourceDataArray); + if (NumBytesToPad) { + bool DestWidened = tryWidenDestArray(F, CI, GV, NumBytesToPad, + NumBytesToCopy, SourceDataArray); + if (DestWidened) { + return widenGlobalArray(F, CI, GV, NumBytesToPad, NumBytesToCopy, + BytesToCopyOp, SourceDataArray); + } + } } return false; } diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll index 346612efdda6b..61e72c8981783 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s @.i16 = private unnamed_addr constant [5 x i16] [i16 1, i16 2, i16 3, i16 4, i16 5] , align 1 -define hidden void @memcpy_i16_array() local_unnamed_addr { -; CHECK-LABEL: define hidden void @memcpy_i16_array() local_unnamed_addr { +define void @memcpy_i16_array() { +; CHECK-LABEL: define void @memcpy_i16_array() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [6 x i16], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 12, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @[[GLOB0:[0-9]+]], i32 12, i1 false) ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]]) ; CHECK-NEXT: ret void ; @@ -19,4 +19,4 @@ entry: } -declare i32 @bar(...) local_unnamed_addr +declare i32 @bar(...) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll index 7a3bc65edfe66..91ffdb5816517 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll @@ -1,17 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s @.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1 -define hidden void @memcpy_multiple() local_unnamed_addr { -; CHECK-LABEL: define hidden void @memcpy_multiple() local_unnamed_addr { +define void @memcpy_multiple() { +; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [4 x i8], align 1 ; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [4 x i8], align 1 ; CHECK-NEXT: [[SOMETHING2:%.*]] = alloca [4 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB1:[0-9]+]], i32 4, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0:[0-9]+]], i32 4, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0:[0-9]+]], i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0]], i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0]], i32 4, i1 false) ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) ; CHECK-NEXT: [[CALL3:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]]) ; CHECK-NEXT: [[CALL4:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING2]]) @@ -30,4 +30,4 @@ entry: ret void } -declare i32 @bar(...) local_unnamed_addr +declare i32 @bar(...) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll index 0cec5bf1b635f..91cf90a21de91 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll @@ -1,14 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s -; CHECK: [12 x i8] @.str = private unnamed_addr constant [10 x i8] c"123456789\00", align 1 -define hidden void @foo() local_unnamed_addr { -; CHECK-LABEL: define hidden void @foo() local_unnamed_addr { +define void @foo() { +; CHECK-LABEL: define void @foo() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [12 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 12, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @[[GLOB0:[0-9]+]], i32 12, i1 false) ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) ; CHECK-NEXT: ret void ; @@ -19,4 +18,4 @@ entry: ret void } -declare i32 @bar(...) local_unnamed_addr +declare i32 @bar(...) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll index 5d1d008aafcad..30c14af7caf67 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll @@ -1,14 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s -; CHECK: [64 x i8] @.str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1 -define hidden void @foo() local_unnamed_addr { -; CHECK-LABEL: define hidden void @foo() local_unnamed_addr { +define void @foo() { +; CHECK-LABEL: define void @foo() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [64 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(62) @.str, i32 64, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(62) @[[GLOB0:[0-9]+]], i32 64, i1 false) ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) ; CHECK-NEXT: ret void ; @@ -19,4 +18,4 @@ entry: ret void } -declare i32 @bar(...) local_unnamed_addr +declare i32 @bar(...) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll index f1c8ac260c9ef..b8e02c3f996da 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s ; CHECK: [17 x i8] @.str = private unnamed_addr constant [17 x i8] c"aaaaaaaaaaaaaaaa\00", align 1 ; Function Attrs: nounwind -define hidden void @foo() local_unnamed_addr #0 { -; CHECK-LABEL: define hidden void @foo() local_unnamed_addr { +define void @foo() { +; CHECK-LABEL: define void @foo() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [20 x i8], align 1 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr nonnull [[SOMETHING]]) @@ -23,4 +23,4 @@ entry: ret void } -declare i32 @bar(...) local_unnamed_addr #2 +declare i32 @bar(...) #2 diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll index 961653753f54c..4ac31aa2f976d 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s ; CHECK: [65 x i8] ; CHECK-NOT: [68 x i8] @.str = private unnamed_addr constant [65 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzz\00", align 1 ; Function Attrs: nounwind -define hidden void @foo() local_unnamed_addr #0 { -; CHECK-LABEL: define hidden void @foo() local_unnamed_addr { +define void @foo() { +; CHECK-LABEL: define void @foo() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [65 x i8], align 1 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 65, ptr nonnull [[SOMETHING]]) @@ -25,4 +25,4 @@ entry: ret void } -declare i32 @bar(...) local_unnamed_addr #2 +declare i32 @bar(...) #2 diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll index e82712ebe22ea..ce29192948a57 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll @@ -1,18 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s -; CHECK: [48 x i8] @f.string1 = private unnamed_addr constant [45 x i8] c"The quick brown dog jumps over the lazy fox.\00", align 1 ; Function Attrs: nounwind -define hidden i32 @f() { -; CHECK-LABEL: define hidden i32 @f() local_unnamed_addr { +define i32 @f() { +; CHECK-LABEL: define i32 @f() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[STRING1:%.*]] = alloca [48 x i8], align 1 ; CHECK-NEXT: [[POS:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TOKEN:%.*]] = alloca ptr, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 45, ptr [[STRING1]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STRING1]], ptr align 1 @f.string1, i32 48, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STRING1]], ptr align 1 @[[GLOB0:[0-9]+]], i32 48, i1 false) ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[POS]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TOKEN]]) ; CHECK-NEXT: [[CALL:%.*]] = call ptr @strchr(ptr [[STRING1]], i32 101) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll index 9cb0c53bf1652..5367572704b14 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s %struct.P = type { i32, [13 x i8] } ; CHECK-NOT: [16 x i8] @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1 ; Function Attrs: nounwind -define hidden i32 @main() local_unnamed_addr #0 { -; CHECK-LABEL: define hidden i32 @main() local_unnamed_addr { +define i32 @main() { +; CHECK-LABEL: define i32 @main() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_P:%.*]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr nonnull [[P]]) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll index 4d2559579ce1b..b735a77887423 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s ; CHECK-NOT: [64 x i8] @.str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1 ; Function Attrs: nounwind -define hidden void @foo() local_unnamed_addr #0 { -; CHECK-LABEL: define hidden void @foo() local_unnamed_addr { +define void @foo() { +; CHECK-LABEL: define void @foo() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [62 x i8], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [62 x i8], ptr [[SOMETHING]], i32 0, i32 0 @@ -26,4 +26,4 @@ entry: ret void } -declare i32 @bar(...) local_unnamed_addr #2 +declare i32 @bar(...) #2 From c17ff0dd8f592e46c2ec248d3ee9027cabaa9084 Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Fri, 4 Oct 2024 21:55:28 +0100 Subject: [PATCH 08/14] Correcting and refactoring elimination --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 113 +++++++++++------- .../GlobalOpt/ARM/arm-widen-non-byte-array.ll | 2 +- .../ARM/arm-widen-string-multi-use.ll | 14 +-- .../GlobalOpt/ARM/arm-widen-strings-1.ll | 2 +- .../GlobalOpt/ARM/arm-widen-strings-2.ll | 2 +- .../ARM/arm-widen-strings-ptrtoint.ll | 2 +- 6 files changed, 78 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index ba9f6d1a395c5..21ec83af02411 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2031,41 +2031,38 @@ OptimizeFunctions(Module &M, return Changed; } -static bool tryWidenDestArray(Function *F, CallInst *CI, - GlobalVariable *SourceVar, unsigned NumBytesToPad, - unsigned NumBytesToCopy, - ConstantDataArray *SourceDataArray) { +static bool callInstIsMemcpy(CallInst *CI) { + if (!CI) + return false; + + Function *F = CI->getCalledFunction(); + if (!F || !F->isIntrinsic() || F->getIntrinsicID() != Intrinsic::memcpy) + return false; + + return true; +} + +static bool destArrayCanBeWidened(CallInst *CI) { auto *Alloca = dyn_cast(CI->getArgOperand(0)); auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); if (!Alloca || !IsVolatile || IsVolatile->isOne()) return false; - if (!SourceVar->hasInitializer() || !SourceVar->isConstant() || - !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) - return false; - if (!Alloca->isStaticAlloca()) return false; - uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); - uint64_t SZSize = SourceDataArray->getType()->getNumElements(); - unsigned ElementByteWidth = SourceDataArray->getElementByteSize(); - // Calculate the number of elements to copy while avoiding floored - // division of integers returning wrong values i.e. copying one byte - // from an array of i16 would yield 0 elements to copy as supposed to 1. - unsigned NumElementsToCopy = divideCeil(NumBytesToCopy, ElementByteWidth); - - // For safety purposes lets add a constraint and only pad when - // NumElementsToCopy == destination array size == - // source string which is a constant - if (NumElementsToCopy != DZSize || DZSize != SZSize) - return false; + return true; +} +static void widenDestArray(CallInst *CI, const unsigned NumBytesToPad, + const unsigned NumBytesToCopy, + ConstantDataArray *SourceDataArray) { + unsigned ElementByteWidth = SourceDataArray->getElementByteSize(); unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; - NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth); - + unsigned NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth); // Update destination array to be word aligned (memcpy(X,...,...)) + auto *Alloca = dyn_cast(CI->getArgOperand(0)); IRBuilder<> BuildAlloca(Alloca); AllocaInst *NewAlloca = BuildAlloca.CreateAlloca(ArrayType::get( Alloca->getAllocatedType()->getArrayElementType(), NumElementsToCopy)); @@ -2073,14 +2070,17 @@ static bool tryWidenDestArray(Function *F, CallInst *CI, NewAlloca->setAlignment(Alloca->getAlign()); Alloca->replaceAllUsesWith(NewAlloca); Alloca->eraseFromParent(); - return true; } -static bool widenGlobalArray(Function *F, CallInst *CI, - GlobalVariable *SourceVar, unsigned NumBytesToPad, - unsigned NumBytesToCopy, - ConstantInt *BytesToCopyOp, - ConstantDataArray *SourceDataArray) { +static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar, + const unsigned NumBytesToPad, + const unsigned NumBytesToCopy, + ConstantInt *BytesToCopyOp, + ConstantDataArray *SourceDataArray) { + if (!SourceVar->hasInitializer() || !SourceVar->isConstant() || + !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) + return false; + // Update source to be word aligned (memcpy(...,X,...)) // create replacement with padded null bytes. StringRef Data = SourceDataArray->getRawDataValues(); @@ -2100,8 +2100,20 @@ static bool widenGlobalArray(Function *F, CallInst *CI, NewGV->copyAttributesFrom(SourceVar); NewGV->takeName(SourceVar); - CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(), - NumBytesToCopy + NumBytesToPad)); + // Update arguments of remaining uses that + // are memcpys. + for (auto *User : SourceVar->users()) { + auto *CI = dyn_cast(User); + if (!callInstIsMemcpy(CI)) + continue; + + widenDestArray(CI, NumBytesToPad, NumBytesToCopy, SourceDataArray); + + CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(), + NumBytesToCopy + NumBytesToPad)); + } + SourceVar->replaceAllUsesWith(NewGV); + NumGlobalArraysPadded++; return true; } @@ -2109,39 +2121,48 @@ static bool widenGlobalArray(Function *F, CallInst *CI, static bool tryWidenGlobalArraysUsedByMemcpy( GlobalVariable *GV, function_ref GetTTI) { + + if (!GV->hasInitializer()) + return false; + for (auto *User : GV->users()) { CallInst *CI = dyn_cast(User); - if (!CI) + if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI)) continue; Function *F = CI->getCalledFunction(); - if (!F || !F->isIntrinsic() || F->getIntrinsicID() != Intrinsic::memcpy) - continue; - TargetTransformInfo &TTI = GetTTI(*F); auto *BytesToCopyOp = dyn_cast(CI->getArgOperand(2)); if (!BytesToCopyOp) continue; - if (!GV->hasInitializer()) - continue; - ConstantDataArray *SourceDataArray = dyn_cast(GV->getInitializer()); if (!SourceDataArray) continue; unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue(); - unsigned NumBytesToPad = TTI.getNumBytesToPadGlobalArray( - NumBytesToCopy, SourceDataArray->getType()); + auto *Alloca = dyn_cast(CI->getArgOperand(0)); + uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); + uint64_t SZSize = SourceDataArray->getType()->getNumElements(); + unsigned ElementByteWidth = SourceDataArray->getElementByteSize(); + // Calculate the number of elements to copy while avoiding floored + // division of integers returning wrong values i.e. copying one byte + // from an array of i16 would yield 0 elements to copy as supposed to 1. + unsigned NumElementsToCopy = divideCeil(NumBytesToCopy, ElementByteWidth); + + // For safety purposes lets add a constraint and only pad when + // NumElementsToCopy == destination array size == + // source which is a constant + if (NumElementsToCopy != DZSize || DZSize != SZSize) + continue; + + unsigned NumBytesToPad = GetTTI(*F).getNumBytesToPadGlobalArray( + NumBytesToCopy, SourceDataArray->getType()); if (NumBytesToPad) { - bool DestWidened = tryWidenDestArray(F, CI, GV, NumBytesToPad, - NumBytesToCopy, SourceDataArray); - if (DestWidened) { - return widenGlobalArray(F, CI, GV, NumBytesToPad, NumBytesToCopy, - BytesToCopyOp, SourceDataArray); - } + return tryWidenGlobalArrayAndDests(F, GV, NumBytesToPad, NumBytesToCopy, + BytesToCopyOp, SourceDataArray); } } return false; diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll index 61e72c8981783..c7ca7271fd3d2 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll @@ -7,7 +7,7 @@ define void @memcpy_i16_array() { ; CHECK-LABEL: define void @memcpy_i16_array() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [6 x i16], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @[[GLOB0:[0-9]+]], i32 12, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 12, i1 false) ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll index 91ffdb5816517..e37925a78d2c3 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll @@ -6,15 +6,15 @@ define void @memcpy_multiple() { ; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [4 x i8], align 1 -; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [4 x i8], align 1 ; CHECK-NEXT: [[SOMETHING2:%.*]] = alloca [4 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0:[0-9]+]], i32 4, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0]], i32 4, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @[[GLOB0]], i32 4, i1 false) -; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [4 x i8], align 1 +; CHECK-NEXT: [[SOMETHING3:%.*]] = alloca [4 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING3]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING2]]) ; CHECK-NEXT: [[CALL3:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]]) -; CHECK-NEXT: [[CALL4:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING2]]) +; CHECK-NEXT: [[CALL4:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING3]]) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll index 91cf90a21de91..8ea9e2804370e 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll @@ -7,7 +7,7 @@ define void @foo() { ; CHECK-LABEL: define void @foo() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [12 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @[[GLOB0:[0-9]+]], i32 12, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 12, i1 false) ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll index 30c14af7caf67..ad3620b14ea23 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll @@ -7,7 +7,7 @@ define void @foo() { ; CHECK-LABEL: define void @foo() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [64 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(62) @[[GLOB0:[0-9]+]], i32 64, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(62) @.str, i32 64, i1 false) ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll index ce29192948a57..64f57884cd39e 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll @@ -11,7 +11,7 @@ define i32 @f() { ; CHECK-NEXT: [[POS:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TOKEN:%.*]] = alloca ptr, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 45, ptr [[STRING1]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STRING1]], ptr align 1 @[[GLOB0:[0-9]+]], i32 48, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STRING1]], ptr align 1 @f.string1, i32 48, i1 false) ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[POS]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TOKEN]]) ; CHECK-NEXT: [[CALL:%.*]] = call ptr @strchr(ptr [[STRING1]], i32 101) From 72be4cad68a18275acd3d31c2e31454bf00ea9c1 Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Wed, 9 Oct 2024 17:17:43 +0100 Subject: [PATCH 09/14] Fix bug when copying to global dest The case in which copying from a global source to a global dest wasn't handled and caused opt to crash. This is now handled and a new test has been added to check Change-Id: Ieb0467797fcee888f6e95e68af4dac9c05d70a4d --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 91 ++++++++++++------- .../GlobalOpt/ARM/arm-widen-global-dest.ll | 26 ++++++ 2 files changed, 86 insertions(+), 31 deletions(-) create mode 100644 llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 21ec83af02411..4cc968a872957 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2043,9 +2043,13 @@ static bool callInstIsMemcpy(CallInst *CI) { } static bool destArrayCanBeWidened(CallInst *CI) { + auto *GV = dyn_cast(CI->getArgOperand(0)); auto *Alloca = dyn_cast(CI->getArgOperand(0)); auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); + if (!GV || !GV->hasInitializer()) + return false; + if (!Alloca || !IsVolatile || IsVolatile->isOne()) return false; @@ -2055,21 +2059,61 @@ static bool destArrayCanBeWidened(CallInst *CI) { return true; } +static GlobalVariable *widenGlobalVariable(GlobalVariable *OldVar, Function *F, + unsigned NumBytesToPad, + unsigned NumBytesToCopy) { + if (!OldVar->hasInitializer()) + return nullptr; + + ConstantDataArray *DataArray = + dyn_cast(OldVar->getInitializer()); + if (!DataArray) + return nullptr; + + // Update to be word aligned (memcpy(...,X,...)) + // create replacement with padded null bytes. + StringRef Data = DataArray->getRawDataValues(); + std::vector StrData(Data.begin(), Data.end()); + for (unsigned int p = 0; p < NumBytesToPad; p++) + StrData.push_back('\0'); + auto Arr = ArrayRef(StrData.data(), NumBytesToCopy + NumBytesToPad); + // Create new padded version of global variable. + Constant *SourceReplace = ConstantDataArray::get(F->getContext(), Arr); + GlobalVariable *NewGV = new GlobalVariable( + *(F->getParent()), SourceReplace->getType(), true, OldVar->getLinkage(), + SourceReplace, SourceReplace->getName()); + // Copy any other attributes from original global variable + // e.g. unamed_addr + NewGV->copyAttributesFrom(OldVar); + NewGV->takeName(OldVar); + return NewGV; +} + static void widenDestArray(CallInst *CI, const unsigned NumBytesToPad, const unsigned NumBytesToCopy, ConstantDataArray *SourceDataArray) { - unsigned ElementByteWidth = SourceDataArray->getElementByteSize(); - unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; - unsigned NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth); - // Update destination array to be word aligned (memcpy(X,...,...)) + + // Dest array can be global or local + auto *DestGV = dyn_cast(CI->getArgOperand(0)); auto *Alloca = dyn_cast(CI->getArgOperand(0)); - IRBuilder<> BuildAlloca(Alloca); - AllocaInst *NewAlloca = BuildAlloca.CreateAlloca(ArrayType::get( - Alloca->getAllocatedType()->getArrayElementType(), NumElementsToCopy)); - NewAlloca->takeName(Alloca); - NewAlloca->setAlignment(Alloca->getAlign()); - Alloca->replaceAllUsesWith(NewAlloca); - Alloca->eraseFromParent(); + if (DestGV) { + auto *F = CI->getCalledFunction(); + auto *NewDestGV = + widenGlobalVariable(DestGV, F, NumBytesToPad, NumBytesToCopy); + DestGV->replaceAllUsesWith(NewDestGV); + } else if (Alloca) { + unsigned ElementByteWidth = SourceDataArray->getElementByteSize(); + unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; + unsigned NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth); + // Update destination array to be word aligned (memcpy(X,...,...)) + IRBuilder<> BuildAlloca(Alloca); + AllocaInst *NewAlloca = BuildAlloca.CreateAlloca(ArrayType::get( + Alloca->getAllocatedType()->getArrayElementType(), NumElementsToCopy)); + NewAlloca->takeName(Alloca); + NewAlloca->setAlignment(Alloca->getAlign()); + Alloca->replaceAllUsesWith(NewAlloca); + Alloca->eraseFromParent(); + } } static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar, @@ -2081,25 +2125,10 @@ static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar, !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) return false; - // Update source to be word aligned (memcpy(...,X,...)) - // create replacement with padded null bytes. - StringRef Data = SourceDataArray->getRawDataValues(); - std::vector StrData(Data.begin(), Data.end()); - for (unsigned int p = 0; p < NumBytesToPad; p++) - StrData.push_back('\0'); - auto Arr = ArrayRef(StrData.data(), NumBytesToCopy + NumBytesToPad); - - // Create new padded version of global variable. - Constant *SourceReplace = ConstantDataArray::get(F->getContext(), Arr); - GlobalVariable *NewGV = new GlobalVariable( - *(F->getParent()), SourceReplace->getType(), true, - SourceVar->getLinkage(), SourceReplace, SourceReplace->getName()); - - // Copy any other attributes from original global variable - // e.g. unamed_addr - NewGV->copyAttributesFrom(SourceVar); - NewGV->takeName(SourceVar); - + auto *NewSourceGV = + widenGlobalVariable(SourceVar, F, NumBytesToPad, NumBytesToCopy); + if (!NewSourceGV) + return false; // Update arguments of remaining uses that // are memcpys. for (auto *User : SourceVar->users()) { @@ -2112,7 +2141,7 @@ static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar, CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(), NumBytesToCopy + NumBytesToPad)); } - SourceVar->replaceAllUsesWith(NewGV); + SourceVar->replaceAllUsesWith(NewSourceGV); NumGlobalArraysPadded++; return true; diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll new file mode 100644 index 0000000000000..affa7d620804c --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +; CHECK: [4 x i8] +@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1 +; CHECK: [4 x i8] +@other = private unnamed_addr global [3 x i8] [i8 1, i8 2, i8 3] , align 1 + +define void @memcpy_multiple() { +; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [4 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [3 x i8], align 1 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) + ret void +} + +declare i32 @bar(...) From f55c2398f31a89630ad569babebd55a4adba0dc0 Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Thu, 10 Oct 2024 12:00:32 +0100 Subject: [PATCH 10/14] Addressing review comments Change-Id: I029312362f9dd714b2e9bc206cc002883d761b8b --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 7 ++++++- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 13 +++++++------ .../GlobalOpt/ARM/arm-widen-global-dest.ll | 4 +--- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 2623fffd29212..aa173a1f3833c 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -56,7 +56,7 @@ static cl::opt AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true), cl::desc("Enable the generation of WLS loops")); -static cl::opt UseWidenGlobalStrings( +static cl::opt UseWidenGlobalArrays( "widen-global-strings", cl::Hidden, cl::init(true), cl::desc("Enable the widening of global strings to alignment boundaries")); @@ -2815,6 +2815,11 @@ bool ARMTTIImpl::useWidenGlobalStrings() const { return UseWidenGlobalStrings; } unsigned ARMTTIImpl::getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const { + if (!UseWidenGlobalArrays){ + LLVM_DEBUG(dbgs() << "Padding global arrays disabled\n"); + return false; + } + // Don't modify none integer array types if (!ArrayType || !ArrayType->isArrayTy() || !ArrayType->getArrayElementType()->isIntegerTy()) diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 4cc968a872957..197404d12cb2e 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2043,13 +2043,9 @@ static bool callInstIsMemcpy(CallInst *CI) { } static bool destArrayCanBeWidened(CallInst *CI) { - auto *GV = dyn_cast(CI->getArgOperand(0)); auto *Alloca = dyn_cast(CI->getArgOperand(0)); auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); - if (!GV || !GV->hasInitializer()) - return false; - if (!Alloca || !IsVolatile || IsVolatile->isOne()) return false; @@ -2129,12 +2125,17 @@ static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar, widenGlobalVariable(SourceVar, F, NumBytesToPad, NumBytesToCopy); if (!NewSourceGV) return false; + // Update arguments of remaining uses that // are memcpys. for (auto *User : SourceVar->users()) { auto *CI = dyn_cast(User); - if (!callInstIsMemcpy(CI)) - continue; + if (!callInstIsMemcpy(CI)) + continue; + + if (CI->getArgOperand(1) != SourceVar) + continue; + widenDestArray(CI, NumBytesToPad, NumBytesToCopy, SourceDataArray); diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll index affa7d620804c..e1a19a2ab0356 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll @@ -1,9 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s +; RUN: opt <%s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s -; CHECK: [4 x i8] @.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1 -; CHECK: [4 x i8] @other = private unnamed_addr global [3 x i8] [i8 1, i8 2, i8 3] , align 1 define void @memcpy_multiple() { From e1218a075fb574ef2802822d41eba443367f0aeb Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Thu, 10 Oct 2024 12:00:35 +0100 Subject: [PATCH 11/14] Addressing review comments Change-Id: Idc7b14cc785eb88552dd72947eb0df128baa7e90 --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 8 ++++---- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 7 +++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index aa173a1f3833c..94321b1b7620b 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2815,10 +2815,10 @@ bool ARMTTIImpl::useWidenGlobalStrings() const { return UseWidenGlobalStrings; } unsigned ARMTTIImpl::getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const { - if (!UseWidenGlobalArrays){ - LLVM_DEBUG(dbgs() << "Padding global arrays disabled\n"); - return false; - } + if (!UseWidenGlobalArrays) { + LLVM_DEBUG(dbgs() << "Padding global arrays disabled\n"); + return false; + } // Don't modify none integer array types if (!ArrayType || !ArrayType->isArrayTy() || diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 197404d12cb2e..16e60bcc45655 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2130,12 +2130,11 @@ static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar, // are memcpys. for (auto *User : SourceVar->users()) { auto *CI = dyn_cast(User); - if (!callInstIsMemcpy(CI)) - continue; + if (!callInstIsMemcpy(CI)) + continue; if (CI->getArgOperand(1) != SourceVar) - continue; - + continue; widenDestArray(CI, NumBytesToPad, NumBytesToCopy, SourceDataArray); From 9e92588c3d6c11ae42e9850d8b25896109dd77d1 Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Fri, 11 Oct 2024 12:16:34 +0100 Subject: [PATCH 12/14] Review comments - Removed handling of global variable destinations. We simply don't pad these for now - Added check that destination array is an array type and added test. Change-Id: Ifc53051952ef69c4af64827402baf7d69cab4824 --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 16 +++----- .../GlobalOpt/ARM/arm-widen-dest-non-array.ll | 39 +++++++++++++++++++ .../GlobalOpt/ARM/arm-widen-global-dest.ll | 16 +++++--- 3 files changed, 55 insertions(+), 16 deletions(-) create mode 100644 llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 16e60bcc45655..80c50d6a4bf59 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2043,8 +2043,8 @@ static bool callInstIsMemcpy(CallInst *CI) { } static bool destArrayCanBeWidened(CallInst *CI) { - auto *Alloca = dyn_cast(CI->getArgOperand(0)); auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); + auto *Alloca = dyn_cast(CI->getArgOperand(0)); if (!Alloca || !IsVolatile || IsVolatile->isOne()) return false; @@ -2052,6 +2052,9 @@ static bool destArrayCanBeWidened(CallInst *CI) { if (!Alloca->isStaticAlloca()) return false; + if (!Alloca->getAllocatedType()->isArrayTy()) + return false; + return true; } @@ -2089,15 +2092,8 @@ static void widenDestArray(CallInst *CI, const unsigned NumBytesToPad, const unsigned NumBytesToCopy, ConstantDataArray *SourceDataArray) { - // Dest array can be global or local - auto *DestGV = dyn_cast(CI->getArgOperand(0)); auto *Alloca = dyn_cast(CI->getArgOperand(0)); - if (DestGV) { - auto *F = CI->getCalledFunction(); - auto *NewDestGV = - widenGlobalVariable(DestGV, F, NumBytesToPad, NumBytesToCopy); - DestGV->replaceAllUsesWith(NewDestGV); - } else if (Alloca) { + if (Alloca) { unsigned ElementByteWidth = SourceDataArray->getElementByteSize(); unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; unsigned NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth); @@ -2130,7 +2126,7 @@ static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar, // are memcpys. for (auto *User : SourceVar->users()) { auto *CI = dyn_cast(User); - if (!callInstIsMemcpy(CI)) + if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI)) continue; if (CI->getArgOperand(1) != SourceVar) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll new file mode 100644 index 0000000000000..ab04e0a5bc697 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1 + +define void @memcpy_struct() { +; CHECK-LABEL: define void @memcpy_struct() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca { i8, i8, i8 }, align 1 +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) +; CHECK-NEXT: ret void +; +entry: + %something = alloca {i8, i8, i8}, align 1 + %call1 = call i32 @bar(ptr nonnull %something) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + ret void +} + + +@.i8_multi = private unnamed_addr constant [2 x [3 x i8]] [[3 x i8] [i8 1, i8 2, i8 3], [3 x i8] [i8 4, i8 5, i8 6]] , align 1 + +define void @memcpy_array_multidimensional() { +; CHECK-LABEL: define void @memcpy_array_multidimensional() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [2 x [3 x i8]], align 1 +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8_multi, i32 3, i1 false) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [2 x [3 x i8]], align 1 + %call1 = call i32 @bar(ptr nonnull %something) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8_multi, i32 3, i1 false) + ret void +} + +declare i32 @bar(...) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll index e1a19a2ab0356..f435ffdeed2c8 100644 --- a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll @@ -1,23 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt <%s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s -@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1 +; CHECK: [3 x i8] @other = private unnamed_addr global [3 x i8] [i8 1, i8 2, i8 3] , align 1 +; CHECK: [4 x i8] +@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1 define void @memcpy_multiple() { ; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [4 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) ; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: [[CALL3:%.*]] = call i32 @bar(ptr nonnull @other) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) ; CHECK-NEXT: ret void ; entry: %something = alloca [3 x i8], align 1 - call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + %call1 = call i32 @bar(ptr nonnull %something) + %call2 = call i32 @bar(ptr nonnull @other) call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) - %call2 = call i32 @bar(ptr nonnull %something) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) ret void } From 2815d59a433a31c0c8eb1387a5801e20ca1cb4ca Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Wed, 16 Oct 2024 11:49:08 +0100 Subject: [PATCH 13/14] Rebasing Change-Id: Iad0539e526fb0fc116217dcbd033f8297fa5ef5f --- llvm/lib/Analysis/TargetTransformInfo.cpp | 3 --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 3 --- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 2 -- 3 files changed, 8 deletions(-) diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 163635ef5f6a7..6070473363761 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1381,9 +1381,6 @@ bool TargetTransformInfo::isProfitableToSinkOperands( bool TargetTransformInfo::isVectorShiftByScalarCheap(Type *Ty) const { return TTIImpl->isVectorShiftByScalarCheap(Ty); - -bool TargetTransformInfo::useWidenGlobalStrings() const { - return TTIImpl->useWidenGlobalStrings(); } unsigned diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 94321b1b7620b..9f6e5e5ab1421 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2664,7 +2664,6 @@ bool ARMTTIImpl::hasArmWideBranch(bool Thumb) const { } } -<<<<<<< HEAD /// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth /// of the vector elements. static bool areExtractExts(Value *Ext1, Value *Ext2) { @@ -2811,8 +2810,6 @@ bool ARMTTIImpl::isProfitableToSinkOperands(Instruction *I, return true; } -bool ARMTTIImpl::useWidenGlobalStrings() const { return UseWidenGlobalStrings; } - unsigned ARMTTIImpl::getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const { if (!UseWidenGlobalArrays) { diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index a1ba45eaa7021..3a4f940088b2e 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -338,8 +338,6 @@ class ARMTTIImpl : public BasicTTIImplBase { bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl &Ops) const; - bool useWidenGlobalStrings() const; - unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const; /// @} From 75f951a46cf4112a80e4eec4b63c3a3e26fa5d39 Mon Sep 17 00:00:00 2001 From: nasmnc01 Date: Wed, 16 Oct 2024 12:33:11 +0100 Subject: [PATCH 14/14] Responding to review comments - Added test showing behaviour of attempting to widen non-const globals - Refactoring Change-Id: I566214331bf3d889bd1409d3148aa6eab2530ed5 --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 7 ++----- .../ARM/arm-widen-non-const-global.ll | 21 +++++++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 80c50d6a4bf59..4647c65a5c850 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2113,10 +2113,6 @@ static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar, const unsigned NumBytesToCopy, ConstantInt *BytesToCopyOp, ConstantDataArray *SourceDataArray) { - if (!SourceVar->hasInitializer() || !SourceVar->isConstant() || - !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) - return false; - auto *NewSourceGV = widenGlobalVariable(SourceVar, F, NumBytesToPad, NumBytesToCopy); if (!NewSourceGV) @@ -2147,7 +2143,8 @@ static bool tryWidenGlobalArraysUsedByMemcpy( GlobalVariable *GV, function_ref GetTTI) { - if (!GV->hasInitializer()) + if (!GV->hasInitializer() || !GV->isConstant() || !GV->hasLocalLinkage() || + !GV->hasGlobalUnnamedAddr()) return false; for (auto *User : GV->users()) { diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll new file mode 100644 index 0000000000000..3d9c42fe1f3dd --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +@.str = unnamed_addr global [3 x i8] c"12\00", align 1 + +define void @foo() { +; CHECK-LABEL: define void @foo() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [3 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.str, i32 3, i1 false) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [3 x i8], align 1 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.str, i32 3, i1 false) + %call1 = call i32 @bar(ptr nonnull %something) + ret void +} + +declare i32 @bar(...)