From 84e938c82de806b6c9e16d274c0473ac2747b138 Mon Sep 17 00:00:00 2001 From: Jonathan Bailey Date: Thu, 5 May 2022 14:15:34 -0700 Subject: [PATCH 1/3] Drano integrated w/ AMD llvm, basic functionality working. --- llvm/lib/Transforms/CMakeLists.txt | 1 + .../AbstractExecutionEngine.h | 210 +++++++++ .../UncoalescedAnalysis/AbstractState.h | 102 +++++ .../UncoalescedAnalysis/AbstractValue.h | 24 + .../UncoalescedAnalysis/CMakeLists.txt | 30 ++ .../UncoalescedAnalysis/GPUState.cpp | 99 +++++ .../Transforms/UncoalescedAnalysis/GPUState.h | 50 +++ .../InterprocUncoalescedAnalysisPass.cpp | 48 ++ .../InterprocUncoalescedAnalysisPass.h | 68 +++ .../UncoalescedAnalysis/LICENSE.TXT | 68 +++ .../UncoalescedAnalysis/MultiplierValue.cpp | 182 ++++++++ .../UncoalescedAnalysis/MultiplierValue.h | 97 ++++ .../PointerAbstractValue.h | 37 ++ .../UncoalescedAnalysis.cpp | 419 ++++++++++++++++++ .../UncoalescedAnalysis/UncoalescedAnalysis.h | 78 ++++ .../UncoalescedAnalysisPass.cpp | 27 ++ .../UncoalescedAnalysisPass.h | 62 +++ 17 files changed, 1602 insertions(+) create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/AbstractExecutionEngine.h create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/AbstractState.h create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/AbstractValue.h create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/CMakeLists.txt create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/GPUState.cpp create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/GPUState.h create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/InterprocUncoalescedAnalysisPass.cpp create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/InterprocUncoalescedAnalysisPass.h create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/LICENSE.TXT create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/MultiplierValue.cpp create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/MultiplierValue.h create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/PointerAbstractValue.h create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.h create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysisPass.cpp create mode 100644 llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysisPass.h diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt index 4302dac9f2cf4..989092ebd2477 100644 --- a/llvm/lib/Transforms/CMakeLists.txt +++ b/llvm/lib/Transforms/CMakeLists.txt @@ -10,3 +10,4 @@ add_subdirectory(ObjCARC) add_subdirectory(Coroutines) add_subdirectory(CFGuard) add_subdirectory(HC) +add_subdirectory(UncoalescedAnalysis) diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/AbstractExecutionEngine.h b/llvm/lib/Transforms/UncoalescedAnalysis/AbstractExecutionEngine.h new file mode 100644 index 0000000000000..b606d92cc078d --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/AbstractExecutionEngine.h @@ -0,0 +1,210 @@ +#ifndef ABSTRACT_EXECUTION_ENGINE_H +#define ABSTRACT_EXECUTION_ENGINE_H + +#include "AbstractState.h" +#include "AbstractValue.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instruction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +#ifndef DEBUG_TYPE +#define DEBUG_TYPE "abstract-execution" +#endif + +#define NUM_RECENT_BLOCKS 16 + +using namespace llvm; + +// This class defines an abstract execution engine. An abstract execution engine +// takes in a program and executes the program abstractly using semantics +// defined for an abstract value. +// T is the type of abstract value used for abstract execution. It must implement +// AbstractValue. +// U is the type of abstract value used for abstract execution. It must implement +// AbstractState. +template +class AbstractExecutionEngine { + static_assert( + std::is_base_of, T>::value, + "T must be a descendant of AbstractValue" + ); + static_assert( + std::is_base_of, U>::value, + "U must be a descendant of AbstractState" + ); + public: + AbstractExecutionEngine() + : entryBlock_(nullptr) {} + AbstractExecutionEngine(const BasicBlock* entryBlock, U initialState) + : entryBlock_(entryBlock), initialState_(initialState) {} + + virtual ~AbstractExecutionEngine() = 0; + + // Queries the state before an instruction. + const U& getStateBeforeInstruction(const Instruction* inst){ + return StateBeforeInstructionMap_[inst]; + } + + // Adds a block to execute next and the state in which the block must be + // executed. + void AddBlockToExecute(const BasicBlock* b, U st); + + // Executes program (can be overriden). + virtual void Execute(); + + // Executes the instruction on a state and returns the state after execution. + virtual U ExecuteInstruction(const Instruction* inst, + U st) = 0; + + protected: + // Entry block where the abstract execution begins. + const BasicBlock* entryBlock_; + + // Initial state before execution of the program. + U initialState_; + + private: + // Returns the next unit to execute. + std::pair getNextExecutionUnit( + std::list>& worklist); + + // Add block to recently executed blocks. + void AddRecentBlock(const BasicBlock* block); + + // Stores some recent blocks executed by the engine. + SmallVector recentBlocks_; + + // Records abstract state before an instruction is executed. + std::map StateBeforeInstructionMap_; + + // Buffer to store the set of blocks that must be executed after this block + // completes execution. + std::list> BlocksToExecuteBuffer_; +}; + +template +AbstractExecutionEngine::~AbstractExecutionEngine() {} + +template +void AbstractExecutionEngine::AddBlockToExecute(const BasicBlock* b, U st) { + BlocksToExecuteBuffer_.push_back(std::pair(b, st)); +} + +// Returns a block in recentBlocks_ if found. Otherwise returns the +// first block in worklist. This optimization is useful for execution +// of loops. All blocks within the loop are given priority over blocks +// after the loop. This ensures that the blocks after the loop are +// executed only after the loop reaches a fixed point. +template +std::pair + AbstractExecutionEngine::getNextExecutionUnit( + std::list>& worklist) { + for (const BasicBlock* block : recentBlocks_) { + auto listIt = find_if(worklist.begin(), worklist.end(), + [block](const std::pair& item){ + if (item.first == block) return true; + else return false; + }); + if (listIt != worklist.end()) { + // Block found. + auto unit = *listIt; + worklist.erase(listIt); + AddRecentBlock(unit.first); + return unit; + } + } + auto unit = worklist.front(); + worklist.pop_front(); + AddRecentBlock(unit.first); + return unit; +} + +// Adds block to the set of recent blocks. +template +void AbstractExecutionEngine::AddRecentBlock(const BasicBlock* block) { + auto pos = recentBlocks_.begin(); + while (*pos != block && pos != recentBlocks_.end()) ++pos; + if (pos != recentBlocks_.end()) { recentBlocks_.erase(pos); } + if (recentBlocks_.size() >= NUM_RECENT_BLOCKS) { + recentBlocks_.pop_back(); + } + recentBlocks_.insert(recentBlocks_.begin(), block); +} + +template +void AbstractExecutionEngine::Execute() { + // Worklist to execute basic blocks. + // Each worklist item consists of a basicblock and an abstract state to be + // propagated through the block. + std::list> worklist; + worklist.push_back(std::pair(entryBlock_, initialState_)); + + // Execute work items in worklist. + StateBeforeInstructionMap_.clear(); + while (!worklist.empty()) { + auto unit = getNextExecutionUnit(worklist); + const BasicBlock *b = unit.first; // next block to be executed. + U st = unit.second; // state before next block. + LLVM_DEBUG(errs() << "BasicBlock: " << b->getName() << "\n"); + + // Clear buffer. + BlocksToExecuteBuffer_.clear(); + // Execute instructions within the block. + for (BasicBlock::const_iterator it = b->begin(), ite = b->end(); + it != ite; ++it) { + const Instruction* I = &*it; + // If I is the first statement in the block, merge I's pre-state + // with incoming state. + if (it == b->begin()) { + if(StateBeforeInstructionMap_.find(I) != + StateBeforeInstructionMap_.end()) { + U oldState = StateBeforeInstructionMap_[I]; + U newState = oldState.mergeState(st); + // State before block unchanged; no need to execute block. + if (oldState == newState) break; + + StateBeforeInstructionMap_[I] = newState; + } else { + StateBeforeInstructionMap_[I] = st; + } + } else { + StateBeforeInstructionMap_[I] = st; + } + + LLVM_DEBUG(errs() << " " << *I << ", " << st.printInstructionState(I) << "\n"); + st = ExecuteInstruction(I, st); + } + // Add subsequent blocks to be executed. Note that these were added to + // the buffer during the execution of instructions in the current block. + for (auto bufferIt = BlocksToExecuteBuffer_.begin(), + bufferIte = BlocksToExecuteBuffer_.end(); bufferIt != bufferIte; + ++bufferIt) { + // Check if the key already exists in worklist, if so, merge the two + // work items. This is an optimization that helps scale the execution, + // at the cost of being slightly imprecise. + const BasicBlock* block = bufferIt->first; + auto listIt = find_if(worklist.begin(), worklist.end(), + [block](const std::pair& item){ + if (item.first == block) return true; + else return false; + }); + if (listIt != worklist.end()) { + listIt->second = listIt->second.mergeState(bufferIt->second); + } else { + worklist.push_back(std::pair(bufferIt->first, + bufferIt->second)); + } + } + } +} + +#undef DEBUG_TYPE + +#endif /* AbstractExecutionEngine.h */ diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/AbstractState.h b/llvm/lib/Transforms/UncoalescedAnalysis/AbstractState.h new file mode 100644 index 0000000000000..d7646bfbb5f5d --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/AbstractState.h @@ -0,0 +1,102 @@ +#ifndef ABSTRACT_STATE_H +#define ABSTRACT_STATE_H + +#include "AbstractValue.h" + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Value.h" +#include +#include + +using namespace llvm; + +// Defines an abstract state used by the abstract execution engine to store +// the current state of the abstract execution. +template +class AbstractState { + static_assert( + std::is_base_of, T>::value, + "T must be a descendant of AbstractValue" + ); + public: + AbstractState() {} + + virtual ~AbstractState() = 0; + + void clear() { valueMap_.clear(); } + + bool operator==(const AbstractState& st) const { + return (valueMap_ == st.valueMap_); + } + + void operator=(const AbstractState& st) { + valueMap_ = st.valueMap_; + } + + bool hasValue(const Value* in) const { + return (valueMap_.find(in) != valueMap_.end()); + } + + virtual T getValue(const Value* in) const { + return valueMap_.at(in); + } + + virtual void setValue(const Value* in, T v) { valueMap_[in] = v; } + + virtual U mergeState(const U& st) const; + + // Pretty printing + virtual std::string getString() const; + virtual std::string printInstructionState(const Instruction* I) const; + + private: + // Map from variables to their abstract values. + std::map valueMap_; +}; + +template +AbstractState::~AbstractState() {} + +template +U AbstractState::mergeState(const U& st) const { + U result = st; + for (auto it = this->valueMap_.begin(), ite = this->valueMap_.end(); + it != ite; ++it) { + const Value* in = it->first; + T v = it->second; + if (st.hasValue(in)) { + result.setValue(in, v.join(st.valueMap_.at(in))); + } else { + result.setValue(in, v); + } + } + return result; +} + +template +std::string AbstractState::getString() const { + std::string s; + s.append("["); + for (auto it = valueMap_.begin(), ite = valueMap_.end(); it != ite; ++it) { + const Value* in = it->first; + T v = it->second; + s.append(in->getName().str()).append(":").append(v.getString()).append(", "); + } + s.append("]"); + return s; +} + +template +std::string AbstractState::printInstructionState( + const Instruction* I) const { + std::string s; + s.append("["); + for (unsigned i = 0; i < I->getNumOperands(); i++) { + T v = this->getValue(I->getOperand(i)); + s.append(I->getOperand(i)->getName().str()).append(":").append(v.getString()).append(","); + } + s.append("]"); + return s; +} + +#endif /* AbstractState.h */ diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/AbstractValue.h b/llvm/lib/Transforms/UncoalescedAnalysis/AbstractValue.h new file mode 100644 index 0000000000000..948288d31cb80 --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/AbstractValue.h @@ -0,0 +1,24 @@ +#ifndef ABSTRACT_VALUE_H +#define ABSTRACT_VALUE_H + +#include + +// This class defines an abstract value and semantics for the various +// operations performed during the abstract execution of the program. +template +class AbstractValue { + public: + AbstractValue() {} + + virtual ~AbstractValue() = 0; + + virtual std::string getString() const = 0; + + // Returns a merge of this value with value v. + virtual T join(const T& v) const = 0; +}; + +template +AbstractValue::~AbstractValue() {} + +#endif /* AbstractValue.h */ diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/CMakeLists.txt b/llvm/lib/Transforms/UncoalescedAnalysis/CMakeLists.txt new file mode 100644 index 0000000000000..5f3794231c610 --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/CMakeLists.txt @@ -0,0 +1,30 @@ +# If we don't need RTTI or EH, there's no reason to export anything +# from the hello plugin. +# if( NOT LLVM_REQUIRES_RTTI ) +# if( NOT LLVM_REQUIRES_EH ) +# set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/Hello.exports +# endif() +# endif() + +if(WIN32 OR CYGWIN) + set(LLVM_LINK_COMPONENTS Core Support) +endif() + +#set(CMAKE_BUILD_TYPE "Debug") +set(LLVM_ENABLE_PLUGINS ON) +set(LLVM_LINK_COMPONENTS + Demangle +) + +add_llvm_library(LLVMUncoalescedAnalysis MODULE + InterprocUncoalescedAnalysisPass.cpp + UncoalescedAnalysisPass.cpp + GPUState.cpp + MultiplierValue.cpp + UncoalescedAnalysis.cpp + + DEPENDS + intrinsics_gen + PLUGIN_TOOL + opt + ) diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/GPUState.cpp b/llvm/lib/Transforms/UncoalescedAnalysis/GPUState.cpp new file mode 100644 index 0000000000000..be72500471cd9 --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/GPUState.cpp @@ -0,0 +1,99 @@ +#include "GPUState.h" + +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +MultiplierValue GPUState::getValue(const Value* in) const { + if (isa(in)) { + // Return value (zero); + return MultiplierValue(ZERO); + } else if (hasValue(in)) { + // Return value found in state. + return AbstractState::getValue(in); + } else { + // Default: return (bot) or (unset) value. + return MultiplierValue(BOT); + } +} + +GPUState GPUState::mergeState(const GPUState& st) const { + GPUState result = AbstractState::mergeState(st); + result.numThreads_ = numThreads_.join(st.numThreads_); + return result; +} + +std::string GPUState::getString() const { + std::string s = AbstractState::getString(); + s.append(" #t:").append(numThreads_.getString()); + return s; +} +std::string GPUState::printInstructionState(const Instruction* I) const { + std::string s = AbstractState::printInstructionState(I); + s.append(" #t:").append(numThreads_.getString()); + return s; +} + +bool GPUState::testGPUState() { + GPUState st1, st2; + + static LLVMContext context; + const Value* g = new GlobalVariable( + /*Type */ Type::getInt32Ty(context), + /*isConstant */ false, + /*Linkage */ GlobalValue::CommonLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/"g"); + + st1.setValue(g, MultiplierValue(ZERO)); + st1.setNumThreads(MultiplierValue(ONE)); + errs() << " st1 : " << st1.getString() << "\n"; + errs() << " st2 : " << st2.getString() << "\n"; + errs() << " st1 == st2 : " << (st1 == st2 ? "true": "false") << "\n"; + if (st1 == st2) return false; + + GPUState st3 = st1.mergeState(st2); + errs() << " st1.merge(st2) : " << st3.getString() << "\n"; + st3 = st2.mergeState(st1); + errs() << " st2.merge(st1) : " << st3.getString() << "\n\n"; + + st2 = st1; + errs() << " st1 : " << st1.getString() << "\n"; + errs() << " st2 (st2 := st1) : " << st2.getString() << "\n"; + errs() << " st1 == st2 : " << (st1 == st2 ? "true": "false") << "\n\n"; + if (!(st1 == st2)) return false; + + MultiplierValue v = st1.getValue(g); + v.setAddressType(); + st1.setValue(g, v); + errs() << " st1 : " << st1.getString() << "\n"; + errs() << " st2 : " << st2.getString() << "\n"; + errs() << " st1 == st2 : " << (st1 == st2 ? "true": "false") << "\n\n"; + if (!(st1 == st2)) return false; + + st2.setValue(g, MultiplierValue(ONE)); + errs() << " st1 : " << st1.getString() << "\n"; + errs() << " st2 : " << st2.getString() << "\n"; + errs() << " st1 == st2 : " << (st1 == st2 ? "true": "false") << "\n"; + if (st1 == st2) return false; + st3 = st2.mergeState(st1); + errs() << " merge(st1, st2) : " << st3.getString() << "\n\n"; + if (st3.getValue(g) != st1.getValue(g).join(st2.getValue(g))) { + return false; + } + + st1.setValue(g, MultiplierValue(NEGONE)); + errs() << " st1 : " << st1.getString() << "\n"; + errs() << " st2 : " << st2.getString() << "\n"; + st3 = st2.mergeState(st1); + errs() << " merge(st1, st2) : " << st3.getString() << "\n\n"; + if (st3.getValue(g) != st1.getValue(g).join(st2.getValue(g))) { + return false; + } + return true; +} diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/GPUState.h b/llvm/lib/Transforms/UncoalescedAnalysis/GPUState.h new file mode 100644 index 0000000000000..84094689109f3 --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/GPUState.h @@ -0,0 +1,50 @@ +#ifndef GPU_STATE_H +#define GPU_STATE_H + +#include "AbstractState.h" +#include "MultiplierValue.h" + +using namespace llvm; + +// Defines an abstract state used by the increment value analysis. +class GPUState : public AbstractState { + public: + GPUState() : numThreads_(MultiplierValue(MultiplierValueType::TOP)) {} + + void clear() { + AbstractState::clear(); + numThreads_ = MultiplierValue(MultiplierValueType::TOP); + } + + bool operator==(const GPUState& st) const { + return AbstractState::operator==(st) && numThreads_ == st.numThreads_; + } + + void operator=(GPUState st) { + AbstractState::operator=(st); + numThreads_ = st.numThreads_; + } + + // Getters and Setters + MultiplierValue getNumThreads() const { return numThreads_; } + void setNumThreads(MultiplierValue numThreads) { numThreads_ = numThreads; } + + MultiplierValue getValue(const Value* in) const override; + + GPUState mergeState(const GPUState& st) const override; + + // Pretty printing + std::string getString() const; + std::string printInstructionState(const Instruction* I) const override; + + // Test to check correctness. + static bool testGPUState(); + + private: + // Number of active threads. It takes two values: + // - (one): A single thread is active. + // - (unknown): Arbitrary number of threads are active. + MultiplierValue numThreads_; +}; + +#endif /* GPUState.h */ diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/InterprocUncoalescedAnalysisPass.cpp b/llvm/lib/Transforms/UncoalescedAnalysis/InterprocUncoalescedAnalysisPass.cpp new file mode 100644 index 0000000000000..151f9b9b915ac --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/InterprocUncoalescedAnalysisPass.cpp @@ -0,0 +1,48 @@ +#include "InterprocUncoalescedAnalysisPass.h" + +#define DEBUG_TYPE "uncoalesced-analysis" + +using namespace llvm; + +bool InterproceduralUncoalescedAnalysisPass::runOnModule(Module &M) { + auto &CG = getAnalysis().getCallGraph(); + + // Generate topological order of visiting function nodes. + std::vector functionList; + for (scc_iterator I = scc_begin(&CG), IE = scc_end(&CG); + I != IE; ++I) { + const std::vector &SCCCGNs = *I; + for (std::vector::const_iterator CGNI = SCCCGNs.begin(), + CGNIE = SCCCGNs.end(); + CGNI != CGNIE; ++CGNI) { + if ((*CGNI)->getFunction()) { + Function *F = (*CGNI)->getFunction(); + if (!F->isDeclaration()) { + functionList.insert(functionList.begin(), F); + } + } + } + } + + // Map from function to initial argument values. + // It is built by joining all contexts in which the function is called. + std::map> + FunctionArgumentValues; + + // Run analysis on functions. + for (Function *F : functionList) { + LLVM_DEBUG(errs() << "Analyzing function: " << F->getName()); + DominatorTree DT(*F); + UncoalescedAnalysis UA(F, &DT, &FunctionArgumentValues); + errs() << "Analysis Results: \n"; + GPUState st = UA.BuildInitialState(); + UA.BuildAnalysisInfo(st); + std::set uncoalesced = UA.getUncoalescedAccesses(); + UncoalescedAccessMap_.emplace(F, uncoalesced); + } + return false; +} + +char InterproceduralUncoalescedAnalysisPass::ID = 0; +static RegisterPass +Y("interproc-uncoalesced-analysis", "Interprocedural analysis to detect uncoalesced accesses in gpu programs."); diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/InterprocUncoalescedAnalysisPass.h b/llvm/lib/Transforms/UncoalescedAnalysis/InterprocUncoalescedAnalysisPass.h new file mode 100644 index 0000000000000..5192c0d82a6a1 --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/InterprocUncoalescedAnalysisPass.h @@ -0,0 +1,68 @@ +//===- InterproceduralUncoalescedAnalysisPass.cpp - Interprocedural analysis to identify uncoalesced accesses in GPU programs -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass does an inter-procedural dataflow analysis to detect potentially +// uncoalesced load/store accesses in GPU programs. It uses an abstract- +// interpretation based analysis to compute the dependence of accessed indices +// on threadID (a unique identifier for threads). If the dependence is +// non-linear or a large linear function, the access is labelled as uncoalesced. +// +// It starts with the analysis of the top-most functions in the call-graph and +// then proceeds with the analysis of their callees in a topological order. +// While analyzing a specific callee, it considers the join of the call contexts +// of all its callers. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_INTERPROC_UNCOALESCED_ANALYSIS_PASS_H +#define LLVM_INTERPROC_UNCOALESCED_ANALYSIS_PASS_H + +#include "MultiplierValue.h" +#include "UncoalescedAnalysis.h" + +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include + +namespace llvm { + +struct InterproceduralUncoalescedAnalysisPass : public ModulePass { + std::map> UncoalescedAccessMap_; + + public: + static char ID; // Pass identification, replacement for typeid + InterproceduralUncoalescedAnalysisPass() : ModulePass(ID) {} + + // Interprocedural analysis for uncoalesced accesses. + bool runOnModule(Module &M) override; + + const std::set& getUncoalescedAccesses(const Function* F) + const { + return UncoalescedAccessMap_.at(F); + } + + // We don't modify the program, so we preserve all analyses. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesAll(); + } +}; + +} + +#endif /* InterproceduralUncoalescedAnalysisPass.h */ diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/LICENSE.TXT b/llvm/lib/Transforms/UncoalescedAnalysis/LICENSE.TXT new file mode 100644 index 0000000000000..ff63f2b6aae3f --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/LICENSE.TXT @@ -0,0 +1,68 @@ +============================================================================== +LLVM Release License +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2017 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + +============================================================================== +Copyrights and Licenses for Third Party Software Distributed with LLVM: +============================================================================== +The LLVM software contains code written by third parties. Such software will +have its own individual LICENSE.TXT file in the directory in which it appears. +This file will describe the copyrights, license, and restrictions which apply +to that code. + +The disclaimer of warranty in the University of Illinois Open Source License +applies to all code in the LLVM Distribution, and nothing in any of the +other licenses gives permission to use the names of the LLVM Team or the +University of Illinois to endorse or promote products derived from this +Software. + +The following pieces of software have additional or alternate copyrights, +licenses, and/or restrictions: + +Program Directory +------- --------- +Google Test llvm/utils/unittest/googletest +OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex} +pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT} +ARM contributions llvm/lib/Target/ARM/LICENSE.TXT +md5 contributions llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/MultiplierValue.cpp b/llvm/lib/Transforms/UncoalescedAnalysis/MultiplierValue.cpp new file mode 100644 index 0000000000000..ab16f0560387b --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/MultiplierValue.cpp @@ -0,0 +1,182 @@ +#include "MultiplierValue.h" + +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +// Helper functions: Int to MultiplierValue; MultiplierValue to Int +MultiplierValue MultiplierValue::getMultiplierValue(int x, bool b) { + if (x == 0) return MultiplierValue(ZERO, b); + if (x == 1) return MultiplierValue(ONE, b); + if (x == -1) return MultiplierValue(NEGONE, b); + return MultiplierValue(TOP); +} + +int MultiplierValue::getIntValue() const { + if(t_ == ZERO) return 0; + if(t_ == ONE) return 1; + if(t_ == NEGONE) return -1; + return 2; // TOP +} + +MultiplierValue MultiplierValue::join(const MultiplierValue& v) const { + if (t_ == BOT) return v; + if (v.t_ == BOT) return *this; + if (t_ == v.t_) return v; + return MultiplierValue(TOP); +} + +// Binary Operations + +// Integer addition +MultiplierValue operator+(const MultiplierValue& v1, const MultiplierValue& v2) { + if (v1.t_ == BOT || v2.t_ == BOT) return MultiplierValue(BOT); + if (v1.t_ == TOP || v2.t_ == TOP) return MultiplierValue(TOP); + int out = v1.getIntValue() + v2.getIntValue(); + return MultiplierValue::getMultiplierValue(out, false); +} + +// Integer multiplication +MultiplierValue operator*(const MultiplierValue& v1, const MultiplierValue& v2) { + if (v1.t_ == BOT || v2.t_ == BOT) return MultiplierValue(BOT); + if (v1.t_ == ZERO && v2.t_ == ZERO) return MultiplierValue(ZERO); + return MultiplierValue(TOP); +} + +// Returns abstract value for predicate (v1 == v2); +// If the incoming values are equal, then they have the same dependence on +// thread ID, hence the conditional is thread ID independent. Returns boolean +// value (zero). +// If one incoming value is a constant, and other is linear in thread ID, +// they can be equal for at most one thread. Returns boolean value (one). +MultiplierValue eq(const MultiplierValue& v1, const MultiplierValue& v2) { + if (v1.t_ == BOT || v2.t_ == BOT) return MultiplierValue(BOT); + if (v1.t_ == v2.t_ && v1.t_ != TOP) return MultiplierValue(ZERO, true); + if (((v1.t_ == ONE || v1.t_ == NEGONE) && v2.t_ == ZERO) || + ((v2.t_ == ONE || v2.t_ == NEGONE) && v1.t_ == ZERO)) { + return MultiplierValue(ONE, true); + } + return MultiplierValue(TOP); +} + +// Returns abstract value for predicate (v1 != v2); +// If the incoming values are equal, then they have the same dependence on +// thread ID, hence the conditional is thread ID independent. Returns boolean +// value (zero). +// If one incoming value is a constant, and other is linear in thread ID, +// they will be unequal for all but one thread. Returns boolean value (negone). +MultiplierValue neq(const MultiplierValue& v1, const MultiplierValue& v2) { + if (v1.t_ == BOT || v2.t_ == BOT) return MultiplierValue(BOT); + if (v1.t_ == v2.t_ && v1.t_ != TOP) return MultiplierValue(ZERO, true); + if (((v1.t_ == ONE || v1.t_ == NEGONE) && v2.t_ == ZERO) || + ((v2.t_ == ONE || v2.t_ == NEGONE) && v1.t_ == ZERO)) { + return MultiplierValue(NEGONE, true); + } + return MultiplierValue(TOP); +} + +// Returns conjunction of abstract predicate values. +// If both predicates are thread-independent, result is thread-independent. +// If one of the predicates is true for at most one thread, the conjunction is +// true for at most one thread. +MultiplierValue operator&&(const MultiplierValue& v1, const MultiplierValue& v2) { + if (v1.t_ == BOT || v2.t_ == BOT) return MultiplierValue(BOT); + if (v1.t_ == ZERO && v2.t_ == ZERO) return MultiplierValue(ZERO, true); + if (v1.t_ == ONE || v2.t_ == ONE) return MultiplierValue(ONE, true); + return MultiplierValue(TOP); +} + +// Returns disjunction of abstract predicate values. +// If both predicates are thread-independent, result is thread-independent. +// If one of the predicates is false for at most one thread, the disjunction is +// false for at most one thread. +MultiplierValue operator||(const MultiplierValue& v1, const MultiplierValue& v2) { + if (v1.t_ == BOT || v2.t_ == BOT) return MultiplierValue(BOT); + if (v1.t_ == ZERO && v2.t_ == ZERO) return MultiplierValue(ZERO, true); + if (v1.t_ == NEGONE || v2.t_ == NEGONE) return MultiplierValue(NEGONE, true); + return MultiplierValue(TOP); +} + +// Negates the abstract value (useful for negation as well.) +MultiplierValue operator-(const MultiplierValue& v) { + if (v.t_ == BOT) return MultiplierValue(BOT); + if (v.t_ == TOP) return MultiplierValue(TOP); + int out = -v.getIntValue(); + return MultiplierValue::getMultiplierValue(out, v.isBool_); +} + +bool operator==(const MultiplierValue& v1, const MultiplierValue& v2) { + return (v1.t_ == v2.t_); +} + +bool operator!=(const MultiplierValue& v1, const MultiplierValue& v2) { + return (v1.t_ != v2.t_); +} + +std::string MultiplierValue::getString() const { + std::string s; + if (isAddressType()) s.append("*"); + switch(t_) { + case BOT: + return s.append("u"); + case ZERO: + return s.append("0"); + case ONE: + return s.append("1"); + case NEGONE: + return s.append("-1"); + case TOP: + default: + return s.append(">1"); + } +} + +bool MultiplierValue::testMultiplierValue() { + MultiplierValue a = MultiplierValue(ZERO); + MultiplierValue b = MultiplierValue(ONE); + MultiplierValue c; + + errs() << " a : " << a.getString() << "\n"; + errs() << " b : " << b.getString() << "\n"; + errs() << "\n"; + + // Test binary operations. + c = a + b; + errs() << " c := a + b : " << c.getString() << "\n"; + c = b + c; + errs() << " c := b + c : " << c.getString() << "\n"; + c = a * b; + errs() << " c := a * b : " << c.getString() << "\n"; + c = b * c; + errs() << " c := b * c : " << c.getString() << "\n"; + errs() << "\n"; + + // Test relational and boolean operations. + c = eq(a, b); + errs() << " eq(a, b) : " << c.getString() << "\n"; + c = neq(a, b); + errs() << " neq(a, b) : " << c.getString() << "\n"; + c = a && b; + errs() << " a && b : " << c.getString() << "\n"; + c = a || b; + errs() << " a || b : " << c.getString() << "\n"; + + // Test assignment, equality. + errs() << " a == b : " << ((a == b)? "true": "false") << "\n"; + a = b; + errs() << " a after (a := b) : " << a.getString() << "\n"; + errs() << " a == b : " << ((a == b)? "true": "false") << "\n"; + errs() << "\n"; + + // Test join operation. + c = a.join(b); + errs() << " c := a.join(b) : " << c.getString() << "\n"; + c = a.join(MultiplierValue(BOT)); + errs() << " c := a.join(bot) : " << c.getString() << "\n"; + c = a.join(MultiplierValue(TOP)); + errs() << " c := a.join(top) : " << c.getString() << "\n"; + errs() << "\n"; + + return true; +} diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/MultiplierValue.h b/llvm/lib/Transforms/UncoalescedAnalysis/MultiplierValue.h new file mode 100644 index 0000000000000..8fcfac3eb8d1a --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/MultiplierValue.h @@ -0,0 +1,97 @@ +#ifndef MULTIPLIER_VALUE_H +#define MULTIPLIER_VALUE_H + +#include "PointerAbstractValue.h" + +// Multiplier Value +// An abstract value used to represent values of integer and boolean variables. +// +// For integer variables, the abstraction is as follows: +// - (unset) or (bot) represents undefined values. +// - (zero) represents values which are independent of thread ID. Thus, +// the variable has same value across threads. +// - (one) represents values which have a unit dependence on threadID. Thus the +// variable has an expression of the form (thread ID + constant). +// - (negone) represents values which a negative unit depdendence on thread ID +// (-thread ID + constant). +// - (unknown) or (top): other kinds of unknown dependences on thread ID (e.g. 2.threadID, +// 10.threadID + 1). +// For integers it is also refered to as the "thread ID multiplier" (since it +// tracks the multiplier for thread ID). +// +// For booleans variables, (bot) and (top) representations are the same. +// Other values are as follows: +// - (zero) : The variable has same truth value across threads (similar to integers). +// - (one) : The variable is true for at most one thread. +// - (negone) : The variable is false for at most one thread. + +namespace uncoalesced_analysis { + +enum MultiplierValueType { + BOT, + ZERO, + ONE, + NEGONE, + TOP +}; + +} + +using namespace uncoalesced_analysis; + +class MultiplierValue : public PointerAbstractValue { + public: + MultiplierValue() : t_(MultiplierValueType::BOT), isBool_(false) {} + + MultiplierValue(MultiplierValueType t, bool isBool = false) + : t_(t), isBool_(isBool) {} + + // Merge values; returns the least value that supersedes both values. + MultiplierValue join(const MultiplierValue& v) const; + + // Binary operations + friend MultiplierValue operator+(const MultiplierValue& v1, const MultiplierValue& v2); + friend MultiplierValue operator*(const MultiplierValue& v1, const MultiplierValue& v2); + friend MultiplierValue operator-(const MultiplierValue& v); + friend MultiplierValue operator&&(const MultiplierValue& v1, const MultiplierValue& v2); + friend MultiplierValue operator||(const MultiplierValue& v1, const MultiplierValue& v2); + // Abstract value for predicate (v1 == v2). + friend MultiplierValue eq(const MultiplierValue& v1, const MultiplierValue& v2); + // Abstract value for predicate (v1 != v2). + friend MultiplierValue neq(const MultiplierValue& v1, const MultiplierValue& v2); + friend bool operator==(const MultiplierValue& v1, const MultiplierValue& v2); + friend bool operator!=(const MultiplierValue& v1, const MultiplierValue& v2); + + // Getters and setters. + MultiplierValueType getType() const { return t_; } + bool isBoolean() const { return isBool_; } + + // Pretty printing + std::string getString() const; + + // Test to check correctness. + static bool testMultiplierValue(); + + private: + // Helper functions. + int getIntValue() const; + static MultiplierValue getMultiplierValue(int x, bool b); + + // The type of value. + MultiplierValueType t_; + + // Is this a boolean value or an integer value? + bool isBool_; +}; + +MultiplierValue operator+(const MultiplierValue& v1, const MultiplierValue& v2); +MultiplierValue operator*(const MultiplierValue& v1, const MultiplierValue& v2); +MultiplierValue operator-(const MultiplierValue& v); +MultiplierValue operator&&(const MultiplierValue& v1, const MultiplierValue& v2); +MultiplierValue operator||(const MultiplierValue& v1, const MultiplierValue& v2); +MultiplierValue eq(const MultiplierValue& v1, const MultiplierValue& v2); +MultiplierValue neq(const MultiplierValue& v1, const MultiplierValue& v2); +bool operator==(const MultiplierValue& v1, const MultiplierValue& v2); +bool operator!=(const MultiplierValue& v1, const MultiplierValue& v2); + +#endif /* MultiplierValue.h */ diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/PointerAbstractValue.h b/llvm/lib/Transforms/UncoalescedAnalysis/PointerAbstractValue.h new file mode 100644 index 0000000000000..d9b5cdc5bf5fa --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/PointerAbstractValue.h @@ -0,0 +1,37 @@ +#ifndef POINTER_ABSTRACT_VALUE_H +#define POINTER_ABSTRACT_VALUE_H + +#include "AbstractValue.h" + +// This class defines that an abstract value that distinguishes address (lvalue) +// of a variable from the value (rvalue) of the variable. Hence, it keeps a flag +// `isAddressType_' to track whether the abstract value represents the address or the +// value of the variable. +template +class PointerAbstractValue : public AbstractValue { + public: + PointerAbstractValue() : isAddressType_(false) {} + + virtual ~PointerAbstractValue() = 0; + + bool isAddressType() const { return isAddressType_; } + + void setAddressType() { isAddressType_ = true; } + + void setValueType() { isAddressType_ = false; } + + virtual std::string getString() const = 0; + + // Returns a merge of this value with value v. + virtual T join(const T& v) const = 0; + + private: + // Does this value represent the address (lvalue) of the variable or the + // value (rvalue) of the variable? + bool isAddressType_; +}; + +template +PointerAbstractValue::~PointerAbstractValue() {} + +#endif /* PointerAbstractValue.h */ diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp b/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp new file mode 100644 index 0000000000000..9830913a46ace --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp @@ -0,0 +1,419 @@ +#include "UncoalescedAnalysis.h" +#include "llvm/Demangle/Demangle.h" +#include +#include + +#define DEBUG_TYPE "uncoalesced-analysis" + +using namespace llvm; + +// Searches FunctionArgumentValues_ for argument values of F_. If found, returns +// the values, otherwise returns (zero) for all arguments. +GPUState UncoalescedAnalysis::BuildInitialState() const { + std::map argMap; + // Check if argument values already exist. + if (FunctionArgumentValues_ && + FunctionArgumentValues_->find(F_) != FunctionArgumentValues_->end()) { + argMap = FunctionArgumentValues_->at(F_); + } + GPUState st; + for (Function::const_arg_iterator argIt = F_->arg_begin(); + argIt != F_->arg_end(); argIt++) { + MultiplierValue v; + const Value* arg = &*argIt; + // Check if argument value exists. + if (argMap.find(arg) == argMap.end()) { v = MultiplierValue(ZERO); } + else { v = MultiplierValue(argMap.at(arg)); } + // If argument is a pointer, set v to address type. + if (arg->getType()->isPointerTy()) { v.setAddressType(); } + st.setValue(arg, v); + } + return st; +} + +MultiplierValue UncoalescedAnalysis::getConstantExprValue(const Value* p) { + MultiplierValue v = MultiplierValue(BOT); + ConstantExpr *pe = const_cast(cast(p)); + // Handle inline getelementptr instruction. + GetElementPtrInst* gep = nullptr; + if (pe->getOpcode() == Instruction::GetElementPtr) { + LLVM_DEBUG(errs() << "Inline get elementptr found \n"); + gep = cast(pe->getAsInstruction()); + if (isa(gep->getPointerOperand())) { + pe = cast(gep->getPointerOperand()); + } + } + if (pe->getOpcode() == Instruction::AddrSpaceCast) { + LLVM_DEBUG(errs() << "Special memory space found \n"); + const AddrSpaceCastInst* asci = + cast(pe->getAsInstruction()); + // Shared memory / local data store (LDS) + if (asci->getSrcAddressSpace() == 3) { + v = MultiplierValue(BOT); + } + // Constant memory (never written) + else if (asci->getSrcAddressSpace() == 4) { + v = MultiplierValue(ZERO); + } + delete asci; + } + if (gep) delete gep; + return v; +} + + +size_t UncoalescedAnalysis::getTypeSize(const Type *ty, const DataLayout &DL) const { + // Note (jobailey): This used to be memoized (likely because you + // might have needed to drill down trough nested pointer / array + // types previously). However, this doesn't *seem* necessary w/ + // opaque pointer types, since we're directly grabbing the type + // from load & store instructions. + return DL.getTypeAllocSize(const_cast(ty)); +} + +GPUState UncoalescedAnalysis::ExecuteInstruction( + const Instruction* I, GPUState st) { + if (isa(I)) { + const BinaryOperator* BO = cast(I); + const Value* in1 = BO->getOperand(0); + const Value* in2 = BO->getOperand(1); + // Get values + MultiplierValue v1, v2; + v1 = st.getValue(in1); + v2 = st.getValue(in2); + // Apply operation to get the resultant value. + MultiplierValue v; + auto op = BO->getOpcode(); + switch (op) { + case Instruction::URem: + case Instruction::SRem: + case Instruction::AShr: + case Instruction::LShr: + v = v1; + break; + case Instruction::Add: + v = v1 + v2; + break; + case Instruction::Sub: + v = v1 + (- v2); + break; + case Instruction::Shl: + case Instruction::Mul: + v = v1 * v2; + break; + case Instruction::UDiv: + case Instruction::SDiv: + v = v1 * v2; + break; + case Instruction::Or: + v = v1 || v2; + break; + case Instruction::And: + v = v1 && v2; + break; + case Instruction::Xor: + v = (v1 && (-v2)) || (v2 && (-v1)); + break; + default: + v = MultiplierValue(TOP); + break; + } + st.setValue(BO, v); + + } else if (isa(I)) { + st.setValue(I, st.getValue(I->getOperand(0))); + + } else if (isa(I)) { + const CallInst *CI = cast(I); + if (CI->isInlineAsm()) { + st.setValue(CI, MultiplierValue(TOP)); + } else { + // If function has no name, return! + Function *calledF = CI->getCalledFunction(); + if (!calledF->hasName()) { + st.setValue(CI, MultiplierValue(TOP)); + } else { + std::string name = demangle(calledF->getName().str()); + if (!name.compare("__HIP_Coordinates<__HIP_ThreadIdx>::__X::operator unsigned int() const")) { + st.setValue(CI, MultiplierValue(ONE)); + } else if ( + !name.compare("__HIP_Coordinates<__HIP_ThreadIdx>::__Y::operator unsigned int() const") || + !name.compare("__HIP_Coordinates<__HIP_ThreadIdx>::__Z::operator unsigned int() const") || + !name.compare("__HIP_Coordinates<__HIP_BlockDim>::__X::operator unsigned int() const") || + !name.compare("__HIP_Coordinates<__HIP_BlockDim>::__Z::operator unsigned int() const") || + !name.compare("__HIP_Coordinates<__HIP_BlockDim>::__Y::operator unsigned int() const") || + !name.compare("__HIP_Coordinates<__HIP_BlockIdx>::__X::operator unsigned int() const") || + !name.compare("__HIP_Coordinates<__HIP_BlockIdx>::__Y::operator unsigned int() const") || + !name.compare("__HIP_Coordinates<__HIP_BlockIdx>::__Z::operator unsigned int() const") || + !name.compare("__HIP_Coordinates<__HIP_GridDim>::__X::operator unsigned int() const") || + !name.compare("__HIP_Coordinates<__HIP_GridDim>::__Y::operator unsigned int() const") || + !name.compare("__HIP_Coordinates<__HIP_GridDim>::__Z::operator unsigned int() const") + ) { + + st.setValue(CI, MultiplierValue(ZERO)); + } else if (!name.compare("memcpy(void*, void const*, unsigned long)")) { + // ***** Handling special case of copy between data structures. ***** + Value* dstOperand = CI->getArgOperand(0); + if (isa(dstOperand)) { + // Get actual operand from the bitcast instruction. + dstOperand = cast(dstOperand)->getOperand(0); + } + Value* srcOperand = CI->getArgOperand(1); + if (isa(srcOperand)) { + // Get actual operand from the bitcast instruction. + srcOperand = cast(srcOperand)->getOperand(0); + } + st.setValue(dstOperand, st.getValue(srcOperand)); + } else { + st.setValue(CI, MultiplierValue(TOP)); + } + } + // If calledF is not a declaration and FunctionArgumentValues_ is not + // nullptr, create a call context consisting of mapping from arguments + // to their abstract values and merge it with the existing call context + // for calledF. This represents the values that flow during the call into + // the arguments of calledF. + if (!calledF->isDeclaration() && FunctionArgumentValues_) { + std::map argMap; + // Check if argument values exist in the map. + if (FunctionArgumentValues_->find(calledF) != + FunctionArgumentValues_->end()) { + argMap = FunctionArgumentValues_->at(calledF); + } + // Iterate over arguments and update argMap. + unsigned i = 0; + for (auto argIt = calledF->arg_begin(); + argIt != calledF->arg_end() && i < CI->arg_size(); + argIt++) { + const Value* arg = &*argIt; + MultiplierValue v = st.getValue(CI->getArgOperand(i)); + if (argMap.find(arg) == argMap.end()) { argMap[arg] = v; } + else { argMap[arg] = v.join(argMap[arg]); } + ++i; + } + FunctionArgumentValues_->emplace(calledF, argMap); + + // Print called arguments. + LLVM_DEBUG(errs() << "Called function " << calledF->getName() + << " with args ("); + for (auto argIt = calledF->arg_begin(); + argIt != calledF->arg_end(); argIt++) { + LLVM_DEBUG(errs() << argMap[&*argIt].getString() << ", "); + } + LLVM_DEBUG(errs() << ")\n"); + } + } + + } else if (isa(I)) { + const AllocaInst *AI = cast(I); + // If the allocated type is a pointer, the initial value of the + // pointer is assumed to be threadID-independent and initialized to + // (zero) value. + if (AI->getAllocatedType()->isPointerTy()) { + st.setValue(AI, MultiplierValue(ZERO)); + } + + } else if (isa(I)) { + const LoadInst *LI = cast(I); + const Value* p = LI->getPointerOperand(); + MultiplierValue v = st.getValue(p); + // HACK to identify inline getelementptr + if(isa(p)) { + v = getConstantExprValue(p); + } + // Detect uncoalesced accesses. + size_t size = getTypeSize(LI->getType(), + LI->getModule()->getDataLayout()); + if (v.isAddressType() && (st.getNumThreads().getType() == TOP) && + ((size > 4 && (v.getType() == ONE || v.getType() == NEGONE)) || + (v.getType() == TOP))) { + UncoalescedAccesses_.insert(LI); + LLVM_DEBUG(errs() << "UNCOALESCED ACCESS FOUND in access at "); + LLVM_DEBUG(cast(LI)->getDebugLoc().print(errs())); + LLVM_DEBUG(errs() << " in \n " << *LI << "\n\n"); + } + // if p stores address, return value(p) * (incr 0) + // else return value(p). + if (v.isAddressType()) { v = v * MultiplierValue(ZERO); } + // If I is a pointer, v corresponds to the address of a variable. + // NOTICE: We implicitly assume that all global variables/arrays + // will be sent as a pointer argument to the kernel function. + // Hence, only loads on such variables are set to address type. + if (LI->getType()->isPointerTy()) { v.setAddressType(); } + +// // Handling arrays. +// if (st.isArray(p)) { +// // Register LI to be the 0th index into array p. +// st.registerArrayIndex(LI, p, 0); +// } + st.setValue(LI, v); + + } else if (isa(I)) { + const GetElementPtrInst* GEPI = cast(I); + const Value* p = GEPI->getPointerOperand(); + // The resultant variable has value that is sum of the value of the + // pointer variable + values of the index variables. + MultiplierValue v = st.getValue(p); + for (unsigned i = 0; i < GEPI->getNumIndices(); i++) { + const Value* idx = GEPI->getOperand(i+1); + v = v + st.getValue(idx); + } + + if (isa(p)) { + v = getConstantExprValue(p); + } + + // Set v to address type if p stores address, since v corresponds to the + // offset increment in address stored in pointer p. + if (st.getValue(p).isAddressType()) v.setAddressType(); + +// // Handling arrays. +// // If p is an array pointer and GEPI indexes into p via a constant, +// // register GEPI to be an element of array for p. +// if (isa(GEPI->getOperand(1))) { +// int offset = cast(GEPI->getOperand(1))->getSExtValue(); +// const auto* ty = cast(p->getType())->getElementType(); +// if (ty->isArrayTy() && offset == 0) { +// if (isa(GEPI->getOperand(2))) { +// offset = cast(GEPI->getOperand(2))->getSExtValue(); +// st.registerArrayIndex(GEPI, p, offset); +// } +// } else if (st.getArrayIndex(p)) { +// const auto* idx = st.getArrayIndex(p); +// st.registerArrayIndex(GEPI, idx->first/*array*/, idx->second/*offset*/ +// + offset); +// } +// } + st.setValue(GEPI, v); + + } else if (isa(I)) { + const StoreInst *SI = cast(I); + const Value* p = SI->getPointerOperand(); + MultiplierValue v = st.getValue(p); + + // Detect uncoalesced accesses. + size_t size = getTypeSize(SI->getValueOperand()->getType(), + SI->getModule()->getDataLayout()); + if (v.isAddressType() && (st.getNumThreads().getType() == TOP) && + ((size > 4 && (v.getType() == ONE || v.getType() == NEGONE)) || + (v.getType() == TOP))) { + UncoalescedAccesses_.insert(SI); + LLVM_DEBUG(errs() << "UNCOALESCED ACCESS FOUND in access at "); + LLVM_DEBUG(cast(SI)->getDebugLoc().print(errs())); + LLVM_DEBUG(errs() << " in \n " << *SI << "\n\n"); + } + + const Value* val = SI->getValueOperand(); +// // Handling arrays. +// // If val is an array-index, duplicate the array represented by val to p. +// // Assumption: The original array is not updated via the new index p. +// if (st.getArrayIndex(val)) { +// const auto* idx = st.getArrayIndex(val); +// if (idx->second/*offset*/ == 0) { +// st.duplicateArray(idx->first/*array*/, p); +// } +// } else + if (!v.isAddressType()) { + // Store value only if p is not address type and not an array. + v = st.getValue(val); + // Set it to value type explicitly. + v.setValueType(); + st.setValue(p, v); + } + + } else if(isa(I)) { + const PHINode *PHI = cast(I); + // Check if the dominator instruction for the PHI node is a conditional + // and if the conditional is threadId-independent (i.e. the branch condition + // is constant across threads). If so, set the output to the max of incoming + // values. Otherwise, the output of PHI node might be a non-linear function + // of thread ID and hence, is assigned the value (unknown). + const BasicBlock *domBlock + = DT_->getNode(const_cast(I->getParent()))->getIDom()->getBlock(); + LLVM_DEBUG(errs() << "...Dominator Instruction for PHI node:" + << "\n " << *domBlock->getTerminator() << "\n"); + + if (isa(domBlock->getTerminator())) { + const BranchInst *BI = cast(domBlock->getTerminator()); + // Dominating branch statement found! + if (BI->isConditional() && + st.getValue(BI->getCondition()).getType() == ZERO) { + // Branch is threadId-independent. + MultiplierValue v = st.getValue(PHI); + for(unsigned i = 0; i < PHI->getNumIncomingValues(); i++) { + v = v.join(st.getValue(PHI->getIncomingValue(i))); + } + st.setValue(PHI, v); + } else { + st.setValue(PHI, MultiplierValue(TOP)); + } + } else { + st.setValue(PHI, MultiplierValue(TOP)); + } + + } else if (isa(I)) { + const CmpInst* CI = cast(I); + const Value* in1 = CI->getOperand(0); + const Value* in2 = CI->getOperand(1); + const CmpInst::Predicate pred = CI->getPredicate(); + // Compute the abstract value of the predicate. + MultiplierValue v; + if (pred == CmpInst::ICMP_EQ) { + v = eq(st.getValue(in1), st.getValue(in2)); + } else if (pred == CmpInst::ICMP_NE) { + v = neq(st.getValue(in1), st.getValue(in2)); + } else { + v = MultiplierValue(TOP); + } + st.setValue(CI, v); + + } else if (isa(I)) { + const BranchInst* BI = cast(I); + if (BI->isConditional()) { + const Value* cond = BI->getCondition(); + const BasicBlock* nb1 = BI->getSuccessor(0); + const BasicBlock* nb2 = BI->getSuccessor(1); + GPUState st1 = st; + GPUState st2 = st; + // Get the abstract value for branch condition. + MultiplierValue v = st.getValue(cond); + // Compute number of threads on the two branches. + st1.setNumThreads(v && st.getNumThreads()); + st2.setNumThreads((- v) && st.getNumThreads()); + // Add new items to the buffer. + AddBlockToExecute(nb1, st1); + AddBlockToExecute(nb2, st2); + } else { + const BasicBlock* nb = BI->getSuccessor(0); + AddBlockToExecute(nb, st); + } + + } else if (I->isTerminator()) { + // Add next blocks. + for (unsigned i = 0; i < I->getNumSuccessors(); i++) { + const BasicBlock *nb = I->getSuccessor(i); + AddBlockToExecute(nb, st); + } + } + return st; +} + +void UncoalescedAnalysis::BuildAnalysisInfo(GPUState st) { + UncoalescedAccesses_.clear(); + + LLVM_DEBUG(errs() << "-------------- computing uncoalesced accesses ------------------\n"); + errs() << "Function: " << F_->getName() << "\n"; + initialState_ = st; + entryBlock_ = &F_->getEntryBlock(); + Execute(); + + // Print uncoalesced accesses found by the analysis. + errs() << " Uncoalesced accesses: #" << UncoalescedAccesses_.size() << "\n"; + for (auto it = UncoalescedAccesses_.begin(), ite = UncoalescedAccesses_.end(); + it != ite; ++it) { + errs() << " -- "; + (*it)->getDebugLoc().print(errs()); + errs() << "\n"; + } + errs() << "\n"; +} diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.h b/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.h new file mode 100644 index 0000000000000..99d55ceacabce --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.h @@ -0,0 +1,78 @@ +#ifndef UNCOALESCED_ACCESS_ANALYSIS_H +#define UNCOALESCED_ACCESS_ANALYSIS_H + +#include "AbstractExecutionEngine.h" +#include "MultiplierValue.h" +#include "GPUState.h" + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/AbstractCallSite.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +using namespace llvm; + +// Class to compute dependences of variables on thread ID and hence, +// the uncoalesced accesses. +class UncoalescedAnalysis + : public AbstractExecutionEngine { + public: + UncoalescedAnalysis(const Function* F, const DominatorTree* DomTree) + : F_(F), DT_(DomTree), FunctionArgumentValues_(nullptr) {} + + UncoalescedAnalysis( + const Function* F, const DominatorTree* DomTree, + std::map>* FunctionArgumentValues) + : F_(F), DT_(DomTree), FunctionArgumentValues_(FunctionArgumentValues) {} + + // Getters + const Function* getFunction() const { return F_; } + const std::set& getUncoalescedAccesses() const { + return UncoalescedAccesses_; + } + + // Builds initial GPU state for the function. + GPUState BuildInitialState() const; + + // Builds analysis information for the function given the initial state. + void BuildAnalysisInfo(GPUState st); + + // Implements execution of different instructions on the abstract state. + GPUState ExecuteInstruction(const Instruction* I, GPUState st); + + private: + // Returns type size in bytes for a Type ty. + size_t getTypeSize(const Type *ty, + const DataLayout &DL) const; + + // Handles special cases where pointer is a constant expr. + MultiplierValue getConstantExprValue(const Value* p); + + std::set UncoalescedAccesses_; + + // Function being analyzed for uncoalesced accesses. + const Function *F_; + + // Dominator Tree Information. + const DominatorTree* DT_; + + // Function to argument values map (initialized by merging different call + // contexts of the function. + // For e.g. if there is a call to F, say x = F(x1, x2) and abstract values of + // x1 and x2 are v1 and v2, then the following mapping + // F -> (arg0 -> v1, arg0 -> v2) + // is added to the map. + std::map>* + FunctionArgumentValues_; +}; + +#endif /* UncoalescedAnalysis.h */ diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysisPass.cpp b/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysisPass.cpp new file mode 100644 index 0000000000000..622646c8f58bd --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysisPass.cpp @@ -0,0 +1,27 @@ +#include "UncoalescedAnalysisPass.h" + +using namespace llvm; + +bool UncoalescedAnalysisPass::runOnFunction(Function &F) { + // MultiplierValue::testMultiplierValue(); + // GPUState::testGPUState(); + + auto &DomTree = getAnalysis().getDomTree(); + UncoalescedAnalysis UA(&F, &DomTree); + errs() << "Analysis Results: \n"; + GPUState st = UA.BuildInitialState(); + UA.BuildAnalysisInfo(st); + UncoalescedAccesses_ = UA.getUncoalescedAccesses(); + return false; +} + +char UncoalescedAnalysisPass::ID = 0; +/* +INITIALIZE_PASS_BEGIN(UncoalescedAnalysisPass, "uncoalesced-analysis", "Pass to generate uncoalesced access analysis for GPU programs", + true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(UncoalescedAnalysisPass, "uncoalesced-analysis", "Pass to generate uncoalesced access analysis for GPU programs", + true, true) +*/ +static RegisterPass +Y("uncoalesced-analysis", "Pass to detect uncoalesced accesses in gpu programs."); diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysisPass.h b/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysisPass.h new file mode 100644 index 0000000000000..be79e38cae21b --- /dev/null +++ b/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysisPass.h @@ -0,0 +1,62 @@ +//===- UncoalescedAnalysisPass.h - Analysis to identify uncoalesced accesses in GPU programs -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass does an intra-procedural dataflow analysis to detect potentially +// uncoalesced load/store accesses in GPU programs. It uses an abstract- +// interpretation based analysis to compute the dependence of accessed indices +// on threadID (a unique identifier for threads). If the dependence is +// non-linear or a large linear function, the access is labelled as uncoalesced. +// It assumes that all the initial function arguments are independent of +// threadID. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UNCOALESCED_ANALYSIS_PASS_H +#define LLVM_UNCOALESCED_ANALYSIS_PASS_H + +#include "MultiplierValue.h" +#include "UncoalescedAnalysis.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include + +namespace llvm { + +struct UncoalescedAnalysisPass : public FunctionPass { + std::set UncoalescedAccesses_; + + public: + static char ID; // Pass identification, replacement for typeid + UncoalescedAnalysisPass() : FunctionPass(ID) {} + + // Analysis for Uncoalesced Accesses. + bool runOnFunction(Function &F) override; + + const std::set& getUncoalescedAccesses() const { + return UncoalescedAccesses_; + } + + // We don't modify the program, so we preserve all analyses. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesAll(); + } +}; + +} + +#endif /* UncoalescedAnalysisPass.h */ From 7056885ea4dc0370e3e89f45ad914b638d17f819 Mon Sep 17 00:00:00 2001 From: Jonathan Bailey Date: Tue, 10 May 2022 07:41:49 -0700 Subject: [PATCH 2/3] Added hooks to run interprocedural analysis with clang instead of opt. This forces us to use the old pass manager w/ clang, however. We will probably want to transition to the new pass manager in the future. --- .../InterprocUncoalescedAnalysisPass.cpp | 8 ++++++++ .../UncoalescedAnalysis/UncoalescedAnalysis.cpp | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/InterprocUncoalescedAnalysisPass.cpp b/llvm/lib/Transforms/UncoalescedAnalysis/InterprocUncoalescedAnalysisPass.cpp index 151f9b9b915ac..28458e5e21389 100644 --- a/llvm/lib/Transforms/UncoalescedAnalysis/InterprocUncoalescedAnalysisPass.cpp +++ b/llvm/lib/Transforms/UncoalescedAnalysis/InterprocUncoalescedAnalysisPass.cpp @@ -1,4 +1,6 @@ #include "InterprocUncoalescedAnalysisPass.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" #define DEBUG_TYPE "uncoalesced-analysis" @@ -46,3 +48,9 @@ bool InterproceduralUncoalescedAnalysisPass::runOnModule(Module &M) { char InterproceduralUncoalescedAnalysisPass::ID = 0; static RegisterPass Y("interproc-uncoalesced-analysis", "Interprocedural analysis to detect uncoalesced accesses in gpu programs."); + +// This lets us run the pass with Clang. +static void registerInterproceduralUncoalescedAnalysisPass(const PassManagerBuilder &builder, legacy::PassManagerBase &manager) { + manager.add(new InterproceduralUncoalescedAnalysisPass()); +} +static RegisterStandardPasses RegisterInterproceduralUncoalescedAnalysisPass(PassManagerBuilder::EP_ModuleOptimizerEarly, registerInterproceduralUncoalescedAnalysisPass); diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp b/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp index 9830913a46ace..0dbb7f9b78401 100644 --- a/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp +++ b/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp @@ -402,7 +402,7 @@ void UncoalescedAnalysis::BuildAnalysisInfo(GPUState st) { UncoalescedAccesses_.clear(); LLVM_DEBUG(errs() << "-------------- computing uncoalesced accesses ------------------\n"); - errs() << "Function: " << F_->getName() << "\n"; + errs() << "Function: " << demangle(F_->getName().str()) << "\n"; initialState_ = st; entryBlock_ = &F_->getEntryBlock(); Execute(); From 67d280de61987ebfced2107dd76a99fd1407c55f Mon Sep 17 00:00:00 2001 From: Corbin Robeck <13821049+CRobeck@users.noreply.github.com> Date: Thu, 26 May 2022 16:20:43 -0400 Subject: [PATCH 3/3] Update llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp Co-authored-by: Jonathan R. Madsen --- .../UncoalescedAnalysis/UncoalescedAnalysis.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp b/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp index 0dbb7f9b78401..0eda4d0d77541 100644 --- a/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp +++ b/llvm/lib/Transforms/UncoalescedAnalysis/UncoalescedAnalysis.cpp @@ -137,17 +137,8 @@ GPUState UncoalescedAnalysis::ExecuteInstruction( if (!name.compare("__HIP_Coordinates<__HIP_ThreadIdx>::__X::operator unsigned int() const")) { st.setValue(CI, MultiplierValue(ONE)); } else if ( - !name.compare("__HIP_Coordinates<__HIP_ThreadIdx>::__Y::operator unsigned int() const") || - !name.compare("__HIP_Coordinates<__HIP_ThreadIdx>::__Z::operator unsigned int() const") || - !name.compare("__HIP_Coordinates<__HIP_BlockDim>::__X::operator unsigned int() const") || - !name.compare("__HIP_Coordinates<__HIP_BlockDim>::__Z::operator unsigned int() const") || - !name.compare("__HIP_Coordinates<__HIP_BlockDim>::__Y::operator unsigned int() const") || - !name.compare("__HIP_Coordinates<__HIP_BlockIdx>::__X::operator unsigned int() const") || - !name.compare("__HIP_Coordinates<__HIP_BlockIdx>::__Y::operator unsigned int() const") || - !name.compare("__HIP_Coordinates<__HIP_BlockIdx>::__Z::operator unsigned int() const") || - !name.compare("__HIP_Coordinates<__HIP_GridDim>::__X::operator unsigned int() const") || - !name.compare("__HIP_Coordinates<__HIP_GridDim>::__Y::operator unsigned int() const") || - !name.compare("__HIP_Coordinates<__HIP_GridDim>::__Z::operator unsigned int() const") + std::regex_match(name, std::regex{ "__HIP_Coordinates<__HIP_ThreadIdx>::__(Y|Z)::operator unsigned int\(\) const" } || + std::regex_match(name, std::regex{ "__HIP_Coordinates<__HIP_(BlockDim|BlockIdx|GridDim)>::__(X|Y|Z)::operator unsigned int\(\) const" }) ) { st.setValue(CI, MultiplierValue(ZERO));