diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..0da57d5 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,66 @@ +language: c++ +sudo: false +dist: trusty +cache: + apt: true + ccache: true + directories: + - $HOME/.ccache + - $TRAVIS_BUILD_DIR/llvm-build +os: linux +env: + - UNIT_TESTS=true + LLVM_VERSION=4.0.0 + BOOST_VERSION=default + ENABLE_MEMCHECK=true + USE_CCACHE=1 + CCACHE_COMPRESS=1 + CCACHE_MAXSIZE=1000M + CCACHE_CPP2=1 + TIMEOUT='30m' + +compiler: clang + +addons: + apt: + sources: &sources + - llvm-toolchain-trusty-4.0 + - ubuntu-toolchain-r-test + - sourceline: 'ppa:ppsspp/cmake' + packages: + - clang-4.0 + - cmake + - ccache + - coreutils + +before_install: + - export CXX="ccache clang++-4.0" && export CC="ccache clang-4.0"; + - mkdir -p llvm && mkdir -p llvm-build + - wget http://releases.llvm.org/${LLVM_VERSION}/llvm-${LLVM_VERSION}.src.tar.xz -O llvm.tar.xz && tar xf llvm.tar.xz -C llvm --strip-components=1 + - wget http://releases.llvm.org/${LLVM_VERSION}/cfe-${LLVM_VERSION}.src.tar.xz -O clang.tar.xz && mkdir -p llvm/tools/clang && tar xf clang.tar.xz -C llvm/tools/clang --strip-components=1 + - ln -s `pwd` llvm/lib/Target/Epiphany + - patch -d llvm -p 1 < LLVM_Epiphany.patch && patch -d llvm -p 1 < CLANG_Epiphany.patch + - echo "max_size = 2.0G" > $HOME/.ccache/ccache.conf + +install: + - cd llvm-build && pwd + - if [ ! -f "CMakeCache.txt" ]; then cmake -DLLVM_TARGETS_TO_BUILD=Epiphany \ + -DLLVM_TOOL_XCODE_TOOLCHAIN_BUILD=OFF \ + -DLLVM_TOOL_LTO_BUILD=OFF \ + -DLLVM_TOOL_LLVM_GO_BUILD=OFF \ + -DLLVM_TOOL_LLVM_C_TEST_BUILD=OFF \ + -DLLVM_TOOL_LLI_BUILD=OFF \ + -DLLVM_TOOL_LLD_BUILD=OFF \ + -DLLVM_TOOL_GOLD_BUILD=OFF \ + -DLLVM_POLLY_BUILD=OFF \ + -DLLVM_INCLUDE_GO_TESTS=OFF \ + -DLLVM_INCLUDE_DOCS=OFF \ + -DLLVM_ENABLE_LTO=OFF \ + -DLLVM_TOOL_LLVM_LTO_BUILD=OFF \ + -DLLVM_TOOL_LLVM_LTO2_BUILD=OFF \ + -DLLVM_TOOL_YAML2OBJ_BUILD=OFF \ + -LLVM_TOOL_MSBUILD_BUILD=OFF \ + -DLLVM_TOOL_LLVM_CXXDUMP_BUILD=OFF \ + -DLLVM_INCLUDE_TESTS=OFF ../llvm; fi + - timeout -s SIGTERM $TIMEOUT make -j2; + ccache -s diff --git a/.travis.yml.bak b/.travis.yml.bak new file mode 100644 index 0000000..d18c7bc --- /dev/null +++ b/.travis.yml.bak @@ -0,0 +1,67 @@ +language: c++ +sudo: false +dist: trusty +cache: + apt: true + ccache: true + directories: + - $HOME/.ccache + - $HOME/build/Epiphany/llvm + - $HOME/build/Epiphany/llvm-build +os: linux +env: + - UNIT_TESTS=true + LLVM_VERSION=4.0.0 + BOOST_VERSION=default + ENABLE_MEMCHECK=true + USE_CCACHE=1 + CCACHE_COMPRESS=1 + CCACHE_MAXSIZE=1000M + CCACHE_CPP2=1 + TIMEOUT='30m' + +compiler: clang + +addons: + apt: + sources: &sources + - llvm-toolchain-trusty-4.0 + - ubuntu-toolchain-r-test + - sourceline: 'ppa:ppsspp/cmake' + packages: + - clang-4.0 + - cmake + - ccache + - coreutils + +before_install: + - export CXX="ccache clang++-4.0" && export CC="ccache clang-4.0"; + - mkdir -p llvm && mkdir -p llvm-build + - wget http://releases.llvm.org/${LLVM_VERSION}/llvm-${LLVM_VERSION}.src.tar.xz -O llvm.tar.xz && tar xf llvm.tar.xz -C llvm --strip-components=1 + - wget http://releases.llvm.org/${LLVM_VERSION}/cfe-${LLVM_VERSION}.src.tar.xz -O clang.tar.xz && mkdir -p llvm/tools/clang && tar xf clang.tar.xz -C llvm/tools/clang --strip-components=1 + - ln -s `pwd` llvm/lib/Target/Epiphany + - patch -d llvm -p 1 < LLVM_Epiphany.patch && patch -d llvm -p 1 < CLANG_Epiphany.patch + - echo "max_size = 2.0G" > $HOME/.ccache/ccache.conf + +install: + - cd llvm-build + - if [ ! -f "CMakeCache.txt" ]; then cmake -DLLVM_TARGETS_TO_BUILD=Epiphany \ + -DLLVM_TOOL_XCODE_TOOLCHAIN_BUILD=OFF \ + -DLLVM_TOOL_LTO_BUILD=OFF \ + -DLLVM_TOOL_LLVM_GO_BUILD=OFF \ + -DLLVM_TOOL_LLVM_C_TEST_BUILD=OFF \ + -DLLVM_TOOL_LLI_BUILD=OFF \ + -DLLVM_TOOL_LLD_BUILD=OFF \ + -DLLVM_TOOL_GOLD_BUILD=OFF \ + -DLLVM_POLLY_BUILD=OFF \ + -DLLVM_INCLUDE_GO_TESTS=OFF \ + -DLLVM_INCLUDE_DOCS=OFF \ + -DLLVM_ENABLE_LTO=OFF \ + -DLLVM_TOOL_LLVM_LTO_BUILD=OFF \ + -DLLVM_TOOL_LLVM_LTO2_BUILD=OFF \ + -DLLVM_TOOL_YAML2OBJ_BUILD=OFF \ + -LLVM_TOOL_MSBUILD_BUILD=OFF \ + -DLLVM_TOOL_LLVM_CXXDUMP_BUILD=OFF \ + -DLLVM_INCLUDE_TESTS=OFF ../llvm; fi + - timeout -s SIGTERM $TIMEOUT make -j2; + ccache -s diff --git a/AsmParser/EpiphanyAsmParser.cpp b/AsmParser/EpiphanyAsmParser.cpp index aee942f..9ed6aa1 100644 --- a/AsmParser/EpiphanyAsmParser.cpp +++ b/AsmParser/EpiphanyAsmParser.cpp @@ -25,7 +25,7 @@ namespace { k_Token } Kind; - public: + public: EpiphanyOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} struct Token { @@ -57,13 +57,13 @@ namespace { SMLoc StartLoc, EndLoc; - public: + public: void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(getReg())); } - void addExpr(MCInst &Inst, const MCExpr *Expr) const{ + void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediate when possible. Null MCExpr = 0. if (Expr == 0) Inst.addOperand(MCOperand::createImm(0)); @@ -76,7 +76,7 @@ namespace { void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCExpr *Expr = getImm(); - addExpr(Inst,Expr); + addExpr(Inst, Expr); } void addMemOperands(MCInst &Inst, unsigned N) const { @@ -85,7 +85,7 @@ namespace { Inst.addOperand(MCOperand::createReg(getMemBase())); const MCExpr *Expr = getMemOff(); - addExpr(Inst,Expr); + addExpr(Inst, Expr); } void addIdxMemOperands(MCInst &Inst, unsigned N) const { @@ -96,9 +96,13 @@ namespace { } bool isReg() const { return Kind == k_Register; } + bool isImm() const { return Kind == k_Immediate; } + bool isToken() const { return Kind == k_Token; } + bool isMem() const { return Kind == k_Memory; } + bool isIdxMem() const { return Kind == k_IdxMemory; } bool isConstantImm() const { @@ -106,26 +110,37 @@ namespace { } // Additional templates for different int sizes - template bool isConstantUImm() const { + template + bool isConstantUImm() const { return isConstantImm() && isUInt(getConstantImm() - Offset); } - template bool isSImm() const { + + template + bool isSImm() const { return isConstantImm() ? isInt(getConstantImm()) : isImm(); } - template bool isUImm() const { + + template + bool isUImm() const { return isConstantImm() ? isUInt(getConstantImm()) : isImm(); } - template bool isAnyImm() const { + + template + bool isAnyImm() const { return isConstantImm() ? (isInt(getConstantImm()) || - isUInt(getConstantImm())) - : isImm(); + isUInt(getConstantImm())) + : isImm(); } - template bool isConstantSImm() const { + + template + bool isConstantSImm() const { return isConstantImm() && isInt(getConstantImm() - Offset); } - template bool isConstantUImmRange() const { + + template + bool isConstantUImmRange() const { return isConstantImm() && getConstantImm() >= Bottom && - getConstantImm() <= Top; + getConstantImm() <= Top; } StringRef getToken() const { @@ -178,8 +193,8 @@ namespace { } /// Internal constructor for register kinds - static std::unique_ptr CreateReg(unsigned RegNum, SMLoc S, - SMLoc E) { + static std::unique_ptr CreateReg(unsigned RegNum, SMLoc S, + SMLoc E) { auto Op = make_unique(k_Register); Op->Reg.RegNum = RegNum; Op->StartLoc = S; @@ -196,7 +211,7 @@ namespace { } static std::unique_ptr CreateMem(unsigned Base, const MCExpr *Off, - SMLoc S, SMLoc E) { + SMLoc S, SMLoc E) { auto Op = make_unique(k_Memory); Op->Mem.Base = Base; Op->Mem.Off = Off; @@ -206,7 +221,7 @@ namespace { } static std::unique_ptr CreateIdxMem(unsigned Base, unsigned Offset, - SMLoc S, SMLoc E) { + SMLoc S, SMLoc E) { auto Op = make_unique(k_IdxMemory); Op->IdxMem.Base = Base; Op->IdxMem.Offset = Offset; @@ -216,9 +231,10 @@ namespace { } /// getStartLoc - Get the location of the first token of this operand. - SMLoc getStartLoc() const { return StartLoc; } + SMLoc getStartLoc() const override { return StartLoc; } + /// getEndLoc - Get the location of the last token of this operand. - SMLoc getEndLoc() const { return EndLoc; } + SMLoc getEndLoc() const override { return EndLoc; } void printReg(raw_ostream &OS) const { OS << "Register: " << Reg.RegNum << "\n"; @@ -233,14 +249,15 @@ namespace { } void printMem(raw_ostream &OS) const { - OS << "Memory addr: base " << Mem.Base << ", offset "; Mem.Off->dump(); + OS << "Memory addr: base " << Mem.Base << ", offset "; + Mem.Off->dump(); } void printIdxMem(raw_ostream &OS) const { OS << "Indexed memory addr: base " << IdxMem.Base << ", offset " << IdxMem.Offset << "\n"; } - void print(raw_ostream &OS) const { + void print(raw_ostream &OS) const override { if (isReg()) printReg(OS); if (isImm()) @@ -259,70 +276,70 @@ namespace { // Some ops may need expansion bool EpiphanyAsmParser::needsExpansion(MCInst &Inst) { - switch(Inst.getOpcode()) { + switch (Inst.getOpcode()) { default: return false; } } void EpiphanyAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, - SmallVectorImpl &Instructions){ - switch(Inst.getOpcode()) { + SmallVectorImpl &Instructions) { + switch (Inst.getOpcode()) { + default: + break; } } //@1 } //@2 { bool EpiphanyAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - OperandVector &Operands, - MCStreamer &Out, - uint64_t &ErrorInfo, - bool MatchingInlineAsm) { + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, - MatchingInlineAsm); + MatchingInlineAsm); switch (MatchResult) { - default: + default: break; - case Match_Success: - { - if (needsExpansion(Inst)) { - SmallVector Instructions; - expandInstruction(Inst, IDLoc, Instructions); - for(unsigned i =0; i < Instructions.size(); i++){ - Out.EmitInstruction(Instructions[i], getSTI()); - } - } else { - Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst, getSTI()); + case Match_Success: { + if (needsExpansion(Inst)) { + SmallVector Instructions; + expandInstruction(Inst, IDLoc, Instructions); + for (unsigned i = 0; i < Instructions.size(); i++) { + Out.EmitInstruction(Instructions[i], getSTI()); } - return false; + } else { + Inst.setLoc(IDLoc); + Out.EmitInstruction(Inst, getSTI()); } + return false; + } //@2 } - case Match_MissingFeature: + case Match_MissingFeature: Error(IDLoc, "instruction requires a CPU feature not currently enabled"); return true; - case Match_InvalidOperand: - { - SMLoc ErrorLoc = IDLoc; - if (ErrorInfo != ~0U) { - if (ErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); - - ErrorLoc = ((EpiphanyOperand &)*Operands[ErrorInfo]).getStartLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; - } - - return Error(ErrorLoc, "invalid operand for instruction"); + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((EpiphanyOperand &) *Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } - case Match_MnemonicFail: + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: return Error(IDLoc, "invalid instruction"); -} -return true; + } + return true; } // Get register by number -unsigned EpiphanyAsmParser::getReg(int RC,int RegNo) { +unsigned EpiphanyAsmParser::getReg(int RC, int RegNo) { return *(getContext().getRegisterInfo()->getRegClass(RC).begin() + RegNo); } @@ -344,14 +361,14 @@ int EpiphanyAsmParser::tryParseRegister(StringRef Mnemonic) { } else if (Tok.is(AsmToken::Integer)) // In some cases we might even get pure integer RegNum = matchRegisterByNumber(static_cast(Tok.getIntVal()), - Mnemonic.lower()); + Mnemonic.lower()); else llvm_unreachable(strcat("Can't parse register: ", Mnemonic.data())); return RegNum; } bool EpiphanyAsmParser::tryParseRegisterOperand(OperandVector &Operands, - StringRef Mnemonic){ + StringRef Mnemonic) { SMLoc S = Parser.getTok().getLoc(); int RegNo = -1; @@ -361,13 +378,13 @@ bool EpiphanyAsmParser::tryParseRegisterOperand(OperandVector &Operands, return true; Operands.push_back(EpiphanyOperand::CreateReg(RegNo, S, - Parser.getTok().getLoc())); + Parser.getTok().getLoc())); Parser.Lex(); // Eat register token. return false; } bool EpiphanyAsmParser::ParseOperand(OperandVector &Operands, - StringRef Mnemonic) { + StringRef Mnemonic) { DEBUG(dbgs() << "ParseOperand\n"); // Check if the current operand has a custom associated parser, if so, try to // custom parse the operand, or fallback to the general approach. @@ -387,39 +404,37 @@ bool EpiphanyAsmParser::ParseOperand(OperandVector &Operands, Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; case AsmToken::RBrac: - case AsmToken::LBrac: - { - // Just add brackets into op list and continue processing the register - SMLoc S = Parser.getTok().getLoc(); - StringRef string = Parser.getTok().getString(); - Operands.push_back(EpiphanyOperand::CreateToken(string, S)); - Parser.Lex(); // Eat the bracket + case AsmToken::LBrac: { + // Just add brackets into op list and continue processing the register + SMLoc S = Parser.getTok().getLoc(); + StringRef string = Parser.getTok().getString(); + Operands.push_back(EpiphanyOperand::CreateToken(string, S)); + Parser.Lex(); // Eat the bracket + return false; + } + case AsmToken::Identifier: { + // try if it is register + SMLoc S = Parser.getTok().getLoc(); + // parse register operand + if (!tryParseRegisterOperand(Operands, Mnemonic)) { return false; } - case AsmToken::Identifier: - { - // try if it is register - SMLoc S = Parser.getTok().getLoc(); - // parse register operand - if (!tryParseRegisterOperand(Operands, Mnemonic)) { - return false; - } - // maybe it is a symbol reference - StringRef Identifier; - if (Parser.parseIdentifier(Identifier)) - return true; + // maybe it is a symbol reference + StringRef Identifier; + if (Parser.parseIdentifier(Identifier)) + return true; - SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); + MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); - // Otherwise create a symbol ref. - const MCExpr *Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, - getContext()); + // Otherwise create a symbol ref. + const MCExpr *Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, + getContext()); - Operands.push_back(EpiphanyOperand::CreateImm(Res, S, E)); - return false; - } + Operands.push_back(EpiphanyOperand::CreateImm(Res, S, E)); + return false; + } case AsmToken::Hash: // Integers start with hash, strip it Parser.Lex(); @@ -427,30 +442,28 @@ bool EpiphanyAsmParser::ParseOperand(OperandVector &Operands, case AsmToken::Minus: case AsmToken::Plus: case AsmToken::Integer: - case AsmToken::String: - { - // quoted label names - const MCExpr *IdVal; - SMLoc S = Parser.getTok().getLoc(); - if (getParser().parseExpression(IdVal)) - return true; - SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(EpiphanyOperand::CreateImm(IdVal, S, E)); - return false; - } - case AsmToken::Percent: - { - // it is a symbol reference or constant expression - const MCExpr *IdVal; - SMLoc S = Parser.getTok().getLoc(); // start location of the operand - if (parseRelocOperand(IdVal)) - return true; + case AsmToken::String: { + // quoted label names + const MCExpr *IdVal; + SMLoc S = Parser.getTok().getLoc(); + if (getParser().parseExpression(IdVal)) + return true; + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(EpiphanyOperand::CreateImm(IdVal, S, E)); + return false; + } + case AsmToken::Percent: { + // it is a symbol reference or constant expression + const MCExpr *IdVal; + SMLoc S = Parser.getTok().getLoc(); // start location of the operand + if (parseRelocOperand(IdVal)) + return true; - SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(EpiphanyOperand::CreateImm(IdVal, S, E)); - return false; - } // case AsmToken::Percent + Operands.push_back(EpiphanyOperand::CreateImm(IdVal, S, E)); + return false; + } // case AsmToken::Percent } // switch(getLexer().getKind()) return true; } @@ -458,12 +471,12 @@ bool EpiphanyAsmParser::ParseOperand(OperandVector &Operands, ///@evaluateRelocExpr // This function parses expressions like %high(imm) and transforms them to reloc const MCExpr *EpiphanyAsmParser::evaluateRelocExpr(const MCExpr *Expr, - StringRef RelocStr) { + StringRef RelocStr) { EpiphanyMCExpr::EpiphanyExprKind Kind = - StringSwitch(RelocStr) - .Case("high", EpiphanyMCExpr::CEK_HIGH) - .Case("low", EpiphanyMCExpr::CEK_LOW) - .Default(EpiphanyMCExpr::CEK_None); + StringSwitch(RelocStr) + .Case("high", EpiphanyMCExpr::CEK_HIGH) + .Case("low", EpiphanyMCExpr::CEK_LOW) + .Default(EpiphanyMCExpr::CEK_None); assert(Kind != EpiphanyMCExpr::CEK_None); return EpiphanyMCExpr::create(Kind, Expr, getContext()); @@ -501,7 +514,7 @@ bool EpiphanyAsmParser::parseRelocOperand(const MCExpr *&Res) { } else break; } - if (getParser().parseParenExpression(IdVal,EndLoc)) + if (getParser().parseParenExpression(IdVal, EndLoc)) return true; while (getLexer().getKind() == AsmToken::RParen) @@ -515,19 +528,19 @@ bool EpiphanyAsmParser::parseRelocOperand(const MCExpr *&Res) { } bool EpiphanyAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, - SMLoc &EndLoc) { + SMLoc &EndLoc) { StartLoc = Parser.getTok().getLoc(); RegNo = tryParseRegister(""); EndLoc = Parser.getTok().getLoc(); - return (RegNo == (unsigned)-1); + return (RegNo == (unsigned) -1); } bool EpiphanyAsmParser::parseMemOffset(const MCExpr *&Res) { SMLoc S; - switch(getLexer().getKind()) { + switch (getLexer().getKind()) { default: return true; case AsmToken::Integer: @@ -549,7 +562,6 @@ OperandMatchResultTy EpiphanyAsmParser::parseMemOperand( bool isIndex = true; const MCExpr *IdVal = 0; - unsigned offsetReg = 0; SMLoc S; if (Parser.getTok().isNot(AsmToken::LBrac)) { @@ -560,7 +572,7 @@ OperandMatchResultTy EpiphanyAsmParser::parseMemOperand( // first operand is the register S = Parser.getTok().getLoc(); - if (tryParseRegisterOperand(Operands,"")) { + if (tryParseRegisterOperand(Operands, "")) { Error(Parser.getTok().getLoc(), "unexpected token in mem operand, expected register"); return MatchOperand_ParseFail; } @@ -579,11 +591,11 @@ OperandMatchResultTy EpiphanyAsmParser::parseMemOperand( Error(Parser.getTok().getLoc(), "unexpected token in mem operand, expected immediate"); return MatchOperand_ParseFail; } - } else if (tryParseRegisterOperand(Operands,"")) { + } else if (tryParseRegisterOperand(Operands, "")) { // If not - it should be register Error(Parser.getTok().getLoc(), "unexpected token in mem operand, expected register or immediate"); return MatchOperand_ParseFail; - } + } if (Parser.getTok().isNot(AsmToken::RBrac)) { Error(Parser.getTok().getLoc(), "unexpected token in mem operand, expected RBrac"); return MatchOperand_ParseFail; @@ -608,7 +620,7 @@ OperandMatchResultTy EpiphanyAsmParser::parseMemOperand( Error(Parser.getTok().getLoc(), "unexpected token in mem operand, expected immediate"); return MatchOperand_ParseFail; } - } else if (tryParseRegisterOperand(Operands,"")) { + } else if (tryParseRegisterOperand(Operands, "")) { // If not - it should be register Error(Parser.getTok().getLoc(), "unexpected token in mem operand, expected register or immediate"); return MatchOperand_ParseFail; @@ -650,7 +662,7 @@ OperandMatchResultTy EpiphanyAsmParser::parseMemOperand( } bool EpiphanyAsmParser::parseMathOperation(StringRef Name, SMLoc NameLoc, - OperandVector &Operands) { + OperandVector &Operands) { // split the format size_t Start = Name.find('.'), Next = Name.rfind('.'); StringRef Format1 = Name.slice(Start, Next); @@ -700,7 +712,7 @@ bool EpiphanyAsmParser::parseMathOperation(StringRef Name, SMLoc NameLoc, bool EpiphanyAsmParser:: ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, - OperandVector &Operands) { + OperandVector &Operands) { // Create the leading tokens for the mnemonic, split by '.' characters. size_t Start = 0, Next = Name.find('.'); @@ -802,6 +814,7 @@ bool EpiphanyAsmParser::parseSetNoMacroDirective() { Parser.Lex(); // Consume the EndOfStatement return false; } + bool EpiphanyAsmParser::parseDirectiveSet() { // get next token @@ -870,5 +883,6 @@ extern "C" void LLVMInitializeEpiphanyAsmParser() { #define GET_REGISTER_MATCHER #define GET_MATCHER_IMPLEMENTATION + #include "EpiphanyGenAsmMatcher.inc" diff --git a/CLANG_Epiphany.patch b/CLANG_Epiphany.patch new file mode 100644 index 0000000..88a2ab1 --- /dev/null +++ b/CLANG_Epiphany.patch @@ -0,0 +1,538 @@ +diff -Naur -x '.*.swp' cfe-4.0.0.src/include/clang/Basic/Attr.td llvm-4.0.0.src/tools/clang/include/clang/Basic/Attr.td +--- cfe-4.0.0.src/include/clang/Basic/Attr.td 2017-01-09 06:12:14.000000000 +0200 ++++ llvm-4.0.0.src/tools/clang/include/clang/Basic/Attr.td 2017-06-08 15:34:13.648007750 +0300 +@@ -259,6 +259,7 @@ + } + def TargetARM : TargetArch<["arm", "thumb", "armeb", "thumbeb"]>; + def TargetMips : TargetArch<["mips", "mipsel"]>; ++def TargetEpiphany : TargetArch<["epiphany"]>; + def TargetMSP430 : TargetArch<["msp430"]>; + def TargetX86 : TargetArch<["x86"]>; + def TargetAnyX86 : TargetArch<["x86", "x86_64"]>; +@@ -950,6 +951,16 @@ + let Documentation = [MSABIDocs]; + } + ++def EpiphanyInterrupt : InheritableAttr, TargetSpecificAttr { ++ // NOTE: If you add any additional spellings, ARMInterrupt's, MipsInterrupt's ++ // and AnyX86Interrupt's spellings must match. ++ let Spellings = [GNU<"interrupt">]; ++ let Args = [UnsignedArgument<"Number">]; ++ let ParseKind = "Interrupt"; ++ let HasCustomParsing = 1; ++ let Documentation = [Undocumented]; ++} ++ + def MSP430Interrupt : InheritableAttr, TargetSpecificAttr { + // NOTE: If you add any additional spellings, ARMInterrupt's, MipsInterrupt's + // and AnyX86Interrupt's spellings must match. +diff -Naur -x '.*.swp' cfe-4.0.0.src/include/clang/Basic/BuiltinsEpiphany.def llvm-4.0.0.src/tools/clang/include/clang/Basic/BuiltinsEpiphany.def +--- cfe-4.0.0.src/include/clang/Basic/BuiltinsEpiphany.def 1970-01-01 02:00:00.000000000 +0200 ++++ llvm-4.0.0.src/tools/clang/include/clang/Basic/BuiltinsEpiphany.def 2017-06-08 14:59:21.989176817 +0300 +@@ -0,0 +1,19 @@ ++// BuiltinsEpiphany.def - Epiphany builtin function database -*- C++ -*-// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++/// ++/// \file ++/// \brief This file defines the Epiphany-specific builtin function database. ++/// Users of this file must define the BUILTIN macro to make use of this ++/// information. ++/// ++//===----------------------------------------------------------------------===// ++ ++// The format of this database matches clang/Basic/Builtins.def. ++ ++#undef BUILTIN +diff -Naur -x '.*.swp' cfe-4.0.0.src/include/clang/Basic/TargetBuiltins.h llvm-4.0.0.src/tools/clang/include/clang/Basic/TargetBuiltins.h +--- cfe-4.0.0.src/include/clang/Basic/TargetBuiltins.h 2016-10-05 01:29:49.000000000 +0300 ++++ llvm-4.0.0.src/tools/clang/include/clang/Basic/TargetBuiltins.h 2017-06-08 14:57:58.274511215 +0300 +@@ -150,6 +150,16 @@ + }; + } + ++ /// \brief Epiphany builtins ++ namespace Epiphany { ++ enum { ++ LastTIBuiltin = clang::Builtin::FirstTSBuiltin-1, ++#define BUILTIN(ID, TYPE, ATTRS) BI##ID, ++#include "clang/Basic/BuiltinsEpiphany.def" ++ LastTSBuiltin ++ }; ++ } ++ + /// \brief MIPS builtins + namespace Mips { + enum { +diff -Naur -x '.*.swp' cfe-4.0.0.src/include/clang/Driver/Options.td llvm-4.0.0.src/tools/clang/include/clang/Driver/Options.td +--- cfe-4.0.0.src/include/clang/Driver/Options.td 2017-01-12 21:26:54.000000000 +0200 ++++ llvm-4.0.0.src/tools/clang/include/clang/Driver/Options.td 2017-06-08 14:09:40.718628023 +0300 +@@ -92,6 +92,8 @@ + Group; + def m_aarch64_Features_Group : OptionGroup<"">, + Group; ++def m_epiphany_Features_Group : OptionGroup<"">, ++ Group; + def m_ppc_Features_Group : OptionGroup<"">, + Group; + def m_wasm_Features_Group : OptionGroup<"">, +diff -Naur -x '.*.swp' cfe-4.0.0.src/lib/Basic/Targets.cpp llvm-4.0.0.src/tools/clang/lib/Basic/Targets.cpp +--- cfe-4.0.0.src/lib/Basic/Targets.cpp 2017-01-19 02:10:50.000000000 +0200 ++++ llvm-4.0.0.src/tools/clang/lib/Basic/Targets.cpp 2017-06-08 16:52:14.316009993 +0300 +@@ -8435,6 +8435,115 @@ + } + }; + ++class EpiphanyTargetInfo : public TargetInfo { ++ public: ++ EpiphanyTargetInfo(const llvm::Triple &Triple, const TargetOptions &) ++ : TargetInfo(Triple) { ++ TLSSupported = false; ++ PointerWidth = 32; ++ PointerAlign = 32; ++ IntWidth = 32; ++ IntAlign = 32; ++ LongWidth = 32; ++ LongAlign = 32; ++ LongLongWidth = 64; ++ LongLongAlign = 64; ++ HalfWidth = 16; ++ HalfAlign = 16; ++ FloatWidth = 32; ++ FloatAlign = 32; ++ FloatFormat = &llvm::APFloat::IEEEsingle(); ++ DoubleWidth = 64; ++ DoubleAlign = 64; ++ DoubleFormat = &llvm::APFloat::IEEEdouble(); ++ SizeType = UnsignedInt; ++ PtrDiffType = SignedInt; ++ IntPtrType = SignedInt; ++ Char16Type = UnsignedChar; ++ Char32Type = UnsignedInt; ++ ++ resetDataLayout("e-p:32:32-i8:8-i16:16-i32:32-i64:64-v32:32-v64:64" ++ "-f32:32-f64:64-n32-S64"); ++ } ++ ++ bool setCPU(const std::string &Name) override { ++ return llvm::StringSwitch(Name) ++ .Case("E16", true) ++ .Default(false); ++ } ++ ++ ArrayRef getGCCRegNames() const override { ++ static const char * const GCCRegNames[] = { ++ // GPR ++ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", ++ "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", ++ "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", ++ "r31", "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", "r40", ++ "r41", "r42", "r43", "r44", "r45", "r46", "r47", "r48", "r49", "r50", ++ "r51", "r52", "r53", "r54", "r55", "r56", "r57", "r58", "r59", "r60", ++ "r61", "r62", "r63", ++ // eCore regs ++ "CONFIG", "STATUS", "PC", "DEBUGSTATUS", "LC", "LS", "LE", "IRET", "IMASK", ++ "ILAT", "ILATST", "ILATCL", "IPEND", "CTIMER0", "CTIMER1", "FSTATUS", "DEBUGCMD", ++ // DMA ++ "DMA0CONFIG", "DMA0STRIDE", "DMA0COUNT", "DMA0SRCADDR", "DMA0DSTADDR", "DMA0AUTO0", ++ "DMA0AUTO1", "DMA0STATUS", "DMA1CONFIG", "DMA1STRIDE", "DMA1COUNT", "DMA1SRCADDR", ++ "DMA1DSTADDR", "DMA1AUTO0", "DMA1AUTO1", "DMA1STATUS", ++ // Memprotection ++ "MEMSTATUS", "MEMPROTECT", ++ // Mesh config ++ "MESHCONFIG", "COREID", "MULTICAST", "RESETCORE", "CMESHROUTE", "XMESHROUTE", "RMESHROUTE", ++ }; ++ return llvm::makeArrayRef(GCCRegNames); ++ } ++ ArrayRef getGCCRegAliases() const override { ++ static const TargetInfo::GCCRegAlias RegAliases[] = { ++ {{"a1"}, "r0"}, {{"a2"}, "r1"}, {{"a3"}, "r2"}, {{"a4"}, "r3"}, {{"v1"}, "r4"}, ++ {{"v2"}, "r5"}, {{"v3"}, "r6"}, {{"v4"}, "r7"}, {{"v5"}, "r8"}, {{"sb"}, "r9"}, ++ {{"sl"}, "r10"}, {{"v8"}, "r11"}, {{"ip"}, "r12"}, {{"sp"}, "r13"}, ++ {{"lr"}, "r14"}, {{"fp"}, "r15"}, {{"zero"}, "r31"}, ++ }; ++ return llvm::makeArrayRef(RegAliases); ++ } ++ ++ bool hasInt128Type() const override { ++ return false; ++ } ++ ++ void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override { ++ DefineStd(Builder, "epiphany", Opts); ++ } ++ ++ ArrayRef getTargetBuiltins() const override { ++ return None; ++ } ++ ++ const char *getClobbers() const override { ++ return ""; ++ } ++ ++ BuiltinVaListKind getBuiltinVaListKind() const override { ++ return TargetInfo::VoidPtrBuiltinVaList; ++ } ++ ++ bool hasFeature(StringRef Feature) const override { ++ return llvm::StringSwitch(Feature) ++ .Case("epiphany", true) ++ .Default(false); ++ } ++ ++ bool validateAsmConstraint(const char *&Name, ++ TargetInfo::ConstraintInfo &Info) const override { ++ switch (*Name) { ++ default: ++ return false; ++ case 'r': // CPU registers. ++ Info.setAllowsRegister(); ++ return true; ++ } ++ } ++ ++}; + + // AVR Target + class AVRTargetInfo : public TargetInfo { +@@ -8559,6 +8668,9 @@ + case llvm::Triple::lanai: + return new LanaiTargetInfo(Triple, Opts); + ++ case llvm::Triple::epiphany: ++ return new EpiphanyTargetInfo(Triple, Opts); ++ + case llvm::Triple::aarch64: + if (Triple.isOSDarwin()) + return new DarwinAArch64TargetInfo(Triple, Opts); +diff -Naur -x '.*.swp' cfe-4.0.0.src/lib/CodeGen/TargetInfo.cpp llvm-4.0.0.src/tools/clang/lib/CodeGen/TargetInfo.cpp +--- cfe-4.0.0.src/lib/CodeGen/TargetInfo.cpp 2017-01-05 02:20:51.000000000 +0200 ++++ llvm-4.0.0.src/tools/clang/lib/CodeGen/TargetInfo.cpp 2017-06-08 13:43:19.425982716 +0300 +@@ -6946,6 +6946,123 @@ + } + + //===----------------------------------------------------------------------===// ++// Epiphany ABI Implementation ++//===----------------------------------------------------------------------===// ++ ++namespace { ++ class EpiphanyABIInfo : public ABIInfo { ++ public: ++ EpiphanyABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} ++ ++ private: ++ ABIArgInfo classifyReturnType(QualType RetTy) const; ++ ABIArgInfo classifyArgumentType(QualType RetTy) const; ++ ++ void computeInfo(CGFunctionInfo &FI) const override; ++ ++ Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, ++ QualType Ty) const override; ++ ++ }; ++ ++ class EpiphanyTargetCodeGenInfo : public TargetCodeGenInfo { ++ public: ++ EpiphanyTargetCodeGenInfo(CodeGenTypes &CGT) ++ :TargetCodeGenInfo(new EpiphanyABIInfo(CGT)) {} ++ ++ int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { ++ return 13; ++ }; ++ }; ++} ++ ++void EpiphanyABIInfo::computeInfo(CGFunctionInfo &FI) const { ++ if (!getCXXABI().classifyReturnType(FI)) ++ FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); ++ for (auto &I : FI.arguments()) ++ I.info = classifyArgumentType(I.type); ++} ++ ++ABIArgInfo EpiphanyABIInfo::classifyArgumentType(QualType Ty) const { ++ if (!isAggregateTypeForABI(Ty)) { ++ // Treat an enum type as its underlying type. ++ if (const EnumType *EnumTy = Ty->getAs()) ++ Ty = EnumTy->getDecl()->getIntegerType(); ++ ++ return (Ty->isPromotableIntegerType() ? ++ ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); ++ } ++ ++ // Ignore empty records. ++ if (isEmptyRecord(getContext(), Ty, true)) ++ return ABIArgInfo::getIgnore(); ++ ++ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) ++ return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); ++ ++ uint64_t Size = getContext().getTypeSize(Ty); ++ if (Size > 64) ++ return getNaturalAlignIndirect(Ty, /*ByVal=*/true); ++ // Pass in the smallest viable integer type. ++ else if (Size > 32) ++ return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext())); ++ else if (Size > 16) ++ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); ++ else if (Size > 8) ++ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); ++ else ++ return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); ++} ++ ++ABIArgInfo EpiphanyABIInfo::classifyReturnType(QualType RetTy) const { ++ if (RetTy->isVoidType()) ++ return ABIArgInfo::getIgnore(); ++ ++ // Large vector types should be returned via memory. ++ if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 64) ++ return getNaturalAlignIndirect(RetTy); ++ ++ if (!isAggregateTypeForABI(RetTy)) { ++ // Treat an enum type as its underlying type. ++ if (const EnumType *EnumTy = RetTy->getAs()) ++ RetTy = EnumTy->getDecl()->getIntegerType(); ++ ++ return (RetTy->isPromotableIntegerType() ? ++ ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); ++ } ++ ++ if (isEmptyRecord(getContext(), RetTy, true)) ++ return ABIArgInfo::getIgnore(); ++ ++ // Aggregates <= 8 bytes are returned in r0; other aggregates ++ // are returned indirectly. ++ uint64_t Size = getContext().getTypeSize(RetTy); ++ if (Size <= 64) { ++ // Return in the smallest viable integer type. ++ if (Size <= 8) ++ return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); ++ if (Size <= 16) ++ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); ++ if (Size <= 32) ++ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); ++ return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext())); ++ } ++ ++ return getNaturalAlignIndirect(RetTy, /*ByVal=*/true); ++} ++ ++Address EpiphanyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, ++ QualType Ty) const { ++ // FIXME: Someone needs to audit that this handle alignment correctly. ++ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, ++ getContext().getTypeInfoInChars(Ty), ++ CharUnits::fromQuantity(4), ++ /*AllowHigherAlign*/ true); ++} ++ ++ ++ ++//===----------------------------------------------------------------------===// + // Hexagon ABI Implementation + //===----------------------------------------------------------------------===// + +@@ -8509,6 +8626,8 @@ + return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types)); + case llvm::Triple::amdgcn: + return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types)); ++ case llvm::Triple::epiphany: ++ return SetCGInfo(new EpiphanyTargetCodeGenInfo(Types)); + case llvm::Triple::sparc: + return SetCGInfo(new SparcV8TargetCodeGenInfo(Types)); + case llvm::Triple::sparcv9: +diff -Naur -x '.*.swp' cfe-4.0.0.src/lib/Driver/Driver.cpp llvm-4.0.0.src/tools/clang/lib/Driver/Driver.cpp +--- cfe-4.0.0.src/lib/Driver/Driver.cpp 2017-01-05 07:20:27.000000000 +0200 ++++ llvm-4.0.0.src/tools/clang/lib/Driver/Driver.cpp 2017-06-08 12:16:34.483504158 +0300 +@@ -3764,6 +3764,9 @@ + case llvm::Triple::wasm64: + TC = new toolchains::WebAssembly(*this, Target, Args); + break; ++ case llvm::Triple::epiphany: ++ TC = new toolchains::EpiphanyToolChain(*this, Target, Args); ++ break; + case llvm::Triple::avr: + TC = new toolchains::AVRToolChain(*this, Target, Args); + break; +diff -Naur -x '.*.swp' cfe-4.0.0.src/lib/Driver/ToolChains.cpp llvm-4.0.0.src/tools/clang/lib/Driver/ToolChains.cpp +--- cfe-4.0.0.src/lib/Driver/ToolChains.cpp 2017-01-10 23:13:08.000000000 +0200 ++++ llvm-4.0.0.src/tools/clang/lib/Driver/ToolChains.cpp 2017-06-08 13:50:53.792158271 +0300 +@@ -5332,6 +5332,13 @@ + return Res; + } + ++/// Epiphany Toolchain ++EpiphanyToolChain::EpiphanyToolChain(const Driver &D, const llvm::Triple & Triple, const ArgList &Args) ++ : Generic_ELF(D, Triple, Args) {} ++Tool *EpiphanyToolChain::buildLinker() const { ++ return new tools::epiphany::Linker(*this); ++} ++ + /// AVR Toolchain + AVRToolChain::AVRToolChain(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args) +diff -Naur -x '.*.swp' cfe-4.0.0.src/lib/Driver/ToolChains.h llvm-4.0.0.src/tools/clang/lib/Driver/ToolChains.h +--- cfe-4.0.0.src/lib/Driver/ToolChains.h 2017-01-05 18:52:29.000000000 +0200 ++++ llvm-4.0.0.src/tools/clang/lib/Driver/ToolChains.h 2017-06-08 13:42:29.908373929 +0300 +@@ -1371,6 +1371,15 @@ + SanitizerMask getSupportedSanitizers() const override; + }; + ++class LLVM_LIBRARY_VISIBILITY EpiphanyToolChain : public Generic_ELF { ++ protected: ++ Tool *buildLinker() const override; ++ public: ++ EpiphanyToolChain(const Driver &D, const llvm::Triple &Triple, ++ const llvm::opt::ArgList &Args); ++ bool IsIntegratedAssemblerDefault() const override { return true; } ++}; ++ + class LLVM_LIBRARY_VISIBILITY AVRToolChain : public Generic_ELF { + protected: + Tool *buildLinker() const override; +diff -Naur -x '.*.swp' cfe-4.0.0.src/lib/Driver/Tools.cpp llvm-4.0.0.src/tools/clang/lib/Driver/Tools.cpp +--- cfe-4.0.0.src/lib/Driver/Tools.cpp 2017-02-21 21:11:22.000000000 +0200 ++++ llvm-4.0.0.src/tools/clang/lib/Driver/Tools.cpp 2017-06-08 15:11:33.944107522 +0300 +@@ -770,6 +770,7 @@ + return false; + + case llvm::Triple::hexagon: ++ case llvm::Triple::epiphany: + case llvm::Triple::ppc64le: + case llvm::Triple::systemz: + case llvm::Triple::xcore: +@@ -1865,6 +1866,13 @@ + return ""; + } + ++static std::string getEpiphanyTargetCPU(const ArgList &Args) { ++ if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { ++ return A->getValue(); ++ } ++ return "E16"; ++} ++ + static std::string getLanaiTargetCPU(const ArgList &Args) { + if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { + return A->getValue(); +@@ -2139,6 +2147,9 @@ + return "hexagon" + + toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); + ++ case llvm::Triple::epiphany: ++ return getEpiphanyTargetCPU(Args); ++ + case llvm::Triple::lanai: + return getLanaiTargetCPU(Args); + +@@ -2526,6 +2537,16 @@ + } + } + ++void Clang::AddEpiphanyTargetArgs(const ArgList &Args, ++ ArgStringList &CmdArgs) const { ++ // Default to "hidden" visibility. ++ if (!Args.hasArg(options::OPT_fvisibility_EQ, ++ options::OPT_fvisibility_ms_compat)) { ++ CmdArgs.push_back("-fvisibility"); ++ CmdArgs.push_back("hidden"); ++ } ++} ++ + void Clang::AddWebAssemblyTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + // Default to "hidden" visibility. +@@ -2685,6 +2706,12 @@ + Features.push_back("+reserve-x18"); + } + ++static void getEpiphanyTargetFeatures(const ArgList &Args, ++ std::vector &Features) { ++ handleTargetFeaturesGroup(Args, Features, ++ options::OPT_m_epiphany_Features_Group); ++} ++ + static void getHexagonTargetFeatures(const ArgList &Args, + std::vector &Features) { + handleTargetFeaturesGroup(Args, Features, +@@ -2763,6 +2790,8 @@ + case llvm::Triple::hexagon: + getHexagonTargetFeatures(Args, Features); + break; ++ case llvm::Triple::epiphany: ++ getEpiphanyTargetFeatures(Args, Features); + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: + getWebAssemblyTargetFeatures(Args, Features); +@@ -4783,6 +4812,10 @@ + AddHexagonTargetArgs(Args, CmdArgs); + break; + ++ case llvm::Triple::epiphany: ++ AddEpiphanyTargetArgs(Args, CmdArgs); ++ break; ++ + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: + AddWebAssemblyTargetArgs(Args, CmdArgs); +@@ -7718,6 +7751,23 @@ + CmdArgs, Inputs)); + } + // Hexagon tools end. ++// ++ ++void epiphany::Linker::ConstructJob(Compilation &C, const JobAction &JA, ++ const InputInfo &Output, ++ const InputInfoList &Inputs, ++ const ArgList &Args, ++ const char *LinkingOutput) const { ++ ++ std::string Linker = getToolChain().GetProgramPath(getShortName()); ++ ArgStringList CmdArgs; ++ AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); ++ CmdArgs.push_back("-o"); ++ CmdArgs.push_back(Output.getFilename()); ++ C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Linker), ++ CmdArgs, Inputs)); ++} ++// Epiphany tools end. + + void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, +diff -Naur -x '.*.swp' cfe-4.0.0.src/lib/Driver/Tools.h llvm-4.0.0.src/tools/clang/lib/Driver/Tools.h +--- cfe-4.0.0.src/lib/Driver/Tools.h 2017-01-05 07:20:27.000000000 +0200 ++++ llvm-4.0.0.src/tools/clang/lib/Driver/Tools.h 2017-06-08 15:02:53.107846950 +0300 +@@ -74,6 +74,8 @@ + llvm::opt::ArgStringList &CmdArgs) const; + void AddR600TargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; ++ void AddEpiphanyTargetArgs(const llvm::opt::ArgList &Args, ++ llvm::opt::ArgStringList &CmdArgs) const; + void AddSparcTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + void AddSystemZTargetArgs(const llvm::opt::ArgList &Args, +@@ -264,6 +266,19 @@ + }; + } // end namespace hexagon. + ++namespace epiphany { ++class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool { ++public: ++ Linker(const ToolChain &TC) : GnuTool("epiphany::Linker", "e-ld", TC) {} ++ bool isLinkJob() const override { return true; } ++ bool hasIntegratedCPP() const override { return false; } ++ void ConstructJob(Compilation &C, const JobAction &JA, ++ const InputInfo &Output, const InputInfoList &Inputs, ++ const llvm::opt::ArgList &TCArgs, ++ const char *LinkingOutput) const override; ++}; ++} // end namespace epiphany. ++ + namespace amdgpu { + + class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool { diff --git a/CMakeLists.txt b/CMakeLists.txt index 5cd5e3c..5ba43c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,20 +14,22 @@ tablegen(LLVM EpiphanyGenAsmWriter.inc -gen-asm-writer) add_public_tablegen_target(EpiphanyCommonTableGen) add_llvm_target(EpiphanyCodeGen - EpiphanyAsmPrinter.cpp - EpiphanyFpuConfigPass.cpp - EpiphanyFrameLowering.cpp - EpiphanyISelLowering.cpp - EpiphanyISelDAGToDAG.cpp - EpiphanyInstrInfo.cpp - EpiphanyLoadStoreOptimizer.cpp - EpiphanyMachineFunction.cpp - EpiphanyMCInstLower.cpp - EpiphanyRegisterInfo.cpp - EpiphanySubtarget.cpp - EpiphanyTargetMachine.cpp - EpiphanyTargetObjectFile.cpp - ) + EpiphanyAsmPrinter.cpp + EpiphanyFpuConfigPass.cpp + EpiphanyFrameLowering.cpp + EpiphanyISelLowering.cpp + EpiphanyISelDAGToDAG.cpp + EpiphanyInstrInfo.cpp + EpiphanyLoadStoreOptimizer.cpp + EpiphanyVregLoadStoreOptimizer.cpp + EpiphanyMachineFunction.cpp + EpiphanyMCInstLower.cpp + EpiphanyRegisterInfo.cpp + EpiphanySubtarget.cpp + EpiphanyTargetMachine.cpp + EpiphanyTargetObjectFile.cpp + EpiphanyTargetTransformInfo.cpp + ) add_subdirectory(MCTargetDesc) add_subdirectory(TargetInfo) diff --git a/Epiphany.h b/Epiphany.h index ff23c4a..8a8900b 100644 --- a/Epiphany.h +++ b/Epiphany.h @@ -44,6 +44,7 @@ namespace llvm { FunctionPass *createEpiphanyFpuConfigPass(); FunctionPass *createEpiphanyLoadStoreOptimizationPass(); + FunctionPass *createEpiphanyVregLoadStoreOptimizationPass(); } // end namespace llvm; diff --git a/Epiphany.td b/Epiphany.td index 70e791b..36e4754 100644 --- a/Epiphany.td +++ b/Epiphany.td @@ -23,7 +23,9 @@ include "llvm/Target/Target.td" include "EpiphanySchedule.td" -def : Processor<"E16", EpiphanyGenericItineraries, []>; +/*def : Processor<"E16", EpiphanyGenericItineraries, []>;*/ + +def CPU_E16: ProcessorModel<"E16", EpiphanyModel, []>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/EpiphanyAsmPrinter.cpp b/EpiphanyAsmPrinter.cpp index 369277d..66814ee 100644 --- a/EpiphanyAsmPrinter.cpp +++ b/EpiphanyAsmPrinter.cpp @@ -269,6 +269,7 @@ void EpiphanyAsmPrinter::printOperand(const MachineInstr *MI, int opNum, case EpiphanyII::MO_PCREL32: O << "%pcrel("; break; + default:break; } switch (MO.getType()) { diff --git a/EpiphanyFpuConfigPass.cpp b/EpiphanyFpuConfigPass.cpp index 539f328..bc3be1a 100644 --- a/EpiphanyFpuConfigPass.cpp +++ b/EpiphanyFpuConfigPass.cpp @@ -23,8 +23,8 @@ using namespace llvm; char EpiphanyFpuConfigPass::ID = 0; -INITIALIZE_PASS_BEGIN(EpiphanyFpuConfigPass, "epiphany_fpu_config", "Epiphany FPU/IALU2 Config", false, false); -INITIALIZE_PASS_END(EpiphanyFpuConfigPass, "epiphany_fpu_config", "Epiphany FPU/IALU2 Config", false, false); +INITIALIZE_PASS_BEGIN(EpiphanyFpuConfigPass, "epiphany_fpu_config", "Epiphany FPU/IALU2 Config", false, false) +INITIALIZE_PASS_END(EpiphanyFpuConfigPass, "epiphany_fpu_config", "Epiphany FPU/IALU2 Config", false, false) void EpiphanyFpuConfigPass::insertConfigInst(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, MachineRegisterInfo &MRI, const EpiphanySubtarget &ST, unsigned frameIdx) { @@ -58,12 +58,12 @@ bool EpiphanyFpuConfigPass::runOnMachineFunction(MachineFunction &MF) { Epiphany::IMSUBrr_r16, Epiphany::IMSUBrr_r32}; // Prepare binary flag and regs - bool hasFPU; - bool hasIALU2; + bool hasFPU = false; + bool hasIALU2 = false; // Step 1: Loop over all of the basic blocks to find the first FPU instruction - for(MachineFunction::iterator it = MF.begin(), E = MF.end(); it != E; ++it) { - MachineBasicBlock *MBB = &*it; + for (auto &it : MF) { + MachineBasicBlock *MBB = ⁢ // Loop over all instructions search for FPU instructions for(MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); MBBI != MBBE; ++MBBI) { MachineInstr *MI = &*MBBI; @@ -94,7 +94,7 @@ bool EpiphanyFpuConfigPass::runOnMachineFunction(MachineFunction &MF) { MF.insert(MF.begin(), MBB); MBB->addSuccessor(Front); // Add function live-ins so that they'll be defined in every path - for (MachineRegisterInfo::livein_iterator LB = MRI.livein_begin(), LE = MRI.livein_end(); LB != LE; ++LB) { + for (auto LB = MRI.livein_begin(), LE = MRI.livein_end(); LB != LE; ++LB) { MBB->addLiveIn(LB->first); } // Create config regs for both cases @@ -144,8 +144,8 @@ bool EpiphanyFpuConfigPass::runOnMachineFunction(MachineFunction &MF) { // FIXME: config based on on successors and first use std::vector lastState(blockCount, PRED_START); if (hasFPU && hasIALU2) { - for(MachineFunction::iterator it = MF.begin(), E = MF.end(); it != E; ++it) { - MachineBasicBlock *MBB = &*it; + for (auto &it : MF) { + MachineBasicBlock *MBB = ⁢ int blockNumber = MBB->getNumber(); // Loop over all instructions search for FPU instructions for(MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); MBBI != MBBE; ++MBBI) { @@ -171,7 +171,7 @@ bool EpiphanyFpuConfigPass::runOnMachineFunction(MachineFunction &MF) { } // Propagate current flag state to all successors, setting state to MIXED if they // already have some other state except start - for (MachineBasicBlock::succ_iterator MBBI = MBB->succ_begin(), MBBE = MBB->succ_end(); MBBI != MBBE; ++MBBI) { + for (auto MBBI = MBB->succ_begin(), MBBE = MBB->succ_end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock *successor = *MBBI; int succNumber = successor->getNumber(); if (lastState[succNumber] == PRED_START) { @@ -186,8 +186,8 @@ bool EpiphanyFpuConfigPass::runOnMachineFunction(MachineFunction &MF) { // Step 4 - resolving loops and predeccessors // Run on every block until we hit the first IALU/FPU inst and check ALL predeccessors if (hasFPU && hasIALU2) { - for(MachineFunction::iterator it = MF.begin(), E = MF.end(); it != E; ++it) { - MachineBasicBlock *MBB = &*it; + for (auto &it : MF) { + MachineBasicBlock *MBB = ⁢ int blockNumber = MBB->getNumber(); // Loop over all instructions search for FPU instructions for(MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); MBBI != MBBE; ++MBBI) { @@ -201,7 +201,7 @@ bool EpiphanyFpuConfigPass::runOnMachineFunction(MachineFunction &MF) { bool isIALU2 = std::find(std::begin(opcodesIALU2), std::end(opcodesIALU2), MI->getOpcode()) != std::end(opcodesIALU2); if (isFPU || isIALU2) { // Check all predeccessors - for (MachineBasicBlock::pred_iterator PBBI = MBB->pred_begin(), PBBE = MBB->pred_end(); PBBI != PBBE; ++PBBI) { + for (auto PBBI = MBB->pred_begin(), PBBE = MBB->pred_end(); PBBI != PBBE; ++PBBI) { MachineBasicBlock *pred = *PBBI; int predNumber = pred->getNumber(); // Remember than now we can be, for example, in mixed state diff --git a/EpiphanyFrameLowering.cpp b/EpiphanyFrameLowering.cpp index 06575bc..faabf7e 100644 --- a/EpiphanyFrameLowering.cpp +++ b/EpiphanyFrameLowering.cpp @@ -68,22 +68,16 @@ void EpiphanyFrameLowering::emitPrologue(MachineFunction &MF, assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); - EpiphanyMachineFunctionInfo *FI = MF.getInfo(); - const EpiphanyInstrInfo &TII = - *static_cast(STI.getInstrInfo()); - const EpiphanyRegisterInfo &RegInfo = - *static_cast(STI.getRegisterInfo()); + const EpiphanyInstrInfo &TII = *STI.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - EpiphanyABIInfo ABI = STI.getABI(); unsigned SP = Epiphany::SP; unsigned LR = Epiphany::LR; unsigned FP = Epiphany::FP; unsigned STRi64_pmd = Epiphany::STRi64_pmd; unsigned ADDri_r32 = Epiphany::ADDri_r32; - const TargetRegisterClass *RC = &Epiphany::GPR32RegClass; unsigned CFIIndex; // First, compute final stack size. @@ -98,7 +92,6 @@ void EpiphanyFrameLowering::emitPrologue(MachineFunction &MF, if (StackSize == 0 && !MFI.adjustsStack()) return; const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - MachineLocation DstML, SrcML; // Create label for prologue MCSymbol *FrameLabel = MF.getContext().createTempSymbol(); @@ -126,7 +119,7 @@ void EpiphanyFrameLowering::emitPrologue(MachineFunction &MF, const std::vector &CSI = MFI.getCalleeSavedInfo(); // Spill all callee-saves - if (CSI.size()) { + if (!CSI.empty()) { // Find the instruction past the last instruction that saves a callee-saved // register to the stack. for (unsigned i = 0; i < CSI.size(); ++i) @@ -135,9 +128,9 @@ void EpiphanyFrameLowering::emitPrologue(MachineFunction &MF, // Iterate over list of callee-saved registers and emit .cfi_offset // directives. DEBUG(dbgs() << "\nCallee-saved regs spilled in prologue\n"); - for (std::vector::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { - int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()) - getOffsetOfLocalArea(); - unsigned Reg = I->getReg(); + for (auto I : CSI) { + int64_t Offset = MFI.getObjectOffset(I.getFrameIdx()) - getOffsetOfLocalArea(); + unsigned Reg = I.getReg(); // Reg is in CPURegs. DEBUG(dbgs() << Reg << "\n"); CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(FrameLabel, MRI->getDwarfRegNum(Reg, true), Offset)); @@ -153,15 +146,10 @@ void EpiphanyFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo &MFI = MF.getFrameInfo(); - EpiphanyMachineFunctionInfo *FI = MF.getInfo(); - const EpiphanyInstrInfo &TII = - *static_cast(STI.getInstrInfo()); - const EpiphanyRegisterInfo &RegInfo = - *static_cast(STI.getRegisterInfo()); + const EpiphanyInstrInfo &TII = *STI.getInstrInfo(); DebugLoc dl = MBBI->getDebugLoc(); - EpiphanyABIInfo ABI = STI.getABI(); unsigned SP = Epiphany::SP; unsigned LR = Epiphany::LR; unsigned LDRi64 = Epiphany::LDRi64; @@ -179,7 +167,7 @@ void EpiphanyFrameLowering::emitEpilogue(MachineFunction &MF, // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Restore old LR and FP from SP + offset - BuildMI(MBB, MBBI, dl, TII.get(LDRi64), LR).addReg(SP).addImm(StackSize).setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, dl, TII.get(LDRi64), LR).addReg(SP).addImm(StackSize).setMIFlag(MachineInstr::FrameDestroy); } // Adjust stack. @@ -195,7 +183,7 @@ int EpiphanyFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); if (hasFP(MF)) { - const EpiphanyRegisterInfo *RegInfo = static_cast( + const auto *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); FrameReg = RegInfo->getFrameRegister(MF); //return MFI.getObjectOffset(FI) + 16; @@ -220,8 +208,7 @@ void EpiphanyFrameLowering::determineCalleeSaves(MachineFunction &MF, RegScavenger *RS) const { //@determineCalleeSaves-body TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); - EpiphanyMachineFunctionInfo *FI = MF.getInfo(); - const EpiphanyRegisterInfo *RegInfo = static_cast(MF.getSubtarget().getRegisterInfo()); + const auto *RegInfo = static_cast(MF.getSubtarget().getRegisterInfo()); DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; for (int Reg = SavedRegs.find_first(); Reg != -1; @@ -242,54 +229,104 @@ bool EpiphanyFrameLowering::hasReservedCallFrame(const MachineFunction &MF) cons !MFI.hasVarSizedObjects(); } -// Spill callee-saved regs to stack -bool EpiphanyFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const { - MachineFunction *MF = MBB.getParent(); - const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); +/// Assign callee-saved regs to frame indexes +/// +/// \param MF Machine function +/// \param TRI Register info +/// \param CSI Callee-save regs info +/// \return True if success +bool EpiphanyFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, std::vector &CSI) const { + // TODO: Probably this method can be moved completely into reimplemented determineCalleeSaves() + // as it not only assigns but also redefines paired regs + if (CSI.empty()) + return true; // Early exit if no callee saved registers are modified! - // Debug output - DebugLoc DL; - if (MI != MBB.end()) { - DL = MI->getDebugLoc(); - } + MachineFrameInfo &MFI = MF.getFrameInfo(); - DEBUG(dbgs() << "\nCallee-saved regs in the current block:\n"; - for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) { - TRI->dumpReg(I->getReg()); - }); - - for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) { - // Add the callee-saved register as live-in. - // It's killed at the spill, unless the register is LR and return address - // is taken. - unsigned Reg = I->getReg(); - bool IsRAAndRetAddrIsTaken = (Reg == Epiphany::LR) && MF->getFrameInfo().isReturnAddressTaken(); - if (!IsRAAndRetAddrIsTaken) { - MBB.addLiveIn(Reg); + unsigned NumFixedSpillSlots; + const TargetFrameLowering::SpillSlot *FixedSpillSlots = getCalleeSavedSpillSlots(NumFixedSpillSlots); + + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (auto CS = CSI.begin(); CS != CSI.end(); ++CS) { + unsigned Reg = CS->getReg(); + if (Reg == Epiphany::LR) { + DEBUG(dbgs() << "Erasing LR from CSI, it will be handled by prologue/epilogue inserters\n"); + CSI.erase(CS--); + continue; } - // Insert the spill to the stack frame. - bool IsKill = !IsRAAndRetAddrIsTaken; + int FrameIdx; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, I->getFrameIdx(), RC, TRI); + if (TRI->hasReservedSpillSlot(MF, Reg, FrameIdx)) { + CS->setFrameIdx(FrameIdx); + continue; + } + + // Check to see if this physreg must be spilled to a particular stack slot on this target. + const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; + const TargetFrameLowering::SpillSlot *LastFixedSlot = FixedSlot + NumFixedSpillSlots; + while (FixedSlot != LastFixedSlot && FixedSlot->Reg != Reg) { + ++FixedSlot; + } + + if (FixedSlot != LastFixedSlot) { + // Spill it to the stack where we must and bail out + FrameIdx = MFI.CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + CS->setFrameIdx(FrameIdx); + continue; + } else { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = getStackAlignment(); + + // Check if this index can be paired + unsigned sra = 0, srb = 0; + auto Next = CS; + Next++; + if (Next != CSI.end()) { + unsigned CurrentReg = CS->getReg(); + unsigned NextReg = Next->getReg(); + // Getting target class + const TargetRegisterClass *TRC = TRI->getMinimalPhysRegClass(CurrentReg) == &Epiphany::GPR32RegClass || + TRI->getMinimalPhysRegClass(CurrentReg) == &Epiphany::GPR16RegClass + ? &Epiphany::GPR64RegClass + : &Epiphany::FPR64RegClass; + // Check if we can find superreg for paired regs + sra = TRI->getMatchingSuperReg(CurrentReg, Epiphany::isub_lo, TRC); + srb = TRI->getMatchingSuperReg(NextReg, Epiphany::isub_hi, TRC); + if ((!sra || !srb) || sra != srb) { + srb = TRI->getMatchingSuperReg(CurrentReg, Epiphany::isub_hi, TRC); + sra = TRI->getMatchingSuperReg(NextReg, Epiphany::isub_lo, TRC); + } + + // Check if pair was formed + if ((sra && srb) && sra == srb) { + // Remove subregs and set superreg as Callee-saved + CSI.erase(CS--, ++Next); + CSI.emplace_back(sra); + continue; + } + } + + // If unable to pair for some reason - just assign to the next frame index + Align = std::min(Align, StackAlign); + FrameIdx = MFI.CreateStackObject(RC->getSize(), Align, true); + CS->setFrameIdx(FrameIdx); + } } return true; } -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas, -// if it needs dynamic stack realignment, if frame pointer elimination is -// disabled, or if the frame address is taken. +// hasFP - Returns true if the specified function should have a dedicated frame +// pointer register. bool EpiphanyFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); DEBUG( - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - dbgs() << "\nMax alignment = " << MFI.getMaxAlignment() << "\n"; - dbgs() << "Current alignment = " << TFI->getStackAlignment() << "\n"; if (MF.getTarget().Options.DisableFramePointerElim(MF)) { dbgs() << "\nHas FP: DisableFramePointerElim set\n"; } @@ -301,16 +338,23 @@ bool EpiphanyFrameLowering::hasFP(const MachineFunction &MF) const { } if (MFI.isFrameAddressTaken()) { dbgs() << "\nHas FP: Frame address taken\n"; - }); + };); - return (MF.getTarget().Options.DisableFramePointerElim(MF) || + return (MF.getTarget().Options.DisableFramePointerElim(MF) || TRI->needsStackRealignment(MF) || MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken()); } -// Eliminate pseudo ADJCALLSTACKUP/ADJCALLSTACKDOWN instructions -// See EpiphanyInstrInfo.td and EpiphanyInstrInfo.cpp +/// Set local frame max alignment to 8, used by EpiphanyLoadStoreOptimizer +void EpiphanyFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setLocalFrameMaxAlign(8); +} + +/// Eliminate pseudo ADJCALLSTACKUP/ADJCALLSTACKDOWN instructions +/// See EpiphanyInstrInfo.td and EpiphanyInstrInfo.cpp MachineBasicBlock::iterator EpiphanyFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { unsigned SP = Epiphany::SP; diff --git a/EpiphanyFrameLowering.h b/EpiphanyFrameLowering.h index bcf08e7..2fda5c6 100644 --- a/EpiphanyFrameLowering.h +++ b/EpiphanyFrameLowering.h @@ -24,30 +24,40 @@ namespace llvm { class EpiphanySubtarget; class EpiphanyFrameLowering : public TargetFrameLowering { - protected: - const EpiphanySubtarget &STI; + protected: + const EpiphanySubtarget &STI; - public: - explicit EpiphanyFrameLowering(const EpiphanySubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16), STI(sti) {} + public: + explicit EpiphanyFrameLowering(const EpiphanySubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0, 8), STI(sti) {} - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; - int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, const TargetRegisterInfo *TRI) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; - bool hasFP(const MachineFunction &MF) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; - bool hasReservedCallFrame(const MachineFunction &MF) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; - MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; + // Callee-saved regs spill-restore + bool assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, std::vector &CSI) const override; + + /// Returns true if the specified function should have a dedicated frame + /// pointer register. This is true if the function has variable sized allocas, + /// if it needs dynamic stack realignment, if frame pointer elimination is + /// disabled, or if the frame address is taken. + bool hasFP(const MachineFunction &MF) const override; + + bool hasReservedCallFrame(const MachineFunction &MF) const override; + + MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; }; diff --git a/EpiphanyISelDAGToDAG.cpp b/EpiphanyISelDAGToDAG.cpp index 9337856..acdf0f2 100644 --- a/EpiphanyISelDAGToDAG.cpp +++ b/EpiphanyISelDAGToDAG.cpp @@ -131,8 +131,6 @@ bool EpiphanyDAGToDAGISel::trySelect(SDNode *Node) { // Instruction Selection not handled by the auto-generated // tablegen selection should be handled here. /// - EVT NodeTy = Node->getValueType(0); - unsigned MultOpc; switch(Opcode) { default: break; diff --git a/EpiphanyISelLowering.cpp b/EpiphanyISelLowering.cpp index 6c9c7f0..d7d7d96 100644 --- a/EpiphanyISelLowering.cpp +++ b/EpiphanyISelLowering.cpp @@ -73,13 +73,19 @@ EpiphanyTargetLowering::EpiphanyTargetLowering(const EpiphanyTargetMachine &TM, : TargetLowering(TM), Subtarget(STI), ABI(TM.getABI()) { // Set up the register classes - addRegisterClass(MVT::i32, &Epiphany::GPR16RegClass); - addRegisterClass(MVT::i32, &Epiphany::GPR32RegClass); - addRegisterClass(MVT::f32, &Epiphany::FPR32RegClass); - addRegisterClass(MVT::i64, &Epiphany::GPR64RegClass); - addRegisterClass(MVT::f64, &Epiphany::FPR64RegClass); - - //- Set .align 2 + addRegisterClass(MVT::i32, &Epiphany::GPR16RegClass); + addRegisterClass(MVT::i32, &Epiphany::GPR32RegClass); +// addRegisterClass(MVT::v2i16, &Epiphany::GPR32RegClass); +// addRegisterClass(MVT::v4i8, &Epiphany::GPR32RegClass); + addRegisterClass(MVT::f32, &Epiphany::FPR32RegClass); + addRegisterClass(MVT::i64, &Epiphany::GPR64RegClass); +// addRegisterClass(MVT::v2i32, &Epiphany::GPR64RegClass); + addRegisterClass(MVT::f64, &Epiphany::FPR64RegClass); + + // Max atomic instruction size is 64 for load/store instruction + setMaxAtomicSizeInBitsSupported(64); + + //- Set function alignment to 2 bytes // It will emit .align 2 later setMinFunctionAlignment(STI.stackAlignment()); @@ -106,6 +112,19 @@ EpiphanyTargetLowering::EpiphanyTargetLowering(const EpiphanyTargetMachine &TM, setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + // Legalize some vector stores and loads +// for (MVT VT : MVT::vector_valuetypes()) { +// ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); +// } + +// ValueTypeActions.setTypeAction(MVT::v4i8, TypeScalarizeVector); +// ValueTypeActions.setTypeAction(MVT::v2i16, TypeLegal); +// setOperationAction(ISD::LOAD, MVT::v2i16, Legal); +// setOperationAction(ISD::STORE, MVT::v2i16, Legal); +// ValueTypeActions.setTypeAction(MVT::v2i32, TypeLegal); +// setOperationAction(ISD::LOAD, MVT::v2i32, Legal); +// setOperationAction(ISD::STORE, MVT::v2i32, Legal); + for (MVT VT : MVT::fp_valuetypes()) { setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); @@ -174,7 +193,10 @@ EpiphanyTargetLowering::EpiphanyTargetLowering(const EpiphanyTargetMachine &TM, setOperationAction(ISD::SETCC, Ty, Custom); setOperationAction(ISD::SELECT, Ty, Custom); } + setOperationAction(ISD::ADDE, MVT::i32, Custom); + setOperationAction(ISD::SUBE, MVT::i32, Custom); setOperationAction(ISD::BRCOND, MVT::i32, Custom); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); @@ -185,7 +207,12 @@ EpiphanyTargetLowering::EpiphanyTargetLowering(const EpiphanyTargetMachine &TM, setOperationAction(ISD::SUB, MVT::i64, Custom); setOperationAction(ISD::SUBC, MVT::i64, Custom); - // Just expand all conversions, as they're getting on the nerves +// setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16, Custom); +// setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); +// setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom); +// setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom); + + // Just expand all custom versions, as they're getting on the nerves for (MVT VT : MVT::all_valuetypes()) { setOperationAction(ISD::FP_TO_UINT, VT, Custom); setOperationAction(ISD::FP_TO_SINT, VT, Custom); @@ -258,19 +285,31 @@ SDValue EpiphanyTargetLowering::LowerOperation(SDValue Op, case ISD::SUBC: return LowerSub64(Op, DAG); break; + case ISD::ADDE: + return LowerAdde(Op, DAG); + break; + case ISD::SUBE: + return LowerSube(Op, DAG); + break; + case ISD::BUILD_VECTOR: + return LowerBuildVector(Op, DAG); + break; + case ISD::EXTRACT_VECTOR_ELT: + return LowerExtractVectorElt(Op, DAG); + break; } return SDValue(); } -static SDValue createGPR64(SelectionDAG &DAG, SDValue High, SDValue Low) { +static SDValue createGPR64(SelectionDAG &DAG, SDValue Low, SDValue High, MVT VT = MVT::i64) { SDLoc DL(High.getNode()); SDValue RegClass = DAG.getTargetConstant(Epiphany::GPR64RegClassID, DL, MVT::i32); SDValue SubRegHi = DAG.getTargetConstant(Epiphany::isub_hi, DL, MVT::i32); SDValue SubRegLo = DAG.getTargetConstant(Epiphany::isub_lo, DL, MVT::i32); const SDValue Ops[] = { RegClass, High, SubRegHi, Low, SubRegLo }; - return SDValue(DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::i64, Ops), 0); + return SDValue(DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops), 0); } //===----------------------------------------------------------------------===// @@ -299,6 +338,64 @@ SDValue EpiphanyTargetLowering::LowerFastDiv(SDValue Op, SelectionDAG &DAG) cons return DAG.getNode(ISD::FMUL, DL, MVT::f32, Divisor.first, LHS, Divisor.second); } +SDValue EpiphanyTargetLowering::LowerSube(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + + // Get operands + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue Flag = Op.getOperand(2); + + // Required constants + SDValue CarryZero = DAG.getConstant(0, DL, MVT::i32); + SDValue CarryOne = DAG.getConstant(1, DL, MVT::i32); + SDValue MaxRegValue = DAG.getConstant(0x7FFFFFFF, DL, MVT::i32); + SDValue Condition = DAG.getConstant(::EpiphanyCC::COND_GTEU, DL, MVT::i32); + SDValue STATUS = DAG.getRegister(Epiphany::STATUS, MVT::i32); + + // Required VTs + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Glue); + + // Instructions + SDValue Carry = DAG.getNode(Epiphany::MOVCC, DL, VTs, CarryOne, CarryZero, Condition, STATUS, Flag); + SDValue Result = DAG.getNode(ISD::SUBC, DL, VTs, LHS, RHS, Carry.getValue(1)); + SDValue ResultCarry = DAG.getNode(Epiphany::MOVCC, DL, VTs, CarryOne, CarryZero, Condition, Result.getValue(1)); + SDValue AddedCarry = DAG.getNode(ISD::SUBC, DL, VTs, Result.getValue(0), Carry, ResultCarry.getValue(1)); + SDValue LastCarry = DAG.getNode(Epiphany::MOVCC, DL, VTs, CarryOne, CarryZero, Condition, AddedCarry.getValue(1)); + SDValue FinalCarry = DAG.getNode(ISD::OR, DL, MVT::i32, ResultCarry, LastCarry); + SDValue SetFlag = DAG.getNode(ISD::ADDC, DL, VTs, FinalCarry, MaxRegValue, LastCarry.getValue(1)); + return DAG.getNode(ISD::SUBC, DL, VTs, AddedCarry, CarryZero, STATUS, SetFlag.getValue(1)); +} + +SDValue EpiphanyTargetLowering::LowerAdde(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + + // Get operands + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue Flag = Op.getOperand(2); + + // Required constants + SDValue CarryZero = DAG.getConstant(0, DL, MVT::i32); + SDValue CarryOne = DAG.getConstant(1, DL, MVT::i32); + SDValue MaxRegValue = DAG.getConstant(0x7FFFFFFF, DL, MVT::i32); + SDValue Condition = DAG.getConstant(::EpiphanyCC::COND_GTEU, DL, MVT::i32); + SDValue STATUS = DAG.getRegister(Epiphany::STATUS, MVT::i32); + + // Required VTs + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Glue); + + // Instructions + SDValue Carry = DAG.getNode(EpiphanyISD::MOVCC, DL, VTs, CarryOne, CarryZero, Condition, STATUS, Flag); + SDValue Result = DAG.getNode(ISD::ADDC, DL, VTs, LHS, RHS, Carry.getValue(1)); + SDValue ResultCarry = DAG.getNode(EpiphanyISD::MOVCC, DL, VTs, CarryOne, CarryZero, Condition, STATUS, Result.getValue(1)); + SDValue AddedCarry = DAG.getNode(ISD::ADDC, DL, VTs, Result.getValue(0), Carry, ResultCarry.getValue(1)); + SDValue LastCarry = DAG.getNode(EpiphanyISD::MOVCC, DL, VTs, CarryOne, CarryZero, Condition, STATUS, AddedCarry.getValue(1)); + SDValue FinalCarry = DAG.getNode(ISD::OR, DL, MVT::i32, ResultCarry, LastCarry); + SDValue SetFlag = DAG.getNode(ISD::ADDC, DL, VTs, FinalCarry, MaxRegValue, LastCarry.getValue(1)); + return DAG.getNode(ISD::ADDC, DL, VTs, AddedCarry, CarryZero, STATUS, SetFlag.getValue(1)); +} + SDValue EpiphanyTargetLowering::LowerAdd64(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -316,7 +413,7 @@ SDValue EpiphanyTargetLowering::LowerAdd64(SDValue Op, SelectionDAG &DAG) const SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Glue); SDValue Low = DAG.getNode(ISD::ADDC, DL, VTs, LHS_l, RHS_l); SDValue High = DAG.getNode(ISD::ADDE, DL, VTs, LHS_h, RHS_h, Low.getValue(1)); - return createGPR64(DAG, High, Low); + return createGPR64(DAG, Low, High, MVT::i64); } SDValue EpiphanyTargetLowering::LowerSub64(SDValue Op, SelectionDAG &DAG) const { @@ -336,7 +433,7 @@ SDValue EpiphanyTargetLowering::LowerSub64(SDValue Op, SelectionDAG &DAG) const SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Glue); SDValue Low = DAG.getNode(ISD::SUBC, DL, VTs, LHS_l, RHS_l); SDValue High = DAG.getNode(ISD::SUBE, DL, VTs, LHS_h, RHS_h, Low.getValue(1)); - return createGPR64(DAG, High, Low); + return createGPR64(DAG, Low, High, MVT::i64); } SDValue EpiphanyTargetLowering::LowerIntToFp(SDValue Op, SelectionDAG &DAG) const { @@ -563,7 +660,7 @@ MachineBasicBlock *EpiphanyTargetLowering::EmitInstrWithCustomInserter(MachineIn MachineBasicBlock *EpiphanyTargetLowering::emitBrCC(MachineInstr &MI, MachineBasicBlock *MBB) const { // We can have 3 cases - GT, LT and EQ (and their unsigned versions). - // LT is converted to GT by swapping comparison operands + // LT is converted to GTE by swapping comparison operands // EQ does not have the first comparison, we simply jump out if high subregs are not equal // LowCmpBB is needed because of the MBB elimination mechanism (CMP is not a terminator) @@ -584,7 +681,6 @@ MachineBasicBlock *EpiphanyTargetLowering::emitBrCC(MachineInstr &MI, MachineBas const BasicBlock *LLVM_BB = MBB->getBasicBlock(); DebugLoc DL = MI.getDebugLoc(); MachineFunction::iterator It = ++MBB->getIterator(); - const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); // Get Operands @@ -602,16 +698,16 @@ MachineBasicBlock *EpiphanyTargetLowering::emitBrCC(MachineInstr &MI, MachineBas swap = false; break; case ::EpiphanyCC::COND_LTE: - CondCode = ::EpiphanyCC::COND_GTE; + CondCode = ::EpiphanyCC::COND_GT; break; case ::EpiphanyCC::COND_LTU: - CondCode = ::EpiphanyCC::COND_GTU; + CondCode = ::EpiphanyCC::COND_GTEU; break; case ::EpiphanyCC::COND_LTEU: - CondCode = ::EpiphanyCC::COND_GTEU; + CondCode = ::EpiphanyCC::COND_GTU; break; case ::EpiphanyCC::COND_LT: - CondCode = ::EpiphanyCC::COND_GT; + CondCode = ::EpiphanyCC::COND_GTE; break; } @@ -669,7 +765,7 @@ bool EpiphanyTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) SDValue EpiphanyTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - GlobalAddressSDNode *GA = cast(Op); + auto *GA = cast(Op); if (DAG.getTarget().Options.EmulatedTLS) return LowerToTLSEmulatedModel(GA, DAG); @@ -687,8 +783,7 @@ SDValue EpiphanyTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG SDValue EpiphanyTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - BlockAddressSDNode *BA = cast(Op); - + auto *BA = cast(Op); const BlockAddress *BV = BA->getBlockAddress(); int64_t Offset = BA->getOffset(); auto PTY = getPointerTy(DAG.getDataLayout()); @@ -702,11 +797,11 @@ SDValue EpiphanyTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) SDValue EpiphanyTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - GlobalAddressSDNode *GA = cast(Op); + auto *GA = cast(Op); const GlobalValue *GV = GA->getGlobal(); EVT PTY = getPointerTy(DAG.getDataLayout()); const EpiphanyRegisterInfo *TRI = Subtarget.getRegisterInfo(); - EpiphanyMachineFunctionInfo *FI = DAG.getMachineFunction().getInfo(); + auto *FI = DAG.getMachineFunction().getInfo(); // Get TLS model TLSModel::Model model = getTargetMachine().getTLSModel(GV); @@ -714,7 +809,6 @@ SDValue EpiphanyTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG & if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { // General Dynamic and Local Dynamic TLS Model. SDValue Argument = DAG.getRegister(FI->getGlobalBaseReg(), MVT::i32); - SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PTY, 0); unsigned PtrSize = PTY.getSizeInBits(); IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); @@ -759,7 +853,7 @@ SDValue EpiphanyTargetLowering::LowerExternalSymbol(SDValue Op, SDValue EpiphanyTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - ConstantPoolSDNode *CP = cast(Op); + auto *CP = cast(Op); EVT PTY = Op.getValueType(); // Get constant pool address @@ -850,7 +944,7 @@ SDValue EpiphanyTargetLowering::LowerBrCC(SDValue Op, SelectionDAG &DAG) const { // Use integer sub to set the flag, see GCC Soft-Float Library Routines SDVTList VTs = DAG.getVTList(Flag.getValueType(), MVT::i32); Flag = DAG.getNode(EpiphanyISD::CMP, DL, VTs, Flag, DAG.getConstant(0, DL, MVT::i32)); - CCode = ConvertCC(Cond, DL, Flag, swap); + CCode = ConvertCC(DAG.getCondCode(getUnsignedToSigned(Cond)), DL, Flag, swap); } // Prepare conditional move @@ -908,6 +1002,7 @@ SDValue EpiphanyTargetLowering::LowerSelectCC(SDValue Op, SelectionDAG &DAG) con // Use integer sub to set the flag, see GCC Soft-Float Library Routines SDVTList VTs = DAG.getVTList(Flag.getValueType(), MVT::i32); Flag = DAG.getNode(EpiphanyISD::CMP, DL, VTs, Flag, DAG.getConstant(0, DL, MVT::i32)); + Cond = DAG.getCondCode(getUnsignedToSigned(Cond)); } // Get condition code @@ -999,6 +1094,7 @@ SDValue EpiphanyTargetLowering::LowerSetCC(SDValue Op, SelectionDAG &DAG) const // Use integer sub to set the flag, see GCC Soft-Float Library Routines SDVTList VTs = DAG.getVTList(Flag.getValueType(), MVT::i32); Flag = DAG.getNode(EpiphanyISD::CMP, DL, VTs, Flag, DAG.getConstant(0, DL, MVT::i32)); + Cond = DAG.getCondCode(getUnsignedToSigned(Cond)); } // Get condition code @@ -1032,7 +1128,38 @@ SDValue EpiphanyTargetLowering::LowerFpRound(SDValue Op, SelectionDAG &DAG) cons return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /* isSigned = */ false, DL).first; } +SDValue EpiphanyTargetLowering::LowerBuildVector(SDValue Op, SelectionDAG &DAG) const { + MVT VT = Op.getSimpleValueType(); + if (VT == MVT::v2i32) { + return createGPR64(DAG, Op.getOperand(0), Op.getOperand(1), Op.getSimpleValueType()); + } else if (VT == MVT::v2i16) { + SDLoc DL(Op); + SDValue MovLow = DAG.getNode(EpiphanyISD::MOV, DL, Op.getValueType(), Op.getOperand(1)); + return DAG.getNode(EpiphanyISD::MOVT, DL, Op.getValueType(), MovLow, Op.getOperand(0)); + } + + llvm_unreachable(("Unable to build vector, type unimplemented" + Op.getValueType().getEVTString()).c_str()); +} + +SDValue EpiphanyTargetLowering::LowerExtractVectorElt(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = Op.getOperand(0).getSimpleValueType(); + ConstantSDNode *IndexNode = dyn_cast(Op.getOperand(1)); + if (VT == MVT::v2i32) { + int Index = IndexNode->getZExtValue() == 0 ? Epiphany::isub_lo : Epiphany::isub_hi; + return DAG.getTargetExtractSubreg(Index, DL, Op.getValueType(), Op.getOperand(0)); + } else if (VT == MVT::v2i16) { + if (IndexNode->getZExtValue() == 0) { + SDValue Shift = DAG.getConstant(16, DL, MVT::i32); + return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Op.getOperand(0), Shift); + } else { + SDValue Mask = DAG.getConstant(0xffff, DL, MVT::i32); + return DAG.getNode(ISD::AND, DL, Op.getValueType(), Op.getOperand(0), Mask); + } + } + llvm_unreachable(("Unable to build vector, type unimplemented" + Op.getValueType().getEVTString()).c_str()); +} //===----------------------------------------------------------------------===// // Inline asm parsing @@ -1063,8 +1190,6 @@ static std::pair parsePhysicalReg(StringRef C, StringRef &Prefix, std::pair EpiphanyTargetLowering:: parseRegForInlineAsmConstraint(StringRef C, MVT VT) const { - const TargetRegisterInfo *TRI = - Subtarget.getRegisterInfo(); const TargetRegisterClass *RC; StringRef Prefix; unsigned long long Reg; @@ -1236,7 +1361,7 @@ EpiphanyTargetLowering::LowerReturn(SDValue Chain, // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); - EpiphanyCC EpiphanyCCInfo(CallConv, ABI.IsE16(), + EpiphanyCC EpiphanyCCInfo(CallConv, ABI.IsE16(), CCInfo); // Analyze return values. @@ -1437,14 +1562,14 @@ EpiphanyTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl(Callee)) { + if (auto *G = dyn_cast(Callee)) { DEBUG(dbgs() << "\nArgument is a global value"); const GlobalValue *GV = G->getGlobal(); SDValue AddrLow = DAG.getTargetGlobalAddress(GV, DL, PTY, 0, EpiphanyII::MO_LOW); SDValue AddrHigh = DAG.getTargetGlobalAddress(GV, DL, PTY, 0, EpiphanyII::MO_HIGH); Callee = DAG.getNode(EpiphanyISD::MOV, DL, PTY, AddrLow); Callee = DAG.getNode(EpiphanyISD::MOVT, DL, PTY, Callee, AddrHigh); - } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + } else if (auto *S = dyn_cast(Callee)) { DEBUG(dbgs() << "\nArgument is an external symbol"); const char *Sym = S->getSymbol(); SDValue AddrLow = DAG.getTargetExternalSymbol(Sym, PTY, EpiphanyII::MO_LOW); @@ -1570,11 +1695,6 @@ void EpiphanyTargetLowering::EpiphanyCC::analyzeReturn(const SmallVectorImpl } } -void EpiphanyTargetLowering::EpiphanyCC::analyzeCallResult(const SmallVectorImpl &Ins, bool IsSoftFloat, - const SDNode *CallNode, const Type *RetTy) const { - analyzeReturn(Ins, IsSoftFloat, CallNode, RetTy); -} - void EpiphanyTargetLowering::EpiphanyCC::analyzeReturn(const SmallVectorImpl &Outs, bool IsSoftFloat, const Type *RetTy) const { analyzeReturn(Outs, IsSoftFloat, nullptr, RetTy); diff --git a/EpiphanyISelLowering.h b/EpiphanyISelLowering.h index 1bc31d6..b66a193 100644 --- a/EpiphanyISelLowering.h +++ b/EpiphanyISelLowering.h @@ -106,11 +106,7 @@ namespace llvm { EpiphanyCC(CallingConv::ID CallConv, bool IsE16, CCState &Info, SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv); - void analyzeCallResult(const SmallVectorImpl &Ins, - bool IsSoftFloat, const SDNode *CallNode, - const Type *RetTy) const; - - void analyzeReturn(const SmallVectorImpl &Outs, + void analyzeReturn(const SmallVectorImpl &Outs, bool IsSoftFloat, const Type *RetTy) const; const CCState &getCCInfo() const { return CCInfo; } @@ -157,6 +153,8 @@ namespace llvm { SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBuildVector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerExtractVectorElt(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFpExtend(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFpRound(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFpToInt(SDValue Op, SelectionDAG &DAG) const; @@ -169,6 +167,8 @@ namespace llvm { SDValue LowerBrCond(SDValue Op, SelectionDAG &DAG) const; SDValue LowerAdd64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSub64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerAdde(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSube(SDValue Op, SelectionDAG &DAG) const; // Custom inserters MachineBasicBlock *emitBrCC(MachineInstr &MI, MachineBasicBlock *MBB) const; diff --git a/EpiphanyInstrInfo.cpp b/EpiphanyInstrInfo.cpp index 7076bf0..616d6d1 100644 --- a/EpiphanyInstrInfo.cpp +++ b/EpiphanyInstrInfo.cpp @@ -113,8 +113,6 @@ bool EpiphanyInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock // Handle unconditional branches. if (I->getOpcode() == Epiphany::BNONE32) { - UnCondBrIter = I; - // If modification is not allowed if (!AllowModify) { TBB = I->getOperand(0).getMBB(); @@ -135,7 +133,6 @@ bool EpiphanyInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock TBB = nullptr; I->eraseFromParent(); I = MBB.end(); - UnCondBrIter = MBB.end(); DEBUG(MBB.getParent()->dump();); continue; } @@ -149,7 +146,7 @@ bool EpiphanyInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock if (I->getOpcode() != Epiphany::BCC) { continue; } - EpiphanyCC::CondCodes BranchCode = static_cast(I->getOperand(1).getImm()); + auto BranchCode = static_cast(I->getOperand(1).getImm()); // Working from the bottom, handle the first conditional branch. if (Cond.empty()) { @@ -169,7 +166,7 @@ bool EpiphanyInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock if (TBB != I->getOperand(0).getMBB()) return true; - EpiphanyCC::CondCodes OldBranchCode = (EpiphanyCC::CondCodes)Cond[0].getImm(); + auto OldBranchCode = (EpiphanyCC::CondCodes)Cond[0].getImm(); // If the conditions are the same, we can leave them alone. if (OldBranchCode == BranchCode) continue; @@ -247,7 +244,7 @@ unsigned EpiphanyInstrInfo::insertBranch(MachineBasicBlock &MBB, bool EpiphanyInstrInfo::reverseBranchCondition(SmallVectorImpl &Cond) const { assert(Cond.size() == 1 && "More than 1 condition"); - EpiphanyCC::CondCodes CC = static_cast(Cond[0].getImm()); + auto CC = static_cast(Cond[0].getImm()); switch(CC) { default: llvm_unreachable("Wrong branch condition code!"); @@ -344,10 +341,7 @@ bool EpiphanyInstrInfo::isSchedulingBoundary(const MachineInstr &MI, if (MI.getDesc().isTerminator() || MI.isPosition()) return true; - if (MI.isInlineAsm()) - return true; - - return false; + return MI.isInlineAsm(); } @@ -359,17 +353,19 @@ bool EpiphanyInstrInfo::isSchedulingBoundary(const MachineInstr &MI, // touch volatiles or load/stores that have a hint to avoid pair formation. bool EpiphanyInstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const { // If this is a volatile load/store, don't mess with it. - if (MI.hasOrderedMemoryRef()) + if (MI.hasOrderedMemoryRef()) { + DEBUG(dbgs() << "Volatile load/store, skipping\n"); return false; + } // Make sure this is a reg+imm (as opposed to an address reloc). - assert(MI.getOperand(1).isReg() && "Expected a reg operand."); + assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) && "Expected a reg operand."); if (!MI.getOperand(2).isImm()) return false; // Can't merge/pair if the instruction modifies the base register. // e.g., ldr r0, [r0] - unsigned BaseReg = MI.getOperand(1).getReg(); + unsigned BaseReg = MI.getOperand(1).isReg() ? MI.getOperand(1).getReg() : Epiphany::FP; const TargetRegisterInfo *TRI = &getRegisterInfo(); if (MI.modifiesRegister(BaseReg, TRI)) return false; @@ -470,7 +466,7 @@ void EpiphanyInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *Rd, const TargetRegisterInfo *TRI) const { DebugLoc DL; // Get instruction, for stack slots (FP/SP) we can only use 32-bit instructions - unsigned Opc; + unsigned Opc = 0; // Choose instruction if (Epiphany::GPR16RegClass.hasSubClassEq(Rd)) { Opc = Epiphany::STRi32_r32; @@ -511,7 +507,7 @@ void EpiphanyInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *Rd, const TargetRegisterInfo *TRI) const { DebugLoc DL; // Choose instruction - unsigned Opc; + unsigned Opc = 0; if (Epiphany::GPR16RegClass.hasSubClassEq(Rd)) { Opc = Epiphany::LDRi32_r32; } else if (Epiphany::GPR32RegClass.hasSubClassEq(Rd)) { @@ -595,9 +591,9 @@ bool EpiphanyInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, } // Get the memory operands - MachineSDNode *MachineLoad1 = dyn_cast(Load1); - MachineSDNode *MachineLoad2 = dyn_cast(Load2); - assert(MachineLoad1 && MachineLoad1); + auto *MachineLoad1 = dyn_cast(Load1); + auto *MachineLoad2 = dyn_cast(Load2); + assert(MachineLoad1 && MachineLoad2); MachineMemOperand *MemOp1 = extractMemOp(MachineLoad1); MachineMemOperand *MemOp2 = extractMemOp(MachineLoad2); diff --git a/EpiphanyInstrInfo.td b/EpiphanyInstrInfo.td index e5f5098..5420232 100644 --- a/EpiphanyInstrInfo.td +++ b/EpiphanyInstrInfo.td @@ -121,11 +121,18 @@ multiclass bitconvert_32 { def : Pat <(a (bitconvert (b GPR32:$src))), (a GPR32:$src)>; } +multiclass bitconvert_64 { + def : Pat <(b (bitconvert (a GPR64:$src))), + (b GPR64:$src)>; + def : Pat <(a (bitconvert (b GPR64:$src))), + (a GPR64:$src)>; +} // Bit convert vector types. defm : bitconvert_32; defm : bitconvert_32; defm : bitconvert_32; +defm : bitconvert_64; //===----------------------------------------------------------------------===// // Load/store instructions @@ -174,28 +181,46 @@ multiclass StorePostM { } // Load -defm LDRi8 : LoadM, LoadPreM, LoadPostM; -defm LDRi16 : LoadM, LoadPreM, LoadPostM; -defm LDRi32 : LoadM, LoadPreM, LoadPostM; +let mayLoad = 1 in { + defm LDRi8: LoadM, LoadPreM, LoadPostM; + defm LDRi16: LoadM, LoadPreM, LoadPostM; + defm LDRi32: LoadM, LoadPreM, LoadPostM; + def LDRf32: LoadDisp32<0, FPR32, load, LS_word, f32>; + def LDRi64: LoadDisp32<0, GPR64, load, LS_dword, i64>; + def LDRi64_pmd: LoadPmd32<0, GPR64, load, LS_dword, i64>; + def LDRv2i16: LoadDisp32<0, GPR32, load, LS_word, v2i16>; + def LDRv2i32: LoadDisp32<0, GPR64, load, LS_dword, v2i32>; + def LDRv4i16: LoadDisp32<0, GPR64, load, LS_dword, v4i16>; + def LDRf64: LoadDisp32<0, FPR64, load, LS_dword, f64>; +} // Store -defm STRi8 : StoreM, StorePreM, StorePostM; -defm STRi16 : StoreM, StorePreM, StorePostM; -defm STRi32 : StoreM, StorePreM, StorePostM; - -def LDRf32 : LoadDisp32<0, FPR32, load, LS_word, f32>; -def STRf32 : StoreDisp32<0, FPR32, store, LS_word, f32>; - -def LDRi64 : LoadDisp32<0, GPR64, load, LS_dword, i64>; -def LDRi64_pmd : LoadPmd32<0, GPR64, load, LS_dword, i64>; -def STRi64 : StoreDisp32<0, GPR64, store, LS_dword, i64>; -def STRi64_pmd : StorePmd32<0, GPR64, store, LS_dword, i64>; +let mayStore = 1 in { + defm STRi8: StoreM, StorePreM, StorePostM; + defm STRi16: StoreM, StorePreM, StorePostM; + defm STRi32: StoreM, StorePreM, StorePostM; + def STRf32: StoreDisp32<0, FPR32, store, LS_word, f32>; + def STRi64: StoreDisp32<0, GPR64, store, LS_dword, i64>; + def STRi64_pmd: StorePmd32<0, GPR64, store, LS_dword, i64>; + def STRv2i16: StoreDisp32<0, GPR32, store, LS_word, v2i16>; + def STRv2i32: StoreDisp32<0, GPR64, store, LS_dword, v2i32>; + def STRv4i16: StoreDisp32<0, GPR64, store, LS_dword, v4i16>; + def STRf64: StoreDisp32<0, FPR64, store, LS_dword, f64>; +} -def LDRv2i32 : LoadDisp32<0, GPR64, load, LS_dword, v2i32>; -def STRv2i32 : StoreDisp32<0, GPR64, store, LS_dword, v2i32>; +// atomic_load addr -> load addr +def : Pat<(i32 (atomic_load_8 (addr11 (i32 GPR32:$Rn), (i32 imm:$imm)))), (LDRi8_r32 GPR32:$Rn, imm:$imm)>; +def : Pat<(i32 (atomic_load_16 (addr11 (i32 GPR32:$Rn), (i32 imm:$imm)))), (LDRi16_r32 GPR32:$Rn, imm:$imm)>; +def : Pat<(i32 (atomic_load_32 (addr11 (i32 GPR32:$Rn), (i32 imm:$imm)))), (LDRi32_r32 GPR32:$Rn, imm:$imm)>; +def : Pat<(v2i32 (atomic_load_64 (addr11 (i32 GPR32:$Rn), (i32 imm:$imm)))), (LDRi64 GPR32:$Rn, imm:$imm)>; +def : Pat<(i64 (atomic_load_64 (addr11 (i32 GPR32:$Rn), (i32 imm:$imm)))), (LDRv2i32 GPR32:$Rn, imm:$imm)>; -def LDRf64 : LoadDisp32<0, FPR64, load, LS_dword, f64>; -def STRf64 : StoreDisp32<0, FPR64, store, LS_dword, f64>; +// atomic_store val, addr -> store val, addr +def : Pat<(atomic_store_8 (addr11 (i32 GPR32:$Rn), (i32 imm:$imm)), (i32 GPR32:$Rd)), (STRi8_r32 GPR32:$Rd, GPR32:$Rn, imm:$imm)>; +def : Pat<(atomic_store_16 (addr11 (i32 GPR32:$Rn), (i32 imm:$imm)), (i32 GPR32:$Rd)), (STRi16_r32 GPR32:$Rd, GPR32:$Rn, imm:$imm)>; +def : Pat<(atomic_store_32 (addr11 (i32 GPR32:$Rn), (i32 imm:$imm)), (i32 GPR32:$Rd)), (STRi32_r32 GPR32:$Rd, GPR32:$Rn, imm:$imm)>; +def : Pat<(atomic_store_64 (addr11 (i32 GPR32:$Rn), (i32 imm:$imm)), (v2i32 GPR64:$Rd)), (STRv2i32 GPR64:$Rd, GPR32:$Rn, imm:$imm)>; +def : Pat<(atomic_store_64 (addr11 (i32 GPR32:$Rn), (i32 imm:$imm)), (i64 GPR64:$Rd)), (STRi64 GPR64:$Rd, GPR32:$Rn, imm:$imm)>; //===----------------------------------------------------------------------===// // Arithmetic operations with registers @@ -212,10 +237,17 @@ multiclass SimpleMath opcode16, bits<7> opcode32, string instr_asm, SDNo // COPY is used as REG_SEQUENCE does not accept multioutput functions (e.g. those defining STATUS reg) multiclass SimpleMath64 { - def _r64 : Pat<(i64 (OpNode GPR64:$Rn, GPR64:$Rm)), - (REG_SEQUENCE GPR64, - (i32 (COPY (!cast(NAME # _r32) (LoReg GPR64:$Rn), (LoReg GPR64:$Rm)))), isub_lo, - (i32 (COPY (!cast(NAME # _r32) (HiReg GPR64:$Rn), (HiReg GPR64:$Rm)))), isub_hi)>; + def _r64 : Pat<(i64 (OpNode GPR64:$Rn, GPR64:$Rm)), + (REG_SEQUENCE GPR64, + (i32 (COPY (!cast(NAME # _r32) (LoReg GPR64:$Rn), (LoReg GPR64:$Rm)))), isub_lo, + (i32 (COPY (!cast(NAME # _r32) (HiReg GPR64:$Rn), (HiReg GPR64:$Rm)))), isub_hi)>; +} + +multiclass SimpleMath_v2i32 { + def _v2i32 : Pat<(v2i32 (OpNode (v2i32 GPR64:$Rn), (v2i32 GPR64:$Rm))), + (REG_SEQUENCE GPR64, + (i32 (COPY (!cast(NAME # _r32) (LoReg GPR64:$Rn), (LoReg GPR64:$Rm)))), isub_lo, + (i32 (COPY (!cast(NAME # _r32) (HiReg GPR64:$Rn), (HiReg GPR64:$Rm)))), isub_hi)>; } let isCommutable = 1 in { @@ -232,14 +264,15 @@ let isCompare = 1 in { defm CMPrr : SimpleMath<0b0111010, 0b0111111, "sub", CMP >; } -defm ANDrr : SimpleMath64; -defm ORRrr : SimpleMath64; -defm EORrr : SimpleMath64; -defm SUBrr : SimpleMath<0b0111010, 0b0111111, "sub", sub >; +defm ADDrr : SimpleMath_v2i32; +defm ANDrr : SimpleMath64, SimpleMath_v2i32; +defm ORRrr : SimpleMath64, SimpleMath_v2i32; +defm EORrr : SimpleMath64, SimpleMath_v2i32; +defm SUBrr : SimpleMath<0b0111010, 0b0111111, "sub", sub >, SimpleMath_v2i32; defm SUBCrr : SimpleMath<0b0111010, 0b0111111, "sub", subc>; -defm ASRrr : SimpleMath<0b1101010, 0b1101111, "asr", sra >; -defm LSRrr : SimpleMath<0b1001010, 0b1001111, "lsr", srl >; -defm LSLrr : SimpleMath<0b0101010, 0b0101111, "lsl", shl >; +defm ASRrr : SimpleMath<0b1101010, 0b1101111, "asr", sra >, SimpleMath_v2i32; +defm LSRrr : SimpleMath<0b1001010, 0b1001111, "lsr", srl >, SimpleMath_v2i32; +defm LSLrr : SimpleMath<0b0101010, 0b0101111, "lsl", shl >, SimpleMath_v2i32; // Complex math: f32 multiclass FPMath opcode16, bits<7> opcode32, string instr_asm, SDNode OpNode> { @@ -270,7 +303,7 @@ multiclass Ialu2Math_2 opcode16, bits<7> opcode32, string instr_asm, SDN def _r16 : ComplexMath2_16rr; def _r32 : ComplexMath2_32rr; } -let Defs = [STATUS], AddedComplexity = 1 in { +let Defs = [STATUS] in { let isAdd = 1 in { defm IADDrr : Ialu2Math<0b0000111, 0b0001111, "iadd", add>; } @@ -280,6 +313,8 @@ let Defs = [STATUS], AddedComplexity = 1 in { defm IMSUBrr : Ialu2Math_2<0b1000111, 0b1001111, "imsub", sub>; } +defm IMULrr : SimpleMath_v2i32; + //===----------------------------------------------------------------------===// // Integer arithmetic operations with immediates //===----------------------------------------------------------------------===// @@ -393,7 +428,7 @@ def SDT_MOVT : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; def SDT_MOVCC : SDTypeProfile<1, 4, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; def MOV : SDNode<"EpiphanyISD::MOV", SDT_MOV>; def MOVT : SDNode<"EpiphanyISD::MOVT", SDT_MOVT>; -def MOVCCsd : SDNode<"EpiphanyISD::MOVCC", SDT_MOVCC, [SDNPOptInGlue]>; +def MOVCCsd : SDNode<"EpiphanyISD::MOVCC", SDT_MOVCC, [SDNPOptInGlue, SDNPOutGlue]>; def : Pat<(i32 (MOV i32immSExt16:$imm)), (MOVi32ri i32:$imm)>; def : Pat<(i32 (MOV i32immSExt32:$imm)), (MOVTi32ri (MOVi32ri (LO16 i32:$imm)), (HI16 i32:$imm))>; def : Pat<(i32 (MOV tglobaladdr:$dst)), (MOVi32ri tglobaladdr:$dst)>; @@ -407,7 +442,6 @@ def : Pat<(i32 (MOVT GPR32:$Rd, tblockaddress:$dst)), (MOVTi32ri GPR32:$Rd, tb def : Pat<(i32 (MOVT GPR32:$Rd, tconstpool:$dst)), (MOVTi32ri GPR32:$Rd, tconstpool:$dst)>; def : Pat<(i32 (MOVT GPR32:$Rd, tglobaltlsaddr:$dst)), (MOVTi32ri GPR32:$Rd, tglobaltlsaddr:$dst)>; - let Constraints = "$src = $Rd", Uses = [STATUS] in { def MOVCC : MovCond32rr<(outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$src, cc:$cc), [(set GPR32:$Rd, (MOVCCsd (i32 GPR32:$Rn), (i32 GPR32:$src), (i32 i32immSExt32:$cc), STATUS))]>; @@ -484,11 +518,11 @@ let isCall = 1, Defs = [LR], hasDelaySlot = 0, isBarrier = 0 in { // Extended Addsub i32, exploiting the fact that MOVGTEU depend only on the carry def : Pat<(adde GPR32:$Rn, simm11:$imm), - (ADDrr_r32 (ADDri_r32 GPR32:$Rn, simm11:$imm), (MOVCC (MOVi32ri 1), (MOVi32ri 0), COND_GT.Code))>; + (ADDCrr_r32 (ADDCri_r32 GPR32:$Rn, simm11:$imm), (MOVCC (MOVi32ri 1), (MOVi32ri 0), COND_GT.Code))>; def : Pat<(sube GPR32:$Rn, simm11:$imm), (SUBrr_r32 (SUBri_r32 GPR32:$Rn, simm11:$imm), (MOVCC (MOVi32ri 1), (MOVi32ri 0), COND_LT.Code))>; def : Pat<(adde (i32 GPR32:$Rn), (i32 GPR32:$Rm)), - (ADDrr_r32 (ADDrr_r32 GPR32:$Rn, GPR32:$Rm), (MOVCC (MOVi32ri 1), (MOVi32ri 0), COND_GT.Code))>; + (ADDCrr_r32 (ADDCrr_r32 GPR32:$Rn, GPR32:$Rm), (MOVCC (MOVi32ri 1), (MOVi32ri 0), COND_GT.Code))>; def : Pat<(sube (i32 GPR32:$Rn), (i32 GPR32:$Rm)), (SUBrr_r32 (SUBrr_r32 GPR32:$Rn, GPR32:$Rm), (MOVCC (MOVi32ri 1), (MOVi32ri 0), COND_LT.Code))>; @@ -539,3 +573,4 @@ def : Pat<(i64 (or (shl (i64 (anyext (i32 GPR32:$sub_hi))), (i32 32)), (i64 (zex def : Pat<(i64 (or (i64 (zext (i32 GPR32:$sub_lo))), (shl (i64 (anyext (i32 GPR32:$sub_hi))), (i32 32)))), (REG_SEQUENCE GPR64, $sub_hi, isub_hi, $sub_lo, isub_lo)>; } + diff --git a/EpiphanyLoadStoreOptimizer.cpp b/EpiphanyLoadStoreOptimizer.cpp index c6fc377..4687025 100644 --- a/EpiphanyLoadStoreOptimizer.cpp +++ b/EpiphanyLoadStoreOptimizer.cpp @@ -6,10 +6,26 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file contains a pass that performs load / store related peephole -// optimizations. This pass should be run after register allocation. -// +/// +/// \file +/// This file contains a pass that performs load / store related peephole +/// optimizations. This pass should be run after register allocation. +/// +/// Flow: +/// * Split MachineFunction (MF) into MachineBasicBlocks (MBB) +/// * For each MBB look through instructions trying to find the next pairable one (see isPairableLoadStoreInst) +/// * If found pairable instruction, check if it has any flags preventing pairing +/// * If no such flags found, try to find matching paired instruction +/// * Take a couple of next instructions, find instruction with the same opcode, and run a couple of checks +/// * Check alignment, reg base, check if reg is not modified +/// * For real regs, try to find super-reg +/// * For real regs, check order +/// * For reg-based (not frame-based) offsets check base alignment (frame SHOULD be 8-byte aligned) +/// * If all green, try to pair regs +/// * For virtual regs, create reg sequence. If frame-based - merge based on stack growth direction and move +/// frame object into fixed local stack area +/// * For virtual regs, just swap with the super-reg +/// //===----------------------------------------------------------------------===// #include "EpiphanyLoadStoreOptimizer.h" @@ -26,41 +42,76 @@ static cl::opt LdStLimit("epiphany-load-store-scan-limit", cl::init(20 char EpiphanyLoadStoreOptimizer::ID = 0; -// Check if the instruction is on the promotable list +/// \brief Returns true if this instruction should be considered for pairing +/// +/// \param MI Machine instruction to check +/// +/// \return true if this instruction should be considered for pairing static bool isPairableLoadStoreInst(MachineInstr &MI) { unsigned inst[] = { - Epiphany::STRi16_r16, - Epiphany::STRi16_r32, - Epiphany::STRi32_r16, - Epiphany::STRi32_r32, - Epiphany::LDRi32_r16, - Epiphany::LDRi32_r32 + Epiphany::STRi32_r16, + Epiphany::STRi32_r32, + Epiphany::STRf32, + Epiphany::LDRi32_r16, + Epiphany::LDRi32_r32, + Epiphany::LDRf32 }; unsigned Opc = MI.getOpcode(); return std::find(std::begin(inst), std::end(inst), Opc) != std::end(inst); } -static int getMemScale(MachineInstr &MI) { - switch (MI.getOpcode()) { +static unsigned int getMemScale(unsigned Opc) { + switch (Opc) { default: llvm_unreachable("Opcode has unknown scale!"); + case Epiphany::STRi8_r16: + case Epiphany::STRi8_r32: + case Epiphany::LDRi8_r16: + case Epiphany::LDRi8_r32: + return 1; case Epiphany::STRi16_r16: case Epiphany::STRi16_r32: + case Epiphany::LDRi16_r16: + case Epiphany::LDRi16_r32: return 2; case Epiphany::STRi32_r16: case Epiphany::STRi32_r32: case Epiphany::LDRi32_r16: case Epiphany::LDRi32_r32: + case Epiphany::STRf32: + case Epiphany::LDRf32: return 4; + case Epiphany::STRi64: + case Epiphany::LDRi64: + case Epiphany::STRf64: + case Epiphany::LDRf64: + return 8; } } -// Pair opcodes, e.g. STRi64_r32 for STRi32_r32 +static unsigned int getMemScale(MachineInstr &MI) { + return getMemScale(MI.getOpcode()); +} + +/// Returns correct instruction alignment. For Epiphany it is equal to memory scale +static unsigned int getAlignment(MachineInstr &MI) { + return getMemScale(MI); +} + +static unsigned int getAlignment(unsigned Opc) { + return getMemScale(Opc); +} + +/// Return paired opcode for the provided one, e.g. STRi64_r32 for STRi32_r32 static unsigned getMatchingPairOpcode(unsigned Opc) { - switch(Opc) { + switch (Opc) { default: llvm_unreachable("Opcode has no pairwise equivalent"); break; + case Epiphany::STRi8_r16: + return Epiphany::STRi16_r16; + case Epiphany::STRi8_r32: + return Epiphany::STRi16_r32; case Epiphany::STRi16_r16: return Epiphany::STRi32_r16; case Epiphany::STRi16_r32: @@ -68,58 +119,77 @@ static unsigned getMatchingPairOpcode(unsigned Opc) { case Epiphany::STRi32_r16: case Epiphany::STRi32_r32: return Epiphany::STRi64; + case Epiphany::LDRi8_r16: + return Epiphany::LDRi16_r16; + case Epiphany::LDRi8_r32: + return Epiphany::LDRi16_r32; + case Epiphany::LDRi16_r16: + return Epiphany::LDRi32_r16; + case Epiphany::LDRi16_r32: + return Epiphany::LDRi32_r32; case Epiphany::LDRi32_r16: case Epiphany::LDRi32_r32: return Epiphany::LDRi64; + case Epiphany::STRf32: + return Epiphany::STRf64; + case Epiphany::LDRf32: + return Epiphany::LDRf64; } } -// Convert the byte-offset used by unscaled into an "element" offset used -// by the scaled pair load/store instructions. -static bool inBoundsForPair(int Offset) { +/// Convert the byte-offset used by unscaled into an "element" offset used +/// by the scaled pair load/store instructions. +static bool inBoundsForPair(int64_t Offset) { // Well, in fact if the op is in bounds for any kind of store/load - it will be in bound for pairing return true; - //return Offset <= 256 && Offset >= -256; } -// Get register for the store +/// Get register for the store/load machine operand static const MachineOperand &getRegOperand(const MachineInstr &MI) { return MI.getOperand(0); } -// Get base for the store +/// Get base for the store/load machine operand static const MachineOperand &getBaseOperand(const MachineInstr &MI) { return MI.getOperand(1); } -// Get offset for the store +/// Get offset for the store/load machine operand static const MachineOperand &getOffsetOperand(const MachineInstr &MI) { return MI.getOperand(2); } -// Returns true if FirstMI and MI are candidates for merging or pairing. -// Otherwise, returns false. -static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, - LoadStoreFlags &Flags, const EpiphanyInstrInfo *TII) { +/// Returns true if we need to use offset, false if frame index should be used +static bool baseIsFrameIndex(const MachineInstr &FirstMI, const MachineInstr &SecondMI) { + return getBaseOperand(FirstMI).isFI() && getBaseOperand(SecondMI).isFI(); +} + +/// Returns true if FirstMI and MI are candidates for merging or pairing. +/// Otherwise, returns false. +static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &SecondMI, LoadStoreFlags &Flags) { // If this is volatile not a candidate. - if (MI.hasOrderedMemoryRef()) + if (SecondMI.hasOrderedMemoryRef()) return false; // We should have already checked FirstMI for pair suppression and volatility. assert(!FirstMI.hasOrderedMemoryRef() && - "FirstMI shouldn't get here if either of these checks are true."); + "FirstMI shouldn't get here if either of these checks are true."); unsigned OpcA = FirstMI.getOpcode(); - unsigned OpcB = MI.getOpcode(); + unsigned OpcB = SecondMI.getOpcode(); // Opcodes match: nothing more to check. - return OpcA == OpcB; + if (OpcA != OpcB) { + return false; + } + + return true; } /// trackRegDefsUses - Remember what registers the specified instruction uses /// and modifies. static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs, - BitVector &UsedRegs, const TargetRegisterInfo *TRI) { + BitVector &UsedRegs, const TargetRegisterInfo *TRI) { for (const MachineOperand &MO : MI.operands()) { if (MO.isRegMask()) ModifiedRegs.setBitsNotInMask(MO.getRegMask()); @@ -131,20 +201,223 @@ static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs, continue; if (MO.isDef()) { // WZR/XZR are not modified even when used as a destination register. - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - ModifiedRegs.set(*AI); + if (!TRI->isVirtualRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, /* includeSelf = */ true); AI.isValid(); ++AI) { + ModifiedRegs.set(*AI); + } + } else { + ModifiedRegs.set(TRI->virtReg2Index(Reg)); + } } else { assert(MO.isUse() && "Reg operand not a def and not a use?!?"); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - UsedRegs.set(*AI); + if (!TRI->isVirtualRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, /* includeSelf = */ true); AI.isValid(); ++AI) { + UsedRegs.set(*AI); + } + } else { + UsedRegs.set(TRI->virtReg2Index(Reg)); + } + } + } +} + +/// Keeps track on which frame indexes were used between two candidates to merge +static void trackFrameIdxs(const MachineInstr &MI, BitVector &ModifiedFrameIdxs, BitVector &UsedFrameIdxs) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isFI()) { + if (MI.mayStore()) { + ModifiedFrameIdxs.set(MO.getIndex()); + } else { + UsedFrameIdxs.set(MO.getIndex()); + } + } + } +} + +/// \brief Returns true if the alignment for specified regs and their offsets is good for pairing. +/// Only applicable when the frame is finalized +/// +/// \param FirstMI First instruction to check +/// \param SecondMI Second instruction to check +/// +/// \return true if alignment is ok for pairing +bool EpiphanyLoadStoreOptimizer::isAlignmentCorrect(MachineInstr &FirstMI, MachineInstr &SecondMI) { + // Resolve target reg class + unsigned MainReg = getRegOperand(FirstMI).getReg(); + unsigned PairedReg = getRegOperand(SecondMI).getReg(); + int64_t MainOffset = getOffsetOperand(FirstMI).getImm() ; + int64_t PairedOffset = getOffsetOperand(SecondMI).getImm(); + + // Check that base alignment matches paired opcode alignment + int PairedAlignment = getAlignment(getMatchingPairOpcode(FirstMI.getOpcode())); + if (getBaseOperand(FirstMI).getReg() != Epiphany::FP) { + // Only applicable when we are dealing with non-FP-based offset, as frame is 8-byte aligned + MachineInstr::mmo_iterator FirstMMOI = FirstMI.memoperands_begin(); + MachineMemOperand FirstMO = **FirstMMOI; + MachineInstr::mmo_iterator SecondMMOI = SecondMI.memoperands_begin(); + MachineMemOperand SecondMO = **SecondMMOI; + if (FirstMO.getBaseAlignment() != PairedAlignment && SecondMO.getBaseAlignment() != PairedAlignment) { + DEBUG(dbgs() << "Base alignment out, skipping\n"); + return false; + } + + // Check if at least one instruction is aligned to the paired opcode alignment + if ((MainOffset % PairedAlignment != 0) && (PairedOffset % PairedAlignment) != 0) { + DEBUG(dbgs() << "Offsets alignment out, skipping\n"); + return false; + } + } + + // If regs are already defined - check alignment based on regs order + if (!TRI->isVirtualRegister(MainReg)) { + // Machine reg checks + // Offset stride -1 for FI as stack grows down + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MainReg) == &Epiphany::GPR32RegClass ? + &Epiphany::GPR64RegClass : &Epiphany::FPR64RegClass; + + // Determine which offset should be higher + unsigned sra = TRI->getMatchingSuperReg(MainReg, Epiphany::isub_lo, RC); + unsigned srb = TRI->getMatchingSuperReg(PairedReg, Epiphany::isub_hi, RC); + int64_t HighOffset = PairedOffset; + int64_t LowOffset = MainOffset; + if ((!sra || !srb) || (sra != srb)) { + sra = TRI->getMatchingSuperReg(PairedReg, Epiphany::isub_lo, RC); + srb = TRI->getMatchingSuperReg(MainReg, Epiphany::isub_hi, RC); + HighOffset = MainOffset; + LowOffset = PairedOffset; + } + + // Can't form super reg + if (!(sra && srb) || (sra != srb)) { + return false; + } + + // Low reg offset should be always lower than high reg offset, and it should be aligned to the paired opcode align + if (LowOffset >= HighOffset || (LowOffset % getAlignment(getMatchingPairOpcode(FirstMI.getOpcode())) != 0)) { + return false; + } + } + return true; +} + +/// \brief Returns true if specified regs can form a super reg. +/// Only applicable for real machine registers, not vregs +/// +/// \param MainReg First reg to check for pairing +/// \param PairedReg Second reg to check for pairing +/// +/// \return true if regs can be paired +bool EpiphanyLoadStoreOptimizer::canFormSuperReg(unsigned MainReg, unsigned PairedReg) { + // Resolve target reg class + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MainReg) == &Epiphany::GPR32RegClass ? + &Epiphany::GPR64RegClass : &Epiphany::FPR64RegClass; + + unsigned sra = TRI->getMatchingSuperReg(MainReg, Epiphany::isub_lo, RC); + unsigned srb = TRI->getMatchingSuperReg(PairedReg, Epiphany::isub_hi, RC); + if ((!sra || !srb) || (sra != srb)) { + sra = TRI->getMatchingSuperReg(PairedReg, Epiphany::isub_lo, RC); + srb = TRI->getMatchingSuperReg(MainReg, Epiphany::isub_hi, RC); + } + return !(!(sra && srb) || (sra != srb)); +} + +/// Checks if two load/store instructions have similar base, and their +/// offsets differ by some fixed stride +static bool isBaseAndOffsetCorrect(unsigned MainBase, unsigned PairBase, int64_t MainOffset, + int64_t PairOffset, int OffsetStride) { + return (MainBase == PairBase && + ((MainOffset == PairOffset + OffsetStride) || (MainOffset + OffsetStride == PairOffset))); +} + +/// Cleans register kill flags before merge +/// +/// Can have two cases based on \p MergeForward value: +/// If merging backward +/// \code +/// STRi32 %r0, ... +/// USE %r1 +/// STRi32 kill %r1 ; need to clear kill flag when moving STRi32 upwards +/// \endcode +/// +/// If merging forward +/// \code +/// STRi32 %r1, ... +/// USE kill %r1 ; need to clear kill flag when moving STRi32 downwards +/// STRi32 %r0 +/// \endcode +void EpiphanyLoadStoreOptimizer::cleanKillFlags(MachineOperand RegOp0, MachineOperand RegOp1, + MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, + bool MergeForward) { + if (!MergeForward) { + // Clear kill flags on store if moving backward + RegOp0.setIsKill(false); + RegOp1.setIsKill(false); + } else { + // Clear kill flags on store if moving forward + unsigned Reg = getRegOperand(*I).getReg(); + for (MachineInstr &MI : make_range(std::next(I), Paired)) + MI.clearRegisterKills(Reg, TRI); + } +} + +/// \brief Merge two real reg-based 32-bit load/store instructions into a single 64-bit one +/// +/// \param PairedOp Wide store/load operation opcode +/// \param OffsetImm Offset to use +/// \param RegOp0 Reg operand from the first paired store/load +/// \param RegOp1 Reg operand from the second paired store/load +/// \param I Iterator pointing at the first paired store/load +/// \param Paired Iterator pointing at the second paired store/load +/// \param Flags Store/load flags +/// +/// \return Builder result +MachineInstrBuilder EpiphanyLoadStoreOptimizer::mergeRegInsns(unsigned PairedOp, int64_t OffsetImm, + MachineOperand RegOp0, MachineOperand RegOp1, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, + const LoadStoreFlags &Flags) { + + MachineInstrBuilder MIB; + bool MergeForward = Flags.getMergeForward(); + // Insert our new paired instruction after whichever of the paired + // instructions MergeForward indicates. + MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; + unsigned PairedReg = RegOp0.getReg(); + const MachineOperand &PairedBase = getBaseOperand(*Paired); + const MachineOperand &MainBase = getBaseOperand(*I); + const MachineOperand &BaseRegOp = MergeForward ? PairedBase : MainBase; + DebugLoc DL = I->getDebugLoc(); + + MachineBasicBlock *MBB = I->getParent(); + if (PairedOp == Epiphany::STRi64 || PairedOp == Epiphany::LDRi64) { + const TargetRegisterClass *RC = &Epiphany::GPR64RegClass; + unsigned sreg = TRI->getMatchingSuperReg(PairedReg, Epiphany::isub_hi, RC); + if (!sreg) { + sreg = TRI->getMatchingSuperReg(PairedReg, Epiphany::isub_lo, RC); } + MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(PairedOp)) + .addReg(sreg) + .addOperand(BaseRegOp) + .addImm(OffsetImm) + .setMemRefs(I->mergeMemRefsWith(*Paired)); + } else { + // Standard 32-bit reg + MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(PairedOp)) + .addOperand(RegOp0) + .addOperand(BaseRegOp) + .addImm(OffsetImm) + .setMemRefs(I->mergeMemRefsWith(*Paired)); } + + DEBUG(dbgs() << "\t"); + DEBUG(((MachineInstr *) MIB)->print(dbgs())); + return MIB; } +/// Merges two n-bit load/store instructions into a single 2*n-bit one MachineBasicBlock::iterator EpiphanyLoadStoreOptimizer::mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, - const LoadStoreFlags &Flags) { + MachineBasicBlock::iterator Paired, const LoadStoreFlags &Flags) { MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need @@ -155,18 +428,16 @@ EpiphanyLoadStoreOptimizer::mergePairedInsns(MachineBasicBlock::iterator I, ++NextI; unsigned Opc = I->getOpcode(); - int OffsetStride = getMemScale(*I); bool MergeForward = Flags.getMergeForward(); - // Insert our new paired instruction after whichever of the paired - // instructions MergeForward indicates. - MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; // Also based on MergeForward is from where we copy the base register operand // so we get the flags compatible with the input code. - const MachineOperand &BaseRegOp = - MergeForward ? getBaseOperand(*Paired) : getBaseOperand(*I); - int Offset = getOffsetOperand(*I).getImm(); - int PairedOffset = getOffsetOperand(*Paired).getImm(); + const MachineOperand &BaseRegOp = MergeForward ? getBaseOperand(*Paired) : getBaseOperand(*I); + int64_t Offset = getOffsetOperand(*I).getImm(); + int64_t PairedOffset = getOffsetOperand(*Paired).getImm(); + // Offset stride is 1 frame index or 1 instruction memory size. Sign depends on stack growth direction + int OffsetStride = getMemScale(*I); + OffsetStride = StackGrowsDown ? OffsetStride : -OffsetStride; // Which register is Rt and which is Rt2 depends on the offset order. MachineInstr *RtMI, *Rt2MI; @@ -177,61 +448,25 @@ EpiphanyLoadStoreOptimizer::mergePairedInsns(MachineBasicBlock::iterator I, RtMI = &*I; Rt2MI = &*Paired; } - int OffsetImm = getOffsetOperand(*RtMI).getImm(); + int64_t OffsetImm = getOffsetOperand(*RtMI).getImm(); // Construct the new instruction. - MachineInstrBuilder MIB; DebugLoc DL = I->getDebugLoc(); - MachineBasicBlock *MBB = I->getParent(); MachineOperand RegOp0 = getRegOperand(*RtMI); MachineOperand RegOp1 = getRegOperand(*Rt2MI); + // Kill flags may become invalid when moving stores for pairing. if (RegOp0.isUse()) { - if (!MergeForward) { - // Clear kill flags on store if moving upwards. Example: - // STRi32 %r0, ... - // USE %r1 - // STRi32 kill %r1 ; need to clear kill flag when moving STRi32 upwards - RegOp0.setIsKill(false); - RegOp1.setIsKill(false); - } else { - // Clear kill flags of the first stores register. Example: - // STRi32 %r1, ... - // USE kill %r1 ; need to clear kill flag when moving STRi32 downwards - // STRi32 %r0 - unsigned Reg = getRegOperand(*I).getReg(); - for (MachineInstr &MI : make_range(std::next(I), Paired)) - MI.clearRegisterKills(Reg, TRI); - } - } - unsigned PairedOp = getMatchingPairOpcode(Opc); - if (PairedOp == Epiphany::STRi64 || PairedOp == Epiphany::LDRi64) { - // Reg with subs - unsigned PairedReg = TRI->getMatchingSuperReg(RegOp0.getReg(), Epiphany::isub_lo, &Epiphany::GPR64RegClass); - if (!PairedReg) { - PairedReg = TRI->getMatchingSuperReg(RegOp1.getReg(), Epiphany::isub_lo, &Epiphany::GPR64RegClass); - } - MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(PairedOp)) - .addReg(PairedReg) - .addOperand(BaseRegOp) - .addImm(OffsetImm) - .setMemRefs(I->mergeMemRefsWith(*Paired)); - } else { - // Standard 32-bit reg - MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(PairedOp)) - .addOperand(RegOp0) - .addOperand(BaseRegOp) - .addImm(OffsetImm) - .setMemRefs(I->mergeMemRefsWith(*Paired)); + cleanKillFlags(RegOp0, RegOp1, I, Paired, MergeForward); } - (void)MIB; - - DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); + DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n\t"); DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); + DEBUG(dbgs() << "\t"); DEBUG(Paired->print(dbgs())); - DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); + DEBUG(dbgs() << " with instruction:\n"); + unsigned PairedOp = getMatchingPairOpcode(Opc); + unsigned PairedReg = RegOp0.getReg(); + mergeRegInsns(PairedOp, OffsetImm, RegOp0, RegOp1, I, Paired, Flags); DEBUG(dbgs() << "\n"); // Erase the old instructions. @@ -244,18 +479,19 @@ EpiphanyLoadStoreOptimizer::mergePairedInsns(MachineBasicBlock::iterator I, /// Scan the instructions looking for a load/store that can be combined with the /// current instruction into a wider equivalent or a load/store pair. MachineBasicBlock::iterator -EpiphanyLoadStoreOptimizer::findMatchingInst(MachineBasicBlock::iterator I, - LoadStoreFlags &Flags, unsigned Limit) { +EpiphanyLoadStoreOptimizer::findMatchingInst(MachineBasicBlock::iterator I, + LoadStoreFlags &Flags, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr &FirstMI = *I; ++MBBI; bool MayLoad = FirstMI.mayLoad(); + // Get first instruction reg data unsigned Reg = getRegOperand(FirstMI).getReg(); - unsigned BaseReg = getBaseOperand(FirstMI).getReg(); - int Offset = getOffsetOperand(FirstMI).getImm(); - int OffsetStride = getMemScale(FirstMI); + unsigned RegIdx = Reg; + unsigned BaseReg = getBaseOperand(FirstMI).isReg() ? getBaseOperand(FirstMI).getReg() : Epiphany::FP; + unsigned BaseRegIdx = BaseReg; // Track which registers have been modified and used between the first insn // (inclusive) and the second insn. @@ -267,71 +503,58 @@ EpiphanyLoadStoreOptimizer::findMatchingInst(MachineBasicBlock::iterator I, for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr &MI = *MBBI; - // Don't count transient instructions towards the search limit since there // may be different numbers of them if e.g. debug information is present. if (!MI.isTransient()) ++Count; - if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) && + if (areCandidatesToMergeOrPair(FirstMI, MI, Flags) && getOffsetOperand(MI).isImm()) { assert(MI.mayLoadOrStore() && "Expected memory operation."); + // Get second instruction reg data + unsigned MIReg = getRegOperand(MI).getReg(); + unsigned MIRegIdx = MIReg; + unsigned MIBaseReg = getBaseOperand(MI).isReg() ? getBaseOperand(MI).getReg() : Epiphany::FP; + // Get offsets + int64_t Offset = getOffsetOperand(FirstMI).getImm(); + int64_t MIOffset = getOffsetOperand(MI).getImm(); + // If we've found another instruction with the same opcode, check to see // if regs, base and offset are compatible with our starting instruction. // These instructions all have scaled immediate operands, so we just // check for +1/-1. Make sure to check the new instruction offset is // actually an immediate and not a symbolic reference destined for // a relocation. - unsigned MIBaseReg = getBaseOperand(MI).getReg(); - int MIOffset = getOffsetOperand(MI).getImm(); - if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || - (Offset + OffsetStride == MIOffset))) { - // First, check pairity for 64-bit pairing - unsigned Opc = MI.getOpcode(); - if (Opc == Epiphany::LDRi32_r32 || Opc == Epiphany::LDRi32_r32 || - Opc == Epiphany::STRi32_r32 || Opc == Epiphany::STRi32_r32) { - unsigned MIReg = getRegOperand(MI).getReg(); - unsigned sra = TRI->getMatchingSuperReg (Reg, Epiphany::isub_lo, &Epiphany::GPR64RegClass); - unsigned srb = TRI->getMatchingSuperReg (MIReg, Epiphany::isub_hi, &Epiphany::GPR64RegClass); - if( (!sra || !srb) || (sra != srb)) { - sra = TRI->getMatchingSuperReg (MIReg, Epiphany::isub_lo, &Epiphany::GPR64RegClass); - srb = TRI->getMatchingSuperReg (Reg, Epiphany::isub_hi, &Epiphany::GPR64RegClass); - } - // If we can't find corresponding superreg for both - out - if (!(sra && srb) || (sra != srb)) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - MemInsns.push_back(&MI); - continue; - } + int OffsetStride = getMemScale(FirstMI); + if (isBaseAndOffsetCorrect(BaseReg, MIBaseReg, Offset, MIOffset, OffsetStride)) { + DEBUG(dbgs() << "Checking instruction "; MI.dump()); + + // First, check register parity + if (!canFormSuperReg(Reg, MIReg)) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + MemInsns.push_back(&MI); + DEBUG(dbgs() << "Can't find matching superreg\n"); + continue; } // Check if the alignment is correct - if (Opc == Epiphany::LDRi32_r32 || Opc == Epiphany::LDRi32_r32 || - Opc == Epiphany::STRi32_r32 || Opc == Epiphany::STRi32_r32) { - unsigned MIReg = getRegOperand(MI).getReg(); - unsigned sra = TRI->getMatchingSuperReg (Reg, Epiphany::isub_lo, &Epiphany::GPR64RegClass); - unsigned srb = TRI->getMatchingSuperReg (MIReg, Epiphany::isub_hi, &Epiphany::GPR64RegClass); - int HighOffset = MIOffset; - int LowOffset = Offset; - if( (!sra || !srb) || (sra != srb)) { - HighOffset = Offset; - LowOffset = MIOffset; - } - - // High reg offset should be always lower than low reg offset, and it should be double-aligned - if ((LowOffset > HighOffset) || (LowOffset % 8 != 0)) { - continue; - } + if (!isAlignmentCorrect(FirstMI, MI)) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + MemInsns.push_back(&MI); + DEBUG(dbgs() << "Can't be paired due to alignment\n"); + continue; } // Get the left-lowest offset - int MinOffset = Offset < MIOffset ? Offset : MIOffset; - // If the resultant immediate offset of merging these + int64_t MinOffset = Offset < MIOffset ? Offset : MIOffset; + + // If the resultant immediate offset of merging these // instructions is out of range for // a pairwise instruction, bail and keep looking. if (!inBoundsForPair(MinOffset)) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(&MI); + DEBUG(dbgs() << "Out of bound for pairing\n"); continue; } // If the destination register of the loads is the same register, bail @@ -340,6 +563,7 @@ EpiphanyLoadStoreOptimizer::findMatchingInst(MachineBasicBlock::iterator I, if (MayLoad && Reg == getRegOperand(MI).getReg()) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(&MI); + DEBUG(dbgs() << "Can't merge into same reg\n"); continue; } @@ -347,18 +571,20 @@ EpiphanyLoadStoreOptimizer::findMatchingInst(MachineBasicBlock::iterator I, // the two instructions and none of the instructions between the second // and first alias with the second, we can combine the second into the // first. - if (!ModifiedRegs[getRegOperand(MI).getReg()] && - !(MI.mayLoad() && UsedRegs[getRegOperand(MI).getReg()])) { - Flags.setMergeForward(false); - return MBBI; + if (!ModifiedRegs[MIRegIdx]) { + if (!(MI.mayLoad() && UsedRegs[MIRegIdx])) { + Flags.setMergeForward(false); + return MBBI; + } + } else { + DEBUG(dbgs() << "Proposed paired reg was modified, will try to merge forward\n"); } // Likewise, if the Rt of the first instruction is not modified or used // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. - if (!ModifiedRegs[getRegOperand(FirstMI).getReg()] && - !(MayLoad && UsedRegs[getRegOperand(FirstMI).getReg()])) { + if (!ModifiedRegs[RegIdx] && !(MayLoad && UsedRegs[RegIdx])) { Flags.setMergeForward(true); return MBBI; } @@ -376,8 +602,8 @@ EpiphanyLoadStoreOptimizer::findMatchingInst(MachineBasicBlock::iterator I, trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); // Otherwise, if the base register is modified, we have no match, so - // return early. - if (ModifiedRegs[BaseReg]) + // return early. Should only happen when dealing with real registers + if (ModifiedRegs[BaseRegIdx]) return E; // Update list of instructions that read/write memory. @@ -392,38 +618,48 @@ EpiphanyLoadStoreOptimizer::findMatchingInst(MachineBasicBlock::iterator I, bool EpiphanyLoadStoreOptimizer::tryToPairLoadStoreInst(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; MachineBasicBlock::iterator E = MI.getParent()->end(); - DEBUG(dbgs() << "\nTrying to pair instruction: "; MI.print(dbgs());); + DEBUG(dbgs() << "\nTrying to pair instruction: "; + MI.print(dbgs());); - if (!TII->isCandidateToMergeOrPair(MI)) + if (!TII->isCandidateToMergeOrPair(MI)) { + DEBUG(dbgs() << "Not a candidate for merging\n"); return false; + } // Early exit if the offset is not possible to match. (6 bits of positive // range, plus allow an extra one in case we find a later insn that matches // with Offset-1) - int Offset = getOffsetOperand(MI).getImm(); + int64_t Offset = getOffsetOperand(MI).getImm(); int OffsetStride = 1; // Allow one more for offset. if (Offset > 0) Offset -= OffsetStride; - if (!inBoundsForPair(Offset)) + if (!inBoundsForPair(Offset)) { + DEBUG(dbgs() << "Out of bounds for pairing\n"); return false; + } // Look ahead up to LdStLimit instructions for a pairable instruction. LoadStoreFlags Flags; MachineBasicBlock::iterator Paired = - findMatchingInst(MBBI, Flags, LdStLimit); + findMatchingInst(MBBI, Flags, LdStLimit); if (Paired != E) { ++NumPairCreated; // Keeping the iterator straight is a pain, so we let the merge routine tell // us what the next instruction is after it's done mucking about. MBBI = mergePairedInsns(MBBI, Paired, Flags); return true; + } else { + DEBUG(dbgs() << "Unable to find matching instruction\n"); } return false; } - - +/// \brief Runs optimizer for the given MBB. +/// +/// \param MBB Machine basic block to optimize +/// +/// \return true if the block was modified bool EpiphanyLoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { bool Modified = false; @@ -435,7 +671,7 @@ bool EpiphanyLoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { // ; becomes // strd r0, [fp] for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - MBBI != E;) { + MBBI != E;) { if (isPairableLoadStoreInst(*MBBI) && tryToPairLoadStoreInst(MBBI)) Modified = true; else @@ -446,8 +682,8 @@ bool EpiphanyLoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { } -INITIALIZE_PASS_BEGIN(EpiphanyLoadStoreOptimizer, "epiphany-ls-opt", "Epiphany Load Store Optimization", false, false); -INITIALIZE_PASS_END(EpiphanyLoadStoreOptimizer, "epiphany-ls-opt", "Epiphany Load Store Optimization", false, false); +INITIALIZE_PASS_BEGIN(EpiphanyLoadStoreOptimizer, "epiphany-ls-opt", "Epiphany Load Store Optimization", false, false) +INITIALIZE_PASS_END(EpiphanyLoadStoreOptimizer, "epiphany-ls-opt", "Epiphany Load Store Optimization", false, false) bool EpiphanyLoadStoreOptimizer::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "\nRunning Epiphany Load/Store Optimization Pass\n"); @@ -455,18 +691,34 @@ bool EpiphanyLoadStoreOptimizer::runOnMachineFunction(MachineFunction &Fn) { return false; Subtarget = &static_cast(Fn.getSubtarget()); - TII = static_cast(Subtarget->getInstrInfo()); + TII = Subtarget->getInstrInfo(); TRI = Subtarget->getRegisterInfo(); + TFI = Subtarget->getFrameLowering(); + MFI = &Fn.getFrameInfo(); + MRI = &Fn.getRegInfo(); + MF = &Fn; + + // Get stack growth direction + StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + LastLocalBlockOffset = StackGrowsDown ? -4 : 4; // Resize the modified and used register bitfield trackers. We do this once // per function and then clear the bitfield each time we optimize a load or // store. - ModifiedRegs.resize(TRI->getNumRegs()); - UsedRegs.resize(TRI->getNumRegs()); + ModifiedRegs.resize(MRI->getNumVirtRegs() + TRI->getNumRegs()); + UsedRegs.resize(MRI->getNumVirtRegs() + TRI->getNumRegs()); bool Modified = false; - for (auto &MBB : Fn) + for (auto &MBB : Fn) { Modified |= optimizeBlock(MBB); + if (Modified) { + MFI->setUseLocalStackAllocationBlock(true); + } + } + + // Adjust local frame block size + int64_t LocalFrameSize = StackGrowsDown ? -LastLocalBlockOffset - 4 : LastLocalBlockOffset - 4; + MFI->setLocalFrameSize(LocalFrameSize); return Modified; } diff --git a/EpiphanyLoadStoreOptimizer.h b/EpiphanyLoadStoreOptimizer.h index d98047b..18f17c7 100644 --- a/EpiphanyLoadStoreOptimizer.h +++ b/EpiphanyLoadStoreOptimizer.h @@ -31,45 +31,72 @@ #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { - void initializeEpiphanyLoadStoreOptimizerPass(PassRegistry&); + void initializeEpiphanyLoadStoreOptimizerPass(PassRegistry &); typedef struct LoadStoreFlags { // If a matching instruction is found, MergeForward is set to true if the // merge is to remove the first instruction and replace the second with // a pair-wise insn, and false if the reverse is true. bool MergeForward; + LoadStoreFlags() : MergeForward(false) {} void setMergeForward(bool V = true) { MergeForward = V; } + bool getMergeForward() const { return MergeForward; } } LoadStoreFlags; class EpiphanyLoadStoreOptimizer : public MachineFunctionPass { - private: - const EpiphanyInstrInfo *TII; - const TargetRegisterInfo *TRI; - const EpiphanySubtarget *Subtarget; - // Track which registers have been modified and used. - BitVector ModifiedRegs, UsedRegs; - bool optimizeBlock(MachineBasicBlock &MBB); - bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI); - bool tryToPairLoadStoreInst(MachineBasicBlock::iterator &MBBI); - MachineBasicBlock::iterator findMatchingInst(MachineBasicBlock::iterator I, - LoadStoreFlags &Flags, unsigned Limit); - MachineBasicBlock::iterator mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, const LoadStoreFlags &Flags); - public: - static char ID; - EpiphanyLoadStoreOptimizer() : MachineFunctionPass(ID) { - initializeEpiphanyLoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const { - return "Epiphany Load/Store Optimization Pass"; - } - bool runOnMachineFunction(MachineFunction &MF); + private: + const EpiphanyInstrInfo *TII; + const TargetRegisterInfo *TRI; + const EpiphanySubtarget *Subtarget; + const EpiphanyFrameLowering *TFI; + MachineFunction *MF; + MachineRegisterInfo *MRI; + MachineFrameInfo *MFI; + // Track which registers have been modified and used. + BitVector ModifiedRegs, UsedRegs; + bool StackGrowsDown; + int64_t LastLocalBlockOffset = -4; + + bool optimizeBlock(MachineBasicBlock &MBB); + + bool tryToPairLoadStoreInst(MachineBasicBlock::iterator &MBBI); + + bool isAlignmentCorrect(MachineInstr &FirstMI, MachineInstr &SecondMI); + + bool canFormSuperReg(unsigned MainReg, unsigned PairedReg); + + MachineBasicBlock::iterator findMatchingInst(MachineBasicBlock::iterator I, + LoadStoreFlags &Flags, unsigned Limit); + + MachineBasicBlock::iterator mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, const LoadStoreFlags &Flags); + + MachineInstrBuilder mergeRegInsns(unsigned PairedOp, int64_t OffsetImm, + MachineOperand RegOp0, MachineOperand RegOp1, + MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, + const LoadStoreFlags &Flags); + + void cleanKillFlags(MachineOperand RegOp0, MachineOperand RegOp1, + MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, + bool MergeForward); + + public: + static char ID; + + EpiphanyLoadStoreOptimizer() : MachineFunctionPass(ID) { + initializeEpiphanyLoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const { + return "Epiphany Load/Store Optimization Pass"; + } + + bool runOnMachineFunction(MachineFunction &MF); }; } // namespace llvm diff --git a/EpiphanyMachineFunction.h b/EpiphanyMachineFunction.h index ddce0d6..b6219eb 100644 --- a/EpiphanyMachineFunction.h +++ b/EpiphanyMachineFunction.h @@ -43,9 +43,9 @@ class EpiphanyMachineFunctionInfo : public MachineFunctionInfo { CallsEhReturn(false), CallsEhDwarf(false), HasFpuInst(false), - HasIalu2Inst(false), + GlobalBaseReg(0), EmitNOAT(false), - GlobalBaseReg(0) + HasIalu2Inst(false) {} ~EpiphanyMachineFunctionInfo(); diff --git a/EpiphanyRegisterInfo.cpp b/EpiphanyRegisterInfo.cpp index 3498ae4..2df1ea3 100644 --- a/EpiphanyRegisterInfo.cpp +++ b/EpiphanyRegisterInfo.cpp @@ -33,43 +33,43 @@ using namespace llvm; #include "EpiphanyGenRegisterInfo.inc" EpiphanyRegisterInfo::EpiphanyRegisterInfo(const EpiphanySubtarget &ST) - : EpiphanyGenRegisterInfo(Epiphany::LR), Subtarget(ST) {} - - //===----------------------------------------------------------------------===// - // Callee Saved Registers methods - //===----------------------------------------------------------------------===// - /// Epiphany Callee Saved Registers - // In EpiphanyCallConv.td, - // def CSR32 : CalleeSavedRegs<(add V1, V2, V3, V4, V5, SB, SL, FP, LR, R15)>; - // llc create CSR32_SaveList and CSR32_RegMask from above defined. - const MCPhysReg * - EpiphanyRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - return CSR32_SaveList; - } + : EpiphanyGenRegisterInfo(Epiphany::LR), Subtarget(ST) {} + + //===----------------------------------------------------------------------===// + // Callee Saved Registers methods + //===----------------------------------------------------------------------===// + /// Epiphany Callee Saved Registers + // In EpiphanyCallConv.td, + // def CSR32 : CalleeSavedRegs<(add V1, V2, V3, V4, V5, SB, SL, FP, LR, R15)>; + // llc create CSR32_SaveList and CSR32_RegMask from above defined. + const MCPhysReg * + EpiphanyRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + return CSR32_SaveList; + } const uint32_t* EpiphanyRegisterInfo::getCallPreservedMask(const MachineFunction &MF, - CallingConv::ID) const { - return CSR32_RegMask; + CallingConv::ID) const { + return CSR32_RegMask; } // pure virtual method BitVector EpiphanyRegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - // Stack base, limit and pointer - Reserved.set(Epiphany::SB); - Reserved.set(Epiphany::SL); - Reserved.set(Epiphany::SP); - // Frame pointer - Reserved.set(Epiphany::FP); - // Link register - Reserved.set(Epiphany::LR); - // Constants - Reserved.set(Epiphany::R28); - Reserved.set(Epiphany::R29); - Reserved.set(Epiphany::R30); - Reserved.set(Epiphany::ZERO); - Reserved.set(Epiphany::STATUS); + BitVector Reserved(getNumRegs()); + // Stack base, limit and pointer + Reserved.set(Epiphany::SB); + Reserved.set(Epiphany::SL); + Reserved.set(Epiphany::SP); + // Frame pointer + Reserved.set(Epiphany::FP); + // Link register + Reserved.set(Epiphany::LR); + // Constants + Reserved.set(Epiphany::R28); + Reserved.set(Epiphany::R29); + Reserved.set(Epiphany::R30); + Reserved.set(Epiphany::ZERO); + Reserved.set(Epiphany::STATUS); // 64 bit with same subregs Reserved.set(Epiphany::D4); @@ -80,7 +80,7 @@ BitVector EpiphanyRegisterInfo::getReservedRegs(const MachineFunction &MF) const Reserved.set(Epiphany::D14); Reserved.set(Epiphany::D15); - return Reserved; + return Reserved; } @@ -91,59 +91,57 @@ BitVector EpiphanyRegisterInfo::getReservedRegs(const MachineFunction &MF) const // direct reference. void EpiphanyRegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator MBBI, int SPAdj, - unsigned FIOperandNum, RegScavenger *RS) const { - MachineInstr &MI = *MBBI; - MachineFunction &MF = *MI.getParent()->getParent(); - MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned FIOperandNum, RegScavenger *RS) const { + MachineInstr &MI = *MBBI; + MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); const EpiphanyFrameLowering *FL = getFrameLowering(MF); - EpiphanyMachineFunctionInfo *FI = MF.getInfo(); - - unsigned i = 0; - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } - - DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; - errs() << "<--------->\n" << MI); - - int FrameIndex = MI.getOperand(i).getIndex(); - uint64_t stackSize = MF.getFrameInfo().getStackSize(); - int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex); - - DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" - << "spOffset : " << spOffset << "\n" - << "stackSize : " << stackSize << "\n"); - - const std::vector &CSI = MFI.getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); - } - - // The following stack frame objects are always referenced relative to $sp: - // 1. Outgoing arguments. - // 2. Pointer to dynamically allocated stack space. - // 3. Locations for callee-saved registers. - // Everything else is referenced relative to whatever register - // getFrameRegister() returns. - unsigned FrameReg = getFrameRegister(MF); + EpiphanyMachineFunctionInfo *FI = MF.getInfo(); + + unsigned i = 0; + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + + DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; + errs() << "<--------->\n" << MI); + + int FrameIndex = MI.getOperand(i).getIndex(); + uint64_t stackSize = MF.getFrameInfo().getStackSize(); + int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex); + + DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" + << "spOffset : " << spOffset << "\n" + << "stackSize : " << stackSize << "\n"); + + const std::vector &CSI = MFI.getCalleeSavedInfo(); + + if (CSI.size()) { + CSI[0].getFrameIdx(); + CSI[CSI.size() - 1].getFrameIdx(); + } + + // The following stack frame objects are always referenced relative to $sp: + // 1. Outgoing arguments. + // 2. Pointer to dynamically allocated stack space. + // 3. Locations for callee-saved registers. + // Everything else is referenced relative to whatever register + // getFrameRegister() returns. + unsigned FrameReg = getFrameRegister(MF); if (FrameIndex >= 0) { if (hasBasePointer(MF)) { FrameReg = getBaseRegister(); } else if (needsStackRealignment(MF)) { - FrameReg = Epiphany::SP; + FrameReg = Epiphany::SP; } } - // Calculate final offset. In fact, just an spOffset is good to use here, + // Calculate final offset. In fact, just an spOffset is good to use here, // but with the offset from FP - int64_t Offset; - Offset = spOffset; + int64_t Offset; + Offset = spOffset; if (FrameReg == Epiphany::SP) { Offset += stackSize; // Skip saved FP/LR if we have calls @@ -151,27 +149,34 @@ eliminateFrameIndex(MachineBasicBlock::iterator MBBI, int SPAdj, Offset += 8; } } - Offset += MI.getOperand(i+1).getImm(); - DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - - // If MI is not a debug value, make sure Offset fits in the 16-bit immediate - // field. - if (!MI.isDebugValue() && !isInt<16>(Offset)) { - assert("(!MI.isDebugValue() && !isInt<16>(Offset))"); - } + //Offset += spOffset > 0 ? MI.getOperand(i+1).getImm() : - MI.getOperand(i+1).getImm(); + Offset += MI.getOperand(i+1).getImm(); + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + + // If MI is not a debug value, make sure Offset fits in the 16-bit immediate + // field. + if (!MI.isDebugValue() && !isInt<16>(Offset)) { + assert("(!MI.isDebugValue() && !isInt<16>(Offset))"); + } - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Offset); } bool EpiphanyRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - return true; + return true; } bool EpiphanyRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - return true; + return true; +} + +const TargetRegisterClass * +EpiphanyRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + return &Epiphany::GPR32RegClass; } bool EpiphanyRegisterInfo::hasBasePointer(const MachineFunction &MF) const { @@ -192,17 +197,37 @@ unsigned EpiphanyRegisterInfo::getBaseRegister() const { // Returns current frame register: FP or SP depending if FramePointer is set unsigned EpiphanyRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - return TFI->hasFP(MF) ? (Epiphany::FP) : (Epiphany::SP); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + return TFI->hasFP(MF) ? (Epiphany::FP) : (Epiphany::SP); } const TargetRegisterClass * EpiphanyRegisterInfo::GPR32(unsigned Size) const { - return &Epiphany::GPR32RegClass; + return &Epiphany::GPR32RegClass; } const TargetRegisterClass * EpiphanyRegisterInfo::GPR16(unsigned Size) const { - return &Epiphany::GPR16RegClass; + return &Epiphany::GPR16RegClass; } +unsigned EpiphanyRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { + switch (RC->getID()) { + default: + return 8; + case Epiphany::GPR32RegClassID: + case Epiphany::FPR32RegClassID: + return 55; // We currently have 9 reserved regs + case Epiphany::GPR16RegClassID: + case Epiphany::FPR16RegClassID: + return 8; + case Epiphany::GPR64RegClassID: + case Epiphany::FPR64RegClassID: + case Epiphany::FPR64_with_isub_lo_in_FPR32RegClassID: + return 26; // We currently have 6 reserved double regs + } +} + +unsigned EpiphanyRegisterInfo::getRegUnitWeight(unsigned RegUnit) const { + return 1; +} diff --git a/EpiphanyRegisterInfo.h b/EpiphanyRegisterInfo.h index 8c5f8d8..d520941 100644 --- a/EpiphanyRegisterInfo.h +++ b/EpiphanyRegisterInfo.h @@ -46,6 +46,11 @@ class EpiphanyRegisterInfo : public EpiphanyGenRegisterInfo { bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; + unsigned getRegUnitWeight(unsigned RegUnit) const override; + + const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, unsigned Kind) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *Rs = nullptr) const override; diff --git a/EpiphanyRegisterInfo.td b/EpiphanyRegisterInfo.td index ac53f80..2150ed6 100644 --- a/EpiphanyRegisterInfo.td +++ b/EpiphanyRegisterInfo.td @@ -18,9 +18,9 @@ class EpiphanyReg enc, string n> : Register { } let Namespace = "Epiphany" in { - def isub_lo : SubRegIndex<32, 32>; - def isub_hi : SubRegIndex<32, 0>; - def subreg_overflow : SubRegIndex<1, 0>; + def isub_lo : SubRegIndex<32, 0>; + def isub_hi : SubRegIndex<32, 32>; + def subreg_overflow : SubRegIndex<0, 1>; } class EpiphanyReg64 enc, string n, list subregs> : RegisterWithSubRegs { @@ -203,10 +203,6 @@ def GPR32 : RegisterClass<"Epiphany", [i32, v4i8, v2i16], 32, (add // Caller-saved GPR (sequence "R%u", 32, 63))> { - // Reordering - putting GPR16 regs to the end to lower register pressure - let AltOrders = [(add (sub GPR32, R0, R1, R2, R3, R4, R5, R6, R7, R8), R0, R1, R2, R3, R4, R5, R6, R7, R8)]; - let AltOrderSelect = [{ return 1; }]; - let AllocationPriority = 2; let Size = 32; let CopyCost = 1; @@ -215,7 +211,7 @@ def GPR32 : RegisterClass<"Epiphany", [i32, v4i8, v2i16], 32, (add def FPR32 : RegisterClass<"Epiphany", [f32], 32, (add GPR32)>; // 64 bit -def GPR64 : RegisterClass<"Epiphany", [i64,v2i32], 64, (add (sequence "D%u", 0, 31))> { +def GPR64 : RegisterClass<"Epiphany", [i64,v2i32, v4i16], 64, (add (sequence "D%u", 0, 31))> { let CopyCost = 4; let Size = 64; } diff --git a/EpiphanySchedule.td b/EpiphanySchedule.td index f1c635f..725345b 100644 --- a/EpiphanySchedule.td +++ b/EpiphanySchedule.td @@ -6,6 +6,24 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// Epiphany pipeline is shown below +// +---------+------+------------------------+----------------------------------------------------------------------+ +// | STEP NO | STEP | NAME | ACTION | +// |---------|------|------------------------|----------------------------------------------------------------------| +// | 1 | FE | Fetch Address | Fetch addr sent to instr memory | +// | 2 | IM | Instruction Mem Access | Instr returns from core mem | +// | 3 | DE | Decode | Decode instr | +// | 4 | RA | Reg Access | Read operands from regs | +// | 5 | E1 | Execution | Load/store address calc Reg read for store Status flag set Branching | +// | 6 | E2 | Execution | Load complete | +// | 7 | E3 | Execution | FP result written in trunc mode | +// | 8 | E4 | Execution | FP result in round-to-near mode | +// +---------+------+------------------------+----------------------------------------------------------------------+ +// +// Epiphany can dual-issue when FPU/IALU2 is used. +// In addition, it might be a good idea to group stores and loads to widen them. +// //===----------------------------------------------------------------------===// // Functional units across Epiphany chips sets @@ -30,6 +48,11 @@ def E2_2 : FuncUnit; def E3_2 : FuncUnit; def E4_2 : FuncUnit; +// LoadStore grouping bypass +def LoadStoreBypass : Bypass; +// IALU2 bypass +def IaluBypass : Bypass; + //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for Epiphany (p58, epiphany_arch_ref.pdf) //===----------------------------------------------------------------------===// @@ -53,23 +76,23 @@ def EpiphanyGenericItineraries : ProcessorItineraries<[FE, IM, DE, RA, E1, E2, E // IALU2 instructions take steps FE-E1, 1 cycle per step // Read on cycle 3, result at cycle 4 (after issue) - InstrItinData, - InstrStage<1, [IM_2]>, - InstrStage<1, [DE_2]>, - InstrStage<1, [RA_2]>, - InstrStage<1, [E1_2]>], + InstrItinData, + InstrStage<1, [IM]>, + InstrStage<1, [DE]>, + InstrStage<1, [RA]>, + InstrStage<1, [E1]>], [4, 3]>, // FPU instructions take steps FE-E4, 1 cycle per step // Read on cycle 3, result at cycle 7 (after issue) - InstrItinData, - InstrStage<1, [IM_2]>, - InstrStage<1, [DE_2]>, - InstrStage<1, [RA_2]>, - InstrStage<1, [E1_2]>, - InstrStage<1, [E2_2]>, - InstrStage<1, [E3_2]>, - InstrStage<1, [E4_2]>], + InstrItinData, + InstrStage<1, [IM]>, + InstrStage<1, [DE]>, + InstrStage<1, [RA]>, + InstrStage<1, [E1]>, + InstrStage<1, [E2]>, + InstrStage<1, [E3]>, + InstrStage<1, [E4]>], [7, 3]>, // LOAD instructions take steps FE-E2, 1 cycle per step @@ -109,3 +132,12 @@ def EpiphanyGenericItineraries : ProcessorItineraries<[FE, IM, DE, RA, E1, E2, E InstrStage<4, [E1]>], [4, 3]> ]>; + +def EpiphanyModel : SchedMachineModel { + let IssueWidth = 1; // At max we can dual-issue, but let's keep 1 for now + let Itineraries = EpiphanyGenericItineraries; + let LoadLatency = 2; + let CompleteModel = 0; + let MispredictPenalty = 0; + let PostRAScheduler = 1; +} diff --git a/EpiphanyTargetMachine.cpp b/EpiphanyTargetMachine.cpp index c82ad7b..ae05099 100644 --- a/EpiphanyTargetMachine.cpp +++ b/EpiphanyTargetMachine.cpp @@ -14,18 +14,31 @@ //===----------------------------------------------------------------------===// #include "EpiphanyTargetMachine.h" -#include "Epiphany.h" #include "EpiphanyISelDAGToDAG.h" -#include "EpiphanySubtarget.h" #include "EpiphanyTargetObjectFile.h" +#include "EpiphanyTargetTransformInfo.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Vectorize.h" using namespace llvm; +static cl::opt EnableSROA( + "epiphany-sroa", + cl::desc("Run SROA after promote alloca pass"), + cl::ReallyHidden, + cl::init(true)); + +static cl::opt EnableLSOpt( + "epiphany-lsopt", + cl::desc("Run Epiphany Load/Store Optimization Pass"), + cl::ReallyHidden, + cl::init(true)); + #define DEBUG_TYPE "epiphany" extern "C" void LLVMInitializeEpiphanyTarget() { @@ -51,6 +64,9 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU, // Minimal alignment for E16 is byte Ret += "-i8:8-i16:16-i32:32-i64:64"; + + // Vector alignment is better to keep at dword for wide loads/stores + Ret += "-v32:64-v64:64"; // 32 and 64 bit floats should have natural alignment Ret += "-f32:32-f64:64"; @@ -97,6 +113,8 @@ class EpiphanyPassConfig : public TargetPassConfig { bool addILPOpts() override; bool addInstSelector() override; + void addIRPasses() override; + void addCodeGenPrepare() override; void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; @@ -111,6 +129,15 @@ TargetPassConfig *EpiphanyTargetMachine::createPassConfig(PassManagerBase &PM) { return new EpiphanyPassConfig(this, PM); } +void EpiphanyPassConfig::addIRPasses() { + addPass(createAtomicExpandPass(&getEpiphanyTargetMachine())); + if (EnableSROA && (TM->getOptLevel() != CodeGenOpt::None)) { + addPass(createSROAPass()); + } + + TargetPassConfig::addIRPasses(); +} + bool EpiphanyPassConfig::addILPOpts() { addPass(&EarlyIfConverterID); //if (EnableMachineCombinerPass) @@ -124,8 +151,16 @@ bool EpiphanyPassConfig::addInstSelector() { return false; } +void EpiphanyPassConfig::addCodeGenPrepare() { + TargetPassConfig::addCodeGenPrepare(); + + addPass(createLoadStoreVectorizerPass()); +} + void EpiphanyPassConfig::addPreRegAlloc() { addPass(&LiveVariablesID, false); + if (EnableLSOpt && TM->getOptLevel() != CodeGenOpt::None) + addPass(createEpiphanyVregLoadStoreOptimizationPass()); } void EpiphanyPassConfig::addPreSched2() { @@ -133,5 +168,13 @@ void EpiphanyPassConfig::addPreSched2() { } void EpiphanyPassConfig::addPreEmitPass() { - addPass(createEpiphanyLoadStoreOptimizationPass()); + if (EnableLSOpt && TM->getOptLevel() != CodeGenOpt::None) + addPass(createEpiphanyLoadStoreOptimizationPass()); } + +TargetIRAnalysis EpiphanyTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(EpiphanyTTIImpl(this, F)); + }); +} + diff --git a/EpiphanyTargetMachine.h b/EpiphanyTargetMachine.h index 8a94861..e2f3389 100644 --- a/EpiphanyTargetMachine.h +++ b/EpiphanyTargetMachine.h @@ -18,6 +18,7 @@ #include "MCTargetDesc/EpiphanyABIInfo.h" #include "EpiphanySubtarget.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetFrameLowering.h" @@ -52,9 +53,11 @@ class EpiphanyTargetMachine : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + TargetIRAnalysis getTargetIRAnalysis() override; const EpiphanyABIInfo &getABI() const { return ABI; } - //const DataLayout *getDataLayout() const { return &DL; } + const DataLayout *getDataLayout() const { return &DL; } }; } // namespace llvm diff --git a/EpiphanyTargetObjectFile.cpp b/EpiphanyTargetObjectFile.cpp index 96cc215..7e0771c 100644 --- a/EpiphanyTargetObjectFile.cpp +++ b/EpiphanyTargetObjectFile.cpp @@ -13,16 +13,8 @@ #include "EpiphanyTargetObjectFile.h" -#include "EpiphanySubtarget.h" -#include "EpiphanyTargetMachine.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/EpiphanyTargetTransformInfo.cpp b/EpiphanyTargetTransformInfo.cpp new file mode 100644 index 0000000..41caaea --- /dev/null +++ b/EpiphanyTargetTransformInfo.cpp @@ -0,0 +1,78 @@ +//===-- EpiphanyTargetTransformInfo.cpp - Epiphany specific TTI pass ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// This file implements a TargetTransformInfo analysis pass specific to the +// Epiphany target machine. It uses the target's detailed information to provide +// more precise answers to certain TTI queries, while letting the target +// independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#include "EpiphanyTargetTransformInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" + +using namespace llvm; + +unsigned EpiphanyTTIImpl::getNumberOfRegisters(bool Vec) { + if (Vec) + return 32; // Only even regs + + return 64; +} + +unsigned EpiphanyTTIImpl::getRegisterBitWidth(bool Vector) { + return Vector ? 64 : 32; +} + +unsigned EpiphanyTTIImpl::getMinVectorRegisterBitWidth() { + return 32; +} + +unsigned EpiphanyTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { + return 64; +} + +void EpiphanyTTIImpl::getUnrollingPreferences(Loop *L, + TTI::UnrollingPreferences &UP) { + UP.Threshold = 64; // 8 * min hw loop, assuming inst const = 1 + UP.MaxCount = 8; + UP.Partial = true; +} + +unsigned EpiphanyTTIImpl::getMaxInterleaveFactor(unsigned VF) { + return 1; +} + +// FIXME: setting all costs to 1 for now +int EpiphanyTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, + ArrayRef Args) { return 1; } +int EpiphanyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { return 1; } +int EpiphanyTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) { return 1; } +int EpiphanyTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) { return 1; } +int EpiphanyTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, + bool VariableMask, unsigned Alignment) { return 1; } +int EpiphanyTTIImpl::getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, + const SCEV *Ptr) { return 1; } +unsigned EpiphanyTTIImpl::getCFInstrCost(unsigned Opcode) { return 1; } + diff --git a/EpiphanyTargetTransformInfo.h b/EpiphanyTargetTransformInfo.h new file mode 100644 index 0000000..9f75902 --- /dev/null +++ b/EpiphanyTargetTransformInfo.h @@ -0,0 +1,92 @@ +//===-- EpiphanyTargetTransformInfo.h - Epiphany specific TTI ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file a TargetTransformInfo::Concept conforming object specific to the +/// Epiphany target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_EPIPHANY_EPIPHANYTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_EPIPHANY_EPIPHANYTARGETTRANSFORMINFO_H + +#include "Epiphany.h" +#include "EpiphanyTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class EpiphanyTargetLowering; + +class EpiphanyTTIImpl final : public BasicTTIImplBase { + typedef BasicTTIImplBase BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const EpiphanySubtarget *ST; + const EpiphanyTargetLowering *TLI; + + const EpiphanySubtarget *getST() const { return ST; } + const EpiphanyTargetLowering *getTLI() const { return TLI; } + + + static inline int getFullRateInstrCost() { + return TargetTransformInfo::TCC_Basic; + } + +public: + explicit EpiphanyTTIImpl(const EpiphanyTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), + ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + bool hasBranchDivergence() { return true; } + + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + return TTI::PSK_FastHardware; + } + + unsigned getNumberOfRegisters(bool Vector); + unsigned getRegisterBitWidth(bool Vector); + unsigned getMinVectorRegisterBitWidth(); + unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; + unsigned getMaxInterleaveFactor(unsigned VF); + + // Instruction costs + int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace); + int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace); + int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, + bool VariableMask, unsigned Alignment); + int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, + const SCEV *Ptr); + int getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef Args = ArrayRef()); + + unsigned getCFInstrCost(unsigned Opcode); + + unsigned getVectorSplitCost() { return 0; } +}; + +} // end namespace llvm + +#endif diff --git a/EpiphanyVregLoadStoreOptimizer.cpp b/EpiphanyVregLoadStoreOptimizer.cpp new file mode 100644 index 0000000..ec9810f --- /dev/null +++ b/EpiphanyVregLoadStoreOptimizer.cpp @@ -0,0 +1,840 @@ +//=- EpiphanyVregLoadStoreOptimizer.cpp - Epiphany load/store opt. pass -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains a pass that performs load / store related peephole +/// optimizations. This pass should be run after register allocation. +/// +/// Flow: +/// * Split MachineFunction (MF) into MachineBasicBlocks (MBB) +/// * For each MBB look through instructions trying to find the next pairable one (see isPairableLoadStoreInst) +/// * If found pairable instruction, check if it has any flags preventing pairing +/// * If no such flags found, try to find matching paired instruction +/// * Take a couple of next instructions, find instruction with the same opcode, and run a couple of checks +/// * Check alignment, reg base, check if reg is not modified +/// * For real regs, try to find super-reg +/// * For real regs, check order +/// * For reg-based (not frame-based) offsets check base alignment (frame SHOULD be 8-byte aligned) +/// * If all green, try to pair regs +/// * For virtual regs, create reg sequence. If frame-based - merge based on stack growth direction and move +/// frame object into fixed local stack area +/// * For virtual regs, just swap with the super-reg +/// +//===----------------------------------------------------------------------===// + +#include "EpiphanyVregLoadStoreOptimizer.h" + +using namespace llvm; + +#define DEBUG_TYPE "epiphany_vreg_ls_opt" + +STATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); + +// The LdStLimit limits how far we search for load/store pairs. +static cl::opt LdStLimit("epiphany-vreg-load-store-scan-limit", cl::init(20), cl::Hidden); + + +char EpiphanyVregLoadStoreOptimizer::ID = 0; + +/// \brief Returns true if this instruction should be considered for pairing +/// +/// \param MI Machine instruction to check +/// +/// \return true if this instruction should be considered for pairing +static bool isPairableLoadStoreInst(MachineInstr &MI) { + unsigned inst[] = { + Epiphany::STRi32_r16, + Epiphany::STRi32_r32, + Epiphany::STRf32, + Epiphany::LDRi32_r16, + Epiphany::LDRi32_r32, + Epiphany::LDRf32 + }; + unsigned Opc = MI.getOpcode(); + return std::find(std::begin(inst), std::end(inst), Opc) != std::end(inst); +} + +static unsigned int getMemScale(unsigned Opc) { + switch (Opc) { + default: + llvm_unreachable("Opcode has unknown scale!"); + case Epiphany::STRi8_r16: + case Epiphany::STRi8_r32: + case Epiphany::LDRi8_r16: + case Epiphany::LDRi8_r32: + return 1; + case Epiphany::STRi16_r16: + case Epiphany::STRi16_r32: + case Epiphany::LDRi16_r16: + case Epiphany::LDRi16_r32: + return 2; + case Epiphany::STRi32_r16: + case Epiphany::STRi32_r32: + case Epiphany::LDRi32_r16: + case Epiphany::LDRi32_r32: + case Epiphany::STRf32: + case Epiphany::LDRf32: + return 4; + case Epiphany::STRi64: + case Epiphany::LDRi64: + case Epiphany::STRf64: + case Epiphany::LDRf64: + return 8; + } +} + +static unsigned int getMemScale(MachineInstr &MI) { + return getMemScale(MI.getOpcode()); +} + +/// Returns correct instruction alignment. For Epiphany it is equal to memory scale +static unsigned int getAlignment(MachineInstr &MI) { + return getMemScale(MI); +} + +static unsigned int getAlignment(unsigned Opc) { + return getMemScale(Opc); +} + +/// Return paired opcode for the provided one, e.g. STRi64_r32 for STRi32_r32 +static unsigned getMatchingPairOpcode(unsigned Opc) { + switch (Opc) { + default: + llvm_unreachable("Opcode has no pairwise equivalent"); + break; + case Epiphany::STRi8_r16: + return Epiphany::STRi16_r16; + case Epiphany::STRi8_r32: + return Epiphany::STRi16_r32; + case Epiphany::STRi16_r16: + return Epiphany::STRi32_r16; + case Epiphany::STRi16_r32: + return Epiphany::STRi32_r32; + case Epiphany::STRi32_r16: + case Epiphany::STRi32_r32: + return Epiphany::STRi64; + case Epiphany::LDRi8_r16: + return Epiphany::LDRi16_r16; + case Epiphany::LDRi8_r32: + return Epiphany::LDRi16_r32; + case Epiphany::LDRi16_r16: + return Epiphany::LDRi32_r16; + case Epiphany::LDRi16_r32: + return Epiphany::LDRi32_r32; + case Epiphany::LDRi32_r16: + case Epiphany::LDRi32_r32: + return Epiphany::LDRi64; + case Epiphany::STRf32: + return Epiphany::STRf64; + case Epiphany::LDRf32: + return Epiphany::LDRf64; + } +} + +/// Convert the byte-offset used by unscaled into an "element" offset used +/// by the scaled pair load/store instructions. +static bool inBoundsForPair(int64_t Offset) { + // Well, in fact if the op is in bounds for any kind of store/load - it will be in bound for pairing + return true; +} + +/// Get register for the store/load machine operand +static const MachineOperand &getRegOperand(const MachineInstr &MI) { + return MI.getOperand(0); +} + +/// Get base for the store/load machine operand +static const MachineOperand &getBaseOperand(const MachineInstr &MI) { + return MI.getOperand(1); +} + +/// Get base for the store/load machine operand +static MachineOperand &getBaseOperand(MachineInstr &MI) { + return MI.getOperand(1); +} + +/// Get offset for the store/load machine operand +static const MachineOperand &getOffsetOperand(const MachineInstr &MI) { + return MI.getOperand(2); +} + +/// Get offset for the store/load machine operand +static MachineOperand &getOffsetOperand(MachineInstr &MI) { + return MI.getOperand(2); +} + +/// Returns true if we need to use offset, false if frame index should be used +static bool baseIsFrameIndex(const MachineInstr &FirstMI, const MachineInstr &SecondMI) { + return getBaseOperand(FirstMI).isFI() && getBaseOperand(SecondMI).isFI(); +} + +/// Returns true if FirstMI and MI are candidates for merging or pairing. +/// Otherwise, returns false. +static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &SecondMI, + LoadStoreFlags &Flags, const EpiphanyInstrInfo *TII, + const MachineFrameInfo *MFI) { + // If this is volatile not a candidate. + if (SecondMI.hasOrderedMemoryRef()) + return false; + + // We should have already checked FirstMI for pair suppression and volatility. + assert(!FirstMI.hasOrderedMemoryRef() && + "FirstMI shouldn't get here if either of these checks are true."); + + unsigned OpcA = FirstMI.getOpcode(); + unsigned OpcB = SecondMI.getOpcode(); + + // Opcodes match: nothing more to check. + if (OpcA != OpcB) { + return false; + } + + // If using frame index - check object sizes, both should be equal to their mem scales + Flags.setBasedOnVirtualFI(baseIsFrameIndex(FirstMI, SecondMI)); + if (Flags.isBasedOnVirtualFI()) { + int FirstBase = getBaseOperand(FirstMI).getIndex(); + int SecondBase = getBaseOperand(SecondMI).getIndex(); + if (MFI->getObjectSize(FirstBase) != getMemScale(FirstMI) + || MFI->getObjectSize(SecondBase) != getMemScale(SecondMI)) { + DEBUG(dbgs() << "Object sizes not equal to their mem scales, skipping\n"); + return false; + } + } + + return true; +} + +/// trackRegDefsUses - Remember what registers the specified instruction uses +/// and modifies. +static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs, + BitVector &UsedRegs, const TargetRegisterInfo *TRI) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) + ModifiedRegs.setBitsNotInMask(MO.getRegMask()); + + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + // WZR/XZR are not modified even when used as a destination register. + ModifiedRegs.set(TRI->isVirtualRegister(Reg) ? TRI->virtReg2Index(Reg) : Reg); + } else { + assert(MO.isUse() && "Reg operand not a def and not a use?!?"); + UsedRegs.set(TRI->isVirtualRegister(Reg) ? TRI->virtReg2Index(Reg) : Reg); + } + } +} + +/// Keeps track on which frame indexes were used between two candidates to merge +static void trackFrameIdxs(const MachineInstr &MI, BitVector &ModifiedFrameIdxs, + BitVector &UsedFrameIdxs, const MachineFrameInfo *MFI) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isFI()) { + if (MI.mayStore()) { + ModifiedFrameIdxs.set(MO.getIndex()); + } else { + UsedFrameIdxs.set(MO.getIndex()); + } + } + } +} + +/// \brief Returns true if the alignment for specified regs and their offsets is good for pairing. +/// Only applicable when the frame is finalized +/// +/// \param FirstMI First instruction to check +/// \param SecondMI Second instruction to check +/// \param UsingVirtualFI True if offset is used for alignment checks, false if frame index is used +/// +/// \return true if alignment is ok for pairing +bool EpiphanyVregLoadStoreOptimizer::isAlignmentCorrect(MachineInstr &FirstMI, MachineInstr &SecondMI, + bool UsingVirtualFI) { + // Resolve target reg class + unsigned MainReg = getRegOperand(FirstMI).getReg(); + unsigned PairedReg = getRegOperand(SecondMI).getReg(); + int64_t MainOffset = UsingVirtualFI ? getBaseOperand(FirstMI).getIndex() : getOffsetOperand(FirstMI).getImm() ; + int64_t PairedOffset = UsingVirtualFI ? getBaseOperand(SecondMI).getIndex() : getOffsetOperand(SecondMI).getImm(); + + if (UsingVirtualFI) { + // Check if both ops have correct alignment required + if (MFI->getObjectAlignment(MainOffset) < getAlignment(FirstMI) + || MFI->getObjectAlignment(PairedOffset) < getAlignment(SecondMI)) { + return false; + } + } else { + // Check that base alignment matches paired opcode alignment + int PairedAlignment = getAlignment(getMatchingPairOpcode(FirstMI.getOpcode())); + if (getBaseOperand(FirstMI).getReg() != Epiphany::FP) { + // Only applicable when we are dealing with non-FP-based offset, as frame is 8-byte aligned + MachineInstr::mmo_iterator FirstMMOI = FirstMI.memoperands_begin(); + MachineMemOperand FirstMO = **FirstMMOI; + MachineInstr::mmo_iterator SecondMMOI = SecondMI.memoperands_begin(); + MachineMemOperand SecondMO = **SecondMMOI; + if (FirstMO.getBaseAlignment() != PairedAlignment && SecondMO.getBaseAlignment() != PairedAlignment) { + DEBUG(dbgs() << "Base alignment out, skipping\n"); + return false; + } + } + + // Check if at least one instruction is aligned to the paired opcode alignment + if ((MainOffset % PairedAlignment != 0) && (PairedOffset % PairedAlignment) != 0) { + DEBUG(dbgs() << "Offsets alignment out, skipping\n"); + return false; + } + } + + return true; +} + +/// Checks if two load/store instructions have similar base, and their +/// offsets differ by some fixed stride +static bool isBaseAndOffsetCorrect(unsigned MainBase, unsigned PairBase, int64_t MainOffset, + int64_t PairOffset, int OffsetStride) { + return (MainBase == PairBase && + ((MainOffset == PairOffset + OffsetStride) || (MainOffset + OffsetStride == PairOffset))); +} + +/// Cleans register kill flags before merge +/// +/// Can have two cases based on \p MergeForward value: +/// If merging backward +/// \code +/// STRi32 %r0, ... +/// USE %r1 +/// STRi32 kill %r1 ; need to clear kill flag when moving STRi32 upwards +/// \endcode +/// +/// If merging forward +/// \code +/// STRi32 %r1, ... +/// USE kill %r1 ; need to clear kill flag when moving STRi32 downwards +/// STRi32 %r0 +/// \endcode +void EpiphanyVregLoadStoreOptimizer::cleanKillFlags(MachineOperand RegOp0, MachineOperand RegOp1, + MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, + bool MergeForward) { + if (!MergeForward) { + // Clear kill flags on store if moving backward + RegOp0.setIsKill(false); + RegOp1.setIsKill(false); + } else { + // Clear kill flags on store if moving forward + unsigned Reg = getRegOperand(*I).getReg(); + for (MachineInstr &MI : make_range(std::next(I), Paired)) + MI.clearRegisterKills(Reg, TRI); + } +} + +/// \brief Merges two virtual reg-based 32-bit load/store instructions into a single 64-bit one. +/// +/// \param PairedOp Wide store/load operation opcode +/// \param OffsetImm Offset to use +/// \param MainReg Reg operand from the first paired store/load +/// \param PairedReg Reg operand from the second paired store/load +/// \param I Iterator pointing at the first paired store/load +/// \param Paired Iterator pointing at the second paired store/load +/// \param Flags Store/load flags +/// +/// \return Builder result +MachineInstrBuilder EpiphanyVregLoadStoreOptimizer::mergeFrameBasedInsns(unsigned PairedOp, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, + const LoadStoreFlags &Flags) { + MachineInstrBuilder MIB; + bool MergeForward = Flags.getMergeForward(); + // Insert our new paired instruction after whichever of the paired + // instructions MergeForward indicates. + MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; + + // Also based on MergeForward is from where we copy the base register operand + // so we get the flags compatible with the input code. + const MachineOperand &MainReg = getRegOperand(*I); + const MachineOperand &PairedReg = getRegOperand(*Paired); + const MachineOperand &MainBase = getBaseOperand(*I); + const MachineOperand &PairedBase = getBaseOperand(*Paired); + const int64_t &OffsetImm = getOffsetOperand(*I).getImm(); + + // Resolve target reg class + const TargetRegisterClass *RC = MRI->getRegClass(MainReg.getReg()) == &Epiphany::GPR32RegClass + ? &Epiphany::GPR64RegClass + : &Epiphany::FPR64RegClass; + + // Get insertion parameters + unsigned parentReg = MRI->createVirtualRegister(RC); + DebugLoc DL = I->getDebugLoc(); + MachineBasicBlock *MBB = I->getParent(); + + // Insert reg sequence + if (TII->get(PairedOp).mayStore()) { + // In terms of store - create regsequence before storing + const MCInstrDesc &RegSeq = TII->get(TargetOpcode::REG_SEQUENCE); + MIB = BuildMI(*MBB, InsertionPoint, DL, RegSeq, parentReg) + .addReg(MainReg.getReg()) + .addImm(Epiphany::isub_lo) + .addReg(PairedReg.getReg()) + .addImm(Epiphany::isub_hi); + DEBUG(dbgs() << "\t"); + DEBUG(((MachineInstr *) MIB)->print(dbgs())); + } + + // Insert paired instruction + unsigned flags = TII->get(PairedOp).mayLoad() ? RegState::Define : MainReg.getTargetFlags(); + MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(PairedOp)) + .addReg(parentReg, flags) + .addOperand(MainBase) + .addImm(OffsetImm) + .setMemRefs(I->mergeMemRefsWith(*Paired)); + DEBUG(dbgs() << "\t"); + DEBUG(((MachineInstr *) MIB)->print(dbgs())); + + if (TII->get(PairedOp).mayLoad()) { + // In terms of load - issue two copy instruction for vregs we had + const MCInstrDesc &Copy = TII->get(TargetOpcode::COPY); + MIB = BuildMI(*MBB, InsertionPoint, DL, Copy, MainReg.getReg()) + .addReg(parentReg, /* flags = */ 0, Epiphany::isub_lo); + DEBUG(dbgs() << "\t"); + DEBUG(((MachineInstr *) MIB)->print(dbgs())); + MIB = BuildMI(*MBB, InsertionPoint, DL, Copy, PairedReg.getReg()) + .addReg(parentReg, /* flags = */ 0, Epiphany::isub_hi); + DEBUG(dbgs() << "\t"); + DEBUG(((MachineInstr *) MIB)->print(dbgs())); + } + + // Erasing old instructions + I->eraseFromParent(); + Paired->eraseFromParent(); + + // Adjust alignment and update frame index and offset in all loads/stores related + MFI->setObjectAlignment(MainBase.getIndex(), getAlignment(PairedOp)); + MFI->setObjectSize(MainBase.getIndex(), getMemScale(PairedOp)); + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + for (MachineOperand &MO : MI.operands()) { + if (MO.isFI() && MO.getIndex() == PairedBase.getIndex()) { + DEBUG(dbgs() << "Changing instruction\n\t"; + MI.print(dbgs())); + MO.setIndex(MainBase.getIndex()); + getOffsetOperand(MI).setImm(4); + DEBUG(dbgs() << "To\n\t"; + MI.print(dbgs())); + } + } + } + } + MFI->RemoveStackObject(PairedBase.getIndex()); + + return MIB; +} + +void EpiphanyVregLoadStoreOptimizer::mergeRegBasedInsns(unsigned int PairedOp, MachineBasicBlock::iterator Paired, + MachineBasicBlock::iterator I, + const LoadStoreFlags &Flags, + int64_t OffsetImm, + MachineOperand RegOp0, + MachineOperand RegOp1) { + MachineInstrBuilder MIB; + bool MergeForward = Flags.getMergeForward(); + // Insert our new paired instruction after whichever of the paired + // instructions MergeForward indicates. + MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; + + // Also based on MergeForward is from where we copy the base register operand + // so we get the flags compatible with the input code. + const MachineOperand &PairedBase = getBaseOperand(*Paired); + const MachineOperand &MainBase = getBaseOperand(*I); + // TODO: Hacky as hell, to be rewritten completely using fixed stack + const MachineOperand &BaseRegOp = MergeForward ? PairedBase : MainBase; + const MachineOperand &PairedBaseOp = &BaseRegOp == &MainBase ? PairedBase : MainBase; + const std::pair MemRefsOrder = &BaseRegOp == &PairedBase + ? I->mergeMemRefsWith(*Paired) + : Paired->mergeMemRefsWith(*I); + + // Resolve target reg class + const TargetRegisterClass *RC = MRI->getRegClass(RegOp0.getReg()) == &Epiphany::GPR32RegClass ? + &Epiphany::GPR64RegClass : &Epiphany::FPR64RegClass; + + // Get insertion parameters + unsigned parentReg = MRI->createVirtualRegister(RC); + DebugLoc DL = I->getDebugLoc(); + MachineBasicBlock *MBB = I->getParent(); + + // Insert reg sequence + if (TII->get(PairedOp).mayStore()) { + // In terms of store - create regsequence before storing + const MCInstrDesc &RegSeq = TII->get(TargetOpcode::REG_SEQUENCE); + MIB = BuildMI(*MBB, InsertionPoint, DL, RegSeq, parentReg) + .addReg(RegOp0.getReg()) + .addImm(Epiphany::isub_lo) + .addReg(RegOp1.getReg()) + .addImm(Epiphany::isub_hi); + DEBUG(dbgs() << "\t"); + DEBUG(((MachineInstr *) MIB)->print(dbgs())); + } + + // Insert paired instruction + unsigned flags = TII->get(PairedOp).mayLoad() ? RegState::Define : RegOp0.getTargetFlags(); + MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(PairedOp)) + .addReg(parentReg, flags) + .addOperand(BaseRegOp) + .addImm(OffsetImm) + .setMemRefs(MemRefsOrder); + DEBUG(dbgs() << "\t"); + DEBUG(((MachineInstr *) MIB)->print(dbgs())); + + if (TII->get(PairedOp).mayLoad()) { + // In terms of load - issue two copy instruction for vregs we had + const MCInstrDesc &Copy = TII->get(TargetOpcode::COPY); + MIB = BuildMI(*MBB, InsertionPoint, DL, Copy, RegOp0.getReg()) + .addReg(parentReg, /* flags = */ 0, Epiphany::isub_lo); + DEBUG(dbgs() << "\t"); + DEBUG(((MachineInstr *) MIB)->print(dbgs())); + MIB = BuildMI(*MBB, InsertionPoint, DL, Copy, RegOp1.getReg()) + .addReg(parentReg, /* flags = */ 0, Epiphany::isub_hi); + DEBUG(dbgs() << "\t"); + DEBUG(((MachineInstr *) MIB)->print(dbgs())); + } +} + + +/// Merges two n-bit load/store instructions into a single 2*n-bit one +MachineBasicBlock::iterator +EpiphanyVregLoadStoreOptimizer::mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, const LoadStoreFlags &Flags) { + MachineBasicBlock::iterator NextI = I; + ++NextI; + // If NextI is the second of the two instructions to be merged, we need + // to skip one further. Either way we merge will invalidate the iterator, + // and we don't need to scan the new instruction, as it's a pairwise + // instruction, which we're not considering for further action anyway. + if (NextI == Paired) + ++NextI; + + unsigned Opc = I->getOpcode(); + unsigned PairedOp = getMatchingPairOpcode(Opc); + DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n\t"); + DEBUG(I->print(dbgs())); + DEBUG(dbgs() << "\t"); + DEBUG(Paired->print(dbgs())); + DEBUG(dbgs() << " with instruction:\n"); + if (Flags.isBasedOnVirtualFI()) { + mergeFrameBasedInsns(PairedOp, I, Paired, Flags); + } else { + bool MergeForward = Flags.getMergeForward(); + // Also based on MergeForward is from where we copy the base register operand + // so we get the flags compatible with the input code. + const MachineOperand &BaseRegOp = MergeForward ? getBaseOperand(*Paired) : getBaseOperand(*I); + const int64_t Offset = getOffsetOperand(*I).getImm(); + const int64_t PairedOffset = getOffsetOperand(*Paired).getImm(); + const int OffsetStride = StackGrowsDown ? getMemScale(*I) : -getMemScale(*I); + + // Which register is Rt and which is Rt2 depends on the offset order. + MachineInstr *RtMI, *Rt2MI; + if (Offset == PairedOffset + OffsetStride) { + RtMI = &*Paired; + Rt2MI = &*I; + } else { + RtMI = &*I; + Rt2MI = &*Paired; + } + int64_t OffsetImm = getOffsetOperand(*RtMI).getImm(); + // Construct the new instruction. + DebugLoc DL = I->getDebugLoc(); + MachineOperand RegOp0 = getRegOperand(*RtMI); + MachineOperand RegOp1 = getRegOperand(*Rt2MI); + + // Kill flags may become invalid when moving stores for pairing. + if (RegOp0.isUse()) { + cleanKillFlags(RegOp0, RegOp1, I, Paired, MergeForward); + } + + mergeRegBasedInsns(PairedOp, Paired, I, Flags, OffsetImm, + RegOp0, RegOp1); + + // Erase the old instructions. + I->eraseFromParent(); + Paired->eraseFromParent(); + } + DEBUG(dbgs() << "\n"); + + return NextI; +} + +/// Scan the instructions looking for a load/store that can be combined with the +/// current instruction into a wider equivalent or a load/store pair. +MachineBasicBlock::iterator +EpiphanyVregLoadStoreOptimizer::findMatchingInst(MachineBasicBlock::iterator I, + LoadStoreFlags &Flags, unsigned Limit) { + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator MBBI = I; + MachineInstr &FirstMI = *I; + ++MBBI; + + bool MayLoad = FirstMI.mayLoad(); + // Get first instruction reg data + unsigned Reg = getRegOperand(FirstMI).getReg(); + unsigned RegIdx = TRI->virtReg2Index(Reg); + unsigned BaseReg = getBaseOperand(FirstMI).isReg() ? getBaseOperand(FirstMI).getReg() : Epiphany::FP; + unsigned BaseRegIdx = TRI->isVirtualRegister(BaseReg) ? TRI->virtReg2Index(BaseReg) : BaseReg; + + // Track which registers have been modified and used between the first insn + // (inclusive) and the second insn. + ModifiedRegs.reset(); + UsedRegs.reset(); + ModifiedFrameIdxs.reset(); + UsedFrameIdxs.reset(); + + // Remember any instructions that read/write memory between FirstMI and MI. + SmallVector MemInsns; + + for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { + MachineInstr &MI = *MBBI; + // Don't count transient instructions towards the search limit since there + // may be different numbers of them if e.g. debug information is present. + if (!MI.isTransient()) + ++Count; + + if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII, MFI) && + getOffsetOperand(MI).isImm()) { + assert(MI.mayLoadOrStore() && "Expected memory operation."); + // Get second instruction reg data + unsigned MIReg = getRegOperand(MI).getReg(); + unsigned MIRegIdx = TRI->virtReg2Index(MIReg); + unsigned MIBaseReg = getBaseOperand(MI).isReg() ? getBaseOperand(MI).getReg() : Epiphany::FP; + // Get offsets + bool UsingVirtualFI = Flags.isBasedOnVirtualFI(); + int64_t Offset = UsingVirtualFI ? getBaseOperand(FirstMI).getIndex() : getOffsetOperand(FirstMI).getImm(); + int64_t MIOffset = UsingVirtualFI ? getBaseOperand(MI).getIndex() : getOffsetOperand(MI).getImm(); + + // If we've found another instruction with the same opcode, check to see + // if regs, base and offset are compatible with our starting instruction. + // These instructions all have scaled immediate operands, so we just + // check for +1/-1. Make sure to check the new instruction offset is + // actually an immediate and not a symbolic reference destined for + // a relocation. + int OffsetStride = UsingVirtualFI ? 1 : getMemScale(FirstMI); + if (isBaseAndOffsetCorrect(BaseReg, MIBaseReg, Offset, MIOffset, OffsetStride)) { + DEBUG(dbgs() << "Checking instruction "; MI.dump()); + + // Check if the alignment is correct + if (!isAlignmentCorrect(FirstMI, MI, UsingVirtualFI)) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + if (UsingVirtualFI) { + trackFrameIdxs(MI, ModifiedFrameIdxs, UsedFrameIdxs, MFI); + } + MemInsns.push_back(&MI); + DEBUG(dbgs() << "Can't be paired due to alignment\n"); + continue; + } + + // If the destination register of the loads is the same register, bail + // and keep looking. A load-pair instruction with both destination + // registers the same is UNPREDICTABLE and will result in an exception. + if (MayLoad && Reg == getRegOperand(MI).getReg()) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + if (UsingVirtualFI) { + trackFrameIdxs(MI, ModifiedFrameIdxs, UsedFrameIdxs, MFI); + } + MemInsns.push_back(&MI); + DEBUG(dbgs() << "Can't merge into same reg\n"); + continue; + } + + bool failed = false; + for (std::pair IdxPair : PairedIdxs) { + if ((Offset == IdxPair.first && MIOffset != IdxPair.second) || + (Offset == IdxPair.second && MIOffset != IdxPair.first) || + (MIOffset == IdxPair.first && Offset != IdxPair.second) || + (MIOffset == IdxPair.second && Offset != IdxPair.first)) { + failed = true; + break; + } + } + if (failed) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + if (UsingVirtualFI) { + trackFrameIdxs(MI, ModifiedFrameIdxs, UsedFrameIdxs, MFI); + } + MemInsns.push_back(&MI); + DEBUG(dbgs() << "Can't merge as frame idx is already paired\n"); + continue; + } + + // If the Rt of the second instruction was not modified or used between + // the two instructions and none of the instructions between the second + // and first alias with the second, we can combine the second into the + // first. + if (!ModifiedRegs[MIRegIdx]) { + if (!(MI.mayLoad() && UsedRegs[MIRegIdx])) { + if (!UsingVirtualFI || !UsedFrameIdxs[MIOffset]) { + PairedIdxs.push_back(std::make_pair(Offset, MIOffset)); + Flags.setMergeForward(false); + return MBBI; + } + } + } else { + DEBUG(dbgs() << "Proposed paired reg was modified, will try to merge forward\n"); + } + + // Likewise, if the Rt of the first instruction is not modified or used + // between the two instructions and none of the instructions between the + // first and the second alias with the first, we can combine the first + // into the second. + if (!ModifiedRegs[RegIdx] && !(MayLoad && UsedRegs[RegIdx])) { + if (!UsingVirtualFI || !UsedFrameIdxs[Offset]) { + PairedIdxs.push_back(std::make_pair(Offset, MIOffset)); + Flags.setMergeForward(true); + return MBBI; + } + } + // Unable to combine these instructions due to interference in between. + // Keep looking. + } + } + + // If the instruction wasn't a matching load or store. Stop searching if we + // encounter a call instruction that might modify memory. + if (MI.isCall()) + return E; + + // Update modified / uses register lists. + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + trackFrameIdxs(MI, ModifiedFrameIdxs, UsedFrameIdxs, MFI); + + // Otherwise, if the base register is modified, we have no match, so return early. + if (ModifiedRegs[BaseRegIdx]) + return E; + + // Update list of instructions that read/write memory. + if (MI.mayLoadOrStore()) + MemInsns.push_back(&MI); + } + return E; +} + +// Find loads and stores that can be merged into a single load or store pair +// instruction. +bool EpiphanyVregLoadStoreOptimizer::tryToPairLoadStoreInst(MachineBasicBlock::iterator &MBBI) { + MachineInstr &MI = *MBBI; + MachineBasicBlock::iterator E = MI.getParent()->end(); + DEBUG(dbgs() << "\nTrying to pair instruction: "; + MI.print(dbgs());); + + if (!TII->isCandidateToMergeOrPair(MI)) { + DEBUG(dbgs() << "Not a candidate for merging\n"); + return false; + } + + // Early exit if the offset is not possible to match. (6 bits of positive + // range, plus allow an extra one in case we find a later insn that matches + // with Offset-1) + int64_t Offset = getOffsetOperand(MI).getImm(); + int OffsetStride = 1; + // Allow one more for offset. + if (Offset > 0) + Offset -= OffsetStride; + if (!inBoundsForPair(Offset)) { + DEBUG(dbgs() << "Out of bounds for pairing\n"); + return false; + } + + // Look ahead up to LdStLimit instructions for a pairable instruction. + LoadStoreFlags Flags; + MachineBasicBlock::iterator Paired = + findMatchingInst(MBBI, Flags, LdStLimit); + if (Paired != E) { + ++NumPairCreated; + // Keeping the iterator straight is a pain, so we let the merge routine tell + // us what the next instruction is after it's done mucking about. + MBBI = mergePairedInsns(MBBI, Paired, Flags); + return true; + } else { + DEBUG(dbgs() << "Unable to find matching instruction\n"); + } + return false; +} + +/// \brief Runs optimizer for the given MBB. +/// +/// \param MBB Machine basic block to optimize +/// +/// \return true if the block was modified +bool EpiphanyVregLoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { + bool Modified = false; + + // Find loads and stores that can be merged into a single load or store + // pair instruction. + // e.g., + // str r0, [fp] + // str r1, [fp, #1] + // ; becomes + // strd r0, [fp] + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + if (isPairableLoadStoreInst(*MBBI) && tryToPairLoadStoreInst(MBBI)) + Modified = true; + else + ++MBBI; + } + + return Modified; +} + + +INITIALIZE_PASS_BEGIN(EpiphanyVregLoadStoreOptimizer, "epiphany-vreg-ls-opt", "Epiphany Vreg Load Store Optimization", false, false) +INITIALIZE_PASS_END(EpiphanyVregLoadStoreOptimizer, "epiphany-vreg-ls-opt", "Epiphany Vreg Load Store Optimization", false, false) + +bool EpiphanyVregLoadStoreOptimizer::runOnMachineFunction(MachineFunction &Fn) { + DEBUG(dbgs() << "\nRunning Vreg Epiphany Load/Store Optimization Pass\n"); + if (skipFunction(*Fn.getFunction())) + return false; + + Subtarget = &static_cast(Fn.getSubtarget()); + TII = Subtarget->getInstrInfo(); + TRI = Subtarget->getRegisterInfo(); + TFI = Subtarget->getFrameLowering(); + MFI = &Fn.getFrameInfo(); + MRI = &Fn.getRegInfo(); + MF = &Fn; + + // Get stack growth direction + StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + LastLocalBlockOffset = StackGrowsDown ? -4 : 4; + + // Resize the modified and used register bitfield trackers. We do this once + // per function and then clear the bitfield each time we optimize a load or + // store. + ModifiedRegs.resize(MRI->getNumVirtRegs() + TRI->getNumRegs()); + UsedRegs.resize(MRI->getNumVirtRegs() + TRI->getNumRegs()); + ModifiedFrameIdxs.resize(MFI->getNumObjects()); + UsedFrameIdxs.resize(MFI->getNumObjects()); + ObjectMapped.reset(); + ObjectMapped.resize(MFI->getNumObjects()); + + bool Modified = false; + for (auto &MBB : Fn) { + Modified |= optimizeBlock(MBB); + if (Modified) { + MFI->setUseLocalStackAllocationBlock(true); + } + } + + // Adjust local frame block size + int64_t LocalFrameSize = StackGrowsDown ? -LastLocalBlockOffset - 4 : LastLocalBlockOffset - 4; + MFI->setLocalFrameSize(LocalFrameSize); + + return Modified; +} + +/// createEpiphanyVregLoadStoreOptimizationPass - returns an instance of the +/// load / store optimization pass. +FunctionPass *llvm::createEpiphanyVregLoadStoreOptimizationPass() { + return new EpiphanyVregLoadStoreOptimizer(); +} diff --git a/EpiphanyVregLoadStoreOptimizer.h b/EpiphanyVregLoadStoreOptimizer.h new file mode 100644 index 0000000..96b3060 --- /dev/null +++ b/EpiphanyVregLoadStoreOptimizer.h @@ -0,0 +1,103 @@ +//===---------------------EpiphanyFpuConfigPass.h--------------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef _LLVM_LIB_TARGET_EPIPHANY_EPIPHANYVREGLSOPASS_H +#define _LLVM_LIB_TARGET_EPIPHANY_EPIPHANYVREGLSOPASS_H + +#include "Epiphany.h" +#include "EpiphanyConfig.h" +#include "EpiphanyMachineFunction.h" +#include "EpiphanySubtarget.h" +#include "EpiphanyTargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +namespace llvm { + void initializeEpiphanyVregLoadStoreOptimizerPass(PassRegistry&); + + typedef struct LoadStoreFlags { + // If a matching instruction is found, MergeForward is set to true if the + // merge is to remove the first instruction and replace the second with + // a pair-wise insn, and false if the reverse is true. + bool MergeForward; + bool BasedOnVirtualFI; + LoadStoreFlags() : MergeForward(false), BasedOnVirtualFI(true) {} + + void setMergeForward(bool V = true) { MergeForward = V; } + bool getMergeForward() const { return MergeForward; } + void setBasedOnVirtualFI(bool V = true) { BasedOnVirtualFI = V; } + bool isBasedOnVirtualFI() const { return BasedOnVirtualFI; } + } LoadStoreFlags; + + + class EpiphanyVregLoadStoreOptimizer : public MachineFunctionPass { + + private: + const EpiphanyInstrInfo *TII; + const TargetRegisterInfo *TRI; + const EpiphanySubtarget *Subtarget; + const EpiphanyFrameLowering *TFI; + MachineFunction *MF; + MachineRegisterInfo *MRI; + MachineFrameInfo *MFI; + // Track which registers have been modified and used. + BitVector ModifiedRegs, UsedRegs, ModifiedFrameIdxs, UsedFrameIdxs, ObjectMapped; + SmallVector, 128> PairedIdxs; + bool StackGrowsDown; + int64_t LastLocalBlockOffset = -4; + + bool optimizeBlock(MachineBasicBlock &MBB); + + bool tryToPairLoadStoreInst(MachineBasicBlock::iterator &MBBI); + bool isAlignmentCorrect(MachineInstr &FirstMI, MachineInstr &SecondMI, bool UsingVirtualFI); + + MachineBasicBlock::iterator findMatchingInst(MachineBasicBlock::iterator I, + LoadStoreFlags &Flags, unsigned Limit); + + MachineBasicBlock::iterator mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, const LoadStoreFlags &Flags); + MachineInstrBuilder mergeFrameBasedInsns(unsigned PairedOp, + MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, + const LoadStoreFlags &Flags); + + void cleanKillFlags(MachineOperand RegOp0, MachineOperand RegOp1, + MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, + bool MergeForward); + public: + static char ID; + EpiphanyVregLoadStoreOptimizer() : MachineFunctionPass(ID) { + initializeEpiphanyVregLoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const { + return "Epiphany Vreg Load/Store Optimization Pass"; + } + bool runOnMachineFunction(MachineFunction &MF); + + void mergeRegBasedInsns(unsigned int PairedOp, MachineBasicBlock::iterator iterator, + MachineBasicBlock::iterator bundleIterator, const LoadStoreFlags &Flags, int64_t i, + MachineOperand operand, MachineOperand machineOperand); + }; + +} // namespace llvm + + +#endif diff --git a/InstPrinter/EpiphanyInstPrinter.cpp b/InstPrinter/EpiphanyInstPrinter.cpp index 4bb2fd7..d5fb4d5 100644 --- a/InstPrinter/EpiphanyInstPrinter.cpp +++ b/InstPrinter/EpiphanyInstPrinter.cpp @@ -16,12 +16,7 @@ #include "MCTargetDesc/EpiphanyMCExpr.h" #include "EpiphanyInstrInfo.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" + using namespace llvm; #define DEBUG_TYPE "asm-printer" diff --git a/LLVMBuild.txt b/LLVMBuild.txt index 954976f..d96d23b 100644 --- a/LLVMBuild.txt +++ b/LLVMBuild.txt @@ -29,12 +29,15 @@ has_asmparser = 1 type = Library name = EpiphanyCodeGen parent = Epiphany -required_libraries = AsmPrinter CodeGen Core MC +required_libraries = Analysis AsmPrinter CodeGen Core IPO MC EpiphanyAsmPrinter EpiphanyDesc EpiphanyInfo + Scalar SelectionDAG Support Target + TransformUtils + Vectorize add_to_library_groups = Epiphany diff --git a/LLVM_Epiphany.patch b/LLVM_Epiphany.patch index ddf3dd6..c2973a8 100644 --- a/LLVM_Epiphany.patch +++ b/LLVM_Epiphany.patch @@ -1,3 +1,15 @@ +diff -Naur llvm-4.0.0.src.orig/cmake/config-ix.cmake llvm-4.0.0.src/cmake/config-ix.cmake +--- llvm-4.0.0.src.orig/cmake/config-ix.cmake 2017-01-07 01:16:00.000000000 +0200 ++++ llvm-4.0.0.src/cmake/config-ix.cmake 2017-06-08 16:32:55.230427324 +0300 +@@ -374,6 +374,8 @@ + set(LLVM_NATIVE_ARCH Mips) + elseif (LLVM_NATIVE_ARCH MATCHES "xcore") + set(LLVM_NATIVE_ARCH XCore) ++elseif (LLVM_NATIVE_ARCH MATCHES "epiphany") ++ set(LLVM_NATIVE_ARCH EPIPHANY) + elseif (LLVM_NATIVE_ARCH MATCHES "msp430") + set(LLVM_NATIVE_ARCH MSP430) + elseif (LLVM_NATIVE_ARCH MATCHES "hexagon") diff -Naur llvm-4.0.0.src.orig/CMakeLists.txt llvm-4.0.0.src/CMakeLists.txt --- llvm-4.0.0.src.orig/CMakeLists.txt 2017-01-13 00:12:41.000000000 +0200 +++ llvm-4.0.0.src/CMakeLists.txt 2017-03-16 12:26:19.549648332 +0200 @@ -189,3 +201,13 @@ diff -Naur llvm-4.0.0.src.orig/lib/Target/LLVMBuild.txt llvm-4.0.0.src/lib/Targe Lanai Hexagon MSP430 +diff -Naur llvm-4.0.0.src.orig/utils/llvm-build/llvmbuild/componentinfo.pyc llvm-4.0.0.src/utils/llvm-build/llvmbuild/componentinfo.pyc +\ No newline at end of file +diff -Naur llvm-4.0.0.src.orig/utils/llvm-build/llvmbuild/configutil.pyc llvm-4.0.0.src/utils/llvm-build/llvmbuild/configutil.pyc +\ No newline at end of file +diff -Naur llvm-4.0.0.src.orig/utils/llvm-build/llvmbuild/__init__.pyc llvm-4.0.0.src/utils/llvm-build/llvmbuild/__init__.pyc +\ No newline at end of file +diff -Naur llvm-4.0.0.src.orig/utils/llvm-build/llvmbuild/main.pyc llvm-4.0.0.src/utils/llvm-build/llvmbuild/main.pyc +\ No newline at end of file +diff -Naur llvm-4.0.0.src.orig/utils/llvm-build/llvmbuild/util.pyc llvm-4.0.0.src/utils/llvm-build/llvmbuild/util.pyc +\ No newline at end of file diff --git a/TargetInfo/EpiphanyTargetInfo.cpp b/TargetInfo/EpiphanyTargetInfo.cpp index 079c8f3..fcb91a6 100644 --- a/TargetInfo/EpiphanyTargetInfo.cpp +++ b/TargetInfo/EpiphanyTargetInfo.cpp @@ -12,6 +12,5 @@ using namespace llvm; Target llvm::TheEpiphanyTarget; extern "C" void LLVMInitializeEpiphanyTargetInfo() { - RegisterTarget X(TheEpiphanyTarget, "epiphany", "Epiphany"); -} \ No newline at end of file + RegisterTarget X(TheEpiphanyTarget, "epiphany", "Epiphany"); +}