From c6fa8fb91dc42f9e0ed80f44bc62b145aa39b301 Mon Sep 17 00:00:00 2001 From: Rafael Auler Date: Wed, 24 Jul 2019 14:03:43 -0700 Subject: [PATCH] Support instrumentation via runtime library Summary: To allow the development of future instrumentation work, this patch adds support in BOLT for linking arbitrary libraries into the binary processed by BOLT. We use orc relocation handling mechanism for that. With this support, this patch also moves code programatically generated in X86 assembly language by X86MCPlusBuilder to C code written in a new library called bolt_rt. Change CMake to support this library as an external project in the same way as clang does with compiler_rt. This library is installed in the lib/ folder relative to BOLT root installation and by default instrumentation will look for the library at that location to finish processing the binary with instrumentation. Reviewed By: maksfb Differential Revision: D16572013 fbshipit-source-id: ed9ae63969f --- CMakeLists.txt | 23 +++ runtime/CMakeLists.txt | 12 ++ runtime/instr.cpp | 139 +++++++++++++ src/CMakeLists.txt | 5 +- src/ExecutableFileMemoryManager.cpp | 10 +- src/ExecutableFileMemoryManager.h | 8 + src/MCPlusBuilder.h | 22 -- src/Passes/Instrumentation.cpp | 74 +++---- src/Passes/Instrumentation.h | 36 ---- src/RewriteInstance.cpp | 145 +++++++++++-- src/RewriteInstance.h | 13 +- src/Target/X86/X86MCPlusBuilder.cpp | 305 ---------------------------- src/llvm-bolt.cpp | 19 +- 13 files changed, 379 insertions(+), 432 deletions(-) create mode 100644 runtime/CMakeLists.txt create mode 100644 runtime/instr.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ae2f324..4f5664d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,29 @@ +include(ExternalProject) + set(BOLT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(BOLT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(CMAKE_CXX_STANDARD 14) +ExternalProject_Add(bolt_rt + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime" + STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-stamps + BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins + CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} + -DCMAKE_INSTALL_PREFIX=${LLVM_BINARY_DIR} + # You might want to set this to True if actively developing bolt_rt, otherwise + # cmake will not rebuild it after source code changes + BUILD_ALWAYS True + ) + +install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -P ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins/cmake_install.cmake \)" + COMPONENT bolt_rt) + +add_llvm_install_targets(install-bolt_rt + DEPENDS bolt_rt + COMPONENT bolt_rt) + add_subdirectory(src) add_subdirectory(test) diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt new file mode 100644 index 0000000..39f1d62 --- /dev/null +++ b/runtime/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 3.1.0) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +project(libbolt_rt_project) + +add_library(bolt_rt STATIC + instr.cpp + ) + +install(TARGETS bolt_rt DESTINATION lib) diff --git a/runtime/instr.cpp b/runtime/instr.cpp new file mode 100644 index 0000000..3744dd3 --- /dev/null +++ b/runtime/instr.cpp @@ -0,0 +1,139 @@ +//===-- instr.cpp -----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// This file contains code that is linked to the final binary with a function +// that is called at program exit to dump instrumented data collected during +// execution. +// +//===----------------------------------------------------------------------===// +// +// BOLT runtime instrumentation library. +// +//===----------------------------------------------------------------------===// + +#include + +// All extern declarations here need to be defined by BOLT itself. + +// Counters inserted by instrumentation, incremented during runtime when +// points of interest (locations) in the program are reached. +extern uint64_t __bolt_instr_locations[]; +// Number of counters. +extern uint32_t __bolt_instr_num_locs; +// String table with function names. +extern char __bolt_instr_strings[]; +// Filename to dump data to. +extern char __bolt_instr_filename[]; + +// A location is a function name plus offset. Function name needs to be +// retrieved from the string table and is stored as an index to this table. +typedef struct _Location { + uint32_t FunctionName; + uint32_t Offset; +} Location; + +// An edge description defines an instrumented edge in the program, fully +// identified by where the jump is located and its destination. +typedef struct _EdgeDescription { + Location From; + Location To; +} EdgeDescription; + +extern EdgeDescription __bolt_instr_descriptions[]; + +// Declare some syscall wrappers we use throughout this code to avoid linking +// against system libc. +static uint64_t +myopen(const char *pathname, + uint64_t flags, + uint64_t mode) { + uint64_t ret; + __asm__ __volatile__ ( + "movq $2, %%rax\n" + "syscall" + : "=a"(ret) + : "D"(pathname), "S"(flags), "d"(mode) + : "cc", "rcx", "r11", "memory"); + return ret; +} + +static uint64_t mywrite(uint64_t fd, const void *buf, uint64_t count) { + uint64_t ret; + __asm__ __volatile__ ( + "movq $1, %%rax\n" + "syscall\n" + : "=a"(ret) + : "D"(fd), "S"(buf), "d"(count) + : "cc", "rcx", "r11", "memory"); + return ret; +} + +static int myclose(uint64_t fd) { + uint64_t ret; + __asm__ __volatile__ ( + "movq $3, %%rax\n" + "syscall\n" + : "=a"(ret) + : "D"(fd) + : "cc", "rcx", "r11", "memory"); + return ret; +} + +static char *intToStr(char *OutBuf, uint32_t Num, uint32_t Base) { + const char *Chars = "0123456789abcdef"; + char Buf[20]; + char *Ptr = Buf; + while (Num) { + *Ptr++ = *(Chars + (Num % Base)); + Num /= Base; + } + if (Ptr == Buf) { + *OutBuf++ = '0'; + return OutBuf; + } + while (Ptr != Buf) { + *OutBuf++ = *--Ptr; + } + return OutBuf; +} + +static char *serializeLoc(char *OutBuf, uint32_t FuncStrIndex, + uint32_t Offset) { + *OutBuf++ = '1'; + *OutBuf++ = ' '; + char *Str = __bolt_instr_strings + FuncStrIndex; + while (*Str) { + *OutBuf++ = *Str++; + } + *OutBuf++ = ' '; + OutBuf = intToStr(OutBuf, Offset, 16); + *OutBuf++ = ' '; + return OutBuf; +} + +extern "C" void __bolt_instr_data_dump() { + uint64_t FD = myopen(__bolt_instr_filename, + /*flags=*/0x241 /*O_WRONLY|O_TRUNC|O_CREAT*/, + /*mode=*/0666); + + for (int I = 0, E = __bolt_instr_num_locs; I < E; ++I) { + char LineBuf[2000]; + char *Ptr = LineBuf; + uint32_t HitCount = __bolt_instr_locations[I]; + if (!HitCount) + continue; + + EdgeDescription *Desc = &__bolt_instr_descriptions[I]; + Ptr = serializeLoc(Ptr, Desc->From.FunctionName, Desc->From.Offset); + Ptr = serializeLoc(Ptr, Desc->To.FunctionName, Desc->To.Offset); + *Ptr++ = '0'; + *Ptr++ = ' '; + Ptr = intToStr(Ptr, HitCount, 10); + *Ptr++ = '\n'; + mywrite(FD, LineBuf, Ptr - LineBuf); + } + myclose(FD); +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d265e98..34b846a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -62,7 +62,7 @@ set(LLVM_LINK_COMPONENTS string(FIND "${LLVM_TARGETS_TO_BUILD}" "AArch64" POSITION) if (NOT ${POSITION} EQUAL -1) list(APPEND LLVM_LINK_COMPONENTS BOLTTargetAArch64) - set(BOLT_AArcb64 On) + set(BOLT_AArch64 On) endif() string(FIND "${LLVM_TARGETS_TO_BUILD}" "X86" POSITION) @@ -101,9 +101,10 @@ add_llvm_tool(llvm-bolt DEPENDS intrinsics_gen + bolt_rt ) -if (DEFINED BOLT_AArcb64) +if (DEFINED BOLT_AArch64) target_compile_definitions(llvm-bolt PRIVATE AARCH64_AVAILABLE) endif() diff --git a/src/ExecutableFileMemoryManager.cpp b/src/ExecutableFileMemoryManager.cpp index 9744821..e1fa410 100644 --- a/src/ExecutableFileMemoryManager.cpp +++ b/src/ExecutableFileMemoryManager.cpp @@ -13,7 +13,7 @@ #include "RewriteInstance.h" #undef DEBUG_TYPE -#define DEBUG_TYPE "bolt" +#define DEBUG_TYPE "efmm" using namespace llvm; using namespace object; @@ -30,7 +30,7 @@ uint8_t *ExecutableFileMemoryManager::allocateSection(intptr_t Size, bool IsCode, bool IsReadOnly) { // Register a debug section as a note section. - if (RewriteInstance::isDebugSection(SectionName)) { + if (!ObjectsLoaded && RewriteInstance::isDebugSection(SectionName)) { uint8_t *DataCopy = new uint8_t[Size]; auto &Section = BC.registerOrUpdateNoteSection(SectionName, DataCopy, @@ -52,6 +52,11 @@ uint8_t *ExecutableFileMemoryManager::allocateSection(intptr_t Size, } const auto Flags = BinarySection::getFlags(IsReadOnly, IsCode, true); + SmallVector Buf; + if (ObjectsLoaded > 0) + SectionName = (Twine(SectionName) + ".bolt.extra." + Twine(ObjectsLoaded)) + .toStringRef(Buf); + auto &Section = BC.registerOrUpdateSection(SectionName, ELF::SHT_PROGBITS, Flags, @@ -94,6 +99,7 @@ uint8_t *ExecutableFileMemoryManager::recordNoteSection( bool ExecutableFileMemoryManager::finalizeMemory(std::string *ErrMsg) { DEBUG(dbgs() << "BOLT: finalizeMemory()\n"); + ++ObjectsLoaded; return SectionMemoryManager::finalizeMemory(ErrMsg); } diff --git a/src/ExecutableFileMemoryManager.h b/src/ExecutableFileMemoryManager.h index ca820df..fced00e 100644 --- a/src/ExecutableFileMemoryManager.h +++ b/src/ExecutableFileMemoryManager.h @@ -55,6 +55,14 @@ private: bool AllowStubs; public: + // Our linker's main purpose is to handle a single object file, created + // by RewriteInstance after reading the input binary and reordering it. + // After objects finish loading, we increment this. Therefore, whenever + // this is greater than zero, we are dealing with additional objects that + // will not be managed by BinaryContext but only exist to support linking + // user-supplied objects into the main input executable. + uint32_t ObjectsLoaded{0}; + /// [start memory address] -> [segment info] mapping. std::map SegmentMapInfo; diff --git a/src/MCPlusBuilder.h b/src/MCPlusBuilder.h index 7326706..e002f1b 100644 --- a/src/MCPlusBuilder.h +++ b/src/MCPlusBuilder.h @@ -1738,28 +1738,6 @@ public: llvm_unreachable("not implemented"); return BlocksVectorTy(); } - - /// Part of the runtime library for instrumented code, this runs at the end - /// of the process and writes the current instrumentation counters to a file - /// compatible with BOLT profile. \p Locs identifies the region in memory - /// where the counters are (\p NumLocs counters), \p Descriptions, the region - /// encoding information about each counter, which is the source of the branch - /// and the destination, \p Strings, the string table with function names used - /// in descriptions, \p FilenameSym, the profile file name to write to, - /// \p Chars, a 0 to F string used for printing hex/decimal numbers. - virtual MultiBlocksCode createInstrumentedDataDumpCode( - MCSymbol *Locs, - MCSymbol *Descriptions, - MCSymbol *Strings, - MCSymbol *FilenameSym, - MCSymbol *Spaces, - MCSymbol *Chars, - size_t NumLocs, - MCContext *Ctx - ) const { - llvm_unreachable("not implemented"); - return MultiBlocksCode(); - } }; } // namespace bolt diff --git a/src/Passes/Instrumentation.cpp b/src/Passes/Instrumentation.cpp index 1bb4f88..8d0b95b 100644 --- a/src/Passes/Instrumentation.cpp +++ b/src/Passes/Instrumentation.cpp @@ -235,49 +235,6 @@ void Instrumentation::runOnFunctions(BinaryContext &BC) { outs() << "BOLT-INSTRUMENTER: Instrumented " << InstrumentationSites << " sites, " << InstrumentationSitesSavingFlags << " saving flags.\n"; - - createDumpFunction(BC); - - DEBUG(DumpFunction->dump()); -} - -void Instrumentation::createDumpFunction(BinaryContext &BC) { - DumpFunction = - BC.createInjectedBinaryFunction("BOLTInstrumentationDataDump"); - Locs = BC.Ctx->createTempSymbol("BOLTInstrumentationLocs", true); - DescriptionsSym = - BC.Ctx->createTempSymbol("BOLTInstrumentationDescriptions", true); - StringsSym = BC.Ctx->createTempSymbol("BOLTInstrumentationStrings", true); - FilenameSym = BC.Ctx->createTempSymbol("BOLTInstrumentationFilename", true); - Spaces = BC.Ctx->createTempSymbol("BOLTInstrumentationSpaces", true); - Chars = BC.Ctx->createTempSymbol("BOLTInstrumentationChars", true); - auto Code = BC.MIB->createInstrumentedDataDumpCode( - Locs, DescriptionsSym, StringsSym, FilenameSym, Spaces, Chars, - Labels.size(), &*BC.Ctx); - - std::vector> BBs; - for (auto &SymBlock : Code.Blocks) { - auto &Symbol = SymBlock.first; - auto &Block = SymBlock.second; - BBs.emplace_back(DumpFunction->createBasicBlock( - BinaryBasicBlock::INVALID_OFFSET, Symbol)); - BBs.back()->addInstructions(Block.begin(), Block.end()); - BBs.back()->setCFIState(0); - } - auto BBIter = BBs.begin(); - for (auto &Succ : Code.Successors) { - if (Succ) - (*BBIter)->addSuccessor(DumpFunction->getBasicBlockForLabel(Succ), 0, - 0); - auto NextBBIter = std::next(BBIter); - if (NextBBIter != BBs.end()) - (*BBIter)->addSuccessor(NextBBIter->get(), 0, 0); - ++BBIter; - } - DumpFunction->insertBasicBlocks(nullptr, std::move(BBs), - /*UpdateLayout=*/true, - /*UpdateCFIState=*/false); - DumpFunction->updateState(BinaryFunction::State::CFG_Finalized); } void Instrumentation::emitDescription( @@ -295,22 +252,45 @@ void Instrumentation::emit(BinaryContext &BC, MCStreamer &Streamer) { auto *Section = BC.Ctx->getELFSection(".bolt.instrumentation", ELF::SHT_PROGBITS, Flags); + + // All of the following symbols will be exported as globals to be used by the + // instrumentation runtime library to dump the instrumentation data to disk. + // Label marking start of the memory region containing instrumentation + // counters, total vector size is Labels.size() 8-byte counters + MCSymbol *Locs = BC.Ctx->getOrCreateSymbol("__bolt_instr_locations"); + MCSymbol *NumLocs = BC.Ctx->getOrCreateSymbol("__bolt_instr_num_locs"); + // Start of the vector with descriptions (one CounterDescription for each + // counter), vector size is Labels.size() CounterDescription-sized elmts + MCSymbol *DescriptionsSym = + BC.Ctx->getOrCreateSymbol("__bolt_instr_descriptions"); + // Label identifying where our string table was emitted to + MCSymbol *StringsSym = BC.Ctx->getOrCreateSymbol("__bolt_instr_strings"); + /// File name where profile is going to written to after target binary + /// finishes a run + MCSymbol *FilenameSym = BC.Ctx->getOrCreateSymbol("__bolt_instr_filename"); + Streamer.SwitchSection(Section); Streamer.EmitLabel(Locs); + Streamer.EmitSymbolAttribute(Locs, + MCSymbolAttr::MCSA_Global); for (const auto &Label : Labels) { Streamer.EmitLabel(Label); Streamer.emitFill(8, 0); } + Streamer.EmitLabel(NumLocs); + Streamer.EmitSymbolAttribute(NumLocs, + MCSymbolAttr::MCSA_Global); + Streamer.EmitIntValue(Labels.size(), /*Size=*/4); Streamer.EmitLabel(DescriptionsSym); + Streamer.EmitSymbolAttribute(DescriptionsSym, + MCSymbolAttr::MCSA_Global); for (const auto &Desc : Descriptions) { emitDescription(Desc, Streamer); } Streamer.EmitLabel(StringsSym); + Streamer.EmitSymbolAttribute(StringsSym, + MCSymbolAttr::MCSA_Global); Streamer.EmitBytes(StringTable); - Streamer.EmitLabel(Spaces); - Streamer.EmitBytes(" "); - Streamer.EmitLabel(Chars); - Streamer.EmitBytes("0123456789abcdef"); Streamer.EmitLabel(FilenameSym); Streamer.EmitBytes(opts::InstrumentationFilename); Streamer.emitFill(1, 0); diff --git a/src/Passes/Instrumentation.h b/src/Passes/Instrumentation.h index 2ef8201..dc282c5 100644 --- a/src/Passes/Instrumentation.h +++ b/src/Passes/Instrumentation.h @@ -53,12 +53,6 @@ public: /// Emit data structures that will be necessary during runtime (second step) void emit(BinaryContext &BC, MCStreamer &Streamer); - /// Access the function injected by the instrumentation pass necessary to - /// write profile to a file. This is only valid after instrumentation - /// finished (step 1). - BinaryFunction *getDumpFunction() const { - return DumpFunction; - } private: // Instrumented branch location information struct CounterDescription { @@ -68,10 +62,6 @@ private: uint32_t ToOffset; }; - /// Create a new injected function that will be needed at runtime to write - /// profile - void createDumpFunction(BinaryContext &BC); - /// Retrieve the string table index for the name of \p Function. We encode /// instrumented locations descriptions with the aid of a string table to /// manage memory of the instrumentation runtime in a more efficient way. @@ -126,32 +116,6 @@ private: /// Identify all counters used in runtime while instrumentation is running std::vector Labels; - - /// Label marking start of the memory region containing instrumentation - /// counters, total vector size is Labels.size() 8-byte counters - MCSymbol *Locs; - - /// Start of the vector with descriptions (one CounterDescription for each - /// counter), vector size is Labels.size() CounterDescription-sized elmts - MCSymbol *DescriptionsSym; - - /// Label identifying where our string table was emitted to - MCSymbol *StringsSym; - - /// File name where profile is going to written to after target binary - /// finishes a run - MCSymbol *FilenameSym; - - /// Label for a string containing 8 spaces used by the algorithm that writes - /// profile during conversion of integer to string. \p Chars stores ASCII - /// representation of numbers from 0 to F. - MCSymbol *Spaces; - MCSymbol *Chars; - - /// We keep a pointer to our injected function whose final address will be - /// needed later to patch the destructor routines in the binary to call us - /// upon end of execution - BinaryFunction *DumpFunction; }; } diff --git a/src/RewriteInstance.cpp b/src/RewriteInstance.cpp index 1766b07..962137b 100644 --- a/src/RewriteInstance.cpp +++ b/src/RewriteInstance.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" @@ -53,6 +54,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Object/Archive.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Casting.h" @@ -101,6 +103,13 @@ Instrument("instrument-experimental", cl::ZeroOrMore, cl::cat(BoltOptCategory)); +static cl::opt +RuntimeInstrumentationLib("runtime-instrumentation-lib", + cl::desc("specify file name of the runtime instrumentation library"), + cl::ZeroOrMore, + cl::init("libbolt_rt.a"), + cl::cat(BoltOptCategory)); + static cl::opt ForceToDataRelocations("force-data-relocations", cl::desc("force relocations to data sections to always be processed"), @@ -591,6 +600,15 @@ void check_error(std::error_code EC, StringRef Message) { report_error(Message, EC); } +void check_error(Error E, Twine Message) { + if (!E) + return; + handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EIB) { + llvm::errs() << "BOLT-ERROR: '" << Message << "': " << EIB.message() + << '\n'; + exit(1); + }); +} } } @@ -750,8 +768,8 @@ createBinaryContext(ELFObjectFileBase *File, DataReader &DR, RewriteInstance::RewriteInstance(ELFObjectFileBase *File, DataReader &DR, DataAggregator &DA, const int Argc, - const char *const *Argv) - : InputFile(File), Argc(Argc), Argv(Argv), DA(DA), + const char *const *Argv, StringRef ToolPath) + : InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath), DA(DA), BC(createBinaryContext( File, DR, DWARFContext::create(*File, nullptr, @@ -1104,7 +1122,7 @@ void RewriteInstance::run() { if (opts::DiffOnly) return; runOptimizationPasses(); - emitSections(); + emitAndLink(); }; outs() << "BOLT-INFO: Target architecture: " @@ -2997,8 +3015,8 @@ std::vector singletonSet(T t) { } // anonymous namespace -void RewriteInstance::emitSections() { - NamedRegionTimer T("emitSections", "emit sections", TimerGroupName, +void RewriteInstance::emitAndLink() { + NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName, TimerGroupDesc, opts::TimeRewrite); std::error_code EC; @@ -3071,12 +3089,18 @@ void RewriteInstance::emitSections() { auto Resolver = orc::createLegacyLookupResolver( [&](const std::string &Name) -> JITSymbol { DEBUG(dbgs() << "BOLT: looking for " << Name << "\n"); + if (EFMM->ObjectsLoaded) { + return OLT->findSymbol(Name, false); + } if (auto *I = BC->getBinaryDataByName(Name)) { const uint64_t Address = I->isMoved() && !I->isJumpTable() ? I->getOutputAddress() : I->getAddress(); + DEBUG(dbgs() << "Resolved to address 0x" << Twine::utohexstr(Address) + << "\n"); return JITSymbol(Address, JITSymbolFlags()); } + DEBUG(dbgs() << "Resolved to address 0x0\n"); return JITSymbol(nullptr); }, [](Error Err) { cantFail(std::move(Err), "lookup failed"); }); @@ -3087,6 +3111,8 @@ void RewriteInstance::emitSections() { SSP.reset(new decltype(SSP)::element_type()); ES.reset(new decltype(ES)::element_type(*SSP)); + // Key for our main object created out of the input binary + auto K = ES->allocateVModule(); OLT.reset(new decltype(OLT)::element_type( *ES, [this, &Resolver](orc::VModuleKey Key) { @@ -3099,22 +3125,32 @@ void RewriteInstance::emitSections() { // Loaded notifier [&](orc::VModuleKey Key, const object::ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &) { - // Assign addresses to all sections. - mapFileSections(Key); + // Assign addresses to all sections. If key corresponds to the object + // created by ourselves, call our regular mapping function. If we are + // loading additional objects as part of runtime libraries for + // instrumentation, treat them as extra sections. + if (Key == K) { + mapFileSections(Key); + } else { + mapExtraSections(Key); + } }, // Finalized notifier [&](orc::VModuleKey Key) { // Update output addresses based on the new section map and - // layout. - updateOutputValues(FinalLayout); + // layout. Only do this for the object created by ourselves. + if (Key == K) + updateOutputValues(FinalLayout); })); OLT->setProcessAllSections(true); - auto K = ES->allocateVModule(); cantFail(OLT->addObject(K, std::move(ObjectMemBuffer))); - cantFail(OLT->emitAndFinalize(K)); + // Link instrumentation runtime library + if (opts::Instrument) + linkRuntime(); + // Once the code is emitted, we can rename function sections to actual // output sections and de-register sections used for emission. if (!BC->HasRelocations) { @@ -3140,6 +3176,58 @@ void RewriteInstance::emitSections() { TempOut->keep(); } +void RewriteInstance::linkRuntime() { + OLT->setProcessAllSections(false); + std::string Dir = llvm::sys::path::parent_path(ToolPath); + SmallString<128> P(Dir); + P = llvm::sys::path::parent_path(Dir); + llvm::sys::path::append(P, "lib", opts::RuntimeInstrumentationLib); + std::string LibPath = P.str(); + if (!llvm::sys::fs::exists(LibPath)) { + errs() << "BOLT-ERROR: instrumentation library not found: " << LibPath + << "\n"; + exit(1); + } + ErrorOr> MaybeBuf = + MemoryBuffer::getFile(LibPath, -1, false); + check_error(MaybeBuf.getError(), LibPath); + std::unique_ptr B = std::move(MaybeBuf.get()); + file_magic Magic = identify_magic(B->getBuffer()); + + if (Magic == file_magic::archive) { + Error Err = Error::success(); + object::Archive Archive(B.get()->getMemBufferRef(), Err); + for (auto &C : Archive.children(Err)) { + auto ChildKey = ES->allocateVModule(); + auto ChildBuf = + MemoryBuffer::getMemBuffer(cantFail(C.getMemoryBufferRef())); + cantFail(OLT->addObject(ChildKey, std::move(ChildBuf))); + cantFail(OLT->emitAndFinalize(ChildKey)); + } + check_error(std::move(Err), B->getBufferIdentifier()); + } else if (Magic == file_magic::elf_relocatable || + Magic == file_magic::elf_shared_object) { + auto K2 = ES->allocateVModule(); + cantFail(OLT->addObject(K2, std::move(B))); + cantFail(OLT->emitAndFinalize(K2)); + } else { + errs() << "BOLT-ERROR: unrecognized instrumentation library format: " + << LibPath << "\n"; + exit(1); + } + InstrumentationRuntimeStartAddress = + cantFail(OLT->findSymbol("__bolt_instr_data_dump", false).getAddress()); + if (!InstrumentationRuntimeStartAddress) { + errs() << "BOLT-ERROR: instrumentation library does not define " + "__bolt_instr_data_dump: " + << LibPath << "\n"; + exit(1); + } + outs() << "BOLT-INFO: output linked against instrumentation runtime " + "library, lib entry point is 0x" + << Twine::utohexstr(InstrumentationRuntimeStartAddress) << "\n"; +} + void RewriteInstance::emitFunctions(MCStreamer *Streamer) { auto emit = [&](const std::vector &Functions) { for (auto *Function : Functions) { @@ -3469,6 +3557,29 @@ void RewriteInstance::mapDataSections(orc::VModuleKey Key) { } } +void RewriteInstance::mapExtraSections(orc::VModuleKey Key) { + assert(BC->HasRelocations && "Unsupported in non-relocation mode"); + + for (auto &Section : BC->allocatableSections()) { + if (Section.getOutputAddress() || !Section.hasValidSectionID()) + continue; + NextAvailableAddress = + alignTo(NextAvailableAddress, Section.getAlignment()); + Section.setOutputAddress(NextAvailableAddress); + NextAvailableAddress += Section.getOutputSize(); + + DEBUG(dbgs() << "BOLT: (extra) mapping " << Section.getName() + << " at 0x" << Twine::utohexstr(Section.getAllocAddress()) + << " to 0x" << Twine::utohexstr(Section.getOutputAddress()) + << '\n'); + + OLT->mapSectionAddress(Key, Section.getSectionID(), + Section.getOutputAddress()); + Section.setFileOffset( + getFileOffsetForAddress(Section.getOutputAddress())); + } +} + void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) { SectionPatchers[".note.stapsdt"] = llvm::make_unique(); auto *SDTNotePatcher = static_cast( @@ -4716,6 +4827,7 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile *File) { "error accessing dynamic table"); const Elf_Dyn *DTE = cantFail(Obj->dynamic_table_end(DynamicPhdr), "error accessing dynamic table"); + bool FiniFound = false; for (auto *DE = DTB; DE != DTE; ++DE) { auto NewDE = *DE; bool ShouldPatch = true; @@ -4735,8 +4847,8 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile *File) { // FIXME: Put the old FINI pointer as a tail call in the generated // dumper function if (opts::Instrument && DE->getTag() == ELF::DT_FINI) { - NewDE.d_un.d_ptr = - Instrumenter->getDumpFunction()->getOutputAddress(); + NewDE.d_un.d_ptr = InstrumentationRuntimeStartAddress; + FiniFound = true; } break; case ELF::DT_FLAGS: @@ -4758,6 +4870,13 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile *File) { } } + if (opts::Instrument && !FiniFound) { + errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic " + "section but instrumentation currently relies on patching " + "DT_FINI to write the profile.\n"; + exit(1); + } + if (BC->RequiresZNow && !ZNowSet) { errs() << "BOLT-ERROR: output binary requires immediate relocation " "processing which depends on DT_FLAGS or DT_FLAGS_1 presence in " diff --git a/src/RewriteInstance.h b/src/RewriteInstance.h index 7ba954c..3de49cf 100644 --- a/src/RewriteInstance.h +++ b/src/RewriteInstance.h @@ -47,7 +47,8 @@ class RewriteInstanceDiff; class RewriteInstance { public: RewriteInstance(llvm::object::ELFObjectFileBase *File, DataReader &DR, - DataAggregator &DA, const int Argc, const char *const *Argv); + DataAggregator &DA, const int Argc, const char *const *Argv, + StringRef ToolPath); ~RewriteInstance(); /// Reset all state except for split hints. Used to run a second pass with @@ -97,7 +98,10 @@ public: /// Write code and data into an intermediary object file, map virtual to real /// addresses and link the object file, resolving all relocations and /// performing final relaxation. - void emitSections(); + void emitAndLink(); + + /// Link additional runtime code to support instrumentation. + void linkRuntime(); /// Emit function code. void emitFunctions(MCStreamer *Streamer); @@ -121,6 +125,7 @@ public: void mapCodeSections(orc::VModuleKey ObjectsHandle); void mapDataSections(orc::VModuleKey ObjectsHandle); void mapFileSections(orc::VModuleKey ObjectsHandle); + void mapExtraSections(orc::VModuleKey ObjectsHandle); /// Update output object's values based on the final \p Layout. void updateOutputValues(const MCAsmLayout &Layout); @@ -321,6 +326,7 @@ private: /// Command line args used to process binary. const int Argc; const char *const *Argv; + StringRef ToolPath; /// Holds our data aggregator in case user supplied a raw perf data file. DataAggregator &DA; @@ -355,6 +361,9 @@ private: uint64_t NewTextSegmentOffset{0}; uint64_t NewTextSegmentSize{0}; + /// Extra linking + uint64_t InstrumentationRuntimeStartAddress{0}; + /// Track next available address for new allocatable sections. uint64_t NextAvailableAddress{0}; diff --git a/src/Target/X86/X86MCPlusBuilder.cpp b/src/Target/X86/X86MCPlusBuilder.cpp index ba774dc..9822d7c 100644 --- a/src/Target/X86/X86MCPlusBuilder.cpp +++ b/src/Target/X86/X86MCPlusBuilder.cpp @@ -3424,311 +3424,6 @@ public: return Results; } - MultiBlocksCode createInstrumentedDataDumpCode( - MCSymbol *Locs, - MCSymbol *Descriptions, - MCSymbol *Strings, - MCSymbol *FilenameSym, - MCSymbol *Spaces, - MCSymbol *Chars, - size_t NumLocs, - MCContext *Ctx - ) const override { - std::vector* Code; - MultiBlocksCode Result; -#define INS(x, y) Code->emplace_back(MCInstBuilder(x).y) -#define INS_NOARGS(x) Code->emplace_back(MCInstBuilder(x)) -#define REG(x) addReg(x) -#define IMM(x) addImm(x) -#define EXPR(x) \ - addExpr(MCSymbolRefExpr::create(x, MCSymbolRefExpr::VK_None, *Ctx)) -#define NOREG addReg(X86::NoRegister) -#define BEGIN_BLOCK(x) \ - Result.Blocks.emplace_back(std::make_pair<>(x, std::vector())); \ - Code = &Result.Blocks.back().second; \ - Result.Successors.emplace_back(nullptr); -#define BEGIN_BLOCK_FALLTHROUGH \ - Result.Blocks.emplace_back( \ - std::make_pair<>(Ctx->createTempSymbol(), std::vector())); \ - Code = &Result.Blocks.back().second; \ - Result.Successors.emplace_back(nullptr); -#define SET_SUCC(x) Result.Successors.back() = x; - - using namespace llvm::X86; - - // I know, this got ridiculously large, we should have a better way to - // write our runtime library for instrumentation. At this point I think it - // is kind of nice to do not depend on loading an object on disk to link - // against the input binary, but in the future it may be inevitable. - - // String buffer allocated on stack to store data sent to write() - const uint32_t COPY_BUFFER_SIZE = 0x1000; - // These constants depend on the target OS - const uint32_t OPEN_MODE = 0666; - const uint32_t OPEN_FLAGS = 0x241; // O_WRONLY|O_TRUNC|O_CREAT - const uint32_t SYSCALL_WRITE = 1; - const uint32_t SYSCALL_OPEN = 2; - const uint32_t SYSCALL_CLOSE = 3; - MCSymbol *LoopBody1 = Ctx->createTempSymbol(); // for each instrumented br - MCSymbol *LoopBody1_1 = Ctx->createTempSymbol(); // for each src/dst - MCSymbol *LoopBody1_1_1 = Ctx->createTempSymbol(); // write func name - MCSymbol *LoopBody1_1_2 = Ctx->createTempSymbol(); // write offset in hex - MCSymbol *LoopBody1_2 = Ctx->createTempSymbol(); // write branch frequency - MCSymbol *Loop1End = Ctx->createTempSymbol(); - - BEGIN_BLOCK_FALLTHROUGH; // Start our prologue - // pushq %rbx - INS(PUSH64r, REG(RBX)); - // pushq %r12 - INS(PUSH64r, REG(R12)); - // pushq %r13 - INS(PUSH64r, REG(R13)); - // pushq %r14 - INS(PUSH64r, REG(R14)); - // pushq %r15 - INS(PUSH64r, REG(R15)); - // subq $0x1000, %rsp // Reserve space for write buffer - INS(SUB64ri32, REG(RSP).REG(RSP).IMM(COPY_BUFFER_SIZE)); - // leaq filename(%rip), %rdi - INS(LEA64r, REG(RDI).REG(RIP).IMM(0x1).NOREG.EXPR(FilenameSym).NOREG); - // movq $0x241, %rsi // O_WRONLY|O_TRUNC|O_CREAT - INS(MOV64ri32, REG(RSI).IMM(OPEN_FLAGS)); - // movq $0666, %rdx // mode - INS(MOV64ri32, REG(RDX).IMM(OPEN_MODE)); - // mov $0x2, %rax - INS(MOV64ri32, REG(RAX).IMM(SYSCALL_OPEN)); - // syscall - INS_NOARGS(SYSCALL); // open() - // movq %rax, %r13 - INS(MOV64rr, REG(R13).REG(RAX)); - // xorq %r14, %r14 // Induction variable for main loop - // // over all intrumentation counters - INS(XOR64rr, REG(R14).REG(R14).REG(R14)); - // leaq label1(%rip), %r11 // Load start of descriptions vector - INS(LEA64r, REG(R11).REG(RIP).IMM(1).NOREG.EXPR(Descriptions).NOREG); - - // loopbody1: // Main loop - BEGIN_BLOCK(LoopBody1); - // movq %rsp, %r15 // Reset write buffer pointer - INS(MOV64rr, REG(R15).REG(RSP)); - - // xorq %rbx, %rbx // Induction variable for our loop of 2 - // // iterations to read From description and - // // then To description - INS(XOR64rr, REG(RBX).REG(RBX).REG(RBX)); - // loopbody1_1: - BEGIN_BLOCK(LoopBody1_1); - // movb $0x31, (%r15) // Write '1' and a space before func name - INS(MOV8mi, REG(R15).IMM(1).NOREG.IMM(0).NOREG.IMM(0x31)); - // incq %r15 - INS(INC64r, REG(R15).REG(R15)); - // movb $0x20, (%r15) - INS(MOV8mi, REG(R15).IMM(1).NOREG.IMM(0).NOREG.IMM(0x20)); - // incq %r15 - INS(INC64r, REG(R15).REG(R15)); - // leaq strings(%rip), %r12 // Load string table base - INS(LEA64r, REG(R12).REG(RIP).IMM(0x1).NOREG.EXPR(Strings).NOREG); - // addl (%r11), %r12d // Add string index - INS(ADD32rm, REG(R12D).REG(R12D).REG(R11).IMM(1).NOREG.IMM(0).NOREG); - // addq $4, %r11 - INS(ADD64ri8, REG(R11).REG(R11).IMM(4)); - - // loopbody1_1_1: // Loop over counter description string - // // copying it to our write buffer - BEGIN_BLOCK(LoopBody1_1_1); - // mov (%r12), %ax - INS(MOV8rm, REG(AX).REG(R12).IMM(1).NOREG.IMM(0).NOREG); - // mov %ax, (%r15) - INS(MOV8mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(AX)); - // incq %r15 - INS(INC64r, REG(R15).REG(R15)); - // incq %r12 - INS(INC64r, REG(R12).REG(R12)); - // test %ax, %ax - INS(TEST8rr, REG(AX).REG(AX)); - // jnz loopbody1_1_1 - INS(JNE_4, EXPR(LoopBody1_1_1)); - SET_SUCC(LoopBody1_1_1); - - BEGIN_BLOCK_FALLTHROUGH; // Copy " " to write buffer -- empty - // // spaces that will be overwritten with the - // // offset value in hex, right to left - // decq %r15 - INS(DEC64r, REG(R15).REG(R15)); - // movq spaces(%rip), %rdx - INS(MOV64rm, REG(RDX).REG(RIP).IMM(1).NOREG.EXPR(Spaces).NOREG); - // movq %rdx, (%r15) - INS(MOV64mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(RDX)); - // addq $8, %r15 - INS(ADD64ri8, REG(R15).REG(R15).IMM(8)); - // movq %rdx, (%r15) - INS(MOV64mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(RDX)); - // addq $8, %r15 - INS(ADD64ri8, REG(R15).REG(R15).IMM(8)); - // movq %r15, %r12 - INS(MOV64rr, REG(R12).REG(R15)); - // decq %r15 - INS(DEC64r, REG(R15).REG(R15)); - // xorq %rax, %rax - INS(XOR64rr, REG(RAX).REG(RAX).REG(RAX)); - // movl (%r11), %eax - INS(MOV32rm, REG(EAX).REG(R11).IMM(1).NOREG.IMM(0).NOREG); - // addq $4, %r11 - INS(ADD64ri8, REG(R11).REG(R11).IMM(4)); - - // loopbody1_1_2: // Loop to print address in hexadecimal - BEGIN_BLOCK(LoopBody1_1_2); - // decq %r15 - INS(DEC64r, REG(R15).REG(R15)); - // xorq %rdx, %rdx - INS(XOR64rr, REG(RDX).REG(RDX).REG(RDX)); - // movq $0x10, %rsi - INS(MOV64ri32, REG(RSI).IMM(0x10)); - // divq %rsi - INS(DIV64r, REG(RSI)); - // leaq chars(%rip), %rdi - INS(LEA64r, REG(RDI).REG(RIP).IMM(1).NOREG.EXPR(Chars).NOREG); - // mov (%rdi, %rdx, 1), %cx - INS(MOV8rm, REG(CX).REG(RDI).IMM(1).REG(RDX).IMM(0).NOREG); - // mov %cx, (%r15) - INS(MOV8mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(CX)); - // testq %rax, %rax - INS(TEST64rr, REG(RAX).REG(RAX)); - // jnz loopbody1_1_2 - INS(JNE_4, EXPR(LoopBody1_1_2)); - SET_SUCC(LoopBody1_1_2); - - BEGIN_BLOCK_FALLTHROUGH; - // movq %r12, %r15 // Loop end (2 iteration loop for From/To) - INS(MOV64rr, REG(R15).REG(R12)); - // incq %rbx - INS(INC64r, REG(RBX).REG(RBX)); - // cmpq $2, %rbx - INS(CMP64ri8, REG(RBX).IMM(2)); - // jne loopbody1_1 - INS(JNE_4, EXPR(LoopBody1_1)); - SET_SUCC(LoopBody1_1); - - BEGIN_BLOCK_FALLTHROUGH; // Copy " " to write buffer -- empty - // // spaces that will be overwritten with the - // // counter value in decimal, right to left - // movb $0x30, (%r15) // Write '0' and a space before counter val - // // representing zero mispredictions - INS(MOV8mi, REG(R15).IMM(1).NOREG.IMM(0).NOREG.IMM(0x30)); - // incq %r15 - INS(INC64r, REG(R15).REG(R15)); - // movb $0x20, (%r15) - INS(MOV8mi, REG(R15).IMM(1).NOREG.IMM(0).NOREG.IMM(0x20)); - // incq %r15 - INS(INC64r, REG(R15).REG(R15)); - // movq spaces(%rip), %rdx - INS(MOV64rm, REG(RDX).REG(RIP).IMM(1).NOREG.EXPR(Spaces).NOREG); - // movq %rdx, (%r15) - INS(MOV64mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(RDX)); - // addq $8, %r15 - INS(ADD64ri8, REG(R15).REG(R15).IMM(8)); - // movq %rdx, (%r15) - INS(MOV64mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(RDX)); - // addq $8, %r15 - INS(ADD64ri8, REG(R15).REG(R15).IMM(8)); - // movq %r15, %r12 - INS(MOV64rr, REG(R12).REG(R15)); - // leaq count1(%rip), %rdx - INS(LEA64r, REG(RDX).REG(RIP).IMM(1).NOREG.EXPR(Locs).NOREG); - // movq (%rdx, %r14, 8), %rax // Load current instrumentation counter value - INS(MOV64rm, REG(RAX).REG(RDX).IMM(8).REG(R14).IMM(0).NOREG); - // testq $rax, %rax - INS(TEST64rr, REG(RAX).REG(RAX)); - // je loop1end - INS(JE_4, EXPR(Loop1End)); - SET_SUCC(Loop1End); - - // loopbody1_2: // Loop to print counter value in decimal - BEGIN_BLOCK(LoopBody1_2); - // decq %r15 - INS(DEC64r, REG(R15).REG(R15)); - // xorq %rdx, %rdx - INS(XOR64rr, REG(RDX).REG(RDX).REG(RDX)); - // movq $0xa, %rsi - INS(MOV64ri32, REG(RSI).IMM(0xa)); - // divq %rsi - INS(DIV64r, REG(RSI)); - // leaq chars(%rip), %rdi - INS(LEA64r, REG(RDI).REG(RIP).IMM(1).NOREG.EXPR(Chars).NOREG); - // mov (%rdi, %rdx, 1), %cx - INS(MOV8rm, REG(CX).REG(RDI).IMM(1).REG(RDX).IMM(0).NOREG); - // mov %cx, (%r15) - INS(MOV8mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(CX)); - // testq %rax, %rax - INS(TEST64rr, REG(RAX).REG(RAX)); - // jnz loopbody1_2 - INS(JNE_4, EXPR(LoopBody1_2)); - SET_SUCC(LoopBody1_2); - - BEGIN_BLOCK_FALLTHROUGH; // Flush write buffer to file - // movb $0xa, (%r12) // Put a '\n' at the end of write buffer - INS(MOV8mi, REG(R12).IMM(1).NOREG.IMM(0).NOREG.IMM(0xa)); - // incq %r12 - INS(INC64r, REG(R12).REG(R12)); - // movq %r13, %rdi - INS(MOV64rr, REG(RDI).REG(R13)); - // movq %rsp, %rsi - INS(MOV64rr, REG(RSI).REG(RSP)); - // movq %r12, %rdx - INS(MOV64rr, REG(RDX).REG(R12)); - // subq %rsp, %rdx - INS(SUB64rr, REG(RDX).REG(RDX).REG(RSP)); - // movq $0x1, %rax - INS(MOV64ri32, REG(RAX).IMM(SYSCALL_WRITE)); - // pushq %r11 - INS(PUSH64r, REG(R11)); - // syscall // write() - INS_NOARGS(SYSCALL); - // popq %r11 - INS(POP64r, REG(R11)); - - // loop1end: // Main loop header - BEGIN_BLOCK(Loop1End); - // incq %r14 - INS(INC64r, REG(R14).REG(R14)); - // cmp $0xXXXXXX, %r14 - INS(CMP64ri32, REG(R14).IMM(NumLocs)); - // jnz loopbody1 - INS(JNE_4, EXPR(LoopBody1)); - SET_SUCC(LoopBody1); - - BEGIN_BLOCK_FALLTHROUGH; // Finish by closing file and returning - // movq %r13, %rdi - INS(MOV64rr, REG(RDI).REG(R13)); - // mov $0x3, %eax // close() - INS(MOV32ri, REG(EAX).IMM(SYSCALL_CLOSE)); - // syscall - INS_NOARGS(SYSCALL); - // addq $0x1000, %rsp - INS(ADD64ri32, REG(RSP).REG(RSP).IMM(0x1000)); - // popq %r15 - INS(POP64r, REG(R15)); - // popq %r14 - INS(POP64r, REG(R14)); - // popq %r13 - INS(POP64r, REG(R13)); - // popq %r12 - INS(POP64r, REG(R12)); - // popq %rbx - INS(POP64r, REG(RBX)); - // ret - INS_NOARGS(RETQ); -#undef INS -#undef INS_NOARGS -#undef REG -#undef IMM -#undef EXPR -#undef NOREG -#undef BEGIN_BLOCK -#undef BEGIN_BLOCK_FALLTHROUGH - return Result; - } - }; } diff --git a/src/llvm-bolt.cpp b/src/llvm-bolt.cpp index ed943ad..8ecd792 100644 --- a/src/llvm-bolt.cpp +++ b/src/llvm-bolt.cpp @@ -18,6 +18,7 @@ #include "RewriteInstance.h" #include "llvm/Object/Binary.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Signals.h" @@ -229,6 +230,16 @@ void boltMode(int argc, char **argv) { } } +std::string GetExecutablePath(const char *Argv0) { + SmallString<128> ExecutablePath(Argv0); + // Do a PATH lookup if Argv0 isn't a valid path. + if (!llvm::sys::fs::exists(ExecutablePath)) + if (llvm::ErrorOr P = + llvm::sys::findProgramByName(ExecutablePath)) + ExecutablePath = *P; + return ExecutablePath.str(); +} + int main(int argc, char **argv) { // Print a stack trace if we signal out. sys::PrintStackTraceOnErrorSignal(argv[0]); @@ -236,6 +247,8 @@ int main(int argc, char **argv) { llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + std::string ToolPath = GetExecutablePath(argv[0]); + // Initialize targets and assembly printers/parsers. llvm::InitializeAllTargetInfos(); llvm::InitializeAllTargetMCs(); @@ -309,7 +322,7 @@ int main(int argc, char **argv) { Binary &Binary = *BinaryOrErr.get().getBinary(); if (auto *e = dyn_cast(&Binary)) { - RewriteInstance RI(e, *DR.get(), *DA.get(), argc, argv); + RewriteInstance RI(e, *DR.get(), *DA.get(), argc, argv, ToolPath); RI.run(); } else { report_error(opts::InputFilename, object_error::invalid_file_type); @@ -342,8 +355,8 @@ int main(int argc, char **argv) { if (auto *ELFObj1 = dyn_cast(&Binary1)) { if (auto *ELFObj2 = dyn_cast(&Binary2)) { - RewriteInstance RI1(ELFObj1, *DR.get(), *DA.get(), argc, argv); - RewriteInstance RI2(ELFObj2, *DR2.get(), *DA.get(), argc, argv); + RewriteInstance RI1(ELFObj1, *DR.get(), *DA.get(), argc, argv, ToolPath); + RewriteInstance RI2(ELFObj2, *DR2.get(), *DA.get(), argc, argv, ToolPath); outs() << "BOLT-DIFF: *** Analyzing binary 1: " << opts::InputFilename << "\n"; outs() << "BOLT-DIFF: *** Binary 1 fdata: " << opts::InputDataFilename