Support instrumentation via runtime library
Summary: To allow the development of future instrumentation work, this patch adds support in BOLT for linking arbitrary libraries into the binary processed by BOLT. We use orc relocation handling mechanism for that. With this support, this patch also moves code programatically generated in X86 assembly language by X86MCPlusBuilder to C code written in a new library called bolt_rt. Change CMake to support this library as an external project in the same way as clang does with compiler_rt. This library is installed in the lib/ folder relative to BOLT root installation and by default instrumentation will look for the library at that location to finish processing the binary with instrumentation. Reviewed By: maksfb Differential Revision: D16572013 fbshipit-source-id: ed9ae63969f
This commit is contained in:
parent
6a339b9949
commit
c6fa8fb91d
@ -1,6 +1,29 @@
|
||||
include(ExternalProject)
|
||||
|
||||
set(BOLT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
set(BOLT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
|
||||
ExternalProject_Add(bolt_rt
|
||||
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime"
|
||||
STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-stamps
|
||||
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins
|
||||
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
|
||||
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
|
||||
-DCMAKE_INSTALL_PREFIX=${LLVM_BINARY_DIR}
|
||||
# You might want to set this to True if actively developing bolt_rt, otherwise
|
||||
# cmake will not rebuild it after source code changes
|
||||
BUILD_ALWAYS True
|
||||
)
|
||||
|
||||
install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -P ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins/cmake_install.cmake \)"
|
||||
COMPONENT bolt_rt)
|
||||
|
||||
add_llvm_install_targets(install-bolt_rt
|
||||
DEPENDS bolt_rt
|
||||
COMPONENT bolt_rt)
|
||||
|
||||
add_subdirectory(src)
|
||||
add_subdirectory(test)
|
||||
|
||||
12
runtime/CMakeLists.txt
Normal file
12
runtime/CMakeLists.txt
Normal file
@ -0,0 +1,12 @@
|
||||
cmake_minimum_required(VERSION 3.1.0)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
project(libbolt_rt_project)
|
||||
|
||||
add_library(bolt_rt STATIC
|
||||
instr.cpp
|
||||
)
|
||||
|
||||
install(TARGETS bolt_rt DESTINATION lib)
|
||||
139
runtime/instr.cpp
Normal file
139
runtime/instr.cpp
Normal file
@ -0,0 +1,139 @@
|
||||
//===-- instr.cpp -----------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
// This file contains code that is linked to the final binary with a function
|
||||
// that is called at program exit to dump instrumented data collected during
|
||||
// execution.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// BOLT runtime instrumentation library.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
// All extern declarations here need to be defined by BOLT itself.
|
||||
|
||||
// Counters inserted by instrumentation, incremented during runtime when
|
||||
// points of interest (locations) in the program are reached.
|
||||
extern uint64_t __bolt_instr_locations[];
|
||||
// Number of counters.
|
||||
extern uint32_t __bolt_instr_num_locs;
|
||||
// String table with function names.
|
||||
extern char __bolt_instr_strings[];
|
||||
// Filename to dump data to.
|
||||
extern char __bolt_instr_filename[];
|
||||
|
||||
// A location is a function name plus offset. Function name needs to be
|
||||
// retrieved from the string table and is stored as an index to this table.
|
||||
typedef struct _Location {
|
||||
uint32_t FunctionName;
|
||||
uint32_t Offset;
|
||||
} Location;
|
||||
|
||||
// An edge description defines an instrumented edge in the program, fully
|
||||
// identified by where the jump is located and its destination.
|
||||
typedef struct _EdgeDescription {
|
||||
Location From;
|
||||
Location To;
|
||||
} EdgeDescription;
|
||||
|
||||
extern EdgeDescription __bolt_instr_descriptions[];
|
||||
|
||||
// Declare some syscall wrappers we use throughout this code to avoid linking
|
||||
// against system libc.
|
||||
static uint64_t
|
||||
myopen(const char *pathname,
|
||||
uint64_t flags,
|
||||
uint64_t mode) {
|
||||
uint64_t ret;
|
||||
__asm__ __volatile__ (
|
||||
"movq $2, %%rax\n"
|
||||
"syscall"
|
||||
: "=a"(ret)
|
||||
: "D"(pathname), "S"(flags), "d"(mode)
|
||||
: "cc", "rcx", "r11", "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static uint64_t mywrite(uint64_t fd, const void *buf, uint64_t count) {
|
||||
uint64_t ret;
|
||||
__asm__ __volatile__ (
|
||||
"movq $1, %%rax\n"
|
||||
"syscall\n"
|
||||
: "=a"(ret)
|
||||
: "D"(fd), "S"(buf), "d"(count)
|
||||
: "cc", "rcx", "r11", "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int myclose(uint64_t fd) {
|
||||
uint64_t ret;
|
||||
__asm__ __volatile__ (
|
||||
"movq $3, %%rax\n"
|
||||
"syscall\n"
|
||||
: "=a"(ret)
|
||||
: "D"(fd)
|
||||
: "cc", "rcx", "r11", "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static char *intToStr(char *OutBuf, uint32_t Num, uint32_t Base) {
|
||||
const char *Chars = "0123456789abcdef";
|
||||
char Buf[20];
|
||||
char *Ptr = Buf;
|
||||
while (Num) {
|
||||
*Ptr++ = *(Chars + (Num % Base));
|
||||
Num /= Base;
|
||||
}
|
||||
if (Ptr == Buf) {
|
||||
*OutBuf++ = '0';
|
||||
return OutBuf;
|
||||
}
|
||||
while (Ptr != Buf) {
|
||||
*OutBuf++ = *--Ptr;
|
||||
}
|
||||
return OutBuf;
|
||||
}
|
||||
|
||||
static char *serializeLoc(char *OutBuf, uint32_t FuncStrIndex,
|
||||
uint32_t Offset) {
|
||||
*OutBuf++ = '1';
|
||||
*OutBuf++ = ' ';
|
||||
char *Str = __bolt_instr_strings + FuncStrIndex;
|
||||
while (*Str) {
|
||||
*OutBuf++ = *Str++;
|
||||
}
|
||||
*OutBuf++ = ' ';
|
||||
OutBuf = intToStr(OutBuf, Offset, 16);
|
||||
*OutBuf++ = ' ';
|
||||
return OutBuf;
|
||||
}
|
||||
|
||||
extern "C" void __bolt_instr_data_dump() {
|
||||
uint64_t FD = myopen(__bolt_instr_filename,
|
||||
/*flags=*/0x241 /*O_WRONLY|O_TRUNC|O_CREAT*/,
|
||||
/*mode=*/0666);
|
||||
|
||||
for (int I = 0, E = __bolt_instr_num_locs; I < E; ++I) {
|
||||
char LineBuf[2000];
|
||||
char *Ptr = LineBuf;
|
||||
uint32_t HitCount = __bolt_instr_locations[I];
|
||||
if (!HitCount)
|
||||
continue;
|
||||
|
||||
EdgeDescription *Desc = &__bolt_instr_descriptions[I];
|
||||
Ptr = serializeLoc(Ptr, Desc->From.FunctionName, Desc->From.Offset);
|
||||
Ptr = serializeLoc(Ptr, Desc->To.FunctionName, Desc->To.Offset);
|
||||
*Ptr++ = '0';
|
||||
*Ptr++ = ' ';
|
||||
Ptr = intToStr(Ptr, HitCount, 10);
|
||||
*Ptr++ = '\n';
|
||||
mywrite(FD, LineBuf, Ptr - LineBuf);
|
||||
}
|
||||
myclose(FD);
|
||||
}
|
||||
@ -62,7 +62,7 @@ set(LLVM_LINK_COMPONENTS
|
||||
string(FIND "${LLVM_TARGETS_TO_BUILD}" "AArch64" POSITION)
|
||||
if (NOT ${POSITION} EQUAL -1)
|
||||
list(APPEND LLVM_LINK_COMPONENTS BOLTTargetAArch64)
|
||||
set(BOLT_AArcb64 On)
|
||||
set(BOLT_AArch64 On)
|
||||
endif()
|
||||
|
||||
string(FIND "${LLVM_TARGETS_TO_BUILD}" "X86" POSITION)
|
||||
@ -101,9 +101,10 @@ add_llvm_tool(llvm-bolt
|
||||
|
||||
DEPENDS
|
||||
intrinsics_gen
|
||||
bolt_rt
|
||||
)
|
||||
|
||||
if (DEFINED BOLT_AArcb64)
|
||||
if (DEFINED BOLT_AArch64)
|
||||
target_compile_definitions(llvm-bolt PRIVATE AARCH64_AVAILABLE)
|
||||
endif()
|
||||
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
#include "RewriteInstance.h"
|
||||
|
||||
#undef DEBUG_TYPE
|
||||
#define DEBUG_TYPE "bolt"
|
||||
#define DEBUG_TYPE "efmm"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace object;
|
||||
@ -30,7 +30,7 @@ uint8_t *ExecutableFileMemoryManager::allocateSection(intptr_t Size,
|
||||
bool IsCode,
|
||||
bool IsReadOnly) {
|
||||
// Register a debug section as a note section.
|
||||
if (RewriteInstance::isDebugSection(SectionName)) {
|
||||
if (!ObjectsLoaded && RewriteInstance::isDebugSection(SectionName)) {
|
||||
uint8_t *DataCopy = new uint8_t[Size];
|
||||
auto &Section = BC.registerOrUpdateNoteSection(SectionName,
|
||||
DataCopy,
|
||||
@ -52,6 +52,11 @@ uint8_t *ExecutableFileMemoryManager::allocateSection(intptr_t Size,
|
||||
}
|
||||
|
||||
const auto Flags = BinarySection::getFlags(IsReadOnly, IsCode, true);
|
||||
SmallVector<char, 256> Buf;
|
||||
if (ObjectsLoaded > 0)
|
||||
SectionName = (Twine(SectionName) + ".bolt.extra." + Twine(ObjectsLoaded))
|
||||
.toStringRef(Buf);
|
||||
|
||||
auto &Section = BC.registerOrUpdateSection(SectionName,
|
||||
ELF::SHT_PROGBITS,
|
||||
Flags,
|
||||
@ -94,6 +99,7 @@ uint8_t *ExecutableFileMemoryManager::recordNoteSection(
|
||||
|
||||
bool ExecutableFileMemoryManager::finalizeMemory(std::string *ErrMsg) {
|
||||
DEBUG(dbgs() << "BOLT: finalizeMemory()\n");
|
||||
++ObjectsLoaded;
|
||||
return SectionMemoryManager::finalizeMemory(ErrMsg);
|
||||
}
|
||||
|
||||
|
||||
@ -55,6 +55,14 @@ private:
|
||||
bool AllowStubs;
|
||||
|
||||
public:
|
||||
// Our linker's main purpose is to handle a single object file, created
|
||||
// by RewriteInstance after reading the input binary and reordering it.
|
||||
// After objects finish loading, we increment this. Therefore, whenever
|
||||
// this is greater than zero, we are dealing with additional objects that
|
||||
// will not be managed by BinaryContext but only exist to support linking
|
||||
// user-supplied objects into the main input executable.
|
||||
uint32_t ObjectsLoaded{0};
|
||||
|
||||
/// [start memory address] -> [segment info] mapping.
|
||||
std::map<uint64_t, SegmentInfo> SegmentMapInfo;
|
||||
|
||||
|
||||
@ -1738,28 +1738,6 @@ public:
|
||||
llvm_unreachable("not implemented");
|
||||
return BlocksVectorTy();
|
||||
}
|
||||
|
||||
/// Part of the runtime library for instrumented code, this runs at the end
|
||||
/// of the process and writes the current instrumentation counters to a file
|
||||
/// compatible with BOLT profile. \p Locs identifies the region in memory
|
||||
/// where the counters are (\p NumLocs counters), \p Descriptions, the region
|
||||
/// encoding information about each counter, which is the source of the branch
|
||||
/// and the destination, \p Strings, the string table with function names used
|
||||
/// in descriptions, \p FilenameSym, the profile file name to write to,
|
||||
/// \p Chars, a 0 to F string used for printing hex/decimal numbers.
|
||||
virtual MultiBlocksCode createInstrumentedDataDumpCode(
|
||||
MCSymbol *Locs,
|
||||
MCSymbol *Descriptions,
|
||||
MCSymbol *Strings,
|
||||
MCSymbol *FilenameSym,
|
||||
MCSymbol *Spaces,
|
||||
MCSymbol *Chars,
|
||||
size_t NumLocs,
|
||||
MCContext *Ctx
|
||||
) const {
|
||||
llvm_unreachable("not implemented");
|
||||
return MultiBlocksCode();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace bolt
|
||||
|
||||
@ -235,49 +235,6 @@ void Instrumentation::runOnFunctions(BinaryContext &BC) {
|
||||
|
||||
outs() << "BOLT-INSTRUMENTER: Instrumented " << InstrumentationSites
|
||||
<< " sites, " << InstrumentationSitesSavingFlags << " saving flags.\n";
|
||||
|
||||
createDumpFunction(BC);
|
||||
|
||||
DEBUG(DumpFunction->dump());
|
||||
}
|
||||
|
||||
void Instrumentation::createDumpFunction(BinaryContext &BC) {
|
||||
DumpFunction =
|
||||
BC.createInjectedBinaryFunction("BOLTInstrumentationDataDump");
|
||||
Locs = BC.Ctx->createTempSymbol("BOLTInstrumentationLocs", true);
|
||||
DescriptionsSym =
|
||||
BC.Ctx->createTempSymbol("BOLTInstrumentationDescriptions", true);
|
||||
StringsSym = BC.Ctx->createTempSymbol("BOLTInstrumentationStrings", true);
|
||||
FilenameSym = BC.Ctx->createTempSymbol("BOLTInstrumentationFilename", true);
|
||||
Spaces = BC.Ctx->createTempSymbol("BOLTInstrumentationSpaces", true);
|
||||
Chars = BC.Ctx->createTempSymbol("BOLTInstrumentationChars", true);
|
||||
auto Code = BC.MIB->createInstrumentedDataDumpCode(
|
||||
Locs, DescriptionsSym, StringsSym, FilenameSym, Spaces, Chars,
|
||||
Labels.size(), &*BC.Ctx);
|
||||
|
||||
std::vector<std::unique_ptr<BinaryBasicBlock>> BBs;
|
||||
for (auto &SymBlock : Code.Blocks) {
|
||||
auto &Symbol = SymBlock.first;
|
||||
auto &Block = SymBlock.second;
|
||||
BBs.emplace_back(DumpFunction->createBasicBlock(
|
||||
BinaryBasicBlock::INVALID_OFFSET, Symbol));
|
||||
BBs.back()->addInstructions(Block.begin(), Block.end());
|
||||
BBs.back()->setCFIState(0);
|
||||
}
|
||||
auto BBIter = BBs.begin();
|
||||
for (auto &Succ : Code.Successors) {
|
||||
if (Succ)
|
||||
(*BBIter)->addSuccessor(DumpFunction->getBasicBlockForLabel(Succ), 0,
|
||||
0);
|
||||
auto NextBBIter = std::next(BBIter);
|
||||
if (NextBBIter != BBs.end())
|
||||
(*BBIter)->addSuccessor(NextBBIter->get(), 0, 0);
|
||||
++BBIter;
|
||||
}
|
||||
DumpFunction->insertBasicBlocks(nullptr, std::move(BBs),
|
||||
/*UpdateLayout=*/true,
|
||||
/*UpdateCFIState=*/false);
|
||||
DumpFunction->updateState(BinaryFunction::State::CFG_Finalized);
|
||||
}
|
||||
|
||||
void Instrumentation::emitDescription(
|
||||
@ -295,22 +252,45 @@ void Instrumentation::emit(BinaryContext &BC, MCStreamer &Streamer) {
|
||||
auto *Section = BC.Ctx->getELFSection(".bolt.instrumentation",
|
||||
ELF::SHT_PROGBITS,
|
||||
Flags);
|
||||
|
||||
// All of the following symbols will be exported as globals to be used by the
|
||||
// instrumentation runtime library to dump the instrumentation data to disk.
|
||||
// Label marking start of the memory region containing instrumentation
|
||||
// counters, total vector size is Labels.size() 8-byte counters
|
||||
MCSymbol *Locs = BC.Ctx->getOrCreateSymbol("__bolt_instr_locations");
|
||||
MCSymbol *NumLocs = BC.Ctx->getOrCreateSymbol("__bolt_instr_num_locs");
|
||||
// Start of the vector with descriptions (one CounterDescription for each
|
||||
// counter), vector size is Labels.size() CounterDescription-sized elmts
|
||||
MCSymbol *DescriptionsSym =
|
||||
BC.Ctx->getOrCreateSymbol("__bolt_instr_descriptions");
|
||||
// Label identifying where our string table was emitted to
|
||||
MCSymbol *StringsSym = BC.Ctx->getOrCreateSymbol("__bolt_instr_strings");
|
||||
/// File name where profile is going to written to after target binary
|
||||
/// finishes a run
|
||||
MCSymbol *FilenameSym = BC.Ctx->getOrCreateSymbol("__bolt_instr_filename");
|
||||
|
||||
Streamer.SwitchSection(Section);
|
||||
Streamer.EmitLabel(Locs);
|
||||
Streamer.EmitSymbolAttribute(Locs,
|
||||
MCSymbolAttr::MCSA_Global);
|
||||
for (const auto &Label : Labels) {
|
||||
Streamer.EmitLabel(Label);
|
||||
Streamer.emitFill(8, 0);
|
||||
}
|
||||
Streamer.EmitLabel(NumLocs);
|
||||
Streamer.EmitSymbolAttribute(NumLocs,
|
||||
MCSymbolAttr::MCSA_Global);
|
||||
Streamer.EmitIntValue(Labels.size(), /*Size=*/4);
|
||||
Streamer.EmitLabel(DescriptionsSym);
|
||||
Streamer.EmitSymbolAttribute(DescriptionsSym,
|
||||
MCSymbolAttr::MCSA_Global);
|
||||
for (const auto &Desc : Descriptions) {
|
||||
emitDescription(Desc, Streamer);
|
||||
}
|
||||
Streamer.EmitLabel(StringsSym);
|
||||
Streamer.EmitSymbolAttribute(StringsSym,
|
||||
MCSymbolAttr::MCSA_Global);
|
||||
Streamer.EmitBytes(StringTable);
|
||||
Streamer.EmitLabel(Spaces);
|
||||
Streamer.EmitBytes(" ");
|
||||
Streamer.EmitLabel(Chars);
|
||||
Streamer.EmitBytes("0123456789abcdef");
|
||||
Streamer.EmitLabel(FilenameSym);
|
||||
Streamer.EmitBytes(opts::InstrumentationFilename);
|
||||
Streamer.emitFill(1, 0);
|
||||
|
||||
@ -53,12 +53,6 @@ public:
|
||||
/// Emit data structures that will be necessary during runtime (second step)
|
||||
void emit(BinaryContext &BC, MCStreamer &Streamer);
|
||||
|
||||
/// Access the function injected by the instrumentation pass necessary to
|
||||
/// write profile to a file. This is only valid after instrumentation
|
||||
/// finished (step 1).
|
||||
BinaryFunction *getDumpFunction() const {
|
||||
return DumpFunction;
|
||||
}
|
||||
private:
|
||||
// Instrumented branch location information
|
||||
struct CounterDescription {
|
||||
@ -68,10 +62,6 @@ private:
|
||||
uint32_t ToOffset;
|
||||
};
|
||||
|
||||
/// Create a new injected function that will be needed at runtime to write
|
||||
/// profile
|
||||
void createDumpFunction(BinaryContext &BC);
|
||||
|
||||
/// Retrieve the string table index for the name of \p Function. We encode
|
||||
/// instrumented locations descriptions with the aid of a string table to
|
||||
/// manage memory of the instrumentation runtime in a more efficient way.
|
||||
@ -126,32 +116,6 @@ private:
|
||||
|
||||
/// Identify all counters used in runtime while instrumentation is running
|
||||
std::vector<MCSymbol *> Labels;
|
||||
|
||||
/// Label marking start of the memory region containing instrumentation
|
||||
/// counters, total vector size is Labels.size() 8-byte counters
|
||||
MCSymbol *Locs;
|
||||
|
||||
/// Start of the vector with descriptions (one CounterDescription for each
|
||||
/// counter), vector size is Labels.size() CounterDescription-sized elmts
|
||||
MCSymbol *DescriptionsSym;
|
||||
|
||||
/// Label identifying where our string table was emitted to
|
||||
MCSymbol *StringsSym;
|
||||
|
||||
/// File name where profile is going to written to after target binary
|
||||
/// finishes a run
|
||||
MCSymbol *FilenameSym;
|
||||
|
||||
/// Label for a string containing 8 spaces used by the algorithm that writes
|
||||
/// profile during conversion of integer to string. \p Chars stores ASCII
|
||||
/// representation of numbers from 0 to F.
|
||||
MCSymbol *Spaces;
|
||||
MCSymbol *Chars;
|
||||
|
||||
/// We keep a pointer to our injected function whose final address will be
|
||||
/// needed later to patch the destructor routines in the binary to call us
|
||||
/// upon end of execution
|
||||
BinaryFunction *DumpFunction;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/BinaryFormat/Dwarf.h"
|
||||
#include "llvm/BinaryFormat/Magic.h"
|
||||
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
|
||||
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
|
||||
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
|
||||
@ -53,6 +54,7 @@
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/Object/Archive.h"
|
||||
#include "llvm/Object/ObjectFile.h"
|
||||
#include "llvm/Object/SymbolicFile.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
@ -101,6 +103,13 @@ Instrument("instrument-experimental",
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<std::string>
|
||||
RuntimeInstrumentationLib("runtime-instrumentation-lib",
|
||||
cl::desc("specify file name of the runtime instrumentation library"),
|
||||
cl::ZeroOrMore,
|
||||
cl::init("libbolt_rt.a"),
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
ForceToDataRelocations("force-data-relocations",
|
||||
cl::desc("force relocations to data sections to always be processed"),
|
||||
@ -591,6 +600,15 @@ void check_error(std::error_code EC, StringRef Message) {
|
||||
report_error(Message, EC);
|
||||
}
|
||||
|
||||
void check_error(Error E, Twine Message) {
|
||||
if (!E)
|
||||
return;
|
||||
handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EIB) {
|
||||
llvm::errs() << "BOLT-ERROR: '" << Message << "': " << EIB.message()
|
||||
<< '\n';
|
||||
exit(1);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -750,8 +768,8 @@ createBinaryContext(ELFObjectFileBase *File, DataReader &DR,
|
||||
|
||||
RewriteInstance::RewriteInstance(ELFObjectFileBase *File, DataReader &DR,
|
||||
DataAggregator &DA, const int Argc,
|
||||
const char *const *Argv)
|
||||
: InputFile(File), Argc(Argc), Argv(Argv), DA(DA),
|
||||
const char *const *Argv, StringRef ToolPath)
|
||||
: InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath), DA(DA),
|
||||
BC(createBinaryContext(
|
||||
File, DR,
|
||||
DWARFContext::create(*File, nullptr,
|
||||
@ -1104,7 +1122,7 @@ void RewriteInstance::run() {
|
||||
if (opts::DiffOnly)
|
||||
return;
|
||||
runOptimizationPasses();
|
||||
emitSections();
|
||||
emitAndLink();
|
||||
};
|
||||
|
||||
outs() << "BOLT-INFO: Target architecture: "
|
||||
@ -2997,8 +3015,8 @@ std::vector<T> singletonSet(T t) {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
void RewriteInstance::emitSections() {
|
||||
NamedRegionTimer T("emitSections", "emit sections", TimerGroupName,
|
||||
void RewriteInstance::emitAndLink() {
|
||||
NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName,
|
||||
TimerGroupDesc, opts::TimeRewrite);
|
||||
std::error_code EC;
|
||||
|
||||
@ -3071,12 +3089,18 @@ void RewriteInstance::emitSections() {
|
||||
auto Resolver = orc::createLegacyLookupResolver(
|
||||
[&](const std::string &Name) -> JITSymbol {
|
||||
DEBUG(dbgs() << "BOLT: looking for " << Name << "\n");
|
||||
if (EFMM->ObjectsLoaded) {
|
||||
return OLT->findSymbol(Name, false);
|
||||
}
|
||||
if (auto *I = BC->getBinaryDataByName(Name)) {
|
||||
const uint64_t Address = I->isMoved() && !I->isJumpTable()
|
||||
? I->getOutputAddress()
|
||||
: I->getAddress();
|
||||
DEBUG(dbgs() << "Resolved to address 0x" << Twine::utohexstr(Address)
|
||||
<< "\n");
|
||||
return JITSymbol(Address, JITSymbolFlags());
|
||||
}
|
||||
DEBUG(dbgs() << "Resolved to address 0x0\n");
|
||||
return JITSymbol(nullptr);
|
||||
},
|
||||
[](Error Err) { cantFail(std::move(Err), "lookup failed"); });
|
||||
@ -3087,6 +3111,8 @@ void RewriteInstance::emitSections() {
|
||||
|
||||
SSP.reset(new decltype(SSP)::element_type());
|
||||
ES.reset(new decltype(ES)::element_type(*SSP));
|
||||
// Key for our main object created out of the input binary
|
||||
auto K = ES->allocateVModule();
|
||||
OLT.reset(new decltype(OLT)::element_type(
|
||||
*ES,
|
||||
[this, &Resolver](orc::VModuleKey Key) {
|
||||
@ -3099,22 +3125,32 @@ void RewriteInstance::emitSections() {
|
||||
// Loaded notifier
|
||||
[&](orc::VModuleKey Key, const object::ObjectFile &Obj,
|
||||
const RuntimeDyld::LoadedObjectInfo &) {
|
||||
// Assign addresses to all sections.
|
||||
mapFileSections(Key);
|
||||
// Assign addresses to all sections. If key corresponds to the object
|
||||
// created by ourselves, call our regular mapping function. If we are
|
||||
// loading additional objects as part of runtime libraries for
|
||||
// instrumentation, treat them as extra sections.
|
||||
if (Key == K) {
|
||||
mapFileSections(Key);
|
||||
} else {
|
||||
mapExtraSections(Key);
|
||||
}
|
||||
},
|
||||
// Finalized notifier
|
||||
[&](orc::VModuleKey Key) {
|
||||
// Update output addresses based on the new section map and
|
||||
// layout.
|
||||
updateOutputValues(FinalLayout);
|
||||
// layout. Only do this for the object created by ourselves.
|
||||
if (Key == K)
|
||||
updateOutputValues(FinalLayout);
|
||||
}));
|
||||
|
||||
OLT->setProcessAllSections(true);
|
||||
auto K = ES->allocateVModule();
|
||||
cantFail(OLT->addObject(K, std::move(ObjectMemBuffer)));
|
||||
|
||||
cantFail(OLT->emitAndFinalize(K));
|
||||
|
||||
// Link instrumentation runtime library
|
||||
if (opts::Instrument)
|
||||
linkRuntime();
|
||||
|
||||
// Once the code is emitted, we can rename function sections to actual
|
||||
// output sections and de-register sections used for emission.
|
||||
if (!BC->HasRelocations) {
|
||||
@ -3140,6 +3176,58 @@ void RewriteInstance::emitSections() {
|
||||
TempOut->keep();
|
||||
}
|
||||
|
||||
void RewriteInstance::linkRuntime() {
|
||||
OLT->setProcessAllSections(false);
|
||||
std::string Dir = llvm::sys::path::parent_path(ToolPath);
|
||||
SmallString<128> P(Dir);
|
||||
P = llvm::sys::path::parent_path(Dir);
|
||||
llvm::sys::path::append(P, "lib", opts::RuntimeInstrumentationLib);
|
||||
std::string LibPath = P.str();
|
||||
if (!llvm::sys::fs::exists(LibPath)) {
|
||||
errs() << "BOLT-ERROR: instrumentation library not found: " << LibPath
|
||||
<< "\n";
|
||||
exit(1);
|
||||
}
|
||||
ErrorOr<std::unique_ptr<MemoryBuffer>> MaybeBuf =
|
||||
MemoryBuffer::getFile(LibPath, -1, false);
|
||||
check_error(MaybeBuf.getError(), LibPath);
|
||||
std::unique_ptr<MemoryBuffer> B = std::move(MaybeBuf.get());
|
||||
file_magic Magic = identify_magic(B->getBuffer());
|
||||
|
||||
if (Magic == file_magic::archive) {
|
||||
Error Err = Error::success();
|
||||
object::Archive Archive(B.get()->getMemBufferRef(), Err);
|
||||
for (auto &C : Archive.children(Err)) {
|
||||
auto ChildKey = ES->allocateVModule();
|
||||
auto ChildBuf =
|
||||
MemoryBuffer::getMemBuffer(cantFail(C.getMemoryBufferRef()));
|
||||
cantFail(OLT->addObject(ChildKey, std::move(ChildBuf)));
|
||||
cantFail(OLT->emitAndFinalize(ChildKey));
|
||||
}
|
||||
check_error(std::move(Err), B->getBufferIdentifier());
|
||||
} else if (Magic == file_magic::elf_relocatable ||
|
||||
Magic == file_magic::elf_shared_object) {
|
||||
auto K2 = ES->allocateVModule();
|
||||
cantFail(OLT->addObject(K2, std::move(B)));
|
||||
cantFail(OLT->emitAndFinalize(K2));
|
||||
} else {
|
||||
errs() << "BOLT-ERROR: unrecognized instrumentation library format: "
|
||||
<< LibPath << "\n";
|
||||
exit(1);
|
||||
}
|
||||
InstrumentationRuntimeStartAddress =
|
||||
cantFail(OLT->findSymbol("__bolt_instr_data_dump", false).getAddress());
|
||||
if (!InstrumentationRuntimeStartAddress) {
|
||||
errs() << "BOLT-ERROR: instrumentation library does not define "
|
||||
"__bolt_instr_data_dump: "
|
||||
<< LibPath << "\n";
|
||||
exit(1);
|
||||
}
|
||||
outs() << "BOLT-INFO: output linked against instrumentation runtime "
|
||||
"library, lib entry point is 0x"
|
||||
<< Twine::utohexstr(InstrumentationRuntimeStartAddress) << "\n";
|
||||
}
|
||||
|
||||
void RewriteInstance::emitFunctions(MCStreamer *Streamer) {
|
||||
auto emit = [&](const std::vector<BinaryFunction *> &Functions) {
|
||||
for (auto *Function : Functions) {
|
||||
@ -3469,6 +3557,29 @@ void RewriteInstance::mapDataSections(orc::VModuleKey Key) {
|
||||
}
|
||||
}
|
||||
|
||||
void RewriteInstance::mapExtraSections(orc::VModuleKey Key) {
|
||||
assert(BC->HasRelocations && "Unsupported in non-relocation mode");
|
||||
|
||||
for (auto &Section : BC->allocatableSections()) {
|
||||
if (Section.getOutputAddress() || !Section.hasValidSectionID())
|
||||
continue;
|
||||
NextAvailableAddress =
|
||||
alignTo(NextAvailableAddress, Section.getAlignment());
|
||||
Section.setOutputAddress(NextAvailableAddress);
|
||||
NextAvailableAddress += Section.getOutputSize();
|
||||
|
||||
DEBUG(dbgs() << "BOLT: (extra) mapping " << Section.getName()
|
||||
<< " at 0x" << Twine::utohexstr(Section.getAllocAddress())
|
||||
<< " to 0x" << Twine::utohexstr(Section.getOutputAddress())
|
||||
<< '\n');
|
||||
|
||||
OLT->mapSectionAddress(Key, Section.getSectionID(),
|
||||
Section.getOutputAddress());
|
||||
Section.setFileOffset(
|
||||
getFileOffsetForAddress(Section.getOutputAddress()));
|
||||
}
|
||||
}
|
||||
|
||||
void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) {
|
||||
SectionPatchers[".note.stapsdt"] = llvm::make_unique<SimpleBinaryPatcher>();
|
||||
auto *SDTNotePatcher = static_cast<SimpleBinaryPatcher *>(
|
||||
@ -4716,6 +4827,7 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
|
||||
"error accessing dynamic table");
|
||||
const Elf_Dyn *DTE = cantFail(Obj->dynamic_table_end(DynamicPhdr),
|
||||
"error accessing dynamic table");
|
||||
bool FiniFound = false;
|
||||
for (auto *DE = DTB; DE != DTE; ++DE) {
|
||||
auto NewDE = *DE;
|
||||
bool ShouldPatch = true;
|
||||
@ -4735,8 +4847,8 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
|
||||
// FIXME: Put the old FINI pointer as a tail call in the generated
|
||||
// dumper function
|
||||
if (opts::Instrument && DE->getTag() == ELF::DT_FINI) {
|
||||
NewDE.d_un.d_ptr =
|
||||
Instrumenter->getDumpFunction()->getOutputAddress();
|
||||
NewDE.d_un.d_ptr = InstrumentationRuntimeStartAddress;
|
||||
FiniFound = true;
|
||||
}
|
||||
break;
|
||||
case ELF::DT_FLAGS:
|
||||
@ -4758,6 +4870,13 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
|
||||
}
|
||||
}
|
||||
|
||||
if (opts::Instrument && !FiniFound) {
|
||||
errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic "
|
||||
"section but instrumentation currently relies on patching "
|
||||
"DT_FINI to write the profile.\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (BC->RequiresZNow && !ZNowSet) {
|
||||
errs() << "BOLT-ERROR: output binary requires immediate relocation "
|
||||
"processing which depends on DT_FLAGS or DT_FLAGS_1 presence in "
|
||||
|
||||
@ -47,7 +47,8 @@ class RewriteInstanceDiff;
|
||||
class RewriteInstance {
|
||||
public:
|
||||
RewriteInstance(llvm::object::ELFObjectFileBase *File, DataReader &DR,
|
||||
DataAggregator &DA, const int Argc, const char *const *Argv);
|
||||
DataAggregator &DA, const int Argc, const char *const *Argv,
|
||||
StringRef ToolPath);
|
||||
~RewriteInstance();
|
||||
|
||||
/// Reset all state except for split hints. Used to run a second pass with
|
||||
@ -97,7 +98,10 @@ public:
|
||||
/// Write code and data into an intermediary object file, map virtual to real
|
||||
/// addresses and link the object file, resolving all relocations and
|
||||
/// performing final relaxation.
|
||||
void emitSections();
|
||||
void emitAndLink();
|
||||
|
||||
/// Link additional runtime code to support instrumentation.
|
||||
void linkRuntime();
|
||||
|
||||
/// Emit function code.
|
||||
void emitFunctions(MCStreamer *Streamer);
|
||||
@ -121,6 +125,7 @@ public:
|
||||
void mapCodeSections(orc::VModuleKey ObjectsHandle);
|
||||
void mapDataSections(orc::VModuleKey ObjectsHandle);
|
||||
void mapFileSections(orc::VModuleKey ObjectsHandle);
|
||||
void mapExtraSections(orc::VModuleKey ObjectsHandle);
|
||||
|
||||
/// Update output object's values based on the final \p Layout.
|
||||
void updateOutputValues(const MCAsmLayout &Layout);
|
||||
@ -321,6 +326,7 @@ private:
|
||||
/// Command line args used to process binary.
|
||||
const int Argc;
|
||||
const char *const *Argv;
|
||||
StringRef ToolPath;
|
||||
|
||||
/// Holds our data aggregator in case user supplied a raw perf data file.
|
||||
DataAggregator &DA;
|
||||
@ -355,6 +361,9 @@ private:
|
||||
uint64_t NewTextSegmentOffset{0};
|
||||
uint64_t NewTextSegmentSize{0};
|
||||
|
||||
/// Extra linking
|
||||
uint64_t InstrumentationRuntimeStartAddress{0};
|
||||
|
||||
/// Track next available address for new allocatable sections.
|
||||
uint64_t NextAvailableAddress{0};
|
||||
|
||||
|
||||
@ -3424,311 +3424,6 @@ public:
|
||||
return Results;
|
||||
}
|
||||
|
||||
MultiBlocksCode createInstrumentedDataDumpCode(
|
||||
MCSymbol *Locs,
|
||||
MCSymbol *Descriptions,
|
||||
MCSymbol *Strings,
|
||||
MCSymbol *FilenameSym,
|
||||
MCSymbol *Spaces,
|
||||
MCSymbol *Chars,
|
||||
size_t NumLocs,
|
||||
MCContext *Ctx
|
||||
) const override {
|
||||
std::vector<MCInst>* Code;
|
||||
MultiBlocksCode Result;
|
||||
#define INS(x, y) Code->emplace_back(MCInstBuilder(x).y)
|
||||
#define INS_NOARGS(x) Code->emplace_back(MCInstBuilder(x))
|
||||
#define REG(x) addReg(x)
|
||||
#define IMM(x) addImm(x)
|
||||
#define EXPR(x) \
|
||||
addExpr(MCSymbolRefExpr::create(x, MCSymbolRefExpr::VK_None, *Ctx))
|
||||
#define NOREG addReg(X86::NoRegister)
|
||||
#define BEGIN_BLOCK(x) \
|
||||
Result.Blocks.emplace_back(std::make_pair<>(x, std::vector<MCInst>())); \
|
||||
Code = &Result.Blocks.back().second; \
|
||||
Result.Successors.emplace_back(nullptr);
|
||||
#define BEGIN_BLOCK_FALLTHROUGH \
|
||||
Result.Blocks.emplace_back( \
|
||||
std::make_pair<>(Ctx->createTempSymbol(), std::vector<MCInst>())); \
|
||||
Code = &Result.Blocks.back().second; \
|
||||
Result.Successors.emplace_back(nullptr);
|
||||
#define SET_SUCC(x) Result.Successors.back() = x;
|
||||
|
||||
using namespace llvm::X86;
|
||||
|
||||
// I know, this got ridiculously large, we should have a better way to
|
||||
// write our runtime library for instrumentation. At this point I think it
|
||||
// is kind of nice to do not depend on loading an object on disk to link
|
||||
// against the input binary, but in the future it may be inevitable.
|
||||
|
||||
// String buffer allocated on stack to store data sent to write()
|
||||
const uint32_t COPY_BUFFER_SIZE = 0x1000;
|
||||
// These constants depend on the target OS
|
||||
const uint32_t OPEN_MODE = 0666;
|
||||
const uint32_t OPEN_FLAGS = 0x241; // O_WRONLY|O_TRUNC|O_CREAT
|
||||
const uint32_t SYSCALL_WRITE = 1;
|
||||
const uint32_t SYSCALL_OPEN = 2;
|
||||
const uint32_t SYSCALL_CLOSE = 3;
|
||||
MCSymbol *LoopBody1 = Ctx->createTempSymbol(); // for each instrumented br
|
||||
MCSymbol *LoopBody1_1 = Ctx->createTempSymbol(); // for each src/dst
|
||||
MCSymbol *LoopBody1_1_1 = Ctx->createTempSymbol(); // write func name
|
||||
MCSymbol *LoopBody1_1_2 = Ctx->createTempSymbol(); // write offset in hex
|
||||
MCSymbol *LoopBody1_2 = Ctx->createTempSymbol(); // write branch frequency
|
||||
MCSymbol *Loop1End = Ctx->createTempSymbol();
|
||||
|
||||
BEGIN_BLOCK_FALLTHROUGH; // Start our prologue
|
||||
// pushq %rbx
|
||||
INS(PUSH64r, REG(RBX));
|
||||
// pushq %r12
|
||||
INS(PUSH64r, REG(R12));
|
||||
// pushq %r13
|
||||
INS(PUSH64r, REG(R13));
|
||||
// pushq %r14
|
||||
INS(PUSH64r, REG(R14));
|
||||
// pushq %r15
|
||||
INS(PUSH64r, REG(R15));
|
||||
// subq $0x1000, %rsp // Reserve space for write buffer
|
||||
INS(SUB64ri32, REG(RSP).REG(RSP).IMM(COPY_BUFFER_SIZE));
|
||||
// leaq filename(%rip), %rdi
|
||||
INS(LEA64r, REG(RDI).REG(RIP).IMM(0x1).NOREG.EXPR(FilenameSym).NOREG);
|
||||
// movq $0x241, %rsi // O_WRONLY|O_TRUNC|O_CREAT
|
||||
INS(MOV64ri32, REG(RSI).IMM(OPEN_FLAGS));
|
||||
// movq $0666, %rdx // mode
|
||||
INS(MOV64ri32, REG(RDX).IMM(OPEN_MODE));
|
||||
// mov $0x2, %rax
|
||||
INS(MOV64ri32, REG(RAX).IMM(SYSCALL_OPEN));
|
||||
// syscall
|
||||
INS_NOARGS(SYSCALL); // open()
|
||||
// movq %rax, %r13
|
||||
INS(MOV64rr, REG(R13).REG(RAX));
|
||||
// xorq %r14, %r14 // Induction variable for main loop
|
||||
// // over all intrumentation counters
|
||||
INS(XOR64rr, REG(R14).REG(R14).REG(R14));
|
||||
// leaq label1(%rip), %r11 // Load start of descriptions vector
|
||||
INS(LEA64r, REG(R11).REG(RIP).IMM(1).NOREG.EXPR(Descriptions).NOREG);
|
||||
|
||||
// loopbody1: // Main loop
|
||||
BEGIN_BLOCK(LoopBody1);
|
||||
// movq %rsp, %r15 // Reset write buffer pointer
|
||||
INS(MOV64rr, REG(R15).REG(RSP));
|
||||
|
||||
// xorq %rbx, %rbx // Induction variable for our loop of 2
|
||||
// // iterations to read From description and
|
||||
// // then To description
|
||||
INS(XOR64rr, REG(RBX).REG(RBX).REG(RBX));
|
||||
// loopbody1_1:
|
||||
BEGIN_BLOCK(LoopBody1_1);
|
||||
// movb $0x31, (%r15) // Write '1' and a space before func name
|
||||
INS(MOV8mi, REG(R15).IMM(1).NOREG.IMM(0).NOREG.IMM(0x31));
|
||||
// incq %r15
|
||||
INS(INC64r, REG(R15).REG(R15));
|
||||
// movb $0x20, (%r15)
|
||||
INS(MOV8mi, REG(R15).IMM(1).NOREG.IMM(0).NOREG.IMM(0x20));
|
||||
// incq %r15
|
||||
INS(INC64r, REG(R15).REG(R15));
|
||||
// leaq strings(%rip), %r12 // Load string table base
|
||||
INS(LEA64r, REG(R12).REG(RIP).IMM(0x1).NOREG.EXPR(Strings).NOREG);
|
||||
// addl (%r11), %r12d // Add string index
|
||||
INS(ADD32rm, REG(R12D).REG(R12D).REG(R11).IMM(1).NOREG.IMM(0).NOREG);
|
||||
// addq $4, %r11
|
||||
INS(ADD64ri8, REG(R11).REG(R11).IMM(4));
|
||||
|
||||
// loopbody1_1_1: // Loop over counter description string
|
||||
// // copying it to our write buffer
|
||||
BEGIN_BLOCK(LoopBody1_1_1);
|
||||
// mov (%r12), %ax
|
||||
INS(MOV8rm, REG(AX).REG(R12).IMM(1).NOREG.IMM(0).NOREG);
|
||||
// mov %ax, (%r15)
|
||||
INS(MOV8mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(AX));
|
||||
// incq %r15
|
||||
INS(INC64r, REG(R15).REG(R15));
|
||||
// incq %r12
|
||||
INS(INC64r, REG(R12).REG(R12));
|
||||
// test %ax, %ax
|
||||
INS(TEST8rr, REG(AX).REG(AX));
|
||||
// jnz loopbody1_1_1
|
||||
INS(JNE_4, EXPR(LoopBody1_1_1));
|
||||
SET_SUCC(LoopBody1_1_1);
|
||||
|
||||
BEGIN_BLOCK_FALLTHROUGH; // Copy " " to write buffer -- empty
|
||||
// // spaces that will be overwritten with the
|
||||
// // offset value in hex, right to left
|
||||
// decq %r15
|
||||
INS(DEC64r, REG(R15).REG(R15));
|
||||
// movq spaces(%rip), %rdx
|
||||
INS(MOV64rm, REG(RDX).REG(RIP).IMM(1).NOREG.EXPR(Spaces).NOREG);
|
||||
// movq %rdx, (%r15)
|
||||
INS(MOV64mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(RDX));
|
||||
// addq $8, %r15
|
||||
INS(ADD64ri8, REG(R15).REG(R15).IMM(8));
|
||||
// movq %rdx, (%r15)
|
||||
INS(MOV64mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(RDX));
|
||||
// addq $8, %r15
|
||||
INS(ADD64ri8, REG(R15).REG(R15).IMM(8));
|
||||
// movq %r15, %r12
|
||||
INS(MOV64rr, REG(R12).REG(R15));
|
||||
// decq %r15
|
||||
INS(DEC64r, REG(R15).REG(R15));
|
||||
// xorq %rax, %rax
|
||||
INS(XOR64rr, REG(RAX).REG(RAX).REG(RAX));
|
||||
// movl (%r11), %eax
|
||||
INS(MOV32rm, REG(EAX).REG(R11).IMM(1).NOREG.IMM(0).NOREG);
|
||||
// addq $4, %r11
|
||||
INS(ADD64ri8, REG(R11).REG(R11).IMM(4));
|
||||
|
||||
// loopbody1_1_2: // Loop to print address in hexadecimal
|
||||
BEGIN_BLOCK(LoopBody1_1_2);
|
||||
// decq %r15
|
||||
INS(DEC64r, REG(R15).REG(R15));
|
||||
// xorq %rdx, %rdx
|
||||
INS(XOR64rr, REG(RDX).REG(RDX).REG(RDX));
|
||||
// movq $0x10, %rsi
|
||||
INS(MOV64ri32, REG(RSI).IMM(0x10));
|
||||
// divq %rsi
|
||||
INS(DIV64r, REG(RSI));
|
||||
// leaq chars(%rip), %rdi
|
||||
INS(LEA64r, REG(RDI).REG(RIP).IMM(1).NOREG.EXPR(Chars).NOREG);
|
||||
// mov (%rdi, %rdx, 1), %cx
|
||||
INS(MOV8rm, REG(CX).REG(RDI).IMM(1).REG(RDX).IMM(0).NOREG);
|
||||
// mov %cx, (%r15)
|
||||
INS(MOV8mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(CX));
|
||||
// testq %rax, %rax
|
||||
INS(TEST64rr, REG(RAX).REG(RAX));
|
||||
// jnz loopbody1_1_2
|
||||
INS(JNE_4, EXPR(LoopBody1_1_2));
|
||||
SET_SUCC(LoopBody1_1_2);
|
||||
|
||||
BEGIN_BLOCK_FALLTHROUGH;
|
||||
// movq %r12, %r15 // Loop end (2 iteration loop for From/To)
|
||||
INS(MOV64rr, REG(R15).REG(R12));
|
||||
// incq %rbx
|
||||
INS(INC64r, REG(RBX).REG(RBX));
|
||||
// cmpq $2, %rbx
|
||||
INS(CMP64ri8, REG(RBX).IMM(2));
|
||||
// jne loopbody1_1
|
||||
INS(JNE_4, EXPR(LoopBody1_1));
|
||||
SET_SUCC(LoopBody1_1);
|
||||
|
||||
BEGIN_BLOCK_FALLTHROUGH; // Copy " " to write buffer -- empty
|
||||
// // spaces that will be overwritten with the
|
||||
// // counter value in decimal, right to left
|
||||
// movb $0x30, (%r15) // Write '0' and a space before counter val
|
||||
// // representing zero mispredictions
|
||||
INS(MOV8mi, REG(R15).IMM(1).NOREG.IMM(0).NOREG.IMM(0x30));
|
||||
// incq %r15
|
||||
INS(INC64r, REG(R15).REG(R15));
|
||||
// movb $0x20, (%r15)
|
||||
INS(MOV8mi, REG(R15).IMM(1).NOREG.IMM(0).NOREG.IMM(0x20));
|
||||
// incq %r15
|
||||
INS(INC64r, REG(R15).REG(R15));
|
||||
// movq spaces(%rip), %rdx
|
||||
INS(MOV64rm, REG(RDX).REG(RIP).IMM(1).NOREG.EXPR(Spaces).NOREG);
|
||||
// movq %rdx, (%r15)
|
||||
INS(MOV64mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(RDX));
|
||||
// addq $8, %r15
|
||||
INS(ADD64ri8, REG(R15).REG(R15).IMM(8));
|
||||
// movq %rdx, (%r15)
|
||||
INS(MOV64mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(RDX));
|
||||
// addq $8, %r15
|
||||
INS(ADD64ri8, REG(R15).REG(R15).IMM(8));
|
||||
// movq %r15, %r12
|
||||
INS(MOV64rr, REG(R12).REG(R15));
|
||||
// leaq count1(%rip), %rdx
|
||||
INS(LEA64r, REG(RDX).REG(RIP).IMM(1).NOREG.EXPR(Locs).NOREG);
|
||||
// movq (%rdx, %r14, 8), %rax // Load current instrumentation counter value
|
||||
INS(MOV64rm, REG(RAX).REG(RDX).IMM(8).REG(R14).IMM(0).NOREG);
|
||||
// testq $rax, %rax
|
||||
INS(TEST64rr, REG(RAX).REG(RAX));
|
||||
// je loop1end
|
||||
INS(JE_4, EXPR(Loop1End));
|
||||
SET_SUCC(Loop1End);
|
||||
|
||||
// loopbody1_2: // Loop to print counter value in decimal
|
||||
BEGIN_BLOCK(LoopBody1_2);
|
||||
// decq %r15
|
||||
INS(DEC64r, REG(R15).REG(R15));
|
||||
// xorq %rdx, %rdx
|
||||
INS(XOR64rr, REG(RDX).REG(RDX).REG(RDX));
|
||||
// movq $0xa, %rsi
|
||||
INS(MOV64ri32, REG(RSI).IMM(0xa));
|
||||
// divq %rsi
|
||||
INS(DIV64r, REG(RSI));
|
||||
// leaq chars(%rip), %rdi
|
||||
INS(LEA64r, REG(RDI).REG(RIP).IMM(1).NOREG.EXPR(Chars).NOREG);
|
||||
// mov (%rdi, %rdx, 1), %cx
|
||||
INS(MOV8rm, REG(CX).REG(RDI).IMM(1).REG(RDX).IMM(0).NOREG);
|
||||
// mov %cx, (%r15)
|
||||
INS(MOV8mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(CX));
|
||||
// testq %rax, %rax
|
||||
INS(TEST64rr, REG(RAX).REG(RAX));
|
||||
// jnz loopbody1_2
|
||||
INS(JNE_4, EXPR(LoopBody1_2));
|
||||
SET_SUCC(LoopBody1_2);
|
||||
|
||||
BEGIN_BLOCK_FALLTHROUGH; // Flush write buffer to file
|
||||
// movb $0xa, (%r12) // Put a '\n' at the end of write buffer
|
||||
INS(MOV8mi, REG(R12).IMM(1).NOREG.IMM(0).NOREG.IMM(0xa));
|
||||
// incq %r12
|
||||
INS(INC64r, REG(R12).REG(R12));
|
||||
// movq %r13, %rdi
|
||||
INS(MOV64rr, REG(RDI).REG(R13));
|
||||
// movq %rsp, %rsi
|
||||
INS(MOV64rr, REG(RSI).REG(RSP));
|
||||
// movq %r12, %rdx
|
||||
INS(MOV64rr, REG(RDX).REG(R12));
|
||||
// subq %rsp, %rdx
|
||||
INS(SUB64rr, REG(RDX).REG(RDX).REG(RSP));
|
||||
// movq $0x1, %rax
|
||||
INS(MOV64ri32, REG(RAX).IMM(SYSCALL_WRITE));
|
||||
// pushq %r11
|
||||
INS(PUSH64r, REG(R11));
|
||||
// syscall // write()
|
||||
INS_NOARGS(SYSCALL);
|
||||
// popq %r11
|
||||
INS(POP64r, REG(R11));
|
||||
|
||||
// loop1end: // Main loop header
|
||||
BEGIN_BLOCK(Loop1End);
|
||||
// incq %r14
|
||||
INS(INC64r, REG(R14).REG(R14));
|
||||
// cmp $0xXXXXXX, %r14
|
||||
INS(CMP64ri32, REG(R14).IMM(NumLocs));
|
||||
// jnz loopbody1
|
||||
INS(JNE_4, EXPR(LoopBody1));
|
||||
SET_SUCC(LoopBody1);
|
||||
|
||||
BEGIN_BLOCK_FALLTHROUGH; // Finish by closing file and returning
|
||||
// movq %r13, %rdi
|
||||
INS(MOV64rr, REG(RDI).REG(R13));
|
||||
// mov $0x3, %eax // close()
|
||||
INS(MOV32ri, REG(EAX).IMM(SYSCALL_CLOSE));
|
||||
// syscall
|
||||
INS_NOARGS(SYSCALL);
|
||||
// addq $0x1000, %rsp
|
||||
INS(ADD64ri32, REG(RSP).REG(RSP).IMM(0x1000));
|
||||
// popq %r15
|
||||
INS(POP64r, REG(R15));
|
||||
// popq %r14
|
||||
INS(POP64r, REG(R14));
|
||||
// popq %r13
|
||||
INS(POP64r, REG(R13));
|
||||
// popq %r12
|
||||
INS(POP64r, REG(R12));
|
||||
// popq %rbx
|
||||
INS(POP64r, REG(RBX));
|
||||
// ret
|
||||
INS_NOARGS(RETQ);
|
||||
#undef INS
|
||||
#undef INS_NOARGS
|
||||
#undef REG
|
||||
#undef IMM
|
||||
#undef EXPR
|
||||
#undef NOREG
|
||||
#undef BEGIN_BLOCK
|
||||
#undef BEGIN_BLOCK_FALLTHROUGH
|
||||
return Result;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
#include "RewriteInstance.h"
|
||||
#include "llvm/Object/Binary.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
#include "llvm/Support/PrettyStackTrace.h"
|
||||
#include "llvm/Support/ManagedStatic.h"
|
||||
#include "llvm/Support/Signals.h"
|
||||
@ -229,6 +230,16 @@ void boltMode(int argc, char **argv) {
|
||||
}
|
||||
}
|
||||
|
||||
std::string GetExecutablePath(const char *Argv0) {
|
||||
SmallString<128> ExecutablePath(Argv0);
|
||||
// Do a PATH lookup if Argv0 isn't a valid path.
|
||||
if (!llvm::sys::fs::exists(ExecutablePath))
|
||||
if (llvm::ErrorOr<std::string> P =
|
||||
llvm::sys::findProgramByName(ExecutablePath))
|
||||
ExecutablePath = *P;
|
||||
return ExecutablePath.str();
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
// Print a stack trace if we signal out.
|
||||
sys::PrintStackTraceOnErrorSignal(argv[0]);
|
||||
@ -236,6 +247,8 @@ int main(int argc, char **argv) {
|
||||
|
||||
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
|
||||
|
||||
std::string ToolPath = GetExecutablePath(argv[0]);
|
||||
|
||||
// Initialize targets and assembly printers/parsers.
|
||||
llvm::InitializeAllTargetInfos();
|
||||
llvm::InitializeAllTargetMCs();
|
||||
@ -309,7 +322,7 @@ int main(int argc, char **argv) {
|
||||
Binary &Binary = *BinaryOrErr.get().getBinary();
|
||||
|
||||
if (auto *e = dyn_cast<ELFObjectFileBase>(&Binary)) {
|
||||
RewriteInstance RI(e, *DR.get(), *DA.get(), argc, argv);
|
||||
RewriteInstance RI(e, *DR.get(), *DA.get(), argc, argv, ToolPath);
|
||||
RI.run();
|
||||
} else {
|
||||
report_error(opts::InputFilename, object_error::invalid_file_type);
|
||||
@ -342,8 +355,8 @@ int main(int argc, char **argv) {
|
||||
|
||||
if (auto *ELFObj1 = dyn_cast<ELFObjectFileBase>(&Binary1)) {
|
||||
if (auto *ELFObj2 = dyn_cast<ELFObjectFileBase>(&Binary2)) {
|
||||
RewriteInstance RI1(ELFObj1, *DR.get(), *DA.get(), argc, argv);
|
||||
RewriteInstance RI2(ELFObj2, *DR2.get(), *DA.get(), argc, argv);
|
||||
RewriteInstance RI1(ELFObj1, *DR.get(), *DA.get(), argc, argv, ToolPath);
|
||||
RewriteInstance RI2(ELFObj2, *DR2.get(), *DA.get(), argc, argv, ToolPath);
|
||||
outs() << "BOLT-DIFF: *** Analyzing binary 1: " << opts::InputFilename
|
||||
<< "\n";
|
||||
outs() << "BOLT-DIFF: *** Binary 1 fdata: " << opts::InputDataFilename
|
||||
|
||||
Loading…
Reference in New Issue
Block a user