Support instrumentation via runtime library

Summary:
To allow the development of future instrumentation work, this
patch adds support in BOLT for linking arbitrary libraries into the
binary processed by BOLT. We use orc relocation handling mechanism for
that. With this support, this patch also moves code programatically
generated in X86 assembly language by X86MCPlusBuilder to C code written
in a new library called bolt_rt. Change CMake to support this library as
an external project in the same way as clang does with compiler_rt. This
library is installed in the lib/ folder relative to BOLT root
installation and by default instrumentation will look for the library
at that location to finish processing the binary with instrumentation.

Reviewed By: maksfb

Differential Revision: D16572013

fbshipit-source-id: ed9ae63969f
This commit is contained in:
Rafael Auler 2019-07-24 14:03:43 -07:00 committed by Facebook Github Bot
parent 6a339b9949
commit c6fa8fb91d
13 changed files with 379 additions and 432 deletions

View File

@ -1,6 +1,29 @@
include(ExternalProject)
set(BOLT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(BOLT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(CMAKE_CXX_STANDARD 14)
ExternalProject_Add(bolt_rt
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime"
STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-stamps
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
-DCMAKE_INSTALL_PREFIX=${LLVM_BINARY_DIR}
# You might want to set this to True if actively developing bolt_rt, otherwise
# cmake will not rebuild it after source code changes
BUILD_ALWAYS True
)
install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -P ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins/cmake_install.cmake \)"
COMPONENT bolt_rt)
add_llvm_install_targets(install-bolt_rt
DEPENDS bolt_rt
COMPONENT bolt_rt)
add_subdirectory(src)
add_subdirectory(test)

12
runtime/CMakeLists.txt Normal file
View File

@ -0,0 +1,12 @@
cmake_minimum_required(VERSION 3.1.0)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
project(libbolt_rt_project)
add_library(bolt_rt STATIC
instr.cpp
)
install(TARGETS bolt_rt DESTINATION lib)

139
runtime/instr.cpp Normal file
View File

@ -0,0 +1,139 @@
//===-- instr.cpp -----------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
// This file contains code that is linked to the final binary with a function
// that is called at program exit to dump instrumented data collected during
// execution.
//
//===----------------------------------------------------------------------===//
//
// BOLT runtime instrumentation library.
//
//===----------------------------------------------------------------------===//
#include <cstdint>
// All extern declarations here need to be defined by BOLT itself.
// Counters inserted by instrumentation, incremented during runtime when
// points of interest (locations) in the program are reached.
extern uint64_t __bolt_instr_locations[];
// Number of counters.
extern uint32_t __bolt_instr_num_locs;
// String table with function names.
extern char __bolt_instr_strings[];
// Filename to dump data to.
extern char __bolt_instr_filename[];
// A location is a function name plus offset. Function name needs to be
// retrieved from the string table and is stored as an index to this table.
typedef struct _Location {
uint32_t FunctionName;
uint32_t Offset;
} Location;
// An edge description defines an instrumented edge in the program, fully
// identified by where the jump is located and its destination.
typedef struct _EdgeDescription {
Location From;
Location To;
} EdgeDescription;
extern EdgeDescription __bolt_instr_descriptions[];
// Declare some syscall wrappers we use throughout this code to avoid linking
// against system libc.
static uint64_t
myopen(const char *pathname,
uint64_t flags,
uint64_t mode) {
uint64_t ret;
__asm__ __volatile__ (
"movq $2, %%rax\n"
"syscall"
: "=a"(ret)
: "D"(pathname), "S"(flags), "d"(mode)
: "cc", "rcx", "r11", "memory");
return ret;
}
static uint64_t mywrite(uint64_t fd, const void *buf, uint64_t count) {
uint64_t ret;
__asm__ __volatile__ (
"movq $1, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd), "S"(buf), "d"(count)
: "cc", "rcx", "r11", "memory");
return ret;
}
static int myclose(uint64_t fd) {
uint64_t ret;
__asm__ __volatile__ (
"movq $3, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd)
: "cc", "rcx", "r11", "memory");
return ret;
}
static char *intToStr(char *OutBuf, uint32_t Num, uint32_t Base) {
const char *Chars = "0123456789abcdef";
char Buf[20];
char *Ptr = Buf;
while (Num) {
*Ptr++ = *(Chars + (Num % Base));
Num /= Base;
}
if (Ptr == Buf) {
*OutBuf++ = '0';
return OutBuf;
}
while (Ptr != Buf) {
*OutBuf++ = *--Ptr;
}
return OutBuf;
}
static char *serializeLoc(char *OutBuf, uint32_t FuncStrIndex,
uint32_t Offset) {
*OutBuf++ = '1';
*OutBuf++ = ' ';
char *Str = __bolt_instr_strings + FuncStrIndex;
while (*Str) {
*OutBuf++ = *Str++;
}
*OutBuf++ = ' ';
OutBuf = intToStr(OutBuf, Offset, 16);
*OutBuf++ = ' ';
return OutBuf;
}
extern "C" void __bolt_instr_data_dump() {
uint64_t FD = myopen(__bolt_instr_filename,
/*flags=*/0x241 /*O_WRONLY|O_TRUNC|O_CREAT*/,
/*mode=*/0666);
for (int I = 0, E = __bolt_instr_num_locs; I < E; ++I) {
char LineBuf[2000];
char *Ptr = LineBuf;
uint32_t HitCount = __bolt_instr_locations[I];
if (!HitCount)
continue;
EdgeDescription *Desc = &__bolt_instr_descriptions[I];
Ptr = serializeLoc(Ptr, Desc->From.FunctionName, Desc->From.Offset);
Ptr = serializeLoc(Ptr, Desc->To.FunctionName, Desc->To.Offset);
*Ptr++ = '0';
*Ptr++ = ' ';
Ptr = intToStr(Ptr, HitCount, 10);
*Ptr++ = '\n';
mywrite(FD, LineBuf, Ptr - LineBuf);
}
myclose(FD);
}

View File

@ -62,7 +62,7 @@ set(LLVM_LINK_COMPONENTS
string(FIND "${LLVM_TARGETS_TO_BUILD}" "AArch64" POSITION)
if (NOT ${POSITION} EQUAL -1)
list(APPEND LLVM_LINK_COMPONENTS BOLTTargetAArch64)
set(BOLT_AArcb64 On)
set(BOLT_AArch64 On)
endif()
string(FIND "${LLVM_TARGETS_TO_BUILD}" "X86" POSITION)
@ -101,9 +101,10 @@ add_llvm_tool(llvm-bolt
DEPENDS
intrinsics_gen
bolt_rt
)
if (DEFINED BOLT_AArcb64)
if (DEFINED BOLT_AArch64)
target_compile_definitions(llvm-bolt PRIVATE AARCH64_AVAILABLE)
endif()

View File

@ -13,7 +13,7 @@
#include "RewriteInstance.h"
#undef DEBUG_TYPE
#define DEBUG_TYPE "bolt"
#define DEBUG_TYPE "efmm"
using namespace llvm;
using namespace object;
@ -30,7 +30,7 @@ uint8_t *ExecutableFileMemoryManager::allocateSection(intptr_t Size,
bool IsCode,
bool IsReadOnly) {
// Register a debug section as a note section.
if (RewriteInstance::isDebugSection(SectionName)) {
if (!ObjectsLoaded && RewriteInstance::isDebugSection(SectionName)) {
uint8_t *DataCopy = new uint8_t[Size];
auto &Section = BC.registerOrUpdateNoteSection(SectionName,
DataCopy,
@ -52,6 +52,11 @@ uint8_t *ExecutableFileMemoryManager::allocateSection(intptr_t Size,
}
const auto Flags = BinarySection::getFlags(IsReadOnly, IsCode, true);
SmallVector<char, 256> Buf;
if (ObjectsLoaded > 0)
SectionName = (Twine(SectionName) + ".bolt.extra." + Twine(ObjectsLoaded))
.toStringRef(Buf);
auto &Section = BC.registerOrUpdateSection(SectionName,
ELF::SHT_PROGBITS,
Flags,
@ -94,6 +99,7 @@ uint8_t *ExecutableFileMemoryManager::recordNoteSection(
bool ExecutableFileMemoryManager::finalizeMemory(std::string *ErrMsg) {
DEBUG(dbgs() << "BOLT: finalizeMemory()\n");
++ObjectsLoaded;
return SectionMemoryManager::finalizeMemory(ErrMsg);
}

View File

@ -55,6 +55,14 @@ private:
bool AllowStubs;
public:
// Our linker's main purpose is to handle a single object file, created
// by RewriteInstance after reading the input binary and reordering it.
// After objects finish loading, we increment this. Therefore, whenever
// this is greater than zero, we are dealing with additional objects that
// will not be managed by BinaryContext but only exist to support linking
// user-supplied objects into the main input executable.
uint32_t ObjectsLoaded{0};
/// [start memory address] -> [segment info] mapping.
std::map<uint64_t, SegmentInfo> SegmentMapInfo;

View File

@ -1738,28 +1738,6 @@ public:
llvm_unreachable("not implemented");
return BlocksVectorTy();
}
/// Part of the runtime library for instrumented code, this runs at the end
/// of the process and writes the current instrumentation counters to a file
/// compatible with BOLT profile. \p Locs identifies the region in memory
/// where the counters are (\p NumLocs counters), \p Descriptions, the region
/// encoding information about each counter, which is the source of the branch
/// and the destination, \p Strings, the string table with function names used
/// in descriptions, \p FilenameSym, the profile file name to write to,
/// \p Chars, a 0 to F string used for printing hex/decimal numbers.
virtual MultiBlocksCode createInstrumentedDataDumpCode(
MCSymbol *Locs,
MCSymbol *Descriptions,
MCSymbol *Strings,
MCSymbol *FilenameSym,
MCSymbol *Spaces,
MCSymbol *Chars,
size_t NumLocs,
MCContext *Ctx
) const {
llvm_unreachable("not implemented");
return MultiBlocksCode();
}
};
} // namespace bolt

View File

@ -235,49 +235,6 @@ void Instrumentation::runOnFunctions(BinaryContext &BC) {
outs() << "BOLT-INSTRUMENTER: Instrumented " << InstrumentationSites
<< " sites, " << InstrumentationSitesSavingFlags << " saving flags.\n";
createDumpFunction(BC);
DEBUG(DumpFunction->dump());
}
void Instrumentation::createDumpFunction(BinaryContext &BC) {
DumpFunction =
BC.createInjectedBinaryFunction("BOLTInstrumentationDataDump");
Locs = BC.Ctx->createTempSymbol("BOLTInstrumentationLocs", true);
DescriptionsSym =
BC.Ctx->createTempSymbol("BOLTInstrumentationDescriptions", true);
StringsSym = BC.Ctx->createTempSymbol("BOLTInstrumentationStrings", true);
FilenameSym = BC.Ctx->createTempSymbol("BOLTInstrumentationFilename", true);
Spaces = BC.Ctx->createTempSymbol("BOLTInstrumentationSpaces", true);
Chars = BC.Ctx->createTempSymbol("BOLTInstrumentationChars", true);
auto Code = BC.MIB->createInstrumentedDataDumpCode(
Locs, DescriptionsSym, StringsSym, FilenameSym, Spaces, Chars,
Labels.size(), &*BC.Ctx);
std::vector<std::unique_ptr<BinaryBasicBlock>> BBs;
for (auto &SymBlock : Code.Blocks) {
auto &Symbol = SymBlock.first;
auto &Block = SymBlock.second;
BBs.emplace_back(DumpFunction->createBasicBlock(
BinaryBasicBlock::INVALID_OFFSET, Symbol));
BBs.back()->addInstructions(Block.begin(), Block.end());
BBs.back()->setCFIState(0);
}
auto BBIter = BBs.begin();
for (auto &Succ : Code.Successors) {
if (Succ)
(*BBIter)->addSuccessor(DumpFunction->getBasicBlockForLabel(Succ), 0,
0);
auto NextBBIter = std::next(BBIter);
if (NextBBIter != BBs.end())
(*BBIter)->addSuccessor(NextBBIter->get(), 0, 0);
++BBIter;
}
DumpFunction->insertBasicBlocks(nullptr, std::move(BBs),
/*UpdateLayout=*/true,
/*UpdateCFIState=*/false);
DumpFunction->updateState(BinaryFunction::State::CFG_Finalized);
}
void Instrumentation::emitDescription(
@ -295,22 +252,45 @@ void Instrumentation::emit(BinaryContext &BC, MCStreamer &Streamer) {
auto *Section = BC.Ctx->getELFSection(".bolt.instrumentation",
ELF::SHT_PROGBITS,
Flags);
// All of the following symbols will be exported as globals to be used by the
// instrumentation runtime library to dump the instrumentation data to disk.
// Label marking start of the memory region containing instrumentation
// counters, total vector size is Labels.size() 8-byte counters
MCSymbol *Locs = BC.Ctx->getOrCreateSymbol("__bolt_instr_locations");
MCSymbol *NumLocs = BC.Ctx->getOrCreateSymbol("__bolt_instr_num_locs");
// Start of the vector with descriptions (one CounterDescription for each
// counter), vector size is Labels.size() CounterDescription-sized elmts
MCSymbol *DescriptionsSym =
BC.Ctx->getOrCreateSymbol("__bolt_instr_descriptions");
// Label identifying where our string table was emitted to
MCSymbol *StringsSym = BC.Ctx->getOrCreateSymbol("__bolt_instr_strings");
/// File name where profile is going to written to after target binary
/// finishes a run
MCSymbol *FilenameSym = BC.Ctx->getOrCreateSymbol("__bolt_instr_filename");
Streamer.SwitchSection(Section);
Streamer.EmitLabel(Locs);
Streamer.EmitSymbolAttribute(Locs,
MCSymbolAttr::MCSA_Global);
for (const auto &Label : Labels) {
Streamer.EmitLabel(Label);
Streamer.emitFill(8, 0);
}
Streamer.EmitLabel(NumLocs);
Streamer.EmitSymbolAttribute(NumLocs,
MCSymbolAttr::MCSA_Global);
Streamer.EmitIntValue(Labels.size(), /*Size=*/4);
Streamer.EmitLabel(DescriptionsSym);
Streamer.EmitSymbolAttribute(DescriptionsSym,
MCSymbolAttr::MCSA_Global);
for (const auto &Desc : Descriptions) {
emitDescription(Desc, Streamer);
}
Streamer.EmitLabel(StringsSym);
Streamer.EmitSymbolAttribute(StringsSym,
MCSymbolAttr::MCSA_Global);
Streamer.EmitBytes(StringTable);
Streamer.EmitLabel(Spaces);
Streamer.EmitBytes(" ");
Streamer.EmitLabel(Chars);
Streamer.EmitBytes("0123456789abcdef");
Streamer.EmitLabel(FilenameSym);
Streamer.EmitBytes(opts::InstrumentationFilename);
Streamer.emitFill(1, 0);

View File

@ -53,12 +53,6 @@ public:
/// Emit data structures that will be necessary during runtime (second step)
void emit(BinaryContext &BC, MCStreamer &Streamer);
/// Access the function injected by the instrumentation pass necessary to
/// write profile to a file. This is only valid after instrumentation
/// finished (step 1).
BinaryFunction *getDumpFunction() const {
return DumpFunction;
}
private:
// Instrumented branch location information
struct CounterDescription {
@ -68,10 +62,6 @@ private:
uint32_t ToOffset;
};
/// Create a new injected function that will be needed at runtime to write
/// profile
void createDumpFunction(BinaryContext &BC);
/// Retrieve the string table index for the name of \p Function. We encode
/// instrumented locations descriptions with the aid of a string table to
/// manage memory of the instrumentation runtime in a more efficient way.
@ -126,32 +116,6 @@ private:
/// Identify all counters used in runtime while instrumentation is running
std::vector<MCSymbol *> Labels;
/// Label marking start of the memory region containing instrumentation
/// counters, total vector size is Labels.size() 8-byte counters
MCSymbol *Locs;
/// Start of the vector with descriptions (one CounterDescription for each
/// counter), vector size is Labels.size() CounterDescription-sized elmts
MCSymbol *DescriptionsSym;
/// Label identifying where our string table was emitted to
MCSymbol *StringsSym;
/// File name where profile is going to written to after target binary
/// finishes a run
MCSymbol *FilenameSym;
/// Label for a string containing 8 spaces used by the algorithm that writes
/// profile during conversion of integer to string. \p Chars stores ASCII
/// representation of numbers from 0 to F.
MCSymbol *Spaces;
MCSymbol *Chars;
/// We keep a pointer to our injected function whose final address will be
/// needed later to patch the destructor routines in the binary to call us
/// upon end of execution
BinaryFunction *DumpFunction;
};
}

View File

@ -30,6 +30,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
@ -53,6 +54,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/Casting.h"
@ -101,6 +103,13 @@ Instrument("instrument-experimental",
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
static cl::opt<std::string>
RuntimeInstrumentationLib("runtime-instrumentation-lib",
cl::desc("specify file name of the runtime instrumentation library"),
cl::ZeroOrMore,
cl::init("libbolt_rt.a"),
cl::cat(BoltOptCategory));
static cl::opt<bool>
ForceToDataRelocations("force-data-relocations",
cl::desc("force relocations to data sections to always be processed"),
@ -591,6 +600,15 @@ void check_error(std::error_code EC, StringRef Message) {
report_error(Message, EC);
}
void check_error(Error E, Twine Message) {
if (!E)
return;
handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EIB) {
llvm::errs() << "BOLT-ERROR: '" << Message << "': " << EIB.message()
<< '\n';
exit(1);
});
}
}
}
@ -750,8 +768,8 @@ createBinaryContext(ELFObjectFileBase *File, DataReader &DR,
RewriteInstance::RewriteInstance(ELFObjectFileBase *File, DataReader &DR,
DataAggregator &DA, const int Argc,
const char *const *Argv)
: InputFile(File), Argc(Argc), Argv(Argv), DA(DA),
const char *const *Argv, StringRef ToolPath)
: InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath), DA(DA),
BC(createBinaryContext(
File, DR,
DWARFContext::create(*File, nullptr,
@ -1104,7 +1122,7 @@ void RewriteInstance::run() {
if (opts::DiffOnly)
return;
runOptimizationPasses();
emitSections();
emitAndLink();
};
outs() << "BOLT-INFO: Target architecture: "
@ -2997,8 +3015,8 @@ std::vector<T> singletonSet(T t) {
} // anonymous namespace
void RewriteInstance::emitSections() {
NamedRegionTimer T("emitSections", "emit sections", TimerGroupName,
void RewriteInstance::emitAndLink() {
NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName,
TimerGroupDesc, opts::TimeRewrite);
std::error_code EC;
@ -3071,12 +3089,18 @@ void RewriteInstance::emitSections() {
auto Resolver = orc::createLegacyLookupResolver(
[&](const std::string &Name) -> JITSymbol {
DEBUG(dbgs() << "BOLT: looking for " << Name << "\n");
if (EFMM->ObjectsLoaded) {
return OLT->findSymbol(Name, false);
}
if (auto *I = BC->getBinaryDataByName(Name)) {
const uint64_t Address = I->isMoved() && !I->isJumpTable()
? I->getOutputAddress()
: I->getAddress();
DEBUG(dbgs() << "Resolved to address 0x" << Twine::utohexstr(Address)
<< "\n");
return JITSymbol(Address, JITSymbolFlags());
}
DEBUG(dbgs() << "Resolved to address 0x0\n");
return JITSymbol(nullptr);
},
[](Error Err) { cantFail(std::move(Err), "lookup failed"); });
@ -3087,6 +3111,8 @@ void RewriteInstance::emitSections() {
SSP.reset(new decltype(SSP)::element_type());
ES.reset(new decltype(ES)::element_type(*SSP));
// Key for our main object created out of the input binary
auto K = ES->allocateVModule();
OLT.reset(new decltype(OLT)::element_type(
*ES,
[this, &Resolver](orc::VModuleKey Key) {
@ -3099,22 +3125,32 @@ void RewriteInstance::emitSections() {
// Loaded notifier
[&](orc::VModuleKey Key, const object::ObjectFile &Obj,
const RuntimeDyld::LoadedObjectInfo &) {
// Assign addresses to all sections.
mapFileSections(Key);
// Assign addresses to all sections. If key corresponds to the object
// created by ourselves, call our regular mapping function. If we are
// loading additional objects as part of runtime libraries for
// instrumentation, treat them as extra sections.
if (Key == K) {
mapFileSections(Key);
} else {
mapExtraSections(Key);
}
},
// Finalized notifier
[&](orc::VModuleKey Key) {
// Update output addresses based on the new section map and
// layout.
updateOutputValues(FinalLayout);
// layout. Only do this for the object created by ourselves.
if (Key == K)
updateOutputValues(FinalLayout);
}));
OLT->setProcessAllSections(true);
auto K = ES->allocateVModule();
cantFail(OLT->addObject(K, std::move(ObjectMemBuffer)));
cantFail(OLT->emitAndFinalize(K));
// Link instrumentation runtime library
if (opts::Instrument)
linkRuntime();
// Once the code is emitted, we can rename function sections to actual
// output sections and de-register sections used for emission.
if (!BC->HasRelocations) {
@ -3140,6 +3176,58 @@ void RewriteInstance::emitSections() {
TempOut->keep();
}
void RewriteInstance::linkRuntime() {
OLT->setProcessAllSections(false);
std::string Dir = llvm::sys::path::parent_path(ToolPath);
SmallString<128> P(Dir);
P = llvm::sys::path::parent_path(Dir);
llvm::sys::path::append(P, "lib", opts::RuntimeInstrumentationLib);
std::string LibPath = P.str();
if (!llvm::sys::fs::exists(LibPath)) {
errs() << "BOLT-ERROR: instrumentation library not found: " << LibPath
<< "\n";
exit(1);
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MaybeBuf =
MemoryBuffer::getFile(LibPath, -1, false);
check_error(MaybeBuf.getError(), LibPath);
std::unique_ptr<MemoryBuffer> B = std::move(MaybeBuf.get());
file_magic Magic = identify_magic(B->getBuffer());
if (Magic == file_magic::archive) {
Error Err = Error::success();
object::Archive Archive(B.get()->getMemBufferRef(), Err);
for (auto &C : Archive.children(Err)) {
auto ChildKey = ES->allocateVModule();
auto ChildBuf =
MemoryBuffer::getMemBuffer(cantFail(C.getMemoryBufferRef()));
cantFail(OLT->addObject(ChildKey, std::move(ChildBuf)));
cantFail(OLT->emitAndFinalize(ChildKey));
}
check_error(std::move(Err), B->getBufferIdentifier());
} else if (Magic == file_magic::elf_relocatable ||
Magic == file_magic::elf_shared_object) {
auto K2 = ES->allocateVModule();
cantFail(OLT->addObject(K2, std::move(B)));
cantFail(OLT->emitAndFinalize(K2));
} else {
errs() << "BOLT-ERROR: unrecognized instrumentation library format: "
<< LibPath << "\n";
exit(1);
}
InstrumentationRuntimeStartAddress =
cantFail(OLT->findSymbol("__bolt_instr_data_dump", false).getAddress());
if (!InstrumentationRuntimeStartAddress) {
errs() << "BOLT-ERROR: instrumentation library does not define "
"__bolt_instr_data_dump: "
<< LibPath << "\n";
exit(1);
}
outs() << "BOLT-INFO: output linked against instrumentation runtime "
"library, lib entry point is 0x"
<< Twine::utohexstr(InstrumentationRuntimeStartAddress) << "\n";
}
void RewriteInstance::emitFunctions(MCStreamer *Streamer) {
auto emit = [&](const std::vector<BinaryFunction *> &Functions) {
for (auto *Function : Functions) {
@ -3469,6 +3557,29 @@ void RewriteInstance::mapDataSections(orc::VModuleKey Key) {
}
}
void RewriteInstance::mapExtraSections(orc::VModuleKey Key) {
assert(BC->HasRelocations && "Unsupported in non-relocation mode");
for (auto &Section : BC->allocatableSections()) {
if (Section.getOutputAddress() || !Section.hasValidSectionID())
continue;
NextAvailableAddress =
alignTo(NextAvailableAddress, Section.getAlignment());
Section.setOutputAddress(NextAvailableAddress);
NextAvailableAddress += Section.getOutputSize();
DEBUG(dbgs() << "BOLT: (extra) mapping " << Section.getName()
<< " at 0x" << Twine::utohexstr(Section.getAllocAddress())
<< " to 0x" << Twine::utohexstr(Section.getOutputAddress())
<< '\n');
OLT->mapSectionAddress(Key, Section.getSectionID(),
Section.getOutputAddress());
Section.setFileOffset(
getFileOffsetForAddress(Section.getOutputAddress()));
}
}
void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) {
SectionPatchers[".note.stapsdt"] = llvm::make_unique<SimpleBinaryPatcher>();
auto *SDTNotePatcher = static_cast<SimpleBinaryPatcher *>(
@ -4716,6 +4827,7 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
"error accessing dynamic table");
const Elf_Dyn *DTE = cantFail(Obj->dynamic_table_end(DynamicPhdr),
"error accessing dynamic table");
bool FiniFound = false;
for (auto *DE = DTB; DE != DTE; ++DE) {
auto NewDE = *DE;
bool ShouldPatch = true;
@ -4735,8 +4847,8 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
// FIXME: Put the old FINI pointer as a tail call in the generated
// dumper function
if (opts::Instrument && DE->getTag() == ELF::DT_FINI) {
NewDE.d_un.d_ptr =
Instrumenter->getDumpFunction()->getOutputAddress();
NewDE.d_un.d_ptr = InstrumentationRuntimeStartAddress;
FiniFound = true;
}
break;
case ELF::DT_FLAGS:
@ -4758,6 +4870,13 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
}
}
if (opts::Instrument && !FiniFound) {
errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic "
"section but instrumentation currently relies on patching "
"DT_FINI to write the profile.\n";
exit(1);
}
if (BC->RequiresZNow && !ZNowSet) {
errs() << "BOLT-ERROR: output binary requires immediate relocation "
"processing which depends on DT_FLAGS or DT_FLAGS_1 presence in "

View File

@ -47,7 +47,8 @@ class RewriteInstanceDiff;
class RewriteInstance {
public:
RewriteInstance(llvm::object::ELFObjectFileBase *File, DataReader &DR,
DataAggregator &DA, const int Argc, const char *const *Argv);
DataAggregator &DA, const int Argc, const char *const *Argv,
StringRef ToolPath);
~RewriteInstance();
/// Reset all state except for split hints. Used to run a second pass with
@ -97,7 +98,10 @@ public:
/// Write code and data into an intermediary object file, map virtual to real
/// addresses and link the object file, resolving all relocations and
/// performing final relaxation.
void emitSections();
void emitAndLink();
/// Link additional runtime code to support instrumentation.
void linkRuntime();
/// Emit function code.
void emitFunctions(MCStreamer *Streamer);
@ -121,6 +125,7 @@ public:
void mapCodeSections(orc::VModuleKey ObjectsHandle);
void mapDataSections(orc::VModuleKey ObjectsHandle);
void mapFileSections(orc::VModuleKey ObjectsHandle);
void mapExtraSections(orc::VModuleKey ObjectsHandle);
/// Update output object's values based on the final \p Layout.
void updateOutputValues(const MCAsmLayout &Layout);
@ -321,6 +326,7 @@ private:
/// Command line args used to process binary.
const int Argc;
const char *const *Argv;
StringRef ToolPath;
/// Holds our data aggregator in case user supplied a raw perf data file.
DataAggregator &DA;
@ -355,6 +361,9 @@ private:
uint64_t NewTextSegmentOffset{0};
uint64_t NewTextSegmentSize{0};
/// Extra linking
uint64_t InstrumentationRuntimeStartAddress{0};
/// Track next available address for new allocatable sections.
uint64_t NextAvailableAddress{0};

View File

@ -3424,311 +3424,6 @@ public:
return Results;
}
MultiBlocksCode createInstrumentedDataDumpCode(
MCSymbol *Locs,
MCSymbol *Descriptions,
MCSymbol *Strings,
MCSymbol *FilenameSym,
MCSymbol *Spaces,
MCSymbol *Chars,
size_t NumLocs,
MCContext *Ctx
) const override {
std::vector<MCInst>* Code;
MultiBlocksCode Result;
#define INS(x, y) Code->emplace_back(MCInstBuilder(x).y)
#define INS_NOARGS(x) Code->emplace_back(MCInstBuilder(x))
#define REG(x) addReg(x)
#define IMM(x) addImm(x)
#define EXPR(x) \
addExpr(MCSymbolRefExpr::create(x, MCSymbolRefExpr::VK_None, *Ctx))
#define NOREG addReg(X86::NoRegister)
#define BEGIN_BLOCK(x) \
Result.Blocks.emplace_back(std::make_pair<>(x, std::vector<MCInst>())); \
Code = &Result.Blocks.back().second; \
Result.Successors.emplace_back(nullptr);
#define BEGIN_BLOCK_FALLTHROUGH \
Result.Blocks.emplace_back( \
std::make_pair<>(Ctx->createTempSymbol(), std::vector<MCInst>())); \
Code = &Result.Blocks.back().second; \
Result.Successors.emplace_back(nullptr);
#define SET_SUCC(x) Result.Successors.back() = x;
using namespace llvm::X86;
// I know, this got ridiculously large, we should have a better way to
// write our runtime library for instrumentation. At this point I think it
// is kind of nice to do not depend on loading an object on disk to link
// against the input binary, but in the future it may be inevitable.
// String buffer allocated on stack to store data sent to write()
const uint32_t COPY_BUFFER_SIZE = 0x1000;
// These constants depend on the target OS
const uint32_t OPEN_MODE = 0666;
const uint32_t OPEN_FLAGS = 0x241; // O_WRONLY|O_TRUNC|O_CREAT
const uint32_t SYSCALL_WRITE = 1;
const uint32_t SYSCALL_OPEN = 2;
const uint32_t SYSCALL_CLOSE = 3;
MCSymbol *LoopBody1 = Ctx->createTempSymbol(); // for each instrumented br
MCSymbol *LoopBody1_1 = Ctx->createTempSymbol(); // for each src/dst
MCSymbol *LoopBody1_1_1 = Ctx->createTempSymbol(); // write func name
MCSymbol *LoopBody1_1_2 = Ctx->createTempSymbol(); // write offset in hex
MCSymbol *LoopBody1_2 = Ctx->createTempSymbol(); // write branch frequency
MCSymbol *Loop1End = Ctx->createTempSymbol();
BEGIN_BLOCK_FALLTHROUGH; // Start our prologue
// pushq %rbx
INS(PUSH64r, REG(RBX));
// pushq %r12
INS(PUSH64r, REG(R12));
// pushq %r13
INS(PUSH64r, REG(R13));
// pushq %r14
INS(PUSH64r, REG(R14));
// pushq %r15
INS(PUSH64r, REG(R15));
// subq $0x1000, %rsp // Reserve space for write buffer
INS(SUB64ri32, REG(RSP).REG(RSP).IMM(COPY_BUFFER_SIZE));
// leaq filename(%rip), %rdi
INS(LEA64r, REG(RDI).REG(RIP).IMM(0x1).NOREG.EXPR(FilenameSym).NOREG);
// movq $0x241, %rsi // O_WRONLY|O_TRUNC|O_CREAT
INS(MOV64ri32, REG(RSI).IMM(OPEN_FLAGS));
// movq $0666, %rdx // mode
INS(MOV64ri32, REG(RDX).IMM(OPEN_MODE));
// mov $0x2, %rax
INS(MOV64ri32, REG(RAX).IMM(SYSCALL_OPEN));
// syscall
INS_NOARGS(SYSCALL); // open()
// movq %rax, %r13
INS(MOV64rr, REG(R13).REG(RAX));
// xorq %r14, %r14 // Induction variable for main loop
// // over all intrumentation counters
INS(XOR64rr, REG(R14).REG(R14).REG(R14));
// leaq label1(%rip), %r11 // Load start of descriptions vector
INS(LEA64r, REG(R11).REG(RIP).IMM(1).NOREG.EXPR(Descriptions).NOREG);
// loopbody1: // Main loop
BEGIN_BLOCK(LoopBody1);
// movq %rsp, %r15 // Reset write buffer pointer
INS(MOV64rr, REG(R15).REG(RSP));
// xorq %rbx, %rbx // Induction variable for our loop of 2
// // iterations to read From description and
// // then To description
INS(XOR64rr, REG(RBX).REG(RBX).REG(RBX));
// loopbody1_1:
BEGIN_BLOCK(LoopBody1_1);
// movb $0x31, (%r15) // Write '1' and a space before func name
INS(MOV8mi, REG(R15).IMM(1).NOREG.IMM(0).NOREG.IMM(0x31));
// incq %r15
INS(INC64r, REG(R15).REG(R15));
// movb $0x20, (%r15)
INS(MOV8mi, REG(R15).IMM(1).NOREG.IMM(0).NOREG.IMM(0x20));
// incq %r15
INS(INC64r, REG(R15).REG(R15));
// leaq strings(%rip), %r12 // Load string table base
INS(LEA64r, REG(R12).REG(RIP).IMM(0x1).NOREG.EXPR(Strings).NOREG);
// addl (%r11), %r12d // Add string index
INS(ADD32rm, REG(R12D).REG(R12D).REG(R11).IMM(1).NOREG.IMM(0).NOREG);
// addq $4, %r11
INS(ADD64ri8, REG(R11).REG(R11).IMM(4));
// loopbody1_1_1: // Loop over counter description string
// // copying it to our write buffer
BEGIN_BLOCK(LoopBody1_1_1);
// mov (%r12), %ax
INS(MOV8rm, REG(AX).REG(R12).IMM(1).NOREG.IMM(0).NOREG);
// mov %ax, (%r15)
INS(MOV8mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(AX));
// incq %r15
INS(INC64r, REG(R15).REG(R15));
// incq %r12
INS(INC64r, REG(R12).REG(R12));
// test %ax, %ax
INS(TEST8rr, REG(AX).REG(AX));
// jnz loopbody1_1_1
INS(JNE_4, EXPR(LoopBody1_1_1));
SET_SUCC(LoopBody1_1_1);
BEGIN_BLOCK_FALLTHROUGH; // Copy " " to write buffer -- empty
// // spaces that will be overwritten with the
// // offset value in hex, right to left
// decq %r15
INS(DEC64r, REG(R15).REG(R15));
// movq spaces(%rip), %rdx
INS(MOV64rm, REG(RDX).REG(RIP).IMM(1).NOREG.EXPR(Spaces).NOREG);
// movq %rdx, (%r15)
INS(MOV64mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(RDX));
// addq $8, %r15
INS(ADD64ri8, REG(R15).REG(R15).IMM(8));
// movq %rdx, (%r15)
INS(MOV64mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(RDX));
// addq $8, %r15
INS(ADD64ri8, REG(R15).REG(R15).IMM(8));
// movq %r15, %r12
INS(MOV64rr, REG(R12).REG(R15));
// decq %r15
INS(DEC64r, REG(R15).REG(R15));
// xorq %rax, %rax
INS(XOR64rr, REG(RAX).REG(RAX).REG(RAX));
// movl (%r11), %eax
INS(MOV32rm, REG(EAX).REG(R11).IMM(1).NOREG.IMM(0).NOREG);
// addq $4, %r11
INS(ADD64ri8, REG(R11).REG(R11).IMM(4));
// loopbody1_1_2: // Loop to print address in hexadecimal
BEGIN_BLOCK(LoopBody1_1_2);
// decq %r15
INS(DEC64r, REG(R15).REG(R15));
// xorq %rdx, %rdx
INS(XOR64rr, REG(RDX).REG(RDX).REG(RDX));
// movq $0x10, %rsi
INS(MOV64ri32, REG(RSI).IMM(0x10));
// divq %rsi
INS(DIV64r, REG(RSI));
// leaq chars(%rip), %rdi
INS(LEA64r, REG(RDI).REG(RIP).IMM(1).NOREG.EXPR(Chars).NOREG);
// mov (%rdi, %rdx, 1), %cx
INS(MOV8rm, REG(CX).REG(RDI).IMM(1).REG(RDX).IMM(0).NOREG);
// mov %cx, (%r15)
INS(MOV8mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(CX));
// testq %rax, %rax
INS(TEST64rr, REG(RAX).REG(RAX));
// jnz loopbody1_1_2
INS(JNE_4, EXPR(LoopBody1_1_2));
SET_SUCC(LoopBody1_1_2);
BEGIN_BLOCK_FALLTHROUGH;
// movq %r12, %r15 // Loop end (2 iteration loop for From/To)
INS(MOV64rr, REG(R15).REG(R12));
// incq %rbx
INS(INC64r, REG(RBX).REG(RBX));
// cmpq $2, %rbx
INS(CMP64ri8, REG(RBX).IMM(2));
// jne loopbody1_1
INS(JNE_4, EXPR(LoopBody1_1));
SET_SUCC(LoopBody1_1);
BEGIN_BLOCK_FALLTHROUGH; // Copy " " to write buffer -- empty
// // spaces that will be overwritten with the
// // counter value in decimal, right to left
// movb $0x30, (%r15) // Write '0' and a space before counter val
// // representing zero mispredictions
INS(MOV8mi, REG(R15).IMM(1).NOREG.IMM(0).NOREG.IMM(0x30));
// incq %r15
INS(INC64r, REG(R15).REG(R15));
// movb $0x20, (%r15)
INS(MOV8mi, REG(R15).IMM(1).NOREG.IMM(0).NOREG.IMM(0x20));
// incq %r15
INS(INC64r, REG(R15).REG(R15));
// movq spaces(%rip), %rdx
INS(MOV64rm, REG(RDX).REG(RIP).IMM(1).NOREG.EXPR(Spaces).NOREG);
// movq %rdx, (%r15)
INS(MOV64mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(RDX));
// addq $8, %r15
INS(ADD64ri8, REG(R15).REG(R15).IMM(8));
// movq %rdx, (%r15)
INS(MOV64mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(RDX));
// addq $8, %r15
INS(ADD64ri8, REG(R15).REG(R15).IMM(8));
// movq %r15, %r12
INS(MOV64rr, REG(R12).REG(R15));
// leaq count1(%rip), %rdx
INS(LEA64r, REG(RDX).REG(RIP).IMM(1).NOREG.EXPR(Locs).NOREG);
// movq (%rdx, %r14, 8), %rax // Load current instrumentation counter value
INS(MOV64rm, REG(RAX).REG(RDX).IMM(8).REG(R14).IMM(0).NOREG);
// testq $rax, %rax
INS(TEST64rr, REG(RAX).REG(RAX));
// je loop1end
INS(JE_4, EXPR(Loop1End));
SET_SUCC(Loop1End);
// loopbody1_2: // Loop to print counter value in decimal
BEGIN_BLOCK(LoopBody1_2);
// decq %r15
INS(DEC64r, REG(R15).REG(R15));
// xorq %rdx, %rdx
INS(XOR64rr, REG(RDX).REG(RDX).REG(RDX));
// movq $0xa, %rsi
INS(MOV64ri32, REG(RSI).IMM(0xa));
// divq %rsi
INS(DIV64r, REG(RSI));
// leaq chars(%rip), %rdi
INS(LEA64r, REG(RDI).REG(RIP).IMM(1).NOREG.EXPR(Chars).NOREG);
// mov (%rdi, %rdx, 1), %cx
INS(MOV8rm, REG(CX).REG(RDI).IMM(1).REG(RDX).IMM(0).NOREG);
// mov %cx, (%r15)
INS(MOV8mr, REG(R15).IMM(1).NOREG.IMM(0).NOREG.REG(CX));
// testq %rax, %rax
INS(TEST64rr, REG(RAX).REG(RAX));
// jnz loopbody1_2
INS(JNE_4, EXPR(LoopBody1_2));
SET_SUCC(LoopBody1_2);
BEGIN_BLOCK_FALLTHROUGH; // Flush write buffer to file
// movb $0xa, (%r12) // Put a '\n' at the end of write buffer
INS(MOV8mi, REG(R12).IMM(1).NOREG.IMM(0).NOREG.IMM(0xa));
// incq %r12
INS(INC64r, REG(R12).REG(R12));
// movq %r13, %rdi
INS(MOV64rr, REG(RDI).REG(R13));
// movq %rsp, %rsi
INS(MOV64rr, REG(RSI).REG(RSP));
// movq %r12, %rdx
INS(MOV64rr, REG(RDX).REG(R12));
// subq %rsp, %rdx
INS(SUB64rr, REG(RDX).REG(RDX).REG(RSP));
// movq $0x1, %rax
INS(MOV64ri32, REG(RAX).IMM(SYSCALL_WRITE));
// pushq %r11
INS(PUSH64r, REG(R11));
// syscall // write()
INS_NOARGS(SYSCALL);
// popq %r11
INS(POP64r, REG(R11));
// loop1end: // Main loop header
BEGIN_BLOCK(Loop1End);
// incq %r14
INS(INC64r, REG(R14).REG(R14));
// cmp $0xXXXXXX, %r14
INS(CMP64ri32, REG(R14).IMM(NumLocs));
// jnz loopbody1
INS(JNE_4, EXPR(LoopBody1));
SET_SUCC(LoopBody1);
BEGIN_BLOCK_FALLTHROUGH; // Finish by closing file and returning
// movq %r13, %rdi
INS(MOV64rr, REG(RDI).REG(R13));
// mov $0x3, %eax // close()
INS(MOV32ri, REG(EAX).IMM(SYSCALL_CLOSE));
// syscall
INS_NOARGS(SYSCALL);
// addq $0x1000, %rsp
INS(ADD64ri32, REG(RSP).REG(RSP).IMM(0x1000));
// popq %r15
INS(POP64r, REG(R15));
// popq %r14
INS(POP64r, REG(R14));
// popq %r13
INS(POP64r, REG(R13));
// popq %r12
INS(POP64r, REG(R12));
// popq %rbx
INS(POP64r, REG(RBX));
// ret
INS_NOARGS(RETQ);
#undef INS
#undef INS_NOARGS
#undef REG
#undef IMM
#undef EXPR
#undef NOREG
#undef BEGIN_BLOCK
#undef BEGIN_BLOCK_FALLTHROUGH
return Result;
}
};
}

View File

@ -18,6 +18,7 @@
#include "RewriteInstance.h"
#include "llvm/Object/Binary.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Signals.h"
@ -229,6 +230,16 @@ void boltMode(int argc, char **argv) {
}
}
std::string GetExecutablePath(const char *Argv0) {
SmallString<128> ExecutablePath(Argv0);
// Do a PATH lookup if Argv0 isn't a valid path.
if (!llvm::sys::fs::exists(ExecutablePath))
if (llvm::ErrorOr<std::string> P =
llvm::sys::findProgramByName(ExecutablePath))
ExecutablePath = *P;
return ExecutablePath.str();
}
int main(int argc, char **argv) {
// Print a stack trace if we signal out.
sys::PrintStackTraceOnErrorSignal(argv[0]);
@ -236,6 +247,8 @@ int main(int argc, char **argv) {
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
std::string ToolPath = GetExecutablePath(argv[0]);
// Initialize targets and assembly printers/parsers.
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
@ -309,7 +322,7 @@ int main(int argc, char **argv) {
Binary &Binary = *BinaryOrErr.get().getBinary();
if (auto *e = dyn_cast<ELFObjectFileBase>(&Binary)) {
RewriteInstance RI(e, *DR.get(), *DA.get(), argc, argv);
RewriteInstance RI(e, *DR.get(), *DA.get(), argc, argv, ToolPath);
RI.run();
} else {
report_error(opts::InputFilename, object_error::invalid_file_type);
@ -342,8 +355,8 @@ int main(int argc, char **argv) {
if (auto *ELFObj1 = dyn_cast<ELFObjectFileBase>(&Binary1)) {
if (auto *ELFObj2 = dyn_cast<ELFObjectFileBase>(&Binary2)) {
RewriteInstance RI1(ELFObj1, *DR.get(), *DA.get(), argc, argv);
RewriteInstance RI2(ELFObj2, *DR2.get(), *DA.get(), argc, argv);
RewriteInstance RI1(ELFObj1, *DR.get(), *DA.get(), argc, argv, ToolPath);
RewriteInstance RI2(ELFObj2, *DR2.get(), *DA.get(), argc, argv, ToolPath);
outs() << "BOLT-DIFF: *** Analyzing binary 1: " << opts::InputFilename
<< "\n";
outs() << "BOLT-DIFF: *** Binary 1 fdata: " << opts::InputDataFilename