[DynInst_API:] Some corrections for the dyninst documentation.


Date: Mon, 25 Feb 2013 09:53:22 -0500
From: William Cohen <wcohen@xxxxxxxxxx>
Subject: [DynInst_API:] Some corrections for the dyninst documentation.
Hi All,

I have been playing around with dyninst available on x86_64 Fedora 18
to better understand how to use it.  When trying to built example.cc
in parseAPI/doc/.  I got a number of errors:


$ gcc -c example.cc -I /usr/include/dyninst/example.cc: In function ‘int main(int, char**)’:
example.cc:12:5: error: ‘hash_map’ was not declared in this scope
example.cc:12:21: error: expected primary-expression before ‘,’ token
example.cc:12:23: error: expected primary-expression before ‘bool’
example.cc:12:23: error: expected ‘;’ before ‘bool’
example.cc:27:44: error: invalid initialization of reference of type ‘Dyninst::ParseAPI::CodeObject::funclist& {aka std::set<Dyninst::ParseAPI::Function*, Dyninst::ParseAPI::Function::less>&}’ from expression of type ‘const funclist {aka const std::set<Dyninst::ParseAPI::Function*, Dyninst::ParseAPI::Function::less>}’
example.cc:42:16: error: ‘seen’ was not declared in this scope
example.cc:44:13: error: ‘seen’ was not declared in this scope
example.cc:46:63: error: conversion from ‘std::vector<Dyninst::ParseAPI::Edge*>::const_iterator {aka __gnu_cxx::__normal_iterator<Dyninst::ParseAPI::Edge* const*, std::vector<Dyninst::ParseAPI::Edge*> >}’ to non-scalar type ‘std::vector<Dyninst::ParseAPI::Edge*>::iterator {aka __gnu_cxx::__normal_iterator<Dyninst::ParseAPI::Edge**, std::vector<Dyninst::ParseAPI::Edge*> >}’ requested
example.cc:48:27: warning: deprecated conversion from string constant to ‘char*’ [-Wwrite-strings]
example.cc:50:21: warning: deprecated conversion from string constant to ‘char*’ [-Wwrite-strings]
example.cc:52:21: warning: deprecated conversion from string constant to ‘char*’ [-Wwrite-strings]


The hash_map isn't a C++ standard template, so I used map instead.
This is probably not as efficient but it works on Fedora 18.  There
were a number of places were const was needed to make the compiler
happy.  Attached is the resulting example.cc. It appears to compile
and run on fedora 18.

It would be nice if the parseAPI/doc/Makefile attempted to build
example.cc to make sure that the example.cc for ParseAPI.pdf is still
compileable.

I noticed some similar issues for example.C and retee.C for
DyninstAPI.pdf and had to add some includes to allow them to
compile. I have attached the modified examples.

Also saw some typos in the in the DyninstAPI.pdf manual:

Appendix c common pitfalls:

If a mutator attaches to a mutatee, and immediately exists, ->
If a mutator attaches to a mutatee, and immediately exits,


page 9 "bpatch.openFile" should be bpatch.openBinary

-Will
#include <stdio.h>

#include "CodeObject.h"
#include "CFG.h"

using namespace std;
using namespace Dyninst;
using namespace ParseAPI;

int main(int argc, char * argv[])
{
    map<Address, bool> seen;
    vector<Function *> funcs;
    SymtabCodeSource *sts;
    CodeObject *co;

    // Create a new binary code object from the filename argument
    sts = new SymtabCodeSource( argv[1] );
    co = new CodeObject( sts );

    // Parse the binary
    co->parse();

    printf("digraph G {\n");

    // Print the control flow graph
    const CodeObject::funclist & all = co->funcs();
    CodeObject::funclist::iterator fit = all.begin();
    for( ; fit != all.end(); ++fit) {
        Function * f = *fit;

        if(f->retstatus() == NORETURN) 
            printf("\t\"%lx\" [shape=box,color=red]\n",f->addr()); 
        else 
            printf("\t\"%lx\" [shape=box]\n",f->addr()); 
    
        Function::blocklist::const_iterator bit = f->blocks().begin();
        for( ; bit != f->blocks().end(); ++bit) {
            Block * b = *bit;

            // Don't revisit blocks in shared code
            if(seen.find(b->start()) != seen.end())
                continue;
            seen[b->start()] = true;

            Block::edgelist::const_iterator it = b->targets().begin();
            for( ; it != b->targets().end(); ++it) {
               const char * s = "";
               if((*it)->type() == CALL)
                s = " [color=blue]";
               else if((*it)->type() == RET)
                s = " [color=green]";
               printf("\t\t\"%lx\" -> \"%lx\"%s\n",(*it)->src()->start(),
                (*it)->trg()->start(),s); 
            }
        }
    }

    printf("}\n");

    delete co;
    delete sts;
}
#include "BPatch.h"
#include "BPatch_addressSpace.h"
#include "BPatch_flowGraph.h"
#include "BPatch_point.h"
#include "BPatch_process.h"
#include "BPatch_binaryEdit.h"
#include "BPatch_function.h"

#include <string>

// Example 1: create an instance of class BPatch
BPatch bpatch;

// Example 2: attaching, creating, or opening a file for rewrite
typedef enum {
  create,
  attach,
  open } accessType_t;
BPatch_addressSpace *startInstrumenting(accessType_t accessType,
					const char *name,
					int pid, // For attach
					const char *argv[]) { // For create
  BPatch_addressSpace *handle = NULL;
  switch (accessType) {
  case create:
    handle = bpatch.processCreate(name, argv);
    break;
  case attach:
    handle = bpatch.processAttach(name, pid);
    break;
  case open:
    handle = bpatch.openBinary(name);
    break;
  }
  return handle;
}

// Example 2: find the entry point for "InterestingProcedure"
std::vector<BPatch_point *> *findEntryPoint(BPatch_addressSpace *app) {
  std::vector<BPatch_function *> functions;
  std::vector<BPatch_point *> *points;

  BPatch_image *appImage = app->getImage();
  appImage->findFunction("InterestingProcedure", functions);
  points = functions[0]->findPoint(BPatch_entry);
  return points;
}

// Example 3: create and insert an increment snippet
void createAndInsertSnippet(BPatch_addressSpace *app,
			    std::vector<BPatch_point *> *points) {
  BPatch_image *appImage = app->getImage();
  BPatch_variableExpr *intCounter = app->malloc(*(appImage->findType("int")));
  BPatch_arithExpr addOne(BPatch_assign, *intCounter,
			  BPatch_arithExpr(BPatch_plus, *intCounter, BPatch_constExpr(1)));

  app->insertSnippet(addOne, *points);
}

// Example 4: finish things up (continue or write out)
void finishInstrumenting(BPatch_addressSpace *app, const char *newName) {
  BPatch_process *appProc = dynamic_cast<BPatch_process *>(app);
  BPatch_binaryEdit *appBin = dynamic_cast<BPatch_binaryEdit *>(app);

  if (appProc) {
    appProc->continueExecution();
    while (!appProc->isTerminated()) {
      bpatch.waitForStatusChange();
    }
  }
  if (appBin) {
    appBin->writeFile(newName);
  }
}

// Example 5: binary analysis
int binaryAnalysis(BPatch_addressSpace *app) {
  BPatch_image *appImage = app->getImage();
  int insns_access_memory = 0;

  std::vector<BPatch_function *> funcs;
  appImage->findFunction("InterestingProcedure", funcs);
  
  BPatch_flowGraph *fg = funcs[0]->getCFG();
  std::set<BPatch_basicBlock *> blocks;
  fg->getAllBasicBlocks(blocks);

  std::set<BPatch_basicBlock *>::iterator block_iter;
  for (block_iter = blocks.begin(); block_iter != blocks.end(); ++block_iter) { 
    BPatch_basicBlock *block = *block_iter;
    std::vector<Dyninst::InstructionAPI::Instruction::Ptr> insns;
    block->getInstructions(insns);
    
    std::vector<Dyninst::InstructionAPI::Instruction::Ptr>::iterator insn_iter;
    for (insn_iter = insns.begin(); insn_iter != insns.end(); ++insn_iter) {
      Dyninst::InstructionAPI::Instruction::Ptr insn = *insn_iter;
      if (insn->readsMemory() || insn->writesMemory()) {
	insns_access_memory++;
      }
    }
  }
  return insns_access_memory;
}

// Example 6: memory instrumentation
void instrumentMemory(BPatch_addressSpace *app) {
  BPatch_image *appImage = app->getImage();

  // We're interested in loads and stores
  BPatch_Set<BPatch_opCode> accessTypes;
  accessTypes.insert(BPatch_opLoad);
  accessTypes.insert(BPatch_opStore);

  // Get points for each load and store
  std::vector<BPatch_function *> funcs;
  appImage->findFunction("InterestingProcedure", funcs);
  std::vector<BPatch_point *> *points = funcs[0]->findPoint(accessTypes);
  
  // Create a snippet that calls printf with each effective address
  std::vector<BPatch_snippet *> printfArgs;
  BPatch_snippet *fmt = new BPatch_constExpr("Access at: %p\n");
  printfArgs.push_back(fmt);
  BPatch_snippet *eae = new BPatch_effectiveAddressExpr;
  printfArgs.push_back(eae);
  std::vector<BPatch_function *> printfFuncs;
  appImage->findFunction("printf", printfFuncs);
  BPatch_funcCallExpr printfCall(*(printfFuncs[0]), printfArgs);
  
  app->insertSnippet(printfCall, *points);
}

int main() {
  const char *progName = "InterestingProgram"; // = ...
  int progPID = 42; // = ...
  const char *progArgv[] = {"InterestingProgram", "-h", NULL}; // = ...

  // Example 1: create/attach/open a binary
  BPatch_addressSpace *app = startInstrumenting(create, // or attach or open
						progName,
						progPID,
						progArgv);

  // Example 2: get entry point
  std::vector<BPatch_point *> *entryPoint = findEntryPoint(app);
  
  // Example 3: create and insert increment snippet
  createAndInsertSnippet(app, entryPoint);

  // Example 4: finish up instrumenting
  finishInstrumenting(app, progName);

  // Example 5: get a count of memory accesses
  int insns_access_memory = binaryAnalysis(app);

  // Example 6: instrument memory accesses
  instrumentMemory(app);

  return 0;
}
#include <stdio.h>
#include <fcntl.h>
#include "BPatch.h"
#include "BPatch_point.h"
#include "BPatch_process.h"
#include "BPatch_function.h"
#include "BPatch_Vector.h"
#include "BPatch_thread.h"

/*
 * retee.C
 *
 * This program (mutator) provides an example of several facets of
 * Dyninst's behavior, and is a good basis for many Dyninst
 * mutators. We want to intercept all output from a target application
 * (the mutatee), duplicating output to a file as well as the
 * original destination (e.g., stdout).
 *
 * This mutator operates in several phases. In brief:
 * 1) Attach to the running process and get a handle (BPatch_process
 * object)
 * 2) Get a handle for the parsed image of the mutatee for function
 * lookup (BPatch_image object)
 * 3) Open a file for output
 * 3a) Look up the "open" function
 * 3b) Build a code snippet to call open with the file name.
 * 3c) Run that code snippet via a oneTimeCode, saving the returned
 * file descriptor
 * 4) Write the returned file descriptor into a memory variable for
 * mutatee-side use
 * 5) Build a snippet that copies output to the file
 * 5a) Locate the "write" library call
 * 5b) Access its parameters
 * 5c) Build a snippet calling write(fd, parameters)
 * 5d) Insert the snippet at write
 * 6) Add a hook to exit to ensure that we close the file (using
 * a callback at exit and another oneTimeCode)
 */

void usage() {
  fprintf(stderr, "Usage: retee <process pid> <filename>\n");
  fprintf(stderr, " note: <filename> is relative to the application process.\n");
}

// We need to use a callback, and so the things that callback requires
// are made global - this includes the file descriptor snippet (see below)
BPatch_variableExpr *fdVar = NULL;
// Before we add instrumentation, we need to open the file for
// writing. We can do this with a oneTimeCode - a piece of code run at
// a particular time, rather than at a particular location.
int openFileForWrite(BPatch_process *app, BPatch_image *appImage, char *fileName) {
  // The code to be generated is:
  // fd = open(argv[2], O_WRONLY|O_CREAT, 0666);
  // (1) Find the open function
  BPatch_Vector<BPatch_function *>openFuncs;
  appImage->findFunction("open", openFuncs);
  if (openFuncs.size() == 0) {
    fprintf(stderr, "ERROR: Unable to find function for open()\n");
    return -1;
  }
  // (2) Allocate a vector of snippets for the parameters to open
  BPatch_Vector<BPatch_snippet *> openArgs;
  // (3) Create a string constant expression from argv[3]
  BPatch_constExpr fileNameExpr(fileName);
  // (4) Create two more constant expressions _WRONLY|O_CREAT and 0666
  BPatch_constExpr fileFlagsExpr(O_WRONLY|O_CREAT);
  BPatch_constExpr fileModeExpr(0666);
  // (5) Push 3 & 4 onto the list from step 2, push first to last parameter.
  openArgs.push_back(&fileNameExpr);
  openArgs.push_back(&fileFlagsExpr);
  openArgs.push_back(&fileModeExpr);
  // (6) create a procedure call using function found at 1 and
  // parameters from step 5.
  BPatch_funcCallExpr openCall(*openFuncs[0], openArgs);
  // (7) The oneTimeCode returns whatever the return result from
  // the BPatch_snippet is. In this case, the return result of
  // open -> the file descriptor.
  void *openFD = app->oneTimeCode( openCall );
  // We need to do this to turn the 64-bit upcast of an integer
  // (return type from open) into a 32-bit value. 
  return (int) (long) openFD;
}

// We have used a oneTimeCode to open the file descriptor. However,
// this returns the file descriptor to the mutator - the mutatee has
// no idea what the descriptor is. We need to allocate a variable in
// the mutatee to hold this value for future use and copy the
// (mutator-side) value into the mutatee variable.
// Note: there are alternatives to this technique. We could have
// allocated the variable before the oneTimeCode and augmented the
// snippet to do the assignment. We could also write the file
// descriptor as a constant into any inserted instrumentation.
BPatch_variableExpr *writeFileDescIntoMutatee(BPatch_process *app,
					      BPatch_image *appImage,
					      int fileDescriptor) {
  // (1) Allocate a variable in the mutatee of size (and type) int
  BPatch_variableExpr *fdVar = app->malloc(*appImage->findType("int"));
  if (fdVar == NULL) return NULL;
  // (2) Write the value into the variable
  // Like memcpy, writeValue takes a pointer
  // The third parameter is for functionality called "saveTheWorld",
  // which we don't worry about here (and so is false)
  bool ret = fdVar->writeValue((void *) &fileDescriptor, sizeof(int),
			       false);
  if (ret == false) return NULL;
  return fdVar;
}

// We now have an open file descriptor in the mutatee. We want to
// instrument write to intercept and copy the output. That happens
// here.
bool interceptAndCloneWrite(BPatch_process *app,
			    BPatch_image *appImage,
			    BPatch_variableExpr *fdVar) {
  // (1) Locate the write call
  BPatch_Vector<BPatch_function *> writeFuncs;
  appImage->findFunction("write",
			 writeFuncs);
  if(writeFuncs.size() == 0) {
    fprintf(stderr, "ERROR: Unable to find function for write()\n");
    return false;
  }
  // (2) Build the call to (our) write. Arguments are:
  // ours: fdVar (file descriptor)
  // parameter: buffer
  // parameter: buffer size
  // Declare a vector to hold these.
  BPatch_Vector<BPatch_snippet *> writeArgs;
  // Push on the file descriptor
  writeArgs.push_back(fdVar);
  // Well, we need the buffer... but that's a parameter to the
  // function we're implementing. That's not a problem - we can grab
  // it out with a BPatch_paramExpr.
  BPatch_paramExpr buffer(1); // Second (0, 1, 2) argument
  BPatch_paramExpr bufferSize(2);
  writeArgs.push_back(&buffer);
  writeArgs.push_back(&bufferSize);
  // And build the write call
  BPatch_funcCallExpr writeCall(*writeFuncs[0], writeArgs);
  // (3) Identify the BPatch_point for the entry of write. We're
  // instrumenting the function with itself; normally the findPoint
  // call would operate off a different function than the snippet.
  BPatch_Vector<BPatch_point *> *points;
  points = writeFuncs[0]->findPoint(BPatch_entry);
  if ((*points).size() == 0) {
    return false;
  }
  // (4) Insert the snippet at the start of write
  return app->insertSnippet(writeCall, *points);
  // Note: we have just instrumented write() with a call to
  // write(). This would ordinarily be a _bad thing_, as there is
  // nothing to stop infinite recursion - write -> instrumentation
  // -> write -> instrumentation....
  // However, Dyninst uses a feature called a "tramp guard" to
  // prevent this, and it's on by default.
}

// This function is called as an exit callback (that is, called
// immediately before the process exits when we can still affect it)
// and thus must match the exit callback signature:
//
// typedef void (*BPatchExitCallback) (BPatch_thread *, BPatch_exitType)
//
// Note that the callback gives us a thread, and we want a process - but
// each thread has an up pointer.
void closeFile(BPatch_thread *thread, BPatch_exitType) {
  fprintf(stderr, "Exit callback called for process...\n");
  // (1) Get the BPatch_process and BPatch_images
  BPatch_process *app = thread->getProcess();
  BPatch_image *appImage = app->getImage();
  // The code to be generated is:
  // close(fd);
  // (2) Find close
  BPatch_Vector<BPatch_function *> closeFuncs;
  appImage->findFunction("close", closeFuncs);
  if (closeFuncs.size() == 0) {
    fprintf(stderr, "ERROR: Unable to find function for close()\n");
    return;
  }
  // (3) Allocate a vector of snippets for the parameters to open
  BPatch_Vector<BPatch_snippet *> closeArgs;
  // (4) Add the fd snippet - fdVar is global since we can't
  // get it via the callback
  closeArgs.push_back(fdVar);
  // (5) create a procedure call using function found at 1 and
  // parameters from step 3.
  BPatch_funcCallExpr closeCall(*closeFuncs[0], closeArgs);
  // (6) Use a oneTimeCode to close the file
  app->oneTimeCode( closeCall );
  // (7) Tell the app to continue to finish it off.
  app->continueExecution();
  return;
}

BPatch bpatch;

// In main we perform the following operations.
// 1) Attach to the process and get BPatch_process and BPatch_image
// handles
// 2) Open a file descriptor
// 3) Instrument write
// 4) Continue the process and wait for it to terminate
int main(int argc, char *argv[]) {
  int pid;
  if (argc != 3) {
    usage();
    exit(1);
  }
  pid = atoi(argv[1]);
  // Attach to the program - we can attach with just a pid; the
  // program name is no longer necessary
  fprintf(stderr, "Attaching to process %d...\n", pid);
  BPatch_process *app = bpatch.processAttach(NULL, pid);
  if (!app) return -1;
  // Read the program's image and get an associated image object
  BPatch_image *appImage = app->getImage();
  BPatch_Vector<BPatch_function*> writeFuncs;
  fprintf(stderr, "Opening file %s for write...\n", argv[2]);
  int fileDescriptor = openFileForWrite(app, appImage, argv[2]);
  if (fileDescriptor == -1) {
    fprintf(stderr, "ERROR: opening file %s for write failed\n",
	    argv[2]);
    exit(1);
  }
  fprintf(stderr, "Writing returned file descriptor %d into"
	  "mutatee...\n", fileDescriptor);
  // This was defined globally as the exit callback needs it.
  fdVar = writeFileDescIntoMutatee(app, appImage, fileDescriptor);
  if (fdVar == NULL) {
    fprintf(stderr, "ERROR: failed to write mutatee-side variable\n");
    exit(1);
  }
  fprintf(stderr, "Instrumenting write...\n");
  bool ret = interceptAndCloneWrite(app, appImage, fdVar);
  if (!ret) {
    fprintf(stderr, "ERROR: failed to instrument mutatee\n");
    exit(1);
  }
  fprintf(stderr, "Adding exit callback...\n");
  bpatch.registerExitCallback(closeFile);
  // Continue the execution...
  fprintf(stderr, "Continuing execution and waiting for termination\n");
  app->continueExecution();
  while (!app->isTerminated())
    bpatch.waitForStatusChange();
  printf("Done.\n");
  return 0;
}
[← Prev in Thread] Current Thread [Next in Thread→]