AFL源码分析(一)

2022-12-01 15:52:25 浏览数 (2)

前言

一直想学fuzz,但是由于自己太懒,一直没开始,最近终于开始学了,一直认为学习一个东西最好的办法,就是阅读源码,只有理解了源码,才能一通百通,我将从afl-gcc开始,一步一步的分析AFL的源码。

源码下载:

代码语言:javascript复制
git clone https://github.com/google/AFL.git

afl-gcc

afl-gcc.c的main函数如下:

代码语言:javascript复制
int main(int argc, char** argv) {

  if (isatty(2) && !getenv("AFL_QUIET")) {

    SAYF(cCYA "afl-cc " cBRI VERSION cRST " by <lcamtuf@google.com>n");

  } else be_quiet = 1;

  if (argc < 2) {

    SAYF("n"
         "This is a helper application for afl-fuzz. It serves as a drop-in replacementn"
         "for gcc or clang, letting you recompile third-party code with the requiredn"
         "runtime instrumentation. A common use pattern would be one of the following:nn"

         "  CC=%s/afl-gcc ./configuren"
         "  CXX=%s/afl-g   ./configurenn"

         "You can specify custom next-stage toolchain via AFL_CC, AFL_CXX, and AFL_AS.n"
         "Setting AFL_HARDEN enables hardening optimizations in the compiled code.nn",
         BIN_PATH, BIN_PATH);

    exit(1);

  }

  find_as(argv[0]);

  edit_params(argc, argv);

  execvp(cc_params[0], (char**)cc_params);

  FATAL("Oops, failed to execute '%s' - check your PATH", cc_params[0]);

  return 0;

}

我们需要关注的是find_as函数与edit_params函数,首先来看find_as函数,该函数用于寻找汇编器的位置:

代码语言:javascript复制
static void find_as(u8* argv0) {

  u8 *afl_path = getenv("AFL_PATH");
  u8 *slash, *tmp;

  if (afl_path) {

    tmp = alloc_printf("%s/as", afl_path);

    if (!access(tmp, X_OK)) {
      as_path = afl_path;
      ck_free(tmp);
      return;
    }

    ck_free(tmp);

  }

  slash = strrchr(argv0, '/');

  if (slash) {

    u8 *dir;

    *slash = 0;
    dir = ck_strdup(argv0);
    *slash = '/';

    tmp = alloc_printf("%s/afl-as", dir);

    if (!access(tmp, X_OK)) {
      as_path = dir;
      ck_free(tmp);
      return;
    }

    ck_free(tmp);
    ck_free(dir);

  }

  if (!access(AFL_PATH "/as", X_OK)) {
    as_path = AFL_PATH;
    return;
  }

  FATAL("Unable to find AFL wrapper binary for 'as'. Please set AFL_PATH");
 
}

该函数首先获取了一下AFL_PATH这个环境变量,如果存在,那么将会访问AFL_PATH/as这个文件,如果能够访问,那么,AFL_PATH的路径就会被赋值给as_path,如果不存在,将会获取到路径中最后一次出现/的位置,并把/前的路径作为dir,然后在该路径下寻找是否存在/afl-as,如果存在,则把dir的路径赋值给as_path,如果以上两种办法都没有找到,就会报错。

接下来我们来看edit_params函数,首先来看获取参数的部分:

代码语言:javascript复制
/* Copy argv to cc_params, making the necessary edits. */

static void edit_params(u32 argc, char** argv) {

  u8 fortify_set = 0, asan_set = 0;
  u8 *name;

#if defined(__FreeBSD__) && defined(__x86_64__)
  u8 m32_set = 0;
#endif

  cc_params = ck_alloc((argc   128) * sizeof(u8*));

  name = strrchr(argv[0], '/');
  if (!name) name = argv[0]; else name  ;

  if (!strncmp(name, "afl-clang", 9)) {

    clang_mode = 1;

    setenv(CLANG_ENV_VAR, "1", 1);

    if (!strcmp(name, "afl-clang  ")) {
      u8* alt_cxx = getenv("AFL_CXX");
      cc_params[0] = alt_cxx ? alt_cxx : (u8*)"clang  ";
    } else {
      u8* alt_cc = getenv("AFL_CC");
      cc_params[0] = alt_cc ? alt_cc : (u8*)"clang";
    }

  } else {

    /* With GCJ and Eclipse installed, you can actually compile Java! The
       instrumentation will work (amazingly). Alas, unhandled exceptions do
       not call abort(), so afl-fuzz would need to be modified to equate
       non-zero exit codes with crash conditions when working with Java
       binaries. Meh. */

#ifdef __APPLE__

    if (!strcmp(name, "afl-g  ")) cc_params[0] = getenv("AFL_CXX");
    else if (!strcmp(name, "afl-gcj")) cc_params[0] = getenv("AFL_GCJ");
    else cc_params[0] = getenv("AFL_CC");

    if (!cc_params[0]) {

      SAYF("n" cLRD "[-] " cRST
           "On Apple systems, 'gcc' is usually just a wrapper for clang. Please use then"
           "    'afl-clang' utility instead of 'afl-gcc'. If you really have GCC installed,n"
           "    set AFL_CC or AFL_CXX to specify the correct path to that compiler.n");

      FATAL("AFL_CC or AFL_CXX required on MacOS X");

    }

#else

    if (!strcmp(name, "afl-g  ")) {
      u8* alt_cxx = getenv("AFL_CXX");
      cc_params[0] = alt_cxx ? alt_cxx : (u8*)"g  ";
    } else if (!strcmp(name, "afl-gcj")) {
      u8* alt_cc = getenv("AFL_GCJ");
      cc_params[0] = alt_cc ? alt_cc : (u8*)"gcj";
    } else {
      u8* alt_cc = getenv("AFL_CC");
      cc_params[0] = alt_cc ? alt_cc : (u8*)"gcc";
    }

#endif /* __APPLE__ */

  }

从函数名就可以看出,该函数主要是设置必要的参数,函数首先通过比较strncmp来比较name,从而确定调用afl-g afl-gccafl-clang还是afl-clang 来对程序进行编译,如果是clang*,需要先将clang_mode设置为1,然后再来判断是调用clang 还是clang,并查找环境变量,来决定最终对cc_params[0]的赋值。

接下来我们看为gcc添加参数的部分:

代码语言:javascript复制
  while (--argc) {
    u8* cur = *(  argv);

    if (!strncmp(cur, "-B", 2)) {

      if (!be_quiet) WARNF("-B is already set, overriding");

      if (!cur[2] && argc > 1) { argc--; argv  ; }
      continue;

    }

    if (!strcmp(cur, "-integrated-as")) continue;

    if (!strcmp(cur, "-pipe")) continue;

#if defined(__FreeBSD__) && defined(__x86_64__)
    if (!strcmp(cur, "-m32")) m32_set = 1;
#endif

    if (!strcmp(cur, "-fsanitize=address") ||
        !strcmp(cur, "-fsanitize=memory")) asan_set = 1;

    if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1;

    cc_params[cc_par_cnt  ] = cur;

  }

  cc_params[cc_par_cnt  ] = "-B";
  cc_params[cc_par_cnt  ] = as_path;

  if (clang_mode)
    cc_params[cc_par_cnt  ] = "-no-integrated-as";

  if (getenv("AFL_HARDEN")) {

    cc_params[cc_par_cnt  ] = "-fstack-protector-all";

    if (!fortify_set)
      cc_params[cc_par_cnt  ] = "-D_FORTIFY_SOURCE=2";

  }

  if (asan_set) {

    /* Pass this on to afl-as to adjust map density. */

    setenv("AFL_USE_ASAN", "1", 1);

  } else if (getenv("AFL_USE_ASAN")) {

    if (getenv("AFL_USE_MSAN"))
      FATAL("ASAN and MSAN are mutually exclusive");

    if (getenv("AFL_HARDEN"))
      FATAL("ASAN and AFL_HARDEN are mutually exclusive");

    cc_params[cc_par_cnt  ] = "-U_FORTIFY_SOURCE";
    cc_params[cc_par_cnt  ] = "-fsanitize=address";

  } else if (getenv("AFL_USE_MSAN")) {

    if (getenv("AFL_USE_ASAN"))
      FATAL("ASAN and MSAN are mutually exclusive");

    if (getenv("AFL_HARDEN"))
      FATAL("MSAN and AFL_HARDEN are mutually exclusive");

    cc_params[cc_par_cnt  ] = "-U_FORTIFY_SOURCE";
    cc_params[cc_par_cnt  ] = "-fsanitize=memory";


  }

  if (!getenv("AFL_DONT_OPTIMIZE")) {

#if defined(__FreeBSD__) && defined(__x86_64__)

    /* On 64-bit FreeBSD systems, clang -g -m32 is broken, but -m32 itself
       works OK. This has nothing to do with us, but let's avoid triggering
       that bug. */

    if (!clang_mode || !m32_set)
      cc_params[cc_par_cnt  ] = "-g";

#else

      cc_params[cc_par_cnt  ] = "-g";

#endif

    cc_params[cc_par_cnt  ] = "-O3";
    cc_params[cc_par_cnt  ] = "-funroll-loops";

    /* Two indicators that you're building for fuzzing; one of them is
       AFL-specific, the other is shared with libfuzzer. */

    cc_params[cc_par_cnt  ] = "-D__AFL_COMPILER=1";
    cc_params[cc_par_cnt  ] = "-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1";

  }

  if (getenv("AFL_NO_BUILTIN")) {

    cc_params[cc_par_cnt  ] = "-fno-builtin-strcmp";
    cc_params[cc_par_cnt  ] = "-fno-builtin-strncmp";
    cc_params[cc_par_cnt  ] = "-fno-builtin-strcasecmp";
    cc_params[cc_par_cnt  ] = "-fno-builtin-strncasecmp";
    cc_params[cc_par_cnt  ] = "-fno-builtin-memcmp";
    cc_params[cc_par_cnt  ] = "-fno-builtin-strstr";
    cc_params[cc_par_cnt  ] = "-fno-builtin-strcasestr";

  }

  cc_params[cc_par_cnt] = NULL;

}

程序会跳过-B/-integrated-as/-pipe这几个参数,然后判断是否存在-fsanitize=address-fsanitize=memory参数,如果存在,则将ASAN_set赋值为1,然后赋值cc_params[1]=cur,然后接下来程序自己添加了参数-B as_path,接下来前面设置了clang_mode=1,将会添加参数-no-integrated-as,接下来检查是否存在AFL_HARDEN环境变量,存在的话,将会添加参数-fstack-protector-all,如果存在该环境变量,还会检查fortify_set是否为0,如果为0,将会添加参数-D_FORTIFY_SOURCE=2,接下来会检查asan_set是否为1,如果为1了,将会进一步检察环境变量,并加上相应参数,这里与前面差不多,就不再赘述了,当不存在环境变量AFL_DONT_OPTIMIZE时,程序还会添加参数-g-03-funroll-loops-D__AFL_COMPILER=1-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1,如果存在环境变量AFL_NO_BUILTIN,程序将会添加参数-fno-builtin-strcmp

最后程序会向数组中放入x00来结束输入。

接下来,当find_as函数与edit_params函数执行完成以后,程序将会调用execve函数,最终调用gcc

afl-as

afl-as函数,是对as的包装,main函数实现如下:

代码语言:javascript复制
int main(int argc, char** argv) {

  s32 pid;
  u32 rand_seed;
  int status;
  u8* inst_ratio_str = getenv("AFL_INST_RATIO");

  struct timeval tv;
  struct timezone tz;

  clang_mode = !!getenv(CLANG_ENV_VAR);

  if (isatty(2) && !getenv("AFL_QUIET")) {

    SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>n");
 
  } else be_quiet = 1;

  if (argc < 2) {

    SAYF("n"
         "This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',n"
         "executed by the toolchain whenever using afl-gcc or afl-clang. You probablyn"
         "don't want to run this program directly.nn"

         "Rarely, when dealing with extremely complex projects, it may be advisable ton"
         "set AFL_INST_RATIO to a value less than 100 in order to reduce the odds ofn"
         "instrumenting every discovered branch.nn");

    exit(1);

  }

  gettimeofday(&tv, &tz);

  rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();

  srandom(rand_seed);

  edit_params(argc, argv);

  if (inst_ratio_str) {

    if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) 
      FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");

  }

  if (getenv(AS_LOOP_ENV_VAR))
    FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");

  setenv(AS_LOOP_ENV_VAR, "1", 1);

  /* When compiling with ASAN, we don't have a particularly elegant way to skip
     ASAN-specific branches. But we can probabilistically compensate for
     that... */

  if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
    sanitizer = 1;
    inst_ratio /= 3;
  }

  if (!just_version) add_instrumentation();

  if (!(pid = fork())) {

    execvp(as_params[0], (char**)as_params);
    FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);

  }

  if (pid < 0) PFATAL("fork() failed");

  if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");

  if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);

  exit(WEXITSTATUS(status));

}

我们首先来关注edit_params函数,函数的源码如下:

代码语言:javascript复制
static void edit_params(int argc, char** argv) {

  u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
  u32 i;

#ifdef __APPLE__

  u8 use_clang_as = 0;

  /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
     with the code generated by newer versions of clang that are hand-built
     by the user. See the thread here: http://goo.gl/HBWDtn.

     To work around this, when using clang and running without AFL_AS
     specified, we will actually call 'clang -c' instead of 'as -q' to
     compile the assembly file.

     The tools aren't cmdline-compatible, but at least for now, we can
     seemingly get away with this by making only very minor tweaks. Thanks
     to Nico Weber for the idea. */

  if (clang_mode && !afl_as) {

    use_clang_as = 1;

    afl_as = getenv("AFL_CC");
    if (!afl_as) afl_as = getenv("AFL_CXX");
    if (!afl_as) afl_as = "clang";

  }

#endif /* __APPLE__ */

  /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
     is not set. We need to check these non-standard variables to properly
     handle the pass_thru logic later on. */

  if (!tmp_dir) tmp_dir = getenv("TEMP");
  if (!tmp_dir) tmp_dir = getenv("TMP");
  if (!tmp_dir) tmp_dir = "/tmp";

  as_params = ck_alloc((argc   32) * sizeof(u8*));

  as_params[0] = afl_as ? afl_as : (u8*)"as";

  as_params[argc] = 0;

  for (i = 1; i < argc - 1; i  ) {

    if (!strcmp(argv[i], "--64")) use_64bit = 1;
    else if (!strcmp(argv[i], "--32")) use_64bit = 0;

#ifdef __APPLE__

    /* The Apple case is a bit different... */

    if (!strcmp(argv[i], "-arch") && i   1 < argc) {

      if (!strcmp(argv[i   1], "x86_64")) use_64bit = 1;
      else if (!strcmp(argv[i   1], "i386"))
        FATAL("Sorry, 32-bit Apple platforms are not supported.");

    }

    /* Strip options that set the preference for a particular upstream
       assembler in Xcode. */

    if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
      continue;

#endif /* __APPLE__ */

    as_params[as_par_cnt  ] = argv[i];

  }

#ifdef __APPLE__

  /* When calling clang as the upstream assembler, append -c -x assembler
     and hope for the best. */

  if (use_clang_as) {

    as_params[as_par_cnt  ] = "-c";
    as_params[as_par_cnt  ] = "-x";
    as_params[as_par_cnt  ] = "assembler";

  }

#endif /* __APPLE__ */

  input_file = argv[argc - 1];

  if (input_file[0] == '-') {

    if (!strcmp(input_file   1, "-version")) {
      just_version = 1;
      modified_file = input_file;
      goto wrap_things_up;
    }

    if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)");
      else input_file = NULL;

  } else {

    /* Check if this looks like a standard invocation as a part of an attempt
       to compile a program, rather than using gcc on an ad-hoc .s file in
       a format we may not understand. This works around an issue compiling
       NSS. */

    if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
        strncmp(input_file, "/var/tmp/", 9) &&
        strncmp(input_file, "/tmp/", 5)) pass_thru = 1;

  }

  modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
                               (u32)time(NULL));

wrap_things_up:

  as_params[as_par_cnt  ] = modified_file;
  as_params[as_par_cnt]   = NULL;

}

首先获取了环境变量TMPDIR是否存在,如果存在,则将tmp_dir设置为该环境变量的值,如果不存在,将会依次获取TEMPTMP环境变量,如果都不存在,则将tmp_dir设置为/tmp,然后判断afl_as是否存在,如果存在,则将as_params[0]设置为afl_as,否则,设置为as

然后通过判断参数--64是否存在,如果存在,则use_64bit为真,否则为0,并将as_params[as_par_cnt ]的值,设置为argv[i]

接下来,将argv[argc-1]的值,作为输入文件,接下来,依次比较input_filetmp_dir/var/tmp/tmp/是否相等,如果相等,则将pass_thru设置为1。并将modified_file设置为tmp目录下的.s文件。

然后进行 as_params[as_par_cnt ] = modified_file;以及as_params[as_par_cnt] = NULL

接下来我们来看另外一个函数add_instrumentation,该函数用于对文件的插桩,是重点函数,函数源码如下:

代码语言:javascript复制
static void add_instrumentation(void) {

  static u8 line[MAX_LINE];

  FILE* inf;
  FILE* outf;
  s32 outfd;
  u32 ins_lines = 0;

  u8  instr_ok = 0, skip_csect = 0, skip_next_label = 0,
      skip_intel = 0, skip_app = 0, instrument_next = 0;

#ifdef __APPLE__

  u8* colon_pos;

#endif /* __APPLE__ */

  if (input_file) {

    inf = fopen(input_file, "r");
    if (!inf) PFATAL("Unable to read '%s'", input_file);

  } else inf = stdin;

  outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);

  if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);

  outf = fdopen(outfd, "w");

  if (!outf) PFATAL("fdopen() failed");  

  while (fgets(line, MAX_LINE, inf)) {

    /* In some cases, we want to defer writing the instrumentation trampoline
       until after all the labels, macros, comments, etc. If we're in this
       mode, and if the line starts with a tab followed by a character, dump
       the trampoline now. */

    if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
        instrument_next && line[0] == 't' && isalpha(line[1])) {

      fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
              R(MAP_SIZE));

      instrument_next = 0;
      ins_lines  ;

    }

    /* Output the actual line, call it a day in pass-thru mode. */

    fputs(line, outf);

    if (pass_thru) continue;

    /* All right, this is where the actual fun begins. For one, we only want to
       instrument the .text section. So, let's keep track of that in processed
       files - and let's set instr_ok accordingly. */

    if (line[0] == 't' && line[1] == '.') {

      /* OpenBSD puts jump tables directly inline with the code, which is
         a bit annoying. They use a specific format of p2align directives
         around them, so we use that as a signal. */

      if (!clang_mode && instr_ok && !strncmp(line   2, "p2align ", 8) &&
          isdigit(line[10]) && line[11] == 'n') skip_next_label = 1;

      if (!strncmp(line   2, "textn", 5) ||
          !strncmp(line   2, "sectiont.text", 13) ||
          !strncmp(line   2, "sectiont__TEXT,__text", 21) ||
          !strncmp(line   2, "section __TEXT,__text", 21)) {
        instr_ok = 1;
        continue; 
      }

      if (!strncmp(line   2, "sectiont", 8) ||
          !strncmp(line   2, "section ", 8) ||
          !strncmp(line   2, "bssn", 4) ||
          !strncmp(line   2, "datan", 5)) {
        instr_ok = 0;
        continue;
      }

    }

    /* Detect off-flavor assembly (rare, happens in gdb). When this is
       encountered, we set skip_csect until the opposite directive is
       seen, and we do not instrument. */

    if (strstr(line, ".code")) {

      if (strstr(line, ".code32")) skip_csect = use_64bit;
      if (strstr(line, ".code64")) skip_csect = !use_64bit;

    }

    /* Detect syntax changes, as could happen with hand-written assembly.
       Skip Intel blocks, resume instrumentation when back to AT&T. */

    if (strstr(line, ".intel_syntax")) skip_intel = 1;
    if (strstr(line, ".att_syntax")) skip_intel = 0;

    /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */

    if (line[0] == '#' || line[1] == '#') {

      if (strstr(line, "#APP")) skip_app = 1;
      if (strstr(line, "#NO_APP")) skip_app = 0;

    }

    /* If we're in the right mood for instrumenting, check for function
       names or conditional labels. This is a bit messy, but in essence,
       we want to catch:

         ^main:      - function entry point (always instrumented)
         ^.L0:       - GCC branch label
         ^.LBB0_0:   - clang branch label (but only in clang mode)
         ^tjnz foo  - conditional branches

       ...but not:

         ^# BB#0:    - clang comments
         ^ # BB#0:   - ditto
         ^.Ltmp0:    - clang non-branch labels
         ^.LC0       - GCC non-branch labels
         ^.LBB0_0:   - ditto (when in GCC mode)
         ^tjmp foo  - non-conditional jumps

       Additionally, clang and GCC on MacOS X follow a different convention
       with no leading dots on labels, hence the weird maze of #ifdefs
       later on.

     */

    if (skip_intel || skip_app || skip_csect || !instr_ok ||
        line[0] == '#' || line[0] == ' ') continue;

    /* Conditional branch instruction (jnz, etc). We append the instrumentation
       right after the branch (to instrument the not-taken path) and at the
       branch destination label (handled later on). */

    if (line[0] == 't') {

      if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {

        fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
                R(MAP_SIZE));

        ins_lines  ;

      }

      continue;

    }

    /* Label of some sort. This may be a branch destination, but we need to
       tread carefully and account for several different formatting
       conventions. */

#ifdef __APPLE__

    /* Apple: L<whatever><digit>: */

    if ((colon_pos = strstr(line, ":"))) {

      if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {

#else

    /* Everybody else: .L<whatever>: */

    if (strstr(line, ":")) {

      if (line[0] == '.') {

#endif /* __APPLE__ */

        /* .L0: or LBB0_0: style jump destination */

#ifdef __APPLE__

        /* Apple: L<num> / LBB<num> */

        if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
            && R(100) < inst_ratio) {

#else

        /* Apple: .L<num> / .LBB<num> */

        if ((isdigit(line[2]) || (clang_mode && !strncmp(line   1, "LBB", 3)))
            && R(100) < inst_ratio) {

#endif /* __APPLE__ */

          /* An optimization is possible here by adding the code only if the
             label is mentioned in the code in contexts other than call / jmp.
             That said, this complicates the code by requiring two-pass
             processing (messy with stdin), and results in a speed gain
             typically under 10%, because compilers are generally pretty good
             about not generating spurious intra-function jumps.

             We use deferred output chiefly to avoid disrupting
             .Lfunc_begin0-style exception handling calculations (a problem on
             MacOS X). */

          if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;

        }

      } else {

        /* Function label (always instrumented, deferred mode). */

        instrument_next = 1;
    
      }

    }

  }

  if (ins_lines)
    fputs(use_64bit ? main_payload_64 : main_payload_32, outf);

  if (input_file) fclose(inf);
  fclose(outf);

  if (!be_quiet) {

    if (!ins_lines) WARNF("No instrumentation targets found%s.",
                          pass_thru ? " (pass-thru mode)" : "");
    else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
             ins_lines, use_64bit ? "64" : "32",
             getenv("AFL_HARDEN") ? "hardened" : 
             (sanitizer ? "ASAN/MSAN" : "non-hardened"),
             inst_ratio);
 
  }

}

首先判断了input_files是否存在,如果存在,则打开它,并进行下一步,打开modified_file,并从输入文件中,读入内容到line数组中。

接下来,根据代码的注释可以知道,只需要对.text段进行插桩,所以接下来进行了一系列的判断:

  • instr_ok用来判断是否为text段,如果是则为1,否则为0。
  • skip_csect用来判断是32位还是64位,如果为64位则为1,否则为0。
  • skip_intel用来判断是否为intel架构,如果是则为1,否则为0。
  • skip_app用来判断ad-hoc __asm__块(咱就是说不懂这是啥),如果存在则为1,否则为0。

接下来,程序会判断各个标志位的值,并判断第一行是否为#或 ,通过判断后,程序会判断是否是需要插桩的位置:^tjnz foo - conditional branches,满足该条件,且R(100)<inst_ratio,则会进行插桩,这里的R(100)是一个宏定义,用来对随机数取余,而inst_ratio则是插桩的密度。然后根据前面设置的use_64bit是否为1,来判断是写入trampoline_fmt_64还是trampoline_fmt_32

接下来会判断lines数组中是否有:,并且第一个字符是否是.,如果是,则代表需要对函数或者分支指令插桩。接下来判断如下:

  • .L0:则说明是GCC下的分支指令,进行插桩。
  • .LBB0_0:clang_mode为1,则说明是clang下的分支指令,进行插桩。
  • 两者都不是的话,则说明是一个函数,对其进行插桩。

然后根据架构,向modified_file中,写入main_payload_64或者main_payload_32,并关闭文件。

到这里,afl-as也就基本讲完了,主函数中会调用这两个函数,对程序进行插桩。

gcc

0 人点赞