AFL源码分析05：fuzz_one

前言

本篇作为AFL源码分析的最后一篇，完成了对fuzz执行过程中的核心函数fuzz_one及其辅助函数（均位于文件afl-fuzz.c中，上一篇分析的主要是该文件中的fuzz初始化部分）的分析。在完整跟完一遍AFL源码后，不禁感叹于作者在代码设计上的巧妙以及将遗传算法应用在fuzz领域的绝佳构思。相信读者在阅读完这部分源码分析后也会有着相同的感受。

关键变量

EXP_ST u32 exec_tmout = EXEC_TIMEOUT; /* Configurable exec timeout (ms)   */
static u32 hang_tmout = EXEC_TIMEOUT; /* Timeout used for hang det (ms)   */

static u32 stats_update_freq = 1;     /* Stats update frequency (execs)   */

EXP_ST u8  skip_deterministic,        /* Skip deterministic stages?       */
           force_deterministic,       /* Force deterministic stages?      */
           use_splicing,              /* Recombine input files?           */
           skip_requested,            /* Skip request, via SIGUSR1        */

EXP_ST u32 cur_skipped_paths,         /* Abandoned inputs in cur cycle    */ 
           current_entry,             /* Current queue entry ID           */
           queued_with_cov,           /* Paths with new coverage bytes    */
           queued_discovered,         /* Items discovered during this run */
           pending_favored,           /* Pending favored paths            */
           pending_not_fuzzed,        /* Queued but not done yet          */
    
EXP_ST u64 queue_cycle,               /* Queue round counter              */
           cycles_wo_finds,           /* Cycles without any new paths     */
           total_tmouts,              /* Total number of timeouts         */
           unique_hangs,              /* Hangs with unique signatures     */
           bytes_trim_in,             /* Bytes coming into the trimmer    */
           bytes_trim_out,            /* Bytes coming outa the trimmer    */
           trim_execs,                /* Execs done to trim input files   */
           bytes_trim_in,             /* Bytes coming into the trimmer    */
           bytes_trim_out,            /* Bytes coming outa the trimmer    */

static u32 subseq_tmouts;             /* Number of timeouts in a row      */
    
static u8 *stage_name = "init",       /* Name of the current fuzz stage   */
          *stage_short,               /* Short stage name                 */
          *syncing_party;             /* Currently syncing with...        */

static s32 stage_cur, stage_max;      /* Stage progression                */
static s32 splicing_with = -1;        /* Splicing with which test case?   */

核心函数

main(fuzz主循环部分)

u32 seek_to;
seek_to = find_start_position();

...
    
while (1) {

    u8 skipped_fuzz;

    /* 1.首先调用cull_queue精简队列，筛选掉不是favored的case
       2.如果queue_cur为空，说明所有queue都被执行完一轮，则：
          a.为新一轮fuzz做些准备：
            1).设置计数器queue_cycle加1，表示所有queue被完整执行了多少轮
            2).将用于表示当前queue下标的current_entry清零
            3).将用于统计此轮fuzz废弃掉case个数的计数器cur_skipped_paths清零
            4).令queue_cur指向fuzzing queue列表的第一个queue，准备开始新一轮fuzz
          b.如果seek_to不为空，说明此时为resuming_fuzz，则进入循环：
            1).把queue_cur定位到seek_to指向的位置
            2).同时current_entry也更新到queue_cur所在位置的下标
          c.调用show_stats()刷新展示界面
          d.如果不是终端模式（not_on_tty==1），则输出当前是第几轮fuzz
    */
    cull_queue();

    if (!queue_cur) {
      queue_cycle++;
      current_entry     = 0;
      cur_skipped_paths = 0;
      queue_cur         = queue;

      while (seek_to) {
        current_entry++;
        seek_to--;
        queue_cur = queue_cur->next;
      }

      show_stats();

      if (not_on_tty) {
        ACTF("Entering queue cycle %llu.", queue_cycle);
        fflush(stdout);
      }

        
      /* 如果完整的一轮fuzz没有发现新的路径，那么接下来尝试调整策略
         1.判断queued_paths与prev_queued是否相等：
            a.若相等，表明在刚刚结束的一轮fuzz中没有发现新的路径（如果发现新的路径，则会在
              save_if_interesting中会调用add_to_queue修改queued_paths的值），那么
              接下来判断是否设置了use_splicing（如果没有指定'-M'或指定了'-d'则会设置其值）：
               1).如果设置了，则将cycles_wo_finds加1
               2).如果没设置，则设置use_spicing为1，表示接下来要通过splicing进行队列重组
            b.若不等，说明有新路径发现，则设置cycle_wo_finds为0
         2.用queued_paths去设置prev_queued的值
         3.如果设置了sync_id（参数指定'-M'或'-S'），且queue_cycle为1，且设置了环境变量
           AFL_IMPORT_FIRST；则调用sync_fuzzers从其它sync文件下的fuzzer中读取interesting
           的case到自己的queue中
      */
      if (queued_paths == prev_queued) {
        if (use_splicing) 
            cycles_wo_finds++; 
        else 
            use_splicing = 1;
      } else
          cycles_wo_finds = 0;

      prev_queued = queued_paths;

      if (sync_id && queue_cycle == 1 && getenv("AFL_IMPORT_FIRST"))
        sync_fuzzers(use_argv);
    }
    
    
    /* 1.调用fuzz_one对queue_cur进行一次变异测试（fuzz_one并不一定真的执行当前queue_cur，它有
         一定的策略；如果不执行，就直接返回1，否则返回0），
         上面的变异完成后，AFL会对文件队列的下一个进行变异处理。当队列中的全部文件都变异测试后，就完
         成了一个”cycle”，这个就是AFL状态栏右上角的”cycles done”。而正如cycle的意思所说，整个队
         列又会从第一个文件开始，再次进行变异，不过与第一次变异不同的是，这一次就不需要再进行
         "deterministic fuzzing"了。如果用户不停止AFL，seed文件将会一遍遍的变异下去。
       2.如果没有设置stop_soon，但是设置了sync_id，且fuzz_one返回0：
          a.令sync_interval_cnt加1（main函数开始时初始化为0）
          b.如果sync_interval_cnt能够被SYNC_INTERVAL(在config.h中定义为5)整除，那么调用
            sync_fuzzers来同步其它fuzzer
       3.如果没有设置stop_soon，但是设置了exit_1，则将stop_soon设置为2，并break出fuzz主循环
       4.queue_cur指向fuzzing queue队列中的下一个queue，current_entry也随之自增1。准备测试
         下一个queue
    */
    skipped_fuzz = fuzz_one(use_argv);

    if (!stop_soon && sync_id && !skipped_fuzz) {     
      if (!(sync_interval_cnt++ % SYNC_INTERVAL))
        sync_fuzzers(use_argv);
    }

    if (!stop_soon && exit_1)
        stop_soon = 2;
    if (stop_soon) break;

    queue_cur = queue_cur->next;
    current_entry++;
}

sync_fuzzers

/* 这个函数的主要作用是进行queue同步，先读取有哪些fuzzer文件夹，然后读取其它fuzzer文件夹下的queue
   文件夹中的测试用例，并依次执行一遍。如果执行过程中，发现这些测试用例可以触发新路径，则将测试用例保存
   到自己的queue文件夹中，并将最后一个同步的测试用例的case_id写入到".synced/d_name"的文件中，以
   避免重复运行
*/

/* struct dirent {
      long d_ino;                 // 索引节点号
      off_t d_off;                // 在目录文件中的偏移
      unsigned short d_reclen;    // 文件名长
      unsigned char d_type;       // 文件类型
      char d_name[NAME_MAX+1];    // 文件名，最长255字节
   } 
*/
static void sync_fuzzers(char** argv) {

  DIR* sd;
  struct dirent* sd_ent;
  u32 sync_cnt = 0;

  /* 首先打开sync_dir文件夹 */
  sd = opendir(sync_dir);
  if (!sd) 
      PFATAL("Unable to open '%s'", sync_dir);

  stage_max = stage_cur = 0;
  cur_depth = 0;

  /* 循环读取sync_dir目录下的所有文件（重点是其它fuzzer创建的文件夹）
     1.跳过.开头的和当前fuzzer创建的文件（sync_dir != sd_ent->d_name）
     2.跳过任何不包含queue子目录的文件夹，通过尝试打开"sync_dir/d_name/queue"文件夹。若打开失败，
       说明不包含queue子目录
     3.打开out_dir/.synced/d_name文件，读取前4个字节到变量min_accept中，然后调用lseek
       调整文件内指针到开头，并设置next_min_accept的值为min_accept，这个值代表之前从这个
       (queue)文件夹里读取到的最后一个queue的id
     4.调整sync_cnt的值，显示现在同步到的阶段。并将stage_cur与stage_max清零
  */
  while ((sd_ent = readdir(sd))) {

    static u8 stage_tmp[128];
    DIR* qd;
    struct dirent* qd_ent;
    u8 *qd_path, *qd_synced_path;
    u32 min_accept = 0, next_min_accept;
    s32 id_fd;

    if (sd_ent->d_name[0] == '.' || !strcmp(sync_id, sd_ent->d_name))
        continue;

    qd_path = alloc_printf("%s/%s/queue", sync_dir, sd_ent->d_name);
    if (!(qd = opendir(qd_path))) {
      ck_free(qd_path);
      continue;
    }

    qd_synced_path = alloc_printf("%s/.synced/%s", out_dir, sd_ent->d_name);
    id_fd = open(qd_synced_path, O_RDWR | O_CREAT, 0600);     
    if (id_fd < 0) 
        PFATAL("Unable to create '%s'", qd_synced_path);
    if (read(id_fd, &min_accept, sizeof(u32)) > 0) 
      lseek(id_fd, 0, SEEK_SET);
    next_min_accept = min_accept;

    sprintf(stage_tmp, "sync %u", ++sync_cnt);
    stage_name = stage_tmp;
    stage_cur  = 0;
    stage_max  = 0;
      

    /* 接下来利用一个while循环检查此fuzzer对应的queue队列中的每个用例，解析ID并检查我们之前是否
       查看过它；如果没有，则执行这个测试用例：
       1.跳过.开头的文件和标识小于min_accept的文件（即已经sync过的文件）
       2.如果标识syncing_case >= next_min_accept，就设置next_min_accept的值为syncing_case+1
       3.开始同步case，先打开这个case文件，并获取文件状态   
    */
    while ((qd_ent = readdir(qd))) {

      u8* path;
      s32 fd;
      struct stat st;

      if (qd_ent->d_name[0] == '.' ||
          sscanf(qd_ent->d_name, CASE_PREFIX "%06u", &syncing_case) != 1 || 
          syncing_case < min_accept)
          continue;

      if (syncing_case >= next_min_accept)
        next_min_accept = syncing_case + 1;

      path = alloc_printf("%s/%s", qd_path, qd_ent->d_name);
      fd = open(path, O_RDONLY);
      if (fd < 0) {
         ck_free(path);
         continue;
      }
      if (fstat(fd, &st))
          PFATAL("fstat() failed");

        
      /* 略过大小为0以及大小超过MAX_FILE（默认为1M）的文件， 然后对这个case作进一步判断：
           a.调用mmap映射这个case文件到mem中
           b.调用write_to_testcase将这个case写入到out_file(在detect_file_args中，out_file
             被设置为out_dir/.cur_input)
           c.调用run_target执行目标程序，监控超时情况，返回状态信息
           d.判断一次stop_soon，如果设置了就返回
           e.设置syncing_party为这个case在目录中的文件名
           f.调用save_if_interesting来决定是否要导入这个文件到自己的queue里；如果发现了新的path
             就导入。如果导入了会返回1，就令计数器queued_imported加1。
           g.调用munmap解除这个case的映射
           h.如果到了状态更新的周期(stats_update_freq默认为1)，则调用show_stats刷新一次界面
      */
      if (st.st_size && st.st_size <= MAX_FILE) {
        u8  fault;
        u8* mem = mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
        if (mem == MAP_FAILED)
            PFATAL("Unable to mmap '%s'", path);

        
        write_to_testcase(mem, st.st_size);
          
        fault = run_target(argv, exec_tmout);

        if (stop_soon) 
            return;

        syncing_party = sd_ent->d_name;
        
        queued_imported += save_if_interesting(argv, mem, st.st_size, fault);
        syncing_party = 0;

        munmap(mem, st.st_size);

        if (!(stage_cur++ % stats_update_freq))
            show_stats();
      }
      ck_free(path);
      close(fd);
    }

    /* 将当前的next_min_accept写入到id_fd指向的文件，也就是out_dir/.synced/d_name对应的文件
       ，方便下次sync的时候，判断找到最后一个sync的case，以避免重复运行 */
    ck_write(id_fd, &next_min_accept, sizeof(u32), qd_synced_path);

    close(id_fd);
    closedir(qd);
    ck_free(qd_path);
    ck_free(qd_synced_path);
    
  }  
  closedir(sd);
}

save_if_interesting

/* 检查mem映射的这个case是否是interesting的，如果是，则将其保存到queue中，并返回1；否则返回0 */
static u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) {

  u8  *fn = "";
  u8  hnb;
  s32 fd;
  u8  keeping = 0, res;

    
  /* 判断传入的参数fault是否为crash_mode，若是，则作进一步处理：
     1.调用has_new_bits判断这个case是否触发了新的路径或者命中了已有的路径；若都没有触发，则判断是否
       设置了crash_mode：
          a.如果设置了crash_mode，则令total_crashes加1，然后return 0
          b.如果没设置crash_mode，直接return 0
     2.拼接路径fn为"out_dir/queue/id:00000queued_paths, describe_op(hub)"
     3.调用add_to_queue将case添加到队列里
     4.如果hub值为2，说明发现了新的路径：
        a.将刚刚入队的q->has_new_cov的值设置为1
        b.计数器queued_with_cov的值加1
     5.计算trace_bits（当前run_target之后的tuple信息）的哈希值，保存到q->exec_cksum
  */  
  if (fault == crash_mode) {
    if (!(hnb = has_new_bits(virgin_bits))) {
      if (crash_mode) 
          total_crashes++;
      return 0;
    }    

#ifndef SIMPLE_FILES
    fn = alloc_printf("%s/queue/id:%06u,%s", out_dir, queued_paths, describe_op(hnb));
#else
    fn = alloc_printf("%s/queue/id_%06u", out_dir, queued_paths);
#endif /* ^!SIMPLE_FILES */
      
    add_to_queue(fn, len, 0);

    if (hnb == 2) {
      queue_top->has_new_cov = 1;
      queued_with_cov++;
    }

    queue_top->exec_cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);

      
    /* 6.调用calibrate_case评估这个case，
       7.若错误类型为FAULT_ERROR，则直接抛出异常"无法执行目标程序"
       8.否则，创建fn路径指定的文件，并将case写入文件中
       9.将keeping值设置为1（函数开始时被初始化为0）
    */
    res = calibrate_case(argv, queue_top, mem, queue_cycle - 1, 0);

    if (res == FAULT_ERROR)
      FATAL("Unable to execute target application");

    fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
    if (fd < 0) 
        PFATAL("Unable to create '%s'", fn);
    ck_write(fd, mem, len, fn);
    close(fd);

    keeping = 1;
  }

    
  /* 以上为fault类型为crash_mode时的处理方式。其它fault类型，则进入switch中进行处理 */  
  switch (fault) {
          
    /* 我们对导致超时的case不是那么有兴趣，但我们仍然有义务保留少量样本，我们将特定于超时的位图中新位
       的置位来作为唯一的信号；但是在dumb_mode下，保留所有case。具体操作如下：
       1.total_tmouts计数器加1
       2.如果unique_hangs >= KEEP_UNIQUE_HANG(能保存的最大数量，这个值在config.h中设置为500)
         ，就直接返回keeping(函数开始处初始化为0)
       3.如果不是dumb_mode：
          a.调用simlify_trace对trace_bits进行规整(一种简化跟踪的操作，以加快执行速度)
          b.调用has_new_bits判断是否有新的超时路径，如果没有，直接返回keeping
       4.令unique_tmouts计数器加1，如果不是dumb_mode，说明case发现了新的超时路径；对于dumb_mode
         ，则是保存发现的所有超时的case
    */
    case FAULT_TMOUT:          
      total_tmouts++;

      if (unique_hangs >= KEEP_UNIQUE_HANG) 
          return keeping;

      if (!dumb_mode) {

#ifdef WORD_SIZE_64
        simplify_trace((u64*)trace_bits);
#else
        simplify_trace((u32*)trace_bits);
#endif /* ^WORD_SIZE_64 */

        if (!has_new_bits(virgin_tmout)) 
            return keeping;
      }

      unique_tmouts++;

          
      /* 在保存case之前，我们通过使用一个更大的超时时间重新执行程序，以确保它是一个真正的挂起（除非
         默认超时已经很长），具体操作如下：
         1.如果hang_tmout大于exec_tmout，则先将case的内容写到out_file中，然后以hang_tmout
           为timeout，重新执行一次run_target
            a.如果没设置stop_soon，但是返回结果是FAULT_CRASH，则跳转到keep_as_crash；这是因
              为增加超时时间可能会发现崩溃，这么做是为了确保不丢弃这个case
            b.如果设置了stop_soon，或者返回结果不是FAULT_TMOUT，就直接返回keeping
         2.对于其它返回结果，则继续执行，首先拼接路径fn，用于将case写入
         3.令unique_hangs计数器加1
         4.更新last_hang_time为当前时间，然后从switch中break
      */
      if (exec_tmout < hang_tmout) {

        u8 new_fault;
        write_to_testcase(mem, len);
        new_fault = run_target(argv, hang_tmout);
          
        if (!stop_soon && new_fault == FAULT_CRASH) 
            goto keep_as_crash;
        if (stop_soon || new_fault != FAULT_TMOUT) 
            return keeping;
      }

#ifndef SIMPLE_FILES
      fn = alloc_printf("%s/hangs/id:%06llu,%s", out_dir, unique_hangs, describe_op(0));
#else
      fn = alloc_printf("%s/hangs/id_%06llu", out_dir, unique_hangs);
#endif /* ^!SIMPLE_FILES */

      unique_hangs++;

      last_hang_time = get_cur_time();
      break;

    
    /* CRASH的处理与超时的处理方式大体相似，除了略有不同的限制并且不需要重新运行测试用例。除此之外
       当在超时处理中增加超时时间重新执行程序导致FAULT_CRASH时，也会进入这里进行处理:
       1.total_crashes计数器加1
       2.如果unique_crashes >= KEEP_UNIQUE_CRASH(能保存的最大数量，这个值在config.h中设
         置为5000)，则直接返回keeping
       3.与超时中的处理类似，如果不是dumb_mode，则调用simplify_trace对trace_bits进行规整；
         接着调用has_new_bits判断这个case是否发现了新的crash路径，若没有就直接返回keeping
       4.如果unique_crashes的值为0，则调用write_crash_readme显示一些crash目录的信息
       5.拼接路径fn，用于后续将case写入
       6.unique_crashes计数器加1
       7.更新last_crash_time和last_crash_execs，然后从switch中break
    */
    case FAULT_CRASH:
keep_as_crash:

      total_crashes++;

      if (unique_crashes >= KEEP_UNIQUE_CRASH) 
          return keeping;

      if (!dumb_mode) {

#ifdef WORD_SIZE_64
        simplify_trace((u64*)trace_bits);
#else
        simplify_trace((u32*)trace_bits);
#endif /* ^WORD_SIZE_64 */

        if (!has_new_bits(virgin_crash)) 
            return keeping;
      }

      if (!unique_crashes) 
          write_crash_readme();

#ifndef SIMPLE_FILES
      fn = alloc_printf("%s/crashes/id:%06llu,sig:%02u,%s", out_dir,
                        unique_crashes, kill_signal, describe_op(0));
#else
      fn = alloc_printf("%s/crashes/id_%06llu_%02u", out_dir, unique_crashes,
                        kill_signal);
#endif /* ^!SIMPLE_FILES */

      unique_crashes++;

      last_crash_time = get_cur_time();
      last_crash_execs = total_execs;
      break;

    
    /* 如果fault类型是FAULT_ERROR，则直接抛出异常
       其它情况，直接返回keeping */
    case FAULT_ERROR: 
          FATAL("Unable to execute target application");

    default: 
          return keeping;
  }

  /* 如果执行到这，显然是想保存导致crash或者hang的case，这里创建并打开fn路径指定的文件，并将
     case的内容写入文件，最后返回keeping(这里keeping好像还是0，因此不会加入queue?)
  */
  fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
  if (fd < 0) 
      PFATAL("Unable to create '%s'", fn);
  ck_write(fd, mem, len, fn);
  close(fd);

  ck_free(fn);

  return keeping;
}

simplify_trace

/* 用0x80/0x01表示替换原本tuple中表示路径是否被命中的信息，进而简化跟踪；这么做是为了在每次crash
   或timeout被调用时加快执行速度。这里依旧只分析64位的情况
*/
static const u8 simplify_lookup[256] = { 
  [0]         = 1,
  [1 ... 255] = 128
};

#ifdef WORD_SIZE_64

static void simplify_trace(u64* mem) {

  /* 1.8个字节为1组，每轮循环依次从trace_bits取出
     2.如果mem不为空（即这一组8个字节中至少有一个字节有值），则利用simplify_lookup表对其进行规整。
       如果路径没有命中，就设置为0x1；如果路径命中了，就设置为0x80
     3.否则，将mem设置为0x0101010101010101，即所有8个字节代表的路径都没有命中
  */
  u32 i = MAP_SIZE >> 3;

  while (i--) {

    if (unlikely(*mem)) {
      u8* mem8 = (u8*)mem;

      mem8[0] = simplify_lookup[mem8[0]];
      mem8[1] = simplify_lookup[mem8[1]];
      mem8[2] = simplify_lookup[mem8[2]];
      mem8[3] = simplify_lookup[mem8[3]];
      mem8[4] = simplify_lookup[mem8[4]];
      mem8[5] = simplify_lookup[mem8[5]];
      mem8[6] = simplify_lookup[mem8[6]];
      mem8[7] = simplify_lookup[mem8[7]];
    } else 
        *mem = 0x0101010101010101ULL;
    mem++;
  }
}

trim_case

在fuzz_one中(5119行)被调用

/* 在进行确定性检查(deterministic checks)时，修剪所有新的测试用例以节省周期。微调器将使用
   1/1024~1/16之间的一个二次幂增量，使得执行周期变得简短而又温馨，至少它很简短。这里的参数
   in_buf，实际上就是calibrate_case中的参数use_mem，也就是q->len */
static u8 trim_case(char** argv, struct queue_entry* q, u8* in_buf) {

  static u8 tmp[64];
  static u8 clean_trace[MAP_SIZE];

  u8  needs_write = 0, fault = 0;
  u32 trim_exec = 0;
  u32 remove_len;
  u32 len_p2;

  /* 虽然当检测到可变行为时，修剪器的用处不大，但它仍然会在一定程度上起作用，所以我们不检查这个
     1.如果case的len小于5字节，就不需要修剪，直接返回
     2.令stage_name指向tmp数组首位；修剪字节计数器bytes_trim_in的值增加q->len个大小
     3.选择初始块len_p2的长度，其值是大于等于q->len的第一个2的幂次(如果len是666，len_p2就是1024)
     4.设置起始步长remove_len的值，从"len_p2/16"和"4"中选取最大的那个。这里用到的两个宏都可在
       config.h中找到
  */
  if (q->len < 5) 
      return 0;

  stage_name = tmp;
  bytes_trim_in += q->len;

  len_p2 = next_p2(q->len);

  remove_len = MAX(len_p2 / TRIM_START_STEPS, TRIM_MIN_BYTES);

    
  /* 进入循环，直至步长remove_len(每轮循环会除2)小于终止步长MAX(len_p2/1024, 4)时停止：
     1.remove_pos保存当前的remove_len
     2.格式化remove_len到tmp中，相当于令stage_name = "trim remove_len/remove_len"，需要注意一点
       前面已经将stage_name指向了tmp的首地址
     3.将stage_cur置零，stage_max设置为q->len/remove_len(如果remove_len是用TRIM_START_SETPS
       计算出来的，那么这个值为8~15之间的一个数)
  */
  while (remove_len >= MAX(len_p2 / TRIM_END_STEPS, TRIM_MIN_BYTES)) {
    u32 remove_pos = remove_len;

    sprintf(tmp, "trim %s/%s", DI(remove_len), DI(remove_len));

    stage_cur = 0;
    stage_max = q->len / remove_len;

    /* 进入内层循环，每次前进remove_len个步长，直至整个文件都遍历完(remove_pos >= q->len)为止
       1.设置trim_avail为一块较小的片段长度
       2.调用write_with_gap()，具体操作为，由in_buf中remove_pos处开始，向后跳过remove_len个字
         节，写入到.cur_input里
       3.调用run_target运行一次目标程序，trim_execs计数器加1，然后处理潜在的错误（可以理解为，将
         case删除了一小段。然后再执行下，看看对跟踪是否产生影响，若无影响，则保将删除后的case，尽管
         这么做对可变路径variable-path来说并不完美）
    */
    while (remove_pos < q->len) {

      u32 trim_avail = MIN(remove_len, q->len - remove_pos);
      u32 cksum;

      write_with_gap(in_buf, q->len, remove_pos, trim_avail);

      fault = run_target(argv, exec_tmout);
      trim_execs++;
      if (stop_soon || fault == FAULT_ERROR) 
          goto abort_trimming;

      /* 4.用当前的trace_bits计算出一个哈希值，保存到cksum中。（注意，原生的AFL在此处不会跟踪崩溃
           或挂起）
         5.判断cksum和q->exec_cksum的值是否相等，即将case删减一小段后是否对trace产生影响：
            a.如果相等，说明不产生影响，则：
               1).计算in_buf中从remove_pos开始，到结尾剩余的长度（删除trim_avail后），并将其赋给
                  局部变量move_tail
               2).重新设置q->len和len_p2，即从case删除trim_avail剩余的长度，这里的对len_p2的修
                  改可能会触发外层循环的终止条件
               3).调用memmove，安全的将case中trim_avail片段之后，move_tail大小的内容附加到原先从
                  trim_avail开始的位置
               4).如果未设置needs_write，则将其置1；然后用一个干净的clean_trace保存trace_bits中
                  的值，一旦完成修剪操作后，updata_bitmap_score就会根据trace_bits和偏好因子进一
                  步筛选queue，并对trace_bits进行压缩
            b.如果不相等，说明case删减会对trace产生影响，则这段不删减，remove_pos加上步长的值
         6.由于trim过程可能比较慢，所以如果达到stats_update_freq更新周期，就调用show_stats()刷新
           一次并显示界面（由于stats_update_freq的值为1，所以时不时就刷新一次）；同时，trim_exec
           的值自增1，表示执行了一次修剪。
         7.stage_cur的值加1，然后进入下一轮内层循环
         8.令循环步长remove_len除2，然后进入下一轮外层循环
      */
      cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);

      if (cksum == q->exec_cksum) {
        u32 move_tail = q->len - remove_pos - trim_avail;

        q->len -= trim_avail;
        len_p2  = next_p2(q->len);

        memmove(in_buf + remove_pos, in_buf + remove_pos + trim_avail, 
                move_tail);

        if (!needs_write) {
          needs_write = 1;
          memcpy(clean_trace, trace_bits, MAP_SIZE);
        }
      } else 
          remove_pos += remove_len;

      if (!(trim_exec++ % stats_update_freq)) 
          show_stats();
      stage_cur++;
        
    }
    remove_len >>= 1;
  }
    

  /* 如果对in_buf做了修改（说明trim后不影响，不然就不修改了），那也要对磁盘中保存的case版本进行更新：
     1.删除原来的q->fname，创建一个新的q->fname，将in_buf里的内容写入q->
     2.用clean_trace恢复trace_bits的值；调用update_bitmap_score根据偏好因子更新bitmap
  */
  if (needs_write) {
    s32 fd;

    unlink(q->fname); /* ignore errors */
    fd = open(q->fname, O_WRONLY | O_CREAT | O_EXCL, 0600);
    if (fd < 0)
        PFATAL("Unable to create '%s'", q->fname);
    ck_write(fd, in_buf, q->len, q->fname);
    close(fd);

    memcpy(trace_bits, clean_trace, MAP_SIZE);
    update_bitmap_score(q);
  }
    
   /* 对fault的一些处理，字节修剪计数器bytes_trim_out的值增加q->len个大小 */
abort_trimming:
    
  bytes_trim_out += q->len;
  return fault;
}

calculate_score

在fuzz_one中(5143行)被调用

/* 根据queue entry的执行速度、覆盖到的path数和路径深度来评估出一个得分，这个得分perf_score在后面havoc的    时候使用 */
static u32 calculate_score(struct queue_entry* q) {

  u32 avg_exec_us = total_cal_us / total_cal_cycles;
  u32 avg_bitmap_size = total_bitmap_size / total_bitmap_entries;
  u32 perf_score = 100;
    
  /* 基于case的执行速度对得分perf_score进行调整，将case执行速度与系数相乘（0.1~4）后再与平均值
    （total_cal_us/total_cal_cycles）进行对比。由于快速输入的fuzz成本更低，因而会给他们更多
     的air time
  */
  if (q->exec_us * 0.1 > avg_exec_us) perf_score = 10;
  else if (q->exec_us * 0.25 > avg_exec_us) perf_score = 25;
  else if (q->exec_us * 0.5 > avg_exec_us) perf_score = 50;
  else if (q->exec_us * 0.75 > avg_exec_us) perf_score = 75;
  else if (q->exec_us * 4 < avg_exec_us) perf_score = 300;
  else if (q->exec_us * 3 < avg_exec_us) perf_score = 200;
  else if (q->exec_us * 2 < avg_exec_us) perf_score = 150;

  /* 基于case的bitmap覆盖率对得分perf_score进行调整，原理是更好的覆盖率可以转化为更好的目标；
     具体操作与上面基于执行速度的类似，这里不展开
  */
  if (q->bitmap_size * 0.3 > avg_bitmap_size) perf_score *= 3;
  else if (q->bitmap_size * 0.5 > avg_bitmap_size) perf_score *= 2;
  else if (q->bitmap_size * 0.75 > avg_bitmap_size) perf_score *= 1.5;
  else if (q->bitmap_size * 3 < avg_bitmap_size) perf_score *= 0.25;
  else if (q->bitmap_size * 2 < avg_bitmap_size) perf_score *= 0.5;
  else if (q->bitmap_size * 1.5 < avg_bitmap_size) perf_score *= 0.75;

  /* 基于handicap调整得分perf_score，handicap表示相对于其它case，这个case迟了多久我们才能了解它
     的路径（这个值仅在calibrate_case中被设置，初始为0，后为queue_cycle-1），迟到者可以多跑一段时
     间，直到赶上其他人。
  */
  if (q->handicap >= 4) {
    perf_score *= 4;
    q->handicap -= 4;

  } else if (q->handicap) {
    perf_score *= 2;
    q->handicap--;
  }

  /* 基于输入深度对得分最出最终调整，这里假设在进行模糊测试时，更深入的测试用例更有可能发现传统模拟器无
     法发现的东西。这个cur_depth是一个全局变量，初始为0；每次add_to_queue时，会设置cur_depth+1：
     1.处理输入时：
       在read_testcases时会调用add_to_queue，此时所有的input_case的q->depth都会被设置为1
     2.fuzz_one时：
       在fuzz_one时，会先设置cur_depth为当前queue的q->depth，然后这个queue经过mutate之后调用
       save_if_interesting；如果是interesting_case，就会被add_to_queue，此时就建立起了queue
       之间的关联关系，所以由当前queue经过mutate后加入的新queue，深度都在当前queue的基础上再增加
  */
  switch (q->depth) {

    case 0 ... 3:   break;
    case 4 ... 7:   perf_score *= 2; break;
    case 8 ... 13:  perf_score *= 3; break;
    case 14 ... 25: perf_score *= 4; break;
    default:        perf_score *= 5;

  }

  /* 确保得分perf_score不会超过限制，然后返回得分 */
  if (perf_score > HAVOC_MAX_MULT * 100) 
      perf_score = HAVOC_MAX_MULT * 100;
  return perf_score;
}

common_fuzz_stuff

在fuzz_one中(5188行)多次被调用，首次出现在5188行

/* 编写修改后的testcase，运行程序，处理结果以及异常情况。如需退出，则返回1，这同样是fuzz_one中
   的一个辅助函数。这里的out_buf，同样可以理解为use_mem，参考前面的trim_case */
EXP_ST u8 common_fuzz_stuff(char** argv, u8* out_buf, u32 len) {

  u8 fault;

  /* 如果设置了post_handler（在setup_post中设置），就通过post_handler处理一下out_buf；处理
     后判断out_buf及len的值，若有一个为0，则直接返回0。sakura指出 "如果需要对变异完的queue，做
     一层wrapper再写入的时候，这里其实很有价值" */
  if (post_handler) {
    out_buf = post_handler(out_buf, &len);
    if (!out_buf || !len)
        return 0;
  }

  /* 1.调用write_to_testcase将当前case写入到out_file
     2.执行一次run_target
     3.判断fault类型：
        a.如果是FAULT_TMOUT。则令连续超时数计数器subseq_tmouts的值加1，如果大于TMOUT_LIMIT
         （默认250），则令当前cycle被抛弃inputs计数器cur_skipped_paths的值加1；然后直接返回1
        b.其它fault类型，则将subseq_tmouts置零
  */
  write_to_testcase(out_buf, len);
  fault = run_target(argv, exec_tmout);

  if (stop_soon)
      return 1;

  if (fault == FAULT_TMOUT) {
    if (subseq_tmouts++ > TMOUT_LIMIT) {
      cur_skipped_paths++;
      return 1;
    }
  } else 
      subseq_tmouts = 0;

  /* 如果用户通过SIGUSR1来请求抛弃当前的input，则会设置skip_requested值；若设置了该值，则：
     1.将skip_requested清零
     2.令当前cycle被抛弃inputs计数器cur_skipped_paths的值加1
     3.返回1
  */
  if (skip_requested) {
     skip_requested = 0;
     cur_skipped_paths++;
     return 1;
  }

  /* 接下来处理FAULT_ERROR：
     1.调用一次save_if_interesting，如果返回1，即该case被认为是"有趣的"，则queued_discovered加1
     2.如果进入了更新周期，则调用show_stats更新展示界面
  */
  queued_discovered += save_if_interesting(argv, out_buf, len, fault);
    
  if (!(stage_cur % stats_update_freq) || stage_cur + 1 == stage_max)
      show_stats();
  return 0;
}

choose_block_len

在fuzz_one中多次被调用，首次出现在6352行

/* 该函数用于帮助fuzz_one中的块操作选择随机块的长度，会返回一个大于0的值 */
static u32 choose_block_len(u32 limit) {

  /* 1.设置rlim的值为queue_cycle和3中更小的那一个
     2.如果run_over10m为0（这个值表示是否超时超过10min），则设置rlim的值为1
     3.接下来进入switch语句，对不同分支进行判断。这里的UR定义在afl-fuzz.c的371行，用于生成一个（范围
       在0~rlim-1）随机数：
          case 0: 设置min_value为1，max_value为32
          case 1: 设置min_value为32，max_value为128
          default: 有9/10的概率，设置min_value为128，max_value为1500
                   有1/10的概率，设置min_value为1500，max_value为32768
     4.如果min_value >= 传入的参数limit，则设置min_value的值为0
     5.min_value加上一个随机值作为返回值，因为返回值 >= 1
  */
  u32 min_value, max_value;
  u32 rlim = MIN(queue_cycle, 3);

  if (!run_over10m) 
      rlim = 1;

  switch (UR(rlim)) {

    case 0:  min_value = 1;
             max_value = HAVOC_BLK_SMALL;
             break;

    case 1:  min_value = HAVOC_BLK_SMALL;
             max_value = HAVOC_BLK_MEDIUM;
             break;

    default: 
             if (UR(10)) {
               min_value = HAVOC_BLK_MEDIUM;
               max_value = HAVOC_BLK_LARGE;

             } else {
               min_value = HAVOC_BLK_LARGE;
               max_value = HAVOC_BLK_XL;
             }
  }

  if (min_value >= limit) 
      min_value = 1;
  return min_value + UR(MIN(max_value, limit) - min_value + 1);
}

fuzz_one

AFL中最长的函数(4999~6690)，也是fuzz执行流程中最核心函数。这里按照阶段分布展开分析

初始化阶段

这部分主要是fuzz前最后的一些准备。包括了一些变量的初始化，映射case等，以及CALIBRATION阶段、TRIMMING阶段和PERFORMANCE SCORE阶段这三个比较简短的准备阶段

/* Take the current entry from the queue, fuzz it for a while. This
   function is a tad too long... returns 0 if fuzzed successfully, 1 if
   skipped or bailed out. */

/* 调用fuzz_one对queue_cur进行一次变异测试（fuzz_one并不一定真的执行当前queue_cur，
   它有一定的策略；如果不执行，就直接返回1，否则返回0） */
static u8 fuzz_one(char** argv) {

  s32 len, fd, temp_len, i, j;
  u8  *in_buf, *out_buf, *orig_in, *ex_tmp, *eff_map = 0;
  u64 havoc_queued,  orig_hit_cnt, new_hit_cnt;
  u32 splice_cycle = 0, perf_score = 100, orig_perf, prev_cksum, eff_cnt = 1;

  u8  ret_val = 1, doing_det = 0;

  u8  a_collect[MAX_AUTO_EXTRA];
  u32 a_len = 0;

#ifdef IGNORE_FINDS

  /* In IGNORE_FINDS mode, skip any entries that weren't in the
     initial data set. */

  if (queue_cur->depth > 1) 
      return 1;

#else

  /* 判断pending_favored的值：（这里用到的常量可以在config.h找到）
     1.如果为0，对于queue_cur被fuzz过或者不是favored的，有99%的概率不执行，直接返回1
     2.如果不为0，并且不是dumb_mode、不是favored的、queued_paths>10：
        a.如果queue_cycle大于1，且没有被fuzz过，那么有95%的概率不执行，直接返回1
        b.否则，有75%的概率不执行，直接返回1
  */
  if (pending_favored) {

    /* If we have any favored, non-fuzzed new arrivals in the queue,
       possibly skip to them at the expense of already-fuzzed or non-favored
       cases. */
    
    if ((queue_cur->was_fuzzed || !queue_cur->favored) &&
        UR(100) < SKIP_TO_NEW_PROB) 
        return 1;

  } else if (!dumb_mode && !queue_cur->favored && queued_paths > 10) {

    /* Otherwise, still possibly skip non-favored cases, albeit less often.
       The odds of skipping stuff are higher for already-fuzzed inputs and
       lower for never-fuzzed entries. */

    if (queue_cycle > 1 && !queue_cur->was_fuzzed) {
      if (UR(100) < SKIP_NFAV_NEW_PROB) 
          return 1;

    } else {
      if (UR(100) < SKIP_NFAV_OLD_PROB) 
          return 1;
    }
  }

#endif /* ^IGNORE_FINDS */

  /* 如果不是tty模式，输出提示信息并刷新stdout缓冲区 */
  if (not_on_tty) {
    ACTF("Fuzzing test case #%u (%u total, %llu uniq crashes found)...",
         current_entry, queued_paths, unique_crashes);
    fflush(stdout);
  }

  /* Map the test case into memory. */
    
  /* 这部分主要将case映射到内存的处理：
     1.设置len为queue_cur->len
     2.打开case对应的文件，并通过mmap映射到内存里，将地址赋值给in_buf和orig_in
     3.分配len大小的内存，并初始化为全0，然后将地址赋值给out_buf
     4.将连续超时计数器subseq_tmout清零
     5.设置cur_depth为queue_cur->depth
  */
  fd = open(queue_cur->fname, O_RDONLY);
  if (fd < 0) 
      PFATAL("Unable to open '%s'", queue_cur->fname);

  len = queue_cur->len;

  orig_in = in_buf = mmap(0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);

  if (orig_in == MAP_FAILED)
      PFATAL("Unable to mmap '%s'", queue_cur->fname);
  close(fd);

  /* We could mmap() out_buf as MAP_PRIVATE, but we end up clobbering every
     single byte anyway, so it wouldn't give us any performance or memory usage
     benefits. */

  out_buf = ck_alloc_nozero(len);
  subseq_tmouts = 0;
  cur_depth = queue_cur->depth;
    

  /*******************************************
   * CALIBRATION (only if failed earlier on) *
   *******************************************/

  /* 这里开始进入CALIBRATION阶段：
     1.假如当前项有校准错误，并且校准错误次数小于3次，那么就调用calibrate_case再次校准
     2.如果设置了stop_soon，或者res不等于crash_mode：
        a.计数器cur_skipped_paths加1
        b.进入abandon_entry作后续处理
  */
  if (queue_cur->cal_failed) {

    u8 res = FAULT_TMOUT;

    if (queue_cur->cal_failed < CAL_CHANCES) {

      /* Reset exec_cksum to tell calibrate_case to re-execute the testcase
         avoiding the usage of an invalid trace_bits.
         For more info: https://github.com/AFLplusplus/AFLplusplus/pull/425 */

      queue_cur->exec_cksum = 0;

      res = calibrate_case(argv, queue_cur, in_buf, queue_cycle - 1, 0);

      if (res == FAULT_ERROR)
        FATAL("Unable to execute target application");

    }

    if (stop_soon || res != crash_mode) {
      cur_skipped_paths++;
      goto abandon_entry;
    }

  }

    
  /************
   * TRIMMING *
   ************/

  /* 这里开始进入TRIMMING阶段：
     1.如果不处于dumb_mode，且当前项没有被裁剪：
        a.调用trim_case对queue_cur进行trim
        b.设置queue_cur->trim_done的值为1
        c.重新用queue_cur->len去设置len的值
     2.将in_buf拷贝len个字节到out_buf中（注意in_buf是trim_case的参数，得到了裁剪）
  */
  if (!dumb_mode && !queue_cur->trim_done) {

    u8 res = trim_case(argv, queue_cur, in_buf);

    if (res == FAULT_ERROR)
      FATAL("Unable to execute target application");

    if (stop_soon) {
      cur_skipped_paths++;
      goto abandon_entry;
    }

    /* Don't retry trimming, even if it failed. */

    queue_cur->trim_done = 1;

    if (len != queue_cur->len) 
        len = queue_cur->len;
  }

  memcpy(out_buf, in_buf, len);

    
  /*********************
   * PERFORMANCE SCORE *
   *********************/

  /* 这里开始进入PERFORMANCE SCORE阶段：
     1.对当前项调用calculate_score，算出得分并设置orig_perf和perf_score
     2.如果设置了skip_deterministic，或者当前项被fuzz过，或者passed_det为1（好像也是被fuzz过）
       ，那么跳转到havoc_stage去执行
     3.如果执行路径校验和，超过此主实例的范围，那么也跳转到havoc_stage去执行
     4.若没跳走，设置doing_det的值为1（位于fuzz_one中的一个局部变量）
  */
  orig_perf = perf_score = calculate_score(queue_cur);

  /* Skip right away if -d is given, if we have done deterministic fuzzing on
     this entry ourselves (was_fuzzed), or if it has gone through deterministic
     testing in earlier, resumed runs (passed_det). */

  if (skip_deterministic || queue_cur->was_fuzzed || queue_cur->passed_det)
    goto havoc_stage;

  /* Skip deterministic fuzzing if exec path checksum puts this out of scope
     for this master instance. */

  if (master_max && (queue_cur->exec_cksum % master_max) != master_id - 1)
    goto havoc_stage;

  doing_det = 1;

SIMPLE BITFLIP (+dictionary construction)阶段

在比特反转，根据目标大小的不同，分为了多个不同的阶段：

bitflip 1/1 && collect tokens -> 按位取反 && 搜集token
bitflip 2/1 相邻两位进行取反
bitflip 4/1 相邻四位进行取反
bitflip 8/8 && effector map -> 按字节取反 && 构建effector map
bitflip 16/8 连续两byte翻转
bitflip 32/8 连续四byte翻转

  /*********************************************
   * SIMPLE BITFLIP (+dictionary construction) *
   *********************************************/

/* 这里开始进入SIMPLE BITFILP阶段，先来品一下FLIP_BIT这个宏，它的最后一条指令很有味道：
   1.等式右边：
      a.(_bf) & 7)相当于对8取模，得到0~7中的一个数
      b.128的二进制为10000000
      c.128 >> ((_bf) & 7)也就是在00000000这8个bit位上中的某个位置1，具体哪一位由_bf决定
   2.等式左边：
      a.(_bf) >> 3，是对_bf除8，想一想，这里为什么是除8而不是模8？其实算一下就能明白，假设_bf的
        取值范围是0~7，那么模8后得到的值的范围也是0~7，而除8得到的值只有0。而1个字节又有8bit，假
        设被除数有两个字节/16bit，除8意味着，前8次计算，都指向第一个字节的位置，后8次计算，都指向
        第二个字节的位置。这样是不是就好理解了。然后再往下看
      b._arf[(_bf) >> 3]，_arf是u8*类型，而u8就是char类型，也就是1字节。因此这里(_bf) >> 3
        得到的就是在_arf数组里的下标。再往后看
      c._arf[(_bf) >> 3] ^= (128 >> ((_bf) & 7))，前面提到，(_bf) >> 3，会导致连续（假设
        在循环中依次递增_bf）8次指向某一个字节的位置，而等式右边刚好可以在8次内将单个字节上的8个比
        特依次置位。之后后再进行异或运算。
    3.因此可以得出结论，这个FLIP_BIT就是对参数_ar指向的数组每个字节的每个比特进行翻转；而参数_b必
      须是一个8的倍数的数。这部分仍未理解明白可以去参考hollk师傅的文章，链接贴在文末
*/
#define FLIP_BIT(_ar, _b) do { \
    u8* _arf = (u8*)(_ar); \
    u32 _bf = (_b); \
    _arf[(_bf) >> 3] ^= (128 >> ((_bf) & 7)); \
  } while (0)


  /* Single walking bit. */

  /* 理解了FLIP_BIT这个宏，下面来看bitflip的每个阶段，首先是bitflip 1/1：
     1.设置循环次数stage_max为len << 3，因为len得到的是大小是字节，乘8后就可以在循环中对每个字节
       的每个比特进行翻转
     2.设置stage_name为bitflip 1/1
     3.进入循环，通过FLIP_BIT依次对out_buf的每个位翻转
     4.然后执行一次common_fuzz_stuff
     5.然后再调用FLIP_BIT翻转回来
  */
  stage_short = "flip1";
  stage_max   = len << 3;
  stage_name  = "bitflip 1/1";

  stage_val_type = STAGE_VAL_NONE;
  orig_hit_cnt = queued_paths + unique_crashes;
  prev_cksum = queue_cur->exec_cksum;

  
  for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
      
    stage_cur_byte = stage_cur >> 3;
      
    FLIP_BIT(out_buf, stage_cur);

    if (common_fuzz_stuff(argv, out_buf, len)) 
        goto abandon_entry;

    FLIP_BIT(out_buf, stage_cur);

    /* While flipping the least significant bit in every byte, pull of an extra
       trick to detect possible syntax tokens. In essence, the idea is that if
       you have a binary blob like this:
       xxxxxxxxIHDRxxxxxxxx
       ...and changing the leading and trailing bytes causes variable or no
       changes in program flow, but touching any character in the "IHDR" string
       always produces the same, distinctive path, it's highly likely that
       "IHDR" is an atomically-checked magic value of special significance to
       the fuzzed format.
       We do this here, rather than as a separate stage, because it's a nice
       way to keep the operation approximately "free" (i.e., no extra execs).
       
       Empirically, performing the check when flipping the least significant bit
       is advantageous, compared to doing it at the time of more disruptive
       changes, where the program flow may be affected in more violent ways.
       The caveat is that we won't generate dictionaries in the -d mode or -S
       mode - but that's probably a fair trade-off.
       This won't work particularly well with paths that exhibit variable
       behavior, but fails gracefully, so we'll carry out the checks anyway.
    */
    
    /* 在进行bitflip 1/1变异时，对于每个字节的最低有效位（least significant bit）的翻转还进
       行了额外的处理：
       1.假设有一个字符串"xxxxxxxxIHDRxxxxxxxx"
       2.更改前导字节和尾随字节会导致程序流发生变化或没有变化，但是触摸“IHDR”字符串中的任何字符总
         是会产生相同的（即程序的执行路径相同），并且独特（与原始执行路径不同）的路径，那么就把这一
         段连续的bytes判断是一个token
       3.sakura师傅举了另一个例子："对于SQL的SELECT *，如果SELECT被破坏，则肯定和正确的路径不
         一致，而被破坏之后的路径却肯定是一样的，比如AELECT和SBLECT，显然都是无意义的，而只有不
         破坏token，才有可能出现和原始执行路径一样的结果，所以AFL在这里就是在猜解关键字token"
    */
    if (!dumb_mode && (stage_cur & 7) == 7) {

      u32 cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
        
      if (stage_cur == stage_max - 1 && cksum == prev_cksum) {

        /* If at end of file and we are still collecting a string, grab the
           final character and force output. */
        /* 1.如果路径没变，当前token数量小于32(MAX_AUTO_EXTRA默认为32)，则尝试收集字符串，将
             当前字符作为token拼接到a_collect[]数组中
           2.如果已经达到文件末尾，但仍在收集字符串，则获取最后一个字符并强制输出
           3.如果当前token数量小于32且大于3（token默认的数量范围），调用maybe_add_auto将累计
             的a_collect[]数组中的内容添加到a_extras[]数组中
        */
        if (a_len < MAX_AUTO_EXTRA) 
            a_collect[a_len] = out_buf[stage_cur >> 3];
        a_len++;
          
        if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
          maybe_add_auto(a_collect, a_len);

      } else if (cksum != prev_cksum) {

        /* Otherwise, if the checksum has changed, see if we have something
           worthwhile queued up, and collect that if the answer is yes. */
        /* 1.如果路径变了，调用maybe_add_auto判断是否收集这个case到queue中
           2.同时将a_collect中的token添加到a_extras数组中 */
        if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
          maybe_add_auto(a_collect, a_len);

        a_len = 0;
        prev_cksum = cksum;

      }

      /* Continue collecting string, but only if the bit flip actually made
         any difference - we don't want no-op tokens. */
      /* 1.如果当前路径与原始路径不相同，这说明可能是因为token被破坏导致与原始执行路径不符
         2.所以需要保证位翻转后确实有所作为，才会收集这个字符串token，因为我们不想要无操作的token
      */
      if (cksum != queue_cur->exec_cksum) {
        if (a_len < MAX_AUTO_EXTRA) a_collect[a_len] = out_buf[stage_cur >> 3];        
        a_len++;
      }
    }
  }

  /* 1.stage_finds[STAGE_FLIP1]统计在这个阶段（这里是bitflip 1/1）新发现的路径和Crash的总和
     2.stage_cycles[STAGE_FLIP1]统计在这个阶段执行target的总次数
  */
  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_FLIP1]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_FLIP1] += stage_max;


  /* Two walking bits. */
  /* 1.bitflip 2/1阶段与bitflip 1/1原理相同，只不过是连续翻转相邻的两位bit
     2.结果分别保存到 stage_finds[STAGE_FLIP2] 和 stage_cycles[STAGE_FLIP2] 中
  */
  stage_name  = "bitflip 2/1";
  stage_short = "flip2";
  stage_max   = (len << 3) - 1;

  orig_hit_cnt = new_hit_cnt;

  for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {

    stage_cur_byte = stage_cur >> 3;

    FLIP_BIT(out_buf, stage_cur);
    FLIP_BIT(out_buf, stage_cur + 1);

    if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;

    FLIP_BIT(out_buf, stage_cur);
    FLIP_BIT(out_buf, stage_cur + 1);
  }

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_FLIP2]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_FLIP2] += stage_max;


  /* Four walking bits. */
  /* 同理，bitflip 4/1，连续翻转四位并记录 */
  stage_name  = "bitflip 4/1";
  stage_short = "flip4";
  stage_max   = (len << 3) - 3;

  orig_hit_cnt = new_hit_cnt;

  for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {

    stage_cur_byte = stage_cur >> 3;

    FLIP_BIT(out_buf, stage_cur);
    FLIP_BIT(out_buf, stage_cur + 1);
    FLIP_BIT(out_buf, stage_cur + 2);
    FLIP_BIT(out_buf, stage_cur + 3);

    if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;

    FLIP_BIT(out_buf, stage_cur);
    FLIP_BIT(out_buf, stage_cur + 1);
    FLIP_BIT(out_buf, stage_cur + 2);
    FLIP_BIT(out_buf, stage_cur + 3);

  }

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_FLIP4]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_FLIP4] += stage_max;


  /* 接下来将进入bitflip 8/8这个阶段，在这个阶段就不需要用前面的FLIP_BIT进行翻转了，而是
     直接将0xFF(11111111)与字节进行异或来实现。在这个过程中，还会生成一个非常重要的数组——
     effector map[]，下面对原理进行阐述：（这里引用sakura和hollk的分析结论）
     1.在对每个byte进行翻转时，如果其造成执行路径与原始路径不一致，就将该byte在effector
       map中标记为1，即有效的；否则标记为0，即无效的
     2.这么做的逻辑是：如果一个byte完全翻转，都无法带来执行路径的变化，那么这个byte很有可能
       属于"data"，而不是"metadata"（例如size, flag等），对整个fuzzing的意义不大。所以
       ，在随后的一些变异中，会参考effector map[]，通过stage_max--的方式跳过无效的byte，
       从而节省执行资源
     3.但某些情况并不会检测有效字符，例如在dumb_mode模式下或者指定fuzzer的情况下，此时所有
       字符都有可能进行变异
  */

  /* Effector map setup. These macros calculate:
     EFF_APOS      - position of a particular file offset in the map.
     EFF_ALEN      - length of a map with a particular number of bytes.
     EFF_SPAN_ALEN - map span for a sequence of bytes.
     Effector map 一些用于计算的宏的设置
     EFF_APOS      - 计算特定文件偏移量在map中的位置，EFF_MAP_SCALE2设置为3
     EFF_ALEN      - 具有特定字节数的map的长度
     EFF_SPAN_ALEN - 一个字节序列的map范围
   */

#define EFF_APOS(_p)          ((_p) >> EFF_MAP_SCALE2)
#define EFF_REM(_x)           ((_x) & ((1 << EFF_MAP_SCALE2) - 1))
#define EFF_ALEN(_l)          (EFF_APOS(_l) + !!EFF_REM(_l))
#define EFF_SPAN_ALEN(_p, _l) (EFF_APOS((_p) + (_l) - 1) - EFF_APOS(_p) + 1)

  /* Initialize effector map for the next step (see comments below). Always
     flag first and last byte as doing something. */
  /* 初始化effector map：
     1.初始化大小为case字节长度的空间，赋值给eff_map
     2.将eff_map第一个字节设置为1
     3.如果case的长度大于等于9字节：
        a.将eff_map最后一个字节设置为1
        b.令计数器eff_cnt加1（这是一个在fuzz_one开始处的一个局部变量）
  */
  eff_map    = ck_alloc(EFF_ALEN(len));
  eff_map[0] = 1;

  if (EFF_APOS(len - 1) != 0) {
    eff_map[EFF_APOS(len - 1)] = 1;
    eff_cnt++;
  }

  /* Walking byte. */
  /* 下面正式进入bitflip 8/8阶段，也就是单字节翻转阶段：
     1.设置stage_name为"bitflip 8/8"   
     2.设置stage_max为len，这里没有乘8，因为是对字节进行翻转
     3.out_buf[stage_cur] ^= 0xFF，这里直接对字节进行翻转
     4.调用common_fuzz_stuff对变异后数据进行测试，记录interesting，若返回1则跳转到
       abandon_entry去处理
  */
  stage_name  = "bitflip 8/8";
  stage_short = "flip8";
  stage_max   = len;

  orig_hit_cnt = new_hit_cnt;

  for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {

    stage_cur_byte = stage_cur;

    out_buf[stage_cur] ^= 0xFF;

    if (common_fuzz_stuff(argv, out_buf, len)) 
        goto abandon_entry;

    /* We also use this stage to pull off a simple trick: we identify
       bytes that seem to have no effect on the current execution path
       even when fully flipped - and we skip them during more expensive
       deterministic stages, such as arithmetics or known ints.
      
       我们还在这个阶段实现了一个简单的技巧：我们识别即使在完全翻转时似乎对当前执行路径
       没有影响的字节；这样在更昂贵的确定性fuzzing阶段可以跳过它们（例如算数或已知整数）
       1.如果eff_map[某个字节]对应的值为0
       2.如果不是dumb_mode且case的len大于等于128，则计算校验值cksum
       3.如果是dumb_mode，就不计算了，直接对原先的cksum取反
       4.最后判断cksum与原先的cksum是否相同，若不同说明发现了新的路径，则设置eff_map
         这个字节对应的值为1
       5.判断结束后，将先前取反的字节复位
    */
    if (!eff_map[EFF_APOS(stage_cur)]) {

      u32 cksum;

      /* If in dumb mode or if the file is very short, just flag everything
         without wasting time on checksums. */

      if (!dumb_mode && len >= EFF_MIN_LEN)
        cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
      else
        cksum = ~queue_cur->exec_cksum;

      if (cksum != queue_cur->exec_cksum) {
        eff_map[EFF_APOS(stage_cur)] = 1;
        eff_cnt++;
      }
    }
      
    out_buf[stage_cur] ^= 0xFF;
  }

  /* If the effector map is more than EFF_MAX_PERC dense, just flag the
     whole thing as worth fuzzing, since we wouldn't be saving much time
     anyway.
    
     如果effector map的密度超过EFF_MAX_PERC（设定为90），只需要将整件事标记为值得
     进行模糊测试（即将eff_map所有字节设置为1），因为无论如何我们都不会节省太多时间
  */
  if (eff_cnt != EFF_ALEN(len) &&
      eff_cnt * 100 / EFF_ALEN(len) > EFF_MAX_PERC) {
    memset(eff_map, 1, EFF_ALEN(len));
    blocks_eff_select += EFF_ALEN(len);

  } else {
    blocks_eff_select += eff_cnt;
  }

  blocks_eff_total += EFF_ALEN(len);

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_FLIP8]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_FLIP8] += stage_max;


  /* Two walking bytes. */
  /* 接下来进入bitflip 16/8阶段，即双字节翻转阶段，这部分原理与单字节翻转类似，但有以下
     几点不同：
     1.stage_name设置为"bitflip 16/8"
     2.stage_max设置为len - 1，而不是len
     3.以字为单位和0xffff进行异或运算，去翻转相邻的两个字节
     4.翻转之前会先检查eff_map里对应于这两个字节的标志是否为0，如果为0，则这两个字节是无
       效的数据，stage_max--，然后开始变异下一个字
     相同点：
     1.先翻转，再common_fuzz_stuff，最后复位
  */
  if (len < 2) goto skip_bitflip;

  stage_name  = "bitflip 16/8";
  stage_short = "flip16";
  stage_cur   = 0;
  stage_max   = len - 1;

  orig_hit_cnt = new_hit_cnt;

  for (i = 0; i < len - 1; i++) {

    /* Let's consult the effector map... */

    if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) {
      stage_max--;
      continue;
    }

    stage_cur_byte = i;

    *(u16*)(out_buf + i) ^= 0xFFFF;

    if (common_fuzz_stuff(argv, out_buf, len))
        goto abandon_entry;
    stage_cur++;

    *(u16*)(out_buf + i) ^= 0xFFFF;
  }

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_FLIP16]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_FLIP16] += stage_max;

  if (len < 4) 
      goto skip_bitflip;


  /* Four walking bytes. */
  /* 比特翻转的最后一个阶段，bitflip 32/8，即4字节翻转：
     1.stage_name设置为"bitflip 32/8"
     2.stage_max设置为len - 3
     3.以双字（dword）为单位，直接通过和0xffffffff异或运算去翻转相邻四个字节的位，然后
       执行一次，并记录
     4.在每次翻转之前会检查eff_map里对应于这四个字节的标志是否为0，如果是0，则这两个字节
       是无效的数据，stage_max--，然后开始变异下一组双字
     5.复位
  */
  stage_name  = "bitflip 32/8";
  stage_short = "flip32";
  stage_cur   = 0;
  stage_max   = len - 3;

  orig_hit_cnt = new_hit_cnt;

  for (i = 0; i < len - 3; i++) {

    /* Let's consult the effector map... */
    if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] &&
        !eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) {
      stage_max--;
      continue;
    }

    stage_cur_byte = i;

    *(u32*)(out_buf + i) ^= 0xFFFFFFFF;

    if (common_fuzz_stuff(argv, out_buf, len))
        goto abandon_entry;
    stage_cur++;

    *(u32*)(out_buf + i) ^= 0xFFFFFFFF;
  }

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_FLIP32]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_FLIP32] += stage_max;

skip_bitflip:

  if (no_arith) 
      goto skip_arith;

ARITHMETIC INC/DEC 阶段

  /**********************
   * ARITHMETIC INC/DEC *
   **********************/

  /* 这个阶段进行加减变异，和Bit_Flip阶段差不多，也是按照bit位数递增变异：
     1.这个阶段的加减变异是存在上限的在config.h中定义了宏ARITH_MAX，默认为35.所以会对
       目标进行+1~+35、-1~-35的运算；由整数存在大端序和小端序两种表现形式，所以这个阶段
       会对这两种情况分别进行变异
     2.本阶段中两种情况会跳过对应的byte的变异：
        a.选中字节对应在effector map中是无效的
        b.之前Bit_Flip已经生成过的变异：如果加/减某个数后，其效果与之前的某种bitflip相同
          ，那么这次变异肯定在上一个阶段已经执行过了，此次便不会再执行
  */


  /* 8-bit arithmetics. */
  /* 重点来看来看arith 8/8阶段，后面的可以类比理解：
     1.由于arith 8/8阶段是单字节的，所以不需要考虑大小端，但单字和双字则需要考虑
     2.先遍历case中的每个字节，用orig暂存当前字节
     3.判断这个字节在eff_map中是否有效，若无效，stage减去2倍的ARITH_MAX，进入下一轮
     4.否则进入内层循环
  */
  stage_name  = "arith 8/8";
  stage_short = "arith8";
  stage_cur   = 0;
  stage_max   = 2 * len * ARITH_MAX;

  stage_val_type = STAGE_VAL_LE;

  orig_hit_cnt = new_hit_cnt;

  for (i = 0; i < len; i++) {

    u8 orig = out_buf[i];

    /* Let's consult the effector map... */

    if (!eff_map[EFF_APOS(i)]) {
      stage_max -= 2 * ARITH_MAX;
      continue;
    }

    /* 1.依次扫描orig~orig+j*（1 <= j <= 35）
       2.然后orig与orig+j进行异或翻转，结果赋值给r
       3.判断这个r是否可以通过上一阶段的Bit_Filp得到（防止相同的冗余变异，以节省时间）
         这里去看一下could_be_bitflip()函数的实现就明白了，如果r的值为0xffffffff
         或者0xffff或者0xff中的一个，那么就和前面的Bit_Flip撞了，因此会返回1，属于
         冗余变异
          a.若可以得到，则stage_max--
          b.否则，将out_buf[i]+j，然后调用common_fuzz_stuff
       4.将orig与orig-j进行异或翻转，结果赋值给r
       5.再判断一次r是否可以通过上一阶段的Bit_Flip得到，后面同上
       6.最后用orig恢复out_buf[i]的值
    */
    stage_cur_byte = i;
    for (j = 1; j <= ARITH_MAX; j++) {

      u8 r = orig ^ (orig + j);

      /* Do arithmetic operations only if the result couldn't be a product
         of a bitflip. */

      if (!could_be_bitflip(r)) {

        stage_cur_val = j;
        out_buf[i] = orig + j;

        if (common_fuzz_stuff(argv, out_buf, len)) 
            goto abandon_entry;
        stage_cur++;

      } else 
          stage_max--;

      r =  orig ^ (orig - j);

      if (!could_be_bitflip(r)) {

        stage_cur_val = -j;
        out_buf[i] = orig - j;

        if (common_fuzz_stuff(argv, out_buf, len)) 
            goto abandon_entry;
        stage_cur++;

      } else stage_max--;

      out_buf[i] = orig;
    }
  }

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_ARITH8]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_ARITH8] += stage_max;


  /* 16-bit arithmetics, both endians. */
  /* arith 16/8阶段每次对2字节进行加减变异，原理与arith 8/8相同，但是会考虑大小端 */
  if (len < 2) 
      goto skip_arith;

  stage_name  = "arith 16/8";
  stage_short = "arith16";
  stage_cur   = 0;
  stage_max   = 4 * (len - 1) * ARITH_MAX;

  orig_hit_cnt = new_hit_cnt;

  for (i = 0; i < len - 1; i++) {

    u16 orig = *(u16*)(out_buf + i);

    /* Let's consult the effector map... */

    if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) {
      stage_max -= 4 * ARITH_MAX;
      continue;
    }

    stage_cur_byte = i;

    for (j = 1; j <= ARITH_MAX; j++) {

      u16 r1 = orig ^ (orig + j),
          r2 = orig ^ (orig - j),
          r3 = orig ^ SWAP16(SWAP16(orig) + j),
          r4 = orig ^ SWAP16(SWAP16(orig) - j);

      /* Try little endian addition and subtraction first. Do it only
         if the operation would affect more than one byte (hence the 
         & 0xff overflow checks) and if it couldn't be a product of
         a bitflip. */

      stage_val_type = STAGE_VAL_LE; 

      if ((orig & 0xff) + j > 0xff && !could_be_bitflip(r1)) {

        stage_cur_val = j;
        *(u16*)(out_buf + i) = orig + j;

        if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
        stage_cur++;
 
      } else stage_max--;

      if ((orig & 0xff) < j && !could_be_bitflip(r2)) {

        stage_cur_val = -j;
        *(u16*)(out_buf + i) = orig - j;

        if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
        stage_cur++;

      } else stage_max--;

      /* Big endian comes next. Same deal. */

      stage_val_type = STAGE_VAL_BE;


      if ((orig >> 8) + j > 0xff && !could_be_bitflip(r3)) {

        stage_cur_val = j;
        *(u16*)(out_buf + i) = SWAP16(SWAP16(orig) + j);

        if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
        stage_cur++;

      } else stage_max--;

      if ((orig >> 8) < j && !could_be_bitflip(r4)) {

        stage_cur_val = -j;
        *(u16*)(out_buf + i) = SWAP16(SWAP16(orig) - j);

        if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
        stage_cur++;

      } else stage_max--;

      *(u16*)(out_buf + i) = orig;

    }
  }

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_ARITH16]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_ARITH16] += stage_max;


  /* 32-bit arithmetics, both endians. */
  /* arith 32/8阶段每次对4字节进行加减变异，同样会考虑大小端 */
  if (len < 4) goto skip_arith;

  stage_name  = "arith 32/8";
  stage_short = "arith32";
  stage_cur   = 0;
  stage_max   = 4 * (len - 3) * ARITH_MAX;

  orig_hit_cnt = new_hit_cnt;

  for (i = 0; i < len - 3; i++) {

    u32 orig = *(u32*)(out_buf + i);

    /* Let's consult the effector map... */

    if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] &&
        !eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) {
      stage_max -= 4 * ARITH_MAX;
      continue;
    }

    stage_cur_byte = i;

    for (j = 1; j <= ARITH_MAX; j++) {

      u32 r1 = orig ^ (orig + j),
          r2 = orig ^ (orig - j),
          r3 = orig ^ SWAP32(SWAP32(orig) + j),
          r4 = orig ^ SWAP32(SWAP32(orig) - j);

      /* Little endian first. Same deal as with 16-bit: we only want to
         try if the operation would have effect on more than two bytes. */

      stage_val_type = STAGE_VAL_LE;

      if ((orig & 0xffff) + j > 0xffff && !could_be_bitflip(r1)) {

        stage_cur_val = j;
        *(u32*)(out_buf + i) = orig + j;

        if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
        stage_cur++;

      } else stage_max--;

      if ((orig & 0xffff) < j && !could_be_bitflip(r2)) {

        stage_cur_val = -j;
        *(u32*)(out_buf + i) = orig - j;

        if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
        stage_cur++;

      } else stage_max--;

      /* Big endian next. */

      stage_val_type = STAGE_VAL_BE;

      if ((SWAP32(orig) & 0xffff) + j > 0xffff && !could_be_bitflip(r3)) {

        stage_cur_val = j;
        *(u32*)(out_buf + i) = SWAP32(SWAP32(orig) + j);

        if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
        stage_cur++;

      } else stage_max--;

      if ((SWAP32(orig) & 0xffff) < j && !could_be_bitflip(r4)) {

        stage_cur_val = -j;
        *(u32*)(out_buf + i) = SWAP32(SWAP32(orig) - j);

        if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
        stage_cur++;

      } else 
          stage_max--;

      *(u32*)(out_buf + i) = orig;
    }
  }

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_ARITH32]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_ARITH32] += stage_max;

skip_arith:

INTERESTING VALUES阶段

  /**********************
   * INTERESTING VALUES *
   **********************/

  /* 进入INTERESTING VALUES阶段：
     1.这个阶段主要将out_buf中的字节替换成AFL预设的一些"interesting values"，这些数定义
       在config.h文件中
     2.hollk师傅认为这里主要是替换成一些可能导致整数溢出的数据，进行fuzz
     3.每次变异前，会调用could_be_bitflip和could_be_arith，判断是否在前两个阶段已经达到
       同样的变异，如果已经产生则跳过
     4.effector map仍然会用于判断目标字节的项是否有效，如果无效也会跳过
  */

  /* 1.interest 8/8阶段：
        a.阶段以一个字节为单位进行替换变异
        b.核心操作为"out_buf[i] = interesting_8[j]"，完成字节的替换操作
        c.然后依旧是进行common_fuzz_stuff+字节复位的操作
     2.interest 16/8阶段：
        a.以两个字节为单位进行替换变异
        b.需要去除异或、加减、单字节替换变异阶段的冗余
        c.考虑大小端序
     3.interest 32/8阶段：
        a.以四字节为单位进行替换变异，其余手法同interest 16/8
  */
  stage_name  = "interest 8/8";
  stage_short = "int8";
  stage_cur   = 0;
  stage_max   = len * sizeof(interesting_8);

  stage_val_type = STAGE_VAL_LE;

  orig_hit_cnt = new_hit_cnt;

  /* Setting 8-bit integers. */

  for (i = 0; i < len; i++) {

    u8 orig = out_buf[i];

    /* Let's consult the effector map... */

    if (!eff_map[EFF_APOS(i)]) {
      stage_max -= sizeof(interesting_8);
      continue;
    }

    stage_cur_byte = i;

    for (j = 0; j < sizeof(interesting_8); j++) {

      /* Skip if the value could be a product of bitflips or arithmetics. */

      if (could_be_bitflip(orig ^ (u8)interesting_8[j]) ||
          could_be_arith(orig, (u8)interesting_8[j], 1)) {
        stage_max--;
        continue;
      }

      stage_cur_val = interesting_8[j];
      out_buf[i] = interesting_8[j];

      if (common_fuzz_stuff(argv, out_buf, len))
          goto abandon_entry;

      out_buf[i] = orig;
      stage_cur++;

    }

  }

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_INTEREST8]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_INTEREST8] += stage_max;

  /* Setting 16-bit integers, both endians. */

  if (no_arith || len < 2)
      goto skip_interest;

  stage_name  = "interest 16/8";
  stage_short = "int16";
  stage_cur   = 0;
  stage_max   = 2 * (len - 1) * (sizeof(interesting_16) >> 1);

  orig_hit_cnt = new_hit_cnt;

  for (i = 0; i < len - 1; i++) {

    u16 orig = *(u16*)(out_buf + i);

    /* Let's consult the effector map... */

    if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) {
      stage_max -= sizeof(interesting_16);
      continue;
    }

    stage_cur_byte = i;

    for (j = 0; j < sizeof(interesting_16) / 2; j++) {

      stage_cur_val = interesting_16[j];

      /* Skip if this could be a product of a bitflip, arithmetics,
         or single-byte interesting value insertion. */

      if (!could_be_bitflip(orig ^ (u16)interesting_16[j]) &&
          !could_be_arith(orig, (u16)interesting_16[j], 2) &&
          !could_be_interest(orig, (u16)interesting_16[j], 2, 0)) {

        stage_val_type = STAGE_VAL_LE;

        *(u16*)(out_buf + i) = interesting_16[j];

        if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
        stage_cur++;

      } else stage_max--;

      if ((u16)interesting_16[j] != SWAP16(interesting_16[j]) &&
          !could_be_bitflip(orig ^ SWAP16(interesting_16[j])) &&
          !could_be_arith(orig, SWAP16(interesting_16[j]), 2) &&
          !could_be_interest(orig, SWAP16(interesting_16[j]), 2, 1)) {

        stage_val_type = STAGE_VAL_BE;

        *(u16*)(out_buf + i) = SWAP16(interesting_16[j]);
        if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
        stage_cur++;

      } else stage_max--;

    }

    *(u16*)(out_buf + i) = orig;

  }

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_INTEREST16]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_INTEREST16] += stage_max;

  if (len < 4) goto skip_interest;

  /* Setting 32-bit integers, both endians. */

  stage_name  = "interest 32/8";
  stage_short = "int32";
  stage_cur   = 0;
  stage_max   = 2 * (len - 3) * (sizeof(interesting_32) >> 2);

  orig_hit_cnt = new_hit_cnt;

  for (i = 0; i < len - 3; i++) {

    u32 orig = *(u32*)(out_buf + i);

    /* Let's consult the effector map... */

    if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] &&
        !eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) {
      stage_max -= sizeof(interesting_32) >> 1;
      continue;
    }

    stage_cur_byte = i;

    for (j = 0; j < sizeof(interesting_32) / 4; j++) {

      stage_cur_val = interesting_32[j];

      /* Skip if this could be a product of a bitflip, arithmetics,
         or word interesting value insertion. */

      if (!could_be_bitflip(orig ^ (u32)interesting_32[j]) &&
          !could_be_arith(orig, interesting_32[j], 4) &&
          !could_be_interest(orig, interesting_32[j], 4, 0)) {

        stage_val_type = STAGE_VAL_LE;

        *(u32*)(out_buf + i) = interesting_32[j];

        if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
        stage_cur++;

      } else stage_max--;

      if ((u32)interesting_32[j] != SWAP32(interesting_32[j]) &&
          !could_be_bitflip(orig ^ SWAP32(interesting_32[j])) &&
          !could_be_arith(orig, SWAP32(interesting_32[j]), 4) &&
          !could_be_interest(orig, SWAP32(interesting_32[j]), 4, 1)) {

        stage_val_type = STAGE_VAL_BE;

        *(u32*)(out_buf + i) = SWAP32(interesting_32[j]);
        if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
        stage_cur++;

      } else stage_max--;

    }

    *(u32*)(out_buf + i) = orig;

  }

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_INTEREST32]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_INTEREST32] += stage_max;

skip_interest:

DICTIONARY STUFF阶段

  /********************
   * DICTIONARY STUFF *
   ********************/

  /* 从这个阶段开始，就接近deterministic fuzzing的尾声了，本阶段主要基于用户提供的
     extras token来进行一定的变异，变异规则为替换或者插入。这些extras token的生成
     及加载过程参考初始化部分的maybe_add_auto、load_extras、save_auto这三个函数
     
     用户提供的tokens，是在词典文件中设置并通过-x选项指定的，如果没有则跳过相应的子阶
     段；阶段开始会判断extras_cnt的值，如果为0，说明用户没有提供extra token，则直
     接跳过这个阶段的变异
  */
  if (!extras_cnt) 
      goto skip_user_extras;


  /* Overwrite with user-supplied extras. */
  /* DICTIONARY STUFF有3个阶段，第一个是user extras (over)，这部分主要是将用户
     提供的tokens依次与out_buf进行替换：
     1.stage_max设置为extras_cnt * len
     2.外层循环out_buf每个字节，内层循环token，并依次在当前字节开始处进行替换。然后
       调用common_fuzz_stuff
     3.以下几种情况会跳过extras替换
        a.extras_cnt > MAX_DET_EXTRAS（设定为200）
        b.没有空间插入有效载荷，即[out_but[i], len] < len(token)
        c.eff_map在整个token替换的跨度区域没有设置有效字节
     4.由于extras token是按照从小到大的顺序排好序的，所以这里采用了在外部循环确定的
       特定偏移量处恢复写入之间的缓冲区
  */
  stage_name  = "user extras (over)";
  stage_short = "ext_UO";
  stage_cur   = 0;
  stage_max   = extras_cnt * len;

  stage_val_type = STAGE_VAL_NONE;

  orig_hit_cnt = new_hit_cnt;

  for (i = 0; i < len; i++) {

    u32 last_len = 0;

    stage_cur_byte = i;

    /* Extras are sorted by size, from smallest to largest. This means
       that we don't have to worry about restoring the buffer in
       between writes at a particular offset determined by the outer
       loop. */

    for (j = 0; j < extras_cnt; j++) {

      /* Skip extras probabilistically if extras_cnt > MAX_DET_EXTRAS. Also
         skip them if there's no room to insert the payload, if the token
         is redundant, or if its entire span has no bytes set in the effector
         map. */

      if ((extras_cnt > MAX_DET_EXTRAS && UR(extras_cnt) >= MAX_DET_EXTRAS) ||
          extras[j].len > len - i ||
          !memcmp(extras[j].data, out_buf + i, extras[j].len) ||
          !memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, extras[j].len))) {

        stage_max--;
        continue;
      }

      last_len = extras[j].len;
      memcpy(out_buf + i, extras[j].data, last_len);

      if (common_fuzz_stuff(argv, out_buf, len)) 
          goto abandon_entry;

      stage_cur++;
    }

    /* Restore all the clobbered memory. */
    memcpy(out_buf + i, in_buf + i, last_len);
  }

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_EXTRAS_UO]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_EXTRAS_UO] += stage_max;


  /* Insertion of user-supplied extras. */
  /* user extras (insert)阶段主要是将用户提供的token插入到out_buf中的策略：
     1.这个阶段没有过滤掉eff_map字节无效等情况，也就是说不会跳过token插入的阶段
     2.这里初始化了一块缓冲区ex_tmp，而不是直接对out_buf进行，所以只有Insert token和
       copy tail的过程，没有restore的过程
     3.copy tail之后，进行common_fuzz_stuff     
  */
  stage_name  = "user extras (insert)";
  stage_short = "ext_UI";
  stage_cur   = 0;
  stage_max   = extras_cnt * (len + 1);

  orig_hit_cnt = new_hit_cnt;

  ex_tmp = ck_alloc(len + MAX_DICT_FILE);

  for (i = 0; i <= len; i++) {

    stage_cur_byte = i;

    for (j = 0; j < extras_cnt; j++) {

      if (len + extras[j].len > MAX_FILE) {
        stage_max--; 
        continue;
      }

      /* Insert token */
      memcpy(ex_tmp + i, extras[j].data, extras[j].len);

      /* Copy tail */
      memcpy(ex_tmp + i + extras[j].len, out_buf + i, len - i);

      if (common_fuzz_stuff(argv, ex_tmp, len + extras[j].len)) {
        ck_free(ex_tmp);
        goto abandon_entry;
      }

      stage_cur++;
    }

    /* Copy head */
    ex_tmp[i] = out_buf[i];
  }

  ck_free(ex_tmp);

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_EXTRAS_UI]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_EXTRAS_UI] += stage_max;

skip_user_extras:

  if (!a_extras_cnt)
      goto skip_extras;


  /* 最后是auto extras (over)阶段，该阶段和 user extras (over)类似，只不过是将
     extras[] 换成了 a_extras[]
  */
  stage_name  = "auto extras (over)";
  stage_short = "ext_AO";
  stage_cur   = 0;
  stage_max   = MIN(a_extras_cnt, USE_AUTO_EXTRAS) * len;

  stage_val_type = STAGE_VAL_NONE;

  orig_hit_cnt = new_hit_cnt;

  for (i = 0; i < len; i++) {

    u32 last_len = 0;

    stage_cur_byte = i;

    for (j = 0; j < MIN(a_extras_cnt, USE_AUTO_EXTRAS); j++) {

      /* See the comment in the earlier code; extras are sorted by size. */

      if (a_extras[j].len > len - i ||
          !memcmp(a_extras[j].data, out_buf + i, a_extras[j].len) ||
          !memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, a_extras[j].len))) {

        stage_max--;
        continue;
      }

      last_len = a_extras[j].len;
      memcpy(out_buf + i, a_extras[j].data, last_len);

      if (common_fuzz_stuff(argv, out_buf, len)) 
          goto abandon_entry;

      stage_cur++;
    }

    /* Restore all the clobbered memory. */
    memcpy(out_buf + i, in_buf + i, last_len);
  }

  new_hit_cnt = queued_paths + unique_crashes;

  stage_finds[STAGE_EXTRAS_AO]  += new_hit_cnt - orig_hit_cnt;
  stage_cycles[STAGE_EXTRAS_AO] += stage_max;


skip_extras:
  /* If we made this to here without jumping to havoc_stage or abandon_entry,
     we're properly done with deterministic steps and can mark it as such
     in the .state/ directory. */

  /* 如果在没有跳转到havoc_stage或者abandon_entry的情况下会来到这里，说明我们已经正确地完
     成了确定性fuzz步骤（deterministic steps），为此我们可以标记出来（例如在.state/目录中）
  */
  if (!queue_cur->passed_det) 
      mark_as_det_done(queue_cur);

RANDOM HAVOC（随机毁灭）阶段

  /****************
   * RANDOM HAVOC *
   ****************/

  /* 对于非dumb_mode的主fuzzer来说，完成了上述deterministic fuzzing后，便会进入此阶段；
     对于dumb mode或者从fuzzer来说，则是直接从这一阶段开始。
     RANDOM HAVOC（随机毁灭），这个阶段充满了随机性，它包含了多轮变异，每一轮都应用多种方式
     进行组合
  */ 

havoc_stage:

  stage_cur_byte = -1;

  /* The havoc stage mutation code is also invoked when splicing files; if the
     splice_cycle variable is set, generate different descriptions and such. */

  /* 1.判断是否设置了splice_cycle
        a.若设置了，标记此阶段为"havoc"
        b.若没设置，标记此阶段为"splice"
     2.设置stage_max的值（HAVOC_MIN设定为16）
     3.设置orig_hit_cnt、havoc_queued的值（这2个均为fuzz_one的局部变量）
  */
  if (!splice_cycle) {

    stage_name  = "havoc";
    stage_short = "havoc";
    stage_max   = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) *
                  perf_score / havoc_div / 100;
      
  } else {

    static u8 tmp[32];

    perf_score = orig_perf;

    sprintf(tmp, "splice %u", splice_cycle);
    stage_name  = tmp;
    stage_short = "splice";
    stage_max   = SPLICE_HAVOC * perf_score / havoc_div / 100;
  }

  if (stage_max < HAVOC_MIN) 
      stage_max = HAVOC_MIN;

  temp_len = len;

  orig_hit_cnt = queued_paths + unique_crashes;
  havoc_queued = queued_paths;


  /* We essentially just do several thousand runs (depending on perf_score)
     where we take the input file and make random stacked tweaks. */

  /* 1.外层循环stage_max轮stage（最小为16）
     2.内层循环use_stacking轮，即一次stage中变化的次数（这是个随机的值，范围在2~14之间）
     3.进入switch语句，分支条件根据extras的数量随机生成一个数（范围在0~16或0~14之间，其中case15和case16
       只有存在extras时才有可能触发）：
        case0: 
        case1:  随机选中interesting_8[]中的某个byte随机替换out_buf中的某个byte
        case2:  随机选中interesting_16[]中的某个word随机替换out_buf中的某个word（大小端序随机选择）
        case3:  随机选中interesting_32[]中的某个dword随机替换out_buf中的某个dword（大小端序随机选择）
        case4:  随机选中out_buf中的某个byte，减去一个随机的值（范围1~35）
        case5:  随机选中out_buf中的某个byte，加上一个随机的值（范围1~35）
        case6:  随机选中out_buf中的某个word，减去一个随机的值（大小端随机选择）
        case7:  随机选中out_buf中的某个word，加上一个随机的值（大小端随机选择）
        case8:  随机选中out_buf中的某个dword，减去一个随机的值（大小端随机选择）
        case9:  随机选中out_buf中的某个dword，加上一个随机的值（大小端随机选择）
        case10: 随机选中out_buf中的某个byte与范围在1~255中的一个随机值进行异或
        case11: 随机选中out_buf中的某个byte进行删除
        case12: 同case11，稍微增加一丢丢删除byte的概率，从而使得case更加精炼
        case13: 随机选中out_buf中的某个位置，插入随机长度的内容：
                a.75%的概率，插入一段out_buf中随机选择的一段内容
                b.25%的概率，插入一段相同的随机数字（50%概率是随机生成的，50%概率是从out_buf中随机选取的一个byte）
        case14: 随机选中out_buf中的某个位置，覆盖随机长度的内容：
                a.75%的概率，覆盖一段out_buf中随机选择的一段内容
                b.25%的概率，覆盖一段相同的随机数字（50%概率是随机生成的，50%概率是从out_buf中随机选取的一个byte）
        case15: 随机选中out_buf中的某个位置，覆盖成extra token（token来自用户提供的extras和自动生成的a_extras）
        case16: 随机选中out_buf中的某个位置，插入extra token（token来自用户提供的extras和自动生成的a_extras）
  */
  for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {

    u32 use_stacking = 1 << (1 + UR(HAVOC_STACK_POW2));

    stage_cur_val = use_stacking;
 
    for (i = 0; i < use_stacking; i++) {

      switch (UR(15 + ((extras_cnt + a_extras_cnt) ? 2 : 0))) {

        case 0:

          /* Flip a single bit somewhere. Spooky! */

          FLIP_BIT(out_buf, UR(temp_len << 3));
          break;

        case 1: 

          /* Set byte to interesting value. */

          out_buf[UR(temp_len)] = interesting_8[UR(sizeof(interesting_8))];
          break;

        case 2:

          /* Set word to interesting value, randomly choosing endian. */

          if (temp_len < 2) break;

          if (UR(2)) {

            *(u16*)(out_buf + UR(temp_len - 1)) =
              interesting_16[UR(sizeof(interesting_16) >> 1)];

          } else {

            *(u16*)(out_buf + UR(temp_len - 1)) = SWAP16(
              interesting_16[UR(sizeof(interesting_16) >> 1)]);

          }

          break;

        case 3:

          /* Set dword to interesting value, randomly choosing endian. */

          if (temp_len < 4) break;

          if (UR(2)) {
  
            *(u32*)(out_buf + UR(temp_len - 3)) =
              interesting_32[UR(sizeof(interesting_32) >> 2)];

          } else {

            *(u32*)(out_buf + UR(temp_len - 3)) = SWAP32(
              interesting_32[UR(sizeof(interesting_32) >> 2)]);

          }

          break;

        case 4:

          /* Randomly subtract from byte. */

          out_buf[UR(temp_len)] -= 1 + UR(ARITH_MAX);
          break;

        case 5:

          /* Randomly add to byte. */

          out_buf[UR(temp_len)] += 1 + UR(ARITH_MAX);
          break;

        case 6:

          /* Randomly subtract from word, random endian. */

          if (temp_len < 2) break;

          if (UR(2)) {

            u32 pos = UR(temp_len - 1);

            *(u16*)(out_buf + pos) -= 1 + UR(ARITH_MAX);

          } else {

            u32 pos = UR(temp_len - 1);
            u16 num = 1 + UR(ARITH_MAX);

            *(u16*)(out_buf + pos) =
              SWAP16(SWAP16(*(u16*)(out_buf + pos)) - num);

          }

          break;

        case 7:

          /* Randomly add to word, random endian. */

          if (temp_len < 2) break;

          if (UR(2)) {

            u32 pos = UR(temp_len - 1);

            *(u16*)(out_buf + pos) += 1 + UR(ARITH_MAX);

          } else {

            u32 pos = UR(temp_len - 1);
            u16 num = 1 + UR(ARITH_MAX);

            *(u16*)(out_buf + pos) =
              SWAP16(SWAP16(*(u16*)(out_buf + pos)) + num);

          }

          break;

        case 8:

          /* Randomly subtract from dword, random endian. */

          if (temp_len < 4) break;

          if (UR(2)) {

            u32 pos = UR(temp_len - 3);

            *(u32*)(out_buf + pos) -= 1 + UR(ARITH_MAX);

          } else {

            u32 pos = UR(temp_len - 3);
            u32 num = 1 + UR(ARITH_MAX);

            *(u32*)(out_buf + pos) =
              SWAP32(SWAP32(*(u32*)(out_buf + pos)) - num);

          }

          break;

        case 9:

          /* Randomly add to dword, random endian. */

          if (temp_len < 4) break;

          if (UR(2)) {

            u32 pos = UR(temp_len - 3);

            *(u32*)(out_buf + pos) += 1 + UR(ARITH_MAX);

          } else {

            u32 pos = UR(temp_len - 3);
            u32 num = 1 + UR(ARITH_MAX);

            *(u32*)(out_buf + pos) =
              SWAP32(SWAP32(*(u32*)(out_buf + pos)) + num);

          }

          break;

        case 10:

          /* Just set a random byte to a random value. Because,
             why not. We use XOR with 1-255 to eliminate the
             possibility of a no-op. */

          out_buf[UR(temp_len)] ^= 1 + UR(255);
          break;

        case 11 ... 12: {

            /* Delete bytes. We're making this a bit more likely
               than insertion (the next option) in hopes of keeping
               files reasonably small. */

            u32 del_from, del_len;

            if (temp_len < 2) break;

            /* Don't delete too much. */

            del_len = choose_block_len(temp_len - 1);

            del_from = UR(temp_len - del_len + 1);

            memmove(out_buf + del_from, out_buf + del_from + del_len,
                    temp_len - del_from - del_len);

            temp_len -= del_len;

            break;

          }

        case 13:

          if (temp_len + HAVOC_BLK_XL < MAX_FILE) {

            /* Clone bytes (75%) or insert a block of constant bytes (25%). */

            u8  actually_clone = UR(4);
            u32 clone_from, clone_to, clone_len;
            u8* new_buf;

            if (actually_clone) {

              clone_len  = choose_block_len(temp_len);
              clone_from = UR(temp_len - clone_len + 1);

            } else {

              clone_len = choose_block_len(HAVOC_BLK_XL);
              clone_from = 0;

            }

            clone_to   = UR(temp_len);

            new_buf = ck_alloc_nozero(temp_len + clone_len);

            /* Head */

            memcpy(new_buf, out_buf, clone_to);

            /* Inserted part */

            if (actually_clone)
              memcpy(new_buf + clone_to, out_buf + clone_from, clone_len);
            else
              memset(new_buf + clone_to,
                     UR(2) ? UR(256) : out_buf[UR(temp_len)], clone_len);

            /* Tail */
            memcpy(new_buf + clone_to + clone_len, out_buf + clone_to,
                   temp_len - clone_to);

            ck_free(out_buf);
            out_buf = new_buf;
            temp_len += clone_len;

          }

          break;

        case 14: {

            /* Overwrite bytes with a randomly selected chunk (75%) or fixed
               bytes (25%). */

            u32 copy_from, copy_to, copy_len;

            if (temp_len < 2) break;

            copy_len  = choose_block_len(temp_len - 1);

            copy_from = UR(temp_len - copy_len + 1);
            copy_to   = UR(temp_len - copy_len + 1);

            if (UR(4)) {

              if (copy_from != copy_to)
                memmove(out_buf + copy_to, out_buf + copy_from, copy_len);

            } else memset(out_buf + copy_to,
                          UR(2) ? UR(256) : out_buf[UR(temp_len)], copy_len);

            break;

          }

        /* Values 15 and 16 can be selected only if there are any extras
           present in the dictionaries. */

        case 15: {

            /* Overwrite bytes with an extra. */

            if (!extras_cnt || (a_extras_cnt && UR(2))) {

              /* No user-specified extras or odds in our favor. Let's use an
                 auto-detected one. */

              u32 use_extra = UR(a_extras_cnt);
              u32 extra_len = a_extras[use_extra].len;
              u32 insert_at;

              if (extra_len > temp_len) break;

              insert_at = UR(temp_len - extra_len + 1);
              memcpy(out_buf + insert_at, a_extras[use_extra].data, extra_len);

            } else {

              /* No auto extras or odds in our favor. Use the dictionary. */

              u32 use_extra = UR(extras_cnt);
              u32 extra_len = extras[use_extra].len;
              u32 insert_at;

              if (extra_len > temp_len) break;

              insert_at = UR(temp_len - extra_len + 1);
              memcpy(out_buf + insert_at, extras[use_extra].data, extra_len);

            }

            break;

          }

        case 16: {

            u32 use_extra, extra_len, insert_at = UR(temp_len + 1);
            u8* new_buf;

            /* Insert an extra. Do the same dice-rolling stuff as for the
               previous case. */

            if (!extras_cnt || (a_extras_cnt && UR(2))) {

              use_extra = UR(a_extras_cnt);
              extra_len = a_extras[use_extra].len;

              if (temp_len + extra_len >= MAX_FILE) break;

              new_buf = ck_alloc_nozero(temp_len + extra_len);

              /* Head */
              memcpy(new_buf, out_buf, insert_at);

              /* Inserted part */
              memcpy(new_buf + insert_at, a_extras[use_extra].data, extra_len);

            } else {

              use_extra = UR(extras_cnt);
              extra_len = extras[use_extra].len;

              if (temp_len + extra_len >= MAX_FILE) break;

              new_buf = ck_alloc_nozero(temp_len + extra_len);

              /* Head */
              memcpy(new_buf, out_buf, insert_at);

              /* Inserted part */
              memcpy(new_buf + insert_at, extras[use_extra].data, extra_len);

            }

            /* Tail */
            memcpy(new_buf + insert_at + extra_len, out_buf + insert_at,
                   temp_len - insert_at);

            ck_free(out_buf);
            out_buf   = new_buf;
            temp_len += extra_len;

            break;
          }
      }
    }

     
    /* 1.在进行完循环叠加变换后，调用common_fuzz_stuff对变换后的结果进行fuzz
       2.恢复out_buf的内容
       3.如果fuzz后，queued_paths和havoc_queued不一样了，说明发现了新路径；在范围允许
         的情况下，更新stage_max、perf_score以及havoc_queued（注意这个是局部变量）
       4.在经历了如此多的deterministic变异和随机变异后，原本的case或许早已面目全非，在这
         个过程中充满了大量的随机性，这也是fuzzing过程中的不可控因素，也因此，建议在fuzz
         的时候开多个fuzzer同时进行，因为fuzz的结果往往并不相同。也因此，在服务器上跑fuzz
         往往需要的是更多的核心数
    */
    if (common_fuzz_stuff(argv, out_buf, temp_len))
      goto abandon_entry;

    if (temp_len < len) 
        out_buf = ck_realloc(out_buf, len);
    temp_len = len;
    memcpy(out_buf, in_buf, len);

    if (queued_paths != havoc_queued) {

      if (perf_score <= HAVOC_MAX_MULT * 100) {
        stage_max  *= 2;
        perf_score *= 2;
      }

      havoc_queued = queued_paths;
    }
  }

  new_hit_cnt = queued_paths + unique_crashes;

  if (!splice_cycle) {
    stage_finds[STAGE_HAVOC]  += new_hit_cnt - orig_hit_cnt;
    stage_cycles[STAGE_HAVOC] += stage_max;
  } else {
    stage_finds[STAGE_SPLICE]  += new_hit_cnt - orig_hit_cnt;
    stage_cycles[STAGE_SPLICE] += stage_max;
  }

SPLICING阶段

#ifndef IGNORE_FINDS
   /************
   * SPLICING *
   ************/

  /* This is a last-resort strategy triggered by a full round with no findings.
     It takes the current input file, randomly selects another input, and
     splices them together at some offset, then relies on the havoc
     code to mutate that blob. */

  /* 如果在经历过RANDOM HAVOC阶段后没有什么效果，那么就会进入到SPLICING阶段，这个阶段会尝试
     将当前的case与一个随机选中的case以某个偏移量拼接在一起，然后再次进入RANDOM HAVOC阶段 */

retry_splicing:

  if (use_splicing && splice_cycle++ < SPLICE_CYCLES &&
      queued_paths > 1 && queue_cur->len > 1) {

    struct queue_entry* target;
    u32 tid, split_at;
    u8* new_buf;
    s32 f_diff, l_diff;

    /* First of all, if we've modified in_buf for havoc, let's clean that
       up... */

    if (in_buf != orig_in) {
      ck_free(in_buf);
      in_buf = orig_in;
      len = queue_cur->len;
    }

    /* Pick a random queue entry and seek to it. Don't splice with yourself. */

    do { 
        tid = UR(queued_paths); 
    } while (tid == current_entry);

    splicing_with = tid;
    target = queue;

    while (tid >= 100) { 
        target = target->next_100; tid -= 100;
    }
    while (tid--) 
        target = target->next;

    /* Make sure that the target has a reasonable length. */

    while (target && (target->len < 2 || target == queue_cur)) {
      target = target->next;
      splicing_with++;
    }

    if (!target) 
        goto retry_splicing;

    /* Read the testcase into a new buffer. */

    fd = open(target->fname, O_RDONLY);

    if (fd < 0) 
        PFATAL("Unable to open '%s'", target->fname);

    new_buf = ck_alloc_nozero(target->len);

    ck_read(fd, new_buf, target->len, target->fname);

    close(fd);

    /* Find a suitable splicing location, somewhere between the first and
       the last differing byte. Bail out if the difference is just a single
       byte or so. */

    locate_diffs(in_buf, new_buf, MIN(len, target->len), &f_diff, &l_diff);

    if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) {
      ck_free(new_buf);
      goto retry_splicing;
    }

    /* Split somewhere between the first and last differing byte. */

    split_at = f_diff + UR(l_diff - f_diff);

    /* Do the thing. */

    len = target->len;
    memcpy(new_buf, in_buf, split_at);
    in_buf = new_buf;

    ck_free(out_buf);
    out_buf = ck_alloc_nozero(len);
    memcpy(out_buf, in_buf, len);

    goto havoc_stage;

  }

#endif /* !IGNORE_FINDS */

  /* 如果有所发现，那么就会来到此处：
     1.设置ret_val为0
     2.设置splicing_with为-1，即不与其它case进行拼接
     3.对于当前项case信息进行更新：
        a.如果queue_cur通过了评估，且queue_cur->was_fuzzed值为0，则将该值置1，然后
          令计数器pending_not_fuzzed减1
        b.如果queue_cur还是favored的，那么pending_favored计数器减1
     4.最后做一些清理回收的操作，然后返回ret_val
     5.至此，fuzz_one结束
  */
  ret_val = 0;

abandon_entry:

  splicing_with = -1;

  /* Update pending_not_fuzzed count if we made it through the calibration
     cycle and have not seen this entry before. */

  if (!stop_soon && !queue_cur->cal_failed && !queue_cur->was_fuzzed) {
    queue_cur->was_fuzzed = 1;
    pending_not_fuzzed--;
    if (queue_cur->favored) 
        pending_favored--;
  }

  munmap(orig_in, queue_cur->len);

  if (in_buf != orig_in) 
      ck_free(in_buf);
  ck_free(out_buf);
  ck_free(eff_map);

  return ret_val;

#undef FLIP_BIT

}

交互运作

文章的最后，附上一张AFL执行时的交互工作图：