前言 阅读AFL源码是深入理解Fuzz的第一步,也是为日后对AFL进行魔改或打造自己的Fuzz工具打下基础,本篇从插桩编译开始,一步步了解AFL进行Fuzz的完整流程。
afl-gcc.c源码分析 概述
AFL(普通)插桩部分源码主要有3个:afl-gcc.c、afl-as.h、afl-as.c
本质上afl-gcc是对gcc/clang的一个封装(wrapper),通过对程序的不同分支进行插桩,从而记录程序的执行路径,检测样本的覆盖率等程序运行情况的反馈信息
为了阅读方便,本篇及之后的分析均保留源码本身,并将分析以注释的方式标记在源码附近,同时根据情况修剪源码中原本的注释
关键变量 1 2 3 4 5 static u8* as_path; static u8** cc_params; static u32 cc_par_cnt = 1 ; static u8 be_quiet, clang_mode;
main 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 int main (int argc, char ** argv) { if (isatty(2 ) && !getenv("AFL_QUIET" )) { SAYF(cCYA "afl-cc " cBRI VERSION cRST " by \n" ); } else be_quiet = 1 ; if (argc < 2 ) { SAYF("\n" "This is a helper application for afl-fuzz. It serves as a drop-in replacement\n" "for gcc or clang, letting you recompile third-party code with the required\n" "runtime instrumentation. A common use pattern would be one of the following:\n\n" " CC=%s/afl-gcc ./configure\n" " CXX=%s/afl-g++ ./configure\n\n" "You can specify custom next-stage toolchain via AFL_CC, AFL_CXX, and AFL_AS.\n" "Setting AFL_HARDEN enables hardening optimizations in the compiled code.\n\n" , BIN_PATH, BIN_PATH); exit (1 ); } find_as(argv[0 ]); edit_params(argc, argv); execvp(cc_params[0 ], (char **)cc_params); FATAL("Oops, failed to execute '%s' - check your PATH" , cc_params[0 ]); return 0 ; }
删去主函数中不值得关注的部分,我们可以做一个简化,如下所示:
1 2 3 4 5 6 7 int main (int argc, char ** argv) { find_as(argv[0 ]); edit_params(argc, argv); execvp(cc_params[0 ], (char **)cc_params); return 0 ; }
find_as 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 static void find_as (u8* argv0) { u8 *afl_path = getenv("AFL_PATH" ); u8 *slash, *tmp; if (afl_path) { tmp = alloc_printf("%s/as" , afl_path); if (!access(tmp, X_OK)) { as_path = afl_path; ck_free(tmp); return ; } ck_free(tmp); } slash = strrchr (argv0, '/' ); if (slash) { u8 *dir; *slash = 0 ; dir = ck_strdup(argv0); *slash = '/' ; tmp = alloc_printf("%s/afl-as" , dir); if (!access(tmp, X_OK)) { as_path = dir; ck_free(tmp); return ; } ck_free(tmp); ck_free(dir); } if (!access(AFL_PATH "/as" , X_OK)) { as_path = AFL_PATH; return ; } FATAL("Unable to find AFL wrapper binary for 'as'. Please set AFL_PATH" ); }
edit_params 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 static void edit_params (u32 argc, char ** argv) { u8 fortify_set = 0 , asan_set = 0 ; u8 *name; #if defined(__FreeBSD__) && defined(__x86_64__) u8 m32_set = 0 ; #endif cc_params = ck_alloc((argc + 128 ) * sizeof (u8*)); name = strrchr (argv[0 ], '/' ); if (!name) name = argv[0 ]; else name++; if (!strncmp (name, "afl-clang" , 9 )) { clang_mode = 1 ; setenv(CLANG_ENV_VAR, "1" , 1 ); if (!strcmp (name, "afl-clang++" )) { u8* alt_cxx = getenv("AFL_CXX" ); cc_params[0 ] = alt_cxx ? alt_cxx : (u8*)"clang++" ; } else { u8* alt_cc = getenv("AFL_CC" ); cc_params[0 ] = alt_cc ? alt_cc : (u8*)"clang" ; } } else { #ifdef __APPLE__ if (!strcmp (name, "afl-g++" )) cc_params[0 ] = getenv("AFL_CXX" ); else if (!strcmp (name, "afl-gcj" )) cc_params[0 ] = getenv("AFL_GCJ" ); else cc_params[0 ] = getenv("AFL_CC" ); if (!cc_params[0 ]) { SAYF("\n" cLRD "[-] " cRST "On Apple systems, 'gcc' is usually just a wrapper for clang. Please use the\n" " 'afl-clang' utility instead of 'afl-gcc'. If you really have GCC installed,\n" " set AFL_CC or AFL_CXX to specify the correct path to that compiler.\n" ); FATAL("AFL_CC or AFL_CXX required on MacOS X" ); } #else if (!strcmp (name, "afl-g++" )) { u8* alt_cxx = getenv("AFL_CXX" ); cc_params[0 ] = alt_cxx ? alt_cxx : (u8*)"g++" ; } else if (!strcmp (name, "afl-gcj" )) { u8* alt_cc = getenv("AFL_GCJ" ); cc_params[0 ] = alt_cc ? alt_cc : (u8*)"gcj" ; } else { u8* alt_cc = getenv("AFL_CC" ); cc_params[0 ] = alt_cc ? alt_cc : (u8*)"gcc" ; } #endif } while (--argc) { u8* cur = *(++argv); if (!strncmp (cur, "-B" , 2 )) { if (!be_quiet) WARNF("-B is already set, overriding" ); if (!cur[2 ] && argc > 1 ) { argc--; argv++; } continue ; } if (!strcmp (cur, "-integrated-as" )) continue ; if (!strcmp (cur, "-pipe" )) continue ; #if defined(__FreeBSD__) && defined(__x86_64__) if (!strcmp (cur, "-m32" )) m32_set = 1 ; #endif if (!strcmp (cur, "-fsanitize=address" ) || !strcmp (cur, "-fsanitize=memory" )) asan_set = 1 ; if (strstr (cur, "FORTIFY_SOURCE" )) fortify_set = 1 ; cc_params[cc_par_cnt++] = cur; } cc_params[cc_par_cnt++] = "-B" ; cc_params[cc_par_cnt++] = as_path; if (clang_mode) cc_params[cc_par_cnt++] = "-no-integrated-as" ; if (getenv("AFL_HARDEN" )) { cc_params[cc_par_cnt++] = "-fstack-protector-all" ; if (!fortify_set) cc_params[cc_par_cnt++] = "-D_FORTIFY_SOURCE=2" ; } if (asan_set) { setenv("AFL_USE_ASAN" , "1" , 1 ); } else if (getenv("AFL_USE_ASAN" )) { if (getenv("AFL_USE_MSAN" )) FATAL("ASAN and MSAN are mutually exclusive" ); if (getenv("AFL_HARDEN" )) FATAL("ASAN and AFL_HARDEN are mutually exclusive" ); cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE" ; cc_params[cc_par_cnt++] = "-fsanitize=address" ; } else if (getenv("AFL_USE_MSAN" )) { if (getenv("AFL_USE_ASAN" )) FATAL("ASAN and MSAN are mutually exclusive" ); if (getenv("AFL_HARDEN" )) FATAL("MSAN and AFL_HARDEN are mutually exclusive" ); cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE" ; cc_params[cc_par_cnt++] = "-fsanitize=memory" ; } if (!getenv("AFL_DONT_OPTIMIZE" )) { #if defined(__FreeBSD__) && defined(__x86_64__) if (!clang_mode || !m32_set) cc_params[cc_par_cnt++] = "-g" ; #else cc_params[cc_par_cnt++] = "-g" ; #endif cc_params[cc_par_cnt++] = "-O3" ; cc_params[cc_par_cnt++] = "-funroll-loops" ; cc_params[cc_par_cnt++] = "-D__AFL_COMPILER=1" ; cc_params[cc_par_cnt++] = "-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1" ; } if (getenv("AFL_NO_BUILTIN" )) { cc_params[cc_par_cnt++] = "-fno-builtin-strcmp" ; cc_params[cc_par_cnt++] = "-fno-builtin-strncmp" ; cc_params[cc_par_cnt++] = "-fno-builtin-strcasecmp" ; cc_params[cc_par_cnt++] = "-fno-builtin-strncasecmp" ; cc_params[cc_par_cnt++] = "-fno-builtin-memcmp" ; cc_params[cc_par_cnt++] = "-fno-builtin-strstr" ; cc_params[cc_par_cnt++] = "-fno-builtin-strcasestr" ; } cc_params[cc_par_cnt] = NULL ; }
execvp 执行编译命令,生成目标文件。这个函数看手册就行了
(普通)插桩流程
插桩的过程如上图所示,看上去就是在普通程序编译的过程中,将gcc替换成了afl-gcc。为了更好的理解这个过程,我们做如下操作:
打开afl-gcc.c,在edit_params之后添加如下代码,打印cc_params来查看实际执行的命令
1 2 3 for (int i = 0 ; i < sizeof (cc_params); i++) { printf ("\targ%d: %s\n" , i, cc_params[i]); }
然后执行
1 2 $ make $ sudo make install
查看打印的参数
可以看到,afl-gcc.c帮我们添加了3个参数-B, /usr/local/lib/afl, -g
。这是因为在Linux机器上使用gcc进行编译的时候,默认会使用GNU as作为汇编器,因此这里使用”-B“参数指定使用afl-as。之后afl-as读取并分析输入的.s文件,然后添加instrumentation trampoline 和 main payload,之后再调用GNU as,本质上afl-as也是一个对GNU as的wrapper。
在下一篇,我们会继续分析与(普通)插桩相关的afl-as.c,去研究afl-as到底做了什么。
参考资料
hollk:AFL源码分析之afl-gcc.c详细注释
skr:sakuraのAFL源码全注释
Seebug:AFL 二三事——源码分析
AFL内部实现细节小记
AFL:afl-gcc.c
ScUpax0s:AFL源码阅读笔记之gcc与fuzz部分
HICOOKIE:AFL-Learning
简书:AFL源码分析