]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | //===-- msandr.cc ---------------------------------------------------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file is a part of MemorySanitizer. | |
11 | // | |
12 | // DynamoRio client for MemorySanitizer. | |
13 | // | |
14 | // MemorySanitizer requires that all program code is instrumented. Any memory | |
15 | // store that can turn an uninitialized value into an initialized value must be | |
16 | // observed by the tool, otherwise we risk reporting a false UMR. | |
17 | // | |
18 | // This also includes any libraries that the program depends on. | |
19 | // | |
20 | // In the case when rebuilding all program dependencies with MemorySanitizer is | |
21 | // problematic, an experimental MSanDR tool (the code you are currently looking | |
22 | // at) can be used. It is a DynamoRio-based tool that uses dynamic | |
23 | // instrumentation to | |
24 | // * Unpoison all memory stores. | |
25 | // * Unpoison TLS slots used by MemorySanitizer to pass function arguments and | |
26 | // return value shadow on anything that looks like a function call or a return | |
27 | // from a function. | |
28 | // | |
29 | // This tool does not detect the use of uninitialized values in uninstrumented | |
30 | // libraries. It merely gets rid of false positives by marking all data that | |
31 | // passes through uninstrumented code as fully initialized. | |
32 | //===----------------------------------------------------------------------===// | |
33 | ||
34 | #include <dr_api.h> | |
35 | #include <drutil.h> | |
36 | #include <drmgr.h> | |
37 | #include <drsyscall.h> | |
38 | ||
39 | #include <sys/mman.h> | |
40 | #include <sys/syscall.h> /* for SYS_mmap */ | |
41 | ||
42 | #include <string.h> | |
43 | ||
44 | // XXX: it seems setting macro in CMakeLists.txt does not work, | |
45 | // so manually set it here now. | |
46 | ||
47 | // Building msandr client for running in DynamoRIO hybrid mode, | |
48 | // which allows some module running natively. | |
49 | // TODO: turn it on by default when hybrid is stable enough | |
50 | // #define MSANDR_NATIVE_EXEC | |
51 | ||
52 | #ifndef MSANDR_NATIVE_EXEC | |
53 | #include <algorithm> | |
54 | #include <set> | |
55 | #include <string> | |
56 | #include <vector> | |
57 | #endif | |
58 | ||
59 | #define TESTALL(mask, var) (((mask) & (var)) == (mask)) | |
60 | #define TESTANY(mask, var) (((mask) & (var)) != 0) | |
61 | ||
62 | #define CHECK_IMPL(condition, file, line) \ | |
63 | do { \ | |
64 | if (!(condition)) { \ | |
65 | dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line); \ | |
66 | dr_abort(); \ | |
67 | } \ | |
68 | } while (0) // TODO: stacktrace | |
69 | ||
70 | #define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__) | |
71 | ||
72 | #define VERBOSITY 0 | |
73 | ||
74 | // Building msandr client for standalone test that does not need to | |
75 | // run with msan build executables. Disable by default. | |
76 | // #define MSANDR_STANDALONE_TEST | |
77 | ||
78 | #define NUM_TLS_RETVAL 1 | |
79 | #define NUM_TLS_PARAM 6 | |
80 | ||
81 | #ifdef MSANDR_STANDALONE_TEST | |
82 | // For testing purpose, we map app to shadow memory at [0x100000, 0x20000). | |
83 | // Normally, the app starts at 0x400000: | |
84 | // 00400000-004e0000 r-xp 00000000 fc:00 524343 /bin/bash | |
85 | // so there should be no problem. | |
86 | # define SHADOW_MEMORY_BASE ((void *)0x100000) | |
87 | # define SHADOW_MEMORY_SIZE (0x100000) | |
88 | # define SHADOW_MEMORY_MASK (SHADOW_MEMORY_SIZE - 4 /* to avoid overflow */) | |
89 | #else | |
90 | // shadow memory range [0x200000000000, 0x400000000000) | |
91 | // assuming no app memory below 0x200000000000 | |
92 | # define SHADOW_MEMORY_MASK 0x3fffffffffffULL | |
93 | #endif /* MSANDR_STANDALONE_TEST */ | |
94 | ||
95 | typedef void *(*WrapperFn)(void *); | |
96 | extern "C" void __msan_set_indirect_call_wrapper(WrapperFn wrapper); | |
97 | extern "C" void __msan_dr_is_initialized(); | |
98 | ||
99 | namespace { | |
100 | ||
101 | int msan_retval_tls_offset; | |
102 | int msan_param_tls_offset; | |
103 | ||
104 | #ifndef MSANDR_NATIVE_EXEC | |
105 | class ModuleData { | |
106 | public: | |
107 | ModuleData(); | |
108 | ModuleData(const module_data_t *info); | |
109 | // Yes, we want default copy, assign, and dtor semantics. | |
110 | ||
111 | public: | |
112 | app_pc start_; | |
113 | app_pc end_; | |
114 | // Full path to the module. | |
115 | std::string path_; | |
116 | module_handle_t handle_; | |
117 | bool should_instrument_; | |
118 | bool executed_; | |
119 | }; | |
120 | ||
121 | // A vector of loaded modules sorted by module bounds. We lookup the current PC | |
122 | // in here from the bb event. This is better than an rb tree because the lookup | |
123 | // is faster and the bb event occurs far more than the module load event. | |
124 | std::vector<ModuleData> g_module_list; | |
125 | ||
126 | ModuleData::ModuleData() | |
127 | : start_(NULL), end_(NULL), path_(""), handle_(NULL), | |
128 | should_instrument_(false), executed_(false) { | |
129 | } | |
130 | ||
131 | ModuleData::ModuleData(const module_data_t *info) | |
132 | : start_(info->start), end_(info->end), path_(info->full_path), | |
133 | handle_(info->handle), | |
134 | // We'll check the black/white lists later and adjust this. | |
135 | should_instrument_(true), executed_(false) { | |
136 | } | |
137 | #endif /* !MSANDR_NATIVE_EXEC */ | |
138 | ||
139 | int(*__msan_get_retval_tls_offset)(); | |
140 | int(*__msan_get_param_tls_offset)(); | |
141 | void (*__msan_unpoison)(void *base, size_t size); | |
142 | bool (*__msan_is_in_loader)(); | |
143 | ||
144 | #ifdef MSANDR_STANDALONE_TEST | |
145 | uint mock_msan_retval_tls_offset; | |
146 | uint mock_msan_param_tls_offset; | |
147 | static int mock_msan_get_retval_tls_offset() { | |
148 | return (int)mock_msan_retval_tls_offset; | |
149 | } | |
150 | ||
151 | static int mock_msan_get_param_tls_offset() { | |
152 | return (int)mock_msan_param_tls_offset; | |
153 | } | |
154 | ||
155 | static void mock_msan_unpoison(void *base, size_t size) { | |
156 | /* do nothing */ | |
157 | } | |
158 | ||
159 | static bool mock_msan_is_in_loader() { | |
160 | return false; | |
161 | } | |
162 | #endif /* MSANDR_STANDALONE_TEST */ | |
163 | ||
164 | static generic_func_t LookupCallback(module_data_t *app, const char *name) { | |
165 | #ifdef MSANDR_STANDALONE_TEST | |
166 | if (strcmp("__msan_get_retval_tls_offset", name) == 0) { | |
167 | return (generic_func_t)mock_msan_get_retval_tls_offset; | |
168 | } else if (strcmp("__msan_get_param_tls_offset", name) == 0) { | |
169 | return (generic_func_t)mock_msan_get_param_tls_offset; | |
170 | } else if (strcmp("__msan_unpoison", name) == 0) { | |
171 | return (generic_func_t)mock_msan_unpoison; | |
172 | } else if (strcmp("__msan_is_in_loader", name) == 0) { | |
173 | return (generic_func_t)mock_msan_is_in_loader; | |
174 | } | |
175 | CHECK(false); | |
176 | return NULL; | |
177 | #else /* !MSANDR_STANDALONE_TEST */ | |
178 | generic_func_t callback = dr_get_proc_address(app->handle, name); | |
179 | if (callback == NULL) { | |
180 | dr_printf("Couldn't find `%s` in %s\n", name, app->full_path); | |
181 | CHECK(callback); | |
182 | } | |
183 | return callback; | |
184 | #endif /* !MSANDR_STANDALONE_TEST */ | |
185 | } | |
186 | ||
187 | void InitializeMSanCallbacks() { | |
188 | module_data_t *app = dr_lookup_module_by_name(dr_get_application_name()); | |
189 | if (!app) { | |
190 | dr_printf("%s - oops, dr_lookup_module_by_name failed!\n", | |
191 | dr_get_application_name()); | |
192 | CHECK(app); | |
193 | } | |
194 | ||
195 | __msan_get_retval_tls_offset = (int (*)()) | |
196 | LookupCallback(app, "__msan_get_retval_tls_offset"); | |
197 | __msan_get_param_tls_offset = (int (*)()) | |
198 | LookupCallback(app, "__msan_get_param_tls_offset"); | |
199 | __msan_unpoison = (void(*)(void *, size_t)) | |
200 | LookupCallback(app, "__msan_unpoison"); | |
201 | __msan_is_in_loader = (bool (*)()) | |
202 | LookupCallback(app, "__msan_is_in_loader"); | |
203 | ||
204 | dr_free_module_data(app); | |
205 | } | |
206 | ||
207 | // FIXME: Handle absolute addresses and PC-relative addresses. | |
208 | // FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have | |
209 | // a zero base anyway. | |
210 | bool OperandIsInteresting(opnd_t opnd) { | |
211 | return (opnd_is_base_disp(opnd) && opnd_get_segment(opnd) != DR_SEG_FS && | |
212 | opnd_get_segment(opnd) != DR_SEG_GS); | |
213 | } | |
214 | ||
215 | bool WantToInstrument(instr_t *instr) { | |
216 | // TODO: skip push instructions? | |
217 | switch (instr_get_opcode(instr)) { | |
218 | // FIXME: support the instructions excluded below: | |
219 | case OP_rep_cmps: | |
220 | // f3 a6 rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx | |
221 | return false; | |
222 | } | |
223 | ||
224 | // Labels appear due to drutil_expand_rep_string() | |
225 | if (instr_is_label(instr)) | |
226 | return false; | |
227 | ||
228 | CHECK(instr_ok_to_mangle(instr) == true); | |
229 | ||
230 | if (instr_writes_memory(instr)) { | |
231 | for (int d = 0; d < instr_num_dsts(instr); d++) { | |
232 | opnd_t op = instr_get_dst(instr, d); | |
233 | if (OperandIsInteresting(op)) | |
234 | return true; | |
235 | } | |
236 | } | |
237 | ||
238 | return false; | |
239 | } | |
240 | ||
241 | #define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what); | |
242 | #define PREF(at, what) instrlist_meta_preinsert(bb, at, what); | |
243 | ||
244 | void InstrumentMops(void *drcontext, instrlist_t *bb, instr_t *instr, opnd_t op, | |
245 | bool is_write) { | |
246 | bool need_to_restore_eflags = false; | |
247 | uint flags = instr_get_arith_flags(instr); | |
248 | // TODO: do something smarter with flags and spills in general? | |
249 | // For example, spill them only once for a sequence of instrumented | |
250 | // instructions that don't change/read flags. | |
251 | ||
252 | if (!TESTALL(EFLAGS_WRITE_6, flags) || TESTANY(EFLAGS_READ_6, flags)) { | |
253 | if (VERBOSITY > 1) | |
254 | dr_printf("Spilling eflags...\n"); | |
255 | need_to_restore_eflags = true; | |
256 | // TODO: Maybe sometimes don't need to 'seto'. | |
257 | // TODO: Maybe sometimes don't want to spill XAX here? | |
258 | // TODO: No need to spill XAX here if XAX is not used in the BB. | |
259 | dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); | |
260 | dr_save_arith_flags_to_xax(drcontext, bb, instr); | |
261 | dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3); | |
262 | dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); | |
263 | } | |
264 | ||
265 | #if 0 | |
266 | dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n", | |
267 | opnd_is_memory_reference(op), opnd_is_base_disp(op), | |
268 | opnd_is_base_disp(op) ? opnd_get_index(op) : -1, | |
269 | opnd_is_far_memory_reference(op), opnd_is_reg_pointer_sized(op), | |
270 | opnd_is_base_disp(op) ? opnd_get_disp(op) : -1); | |
271 | #endif | |
272 | ||
273 | reg_id_t R1; | |
274 | bool address_in_R1 = false; | |
275 | if (opnd_is_base_disp(op) && opnd_get_index(op) == DR_REG_NULL && | |
276 | opnd_get_disp(op) == 0) { | |
277 | // If this is a simple access with no offset or index, we can just use the | |
278 | // base for R1. | |
279 | address_in_R1 = true; | |
280 | R1 = opnd_get_base(op); | |
281 | } else { | |
282 | // Otherwise, we need to compute the addr into R1. | |
283 | // TODO: reuse some spare register? e.g. r15 on x64 | |
284 | // TODO: might be used as a non-mem-ref register? | |
285 | R1 = DR_REG_XAX; | |
286 | } | |
287 | CHECK(reg_is_pointer_sized(R1)); // otherwise R2 may be wrong. | |
288 | ||
289 | // Pick R2 from R8 to R15. | |
290 | // It's OK if the instr uses R2 elsewhere, since we'll restore it before instr. | |
291 | reg_id_t R2; | |
292 | for (R2 = DR_REG_R8; R2 <= DR_REG_R15; R2++) { | |
293 | if (!opnd_uses_reg(op, R2)) | |
294 | break; | |
295 | } | |
296 | CHECK((R2 <= DR_REG_R15) && R1 != R2); | |
297 | ||
298 | // Save the current values of R1 and R2. | |
299 | dr_save_reg(drcontext, bb, instr, R1, SPILL_SLOT_1); | |
300 | // TODO: Something smarter than spilling a "fixed" register R2? | |
301 | dr_save_reg(drcontext, bb, instr, R2, SPILL_SLOT_2); | |
302 | ||
303 | if (!address_in_R1) | |
304 | CHECK(drutil_insert_get_mem_addr(drcontext, bb, instr, op, R1, R2)); | |
305 | PRE(instr, mov_imm(drcontext, opnd_create_reg(R2), | |
306 | OPND_CREATE_INT64(SHADOW_MEMORY_MASK))); | |
307 | PRE(instr, and(drcontext, opnd_create_reg(R1), opnd_create_reg(R2))); | |
308 | #ifdef MSANDR_STANDALONE_TEST | |
309 | PRE(instr, add(drcontext, opnd_create_reg(R1), | |
310 | OPND_CREATE_INT32(SHADOW_MEMORY_BASE))); | |
311 | #endif | |
312 | // There is no mov_st of a 64-bit immediate, so... | |
313 | opnd_size_t op_size = opnd_get_size(op); | |
314 | CHECK(op_size != OPSZ_NA); | |
315 | uint access_size = opnd_size_in_bytes(op_size); | |
316 | if (access_size <= 4 || op_size == OPSZ_PTR /* x64 support sign extension */) { | |
317 | instr_t *label = INSTR_CREATE_label(drcontext); | |
318 | opnd_t immed; | |
319 | if (op_size == OPSZ_PTR || op_size == OPSZ_4) | |
320 | immed = OPND_CREATE_INT32(0); | |
321 | else | |
322 | immed = opnd_create_immed_int((ptr_int_t) 0, op_size); | |
323 | // we check if target is 0 before write to reduce unnecessary memory stores. | |
324 | PRE(instr, cmp(drcontext, | |
325 | opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size), | |
326 | immed)); | |
327 | PRE(instr, jcc(drcontext, OP_je, opnd_create_instr(label))); | |
328 | PRE(instr, mov_st(drcontext, | |
329 | opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size), | |
330 | immed)); | |
331 | PREF(instr, label); | |
332 | } else { | |
333 | // FIXME: tail? | |
334 | for (uint ofs = 0; ofs < access_size; ofs += 4) { | |
335 | instr_t *label = INSTR_CREATE_label(drcontext); | |
336 | opnd_t immed = OPND_CREATE_INT32(0); | |
337 | PRE(instr, cmp(drcontext, OPND_CREATE_MEM32(R1, ofs), immed)); | |
338 | PRE(instr, jcc(drcontext, OP_je, opnd_create_instr(label))); | |
339 | PRE(instr, mov_st(drcontext, OPND_CREATE_MEM32(R1, ofs), immed)); | |
340 | PREF(instr, label) | |
341 | } | |
342 | } | |
343 | ||
344 | // Restore the registers and flags. | |
345 | dr_restore_reg(drcontext, bb, instr, R1, SPILL_SLOT_1); | |
346 | dr_restore_reg(drcontext, bb, instr, R2, SPILL_SLOT_2); | |
347 | ||
348 | // TODO: move aflags save/restore to per instr instead of per opnd | |
349 | if (need_to_restore_eflags) { | |
350 | if (VERBOSITY > 1) | |
351 | dr_printf("Restoring eflags\n"); | |
352 | // TODO: Check if it's reverse to the dr_restore_reg above and optimize. | |
353 | dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); | |
354 | dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3); | |
355 | dr_restore_arith_flags_from_xax(drcontext, bb, instr); | |
356 | dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); | |
357 | } | |
358 | ||
359 | // The original instruction is left untouched. The above instrumentation is just | |
360 | // a prefix. | |
361 | } | |
362 | ||
363 | void InstrumentReturn(void *drcontext, instrlist_t *bb, instr_t *instr) { | |
364 | #ifdef MSANDR_STANDALONE_TEST | |
365 | PRE(instr, | |
366 | mov_st(drcontext, | |
367 | opnd_create_far_base_disp(DR_SEG_GS /* DR's TLS */, | |
368 | DR_REG_NULL, DR_REG_NULL, | |
369 | 0, msan_retval_tls_offset, | |
370 | OPSZ_PTR), | |
371 | OPND_CREATE_INT32(0))); | |
372 | #else /* !MSANDR_STANDALONE_TEST */ | |
373 | # ifdef MSANDR_NATIVE_EXEC | |
374 | /* For optimized native exec, -mangle_app_seg and -private_loader are turned off, | |
375 | * so we can reference msan_retval_tls_offset directly. | |
376 | */ | |
377 | PRE(instr, | |
378 | mov_st(drcontext, | |
379 | opnd_create_far_base_disp(DR_SEG_FS, DR_REG_NULL, DR_REG_NULL, 0, | |
380 | msan_retval_tls_offset, OPSZ_PTR), | |
381 | OPND_CREATE_INT32(0))); | |
382 | # else /* !MSANDR_NATIVE_EXEC */ | |
383 | /* XXX: the code below only works if -mangle_app_seg and -private_loader, | |
384 | * which is turned off for optimized native exec | |
385 | */ | |
386 | dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); | |
387 | ||
388 | // Clobbers nothing except xax. | |
389 | bool res = | |
390 | dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX); | |
391 | CHECK(res); | |
392 | ||
393 | // TODO: unpoison more bytes? | |
394 | PRE(instr, | |
395 | mov_st(drcontext, OPND_CREATE_MEM64(DR_REG_XAX, msan_retval_tls_offset), | |
396 | OPND_CREATE_INT32(0))); | |
397 | ||
398 | dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); | |
399 | # endif /* !MSANDR_NATIVE_EXEC */ | |
400 | // The original instruction is left untouched. The above instrumentation is just | |
401 | // a prefix. | |
402 | #endif /* !MSANDR_STANDALONE_TEST */ | |
403 | } | |
404 | ||
405 | void InstrumentIndirectBranch(void *drcontext, instrlist_t *bb, | |
406 | instr_t *instr) { | |
407 | #ifdef MSANDR_STANDALONE_TEST | |
408 | for (int i = 0; i < NUM_TLS_PARAM; ++i) { | |
409 | PRE(instr, | |
410 | mov_st(drcontext, | |
411 | opnd_create_far_base_disp(DR_SEG_GS /* DR's TLS */, | |
412 | DR_REG_NULL, DR_REG_NULL, | |
413 | 0, | |
414 | msan_param_tls_offset + | |
415 | i * sizeof(void *), | |
416 | OPSZ_PTR), | |
417 | OPND_CREATE_INT32(0))); | |
418 | } | |
419 | #else /* !MSANDR_STANDALONE_TEST */ | |
420 | # ifdef MSANDR_NATIVE_EXEC | |
421 | for (int i = 0; i < NUM_TLS_PARAM; ++i) { | |
422 | PRE(instr, | |
423 | mov_st(drcontext, | |
424 | opnd_create_far_base_disp(DR_SEG_FS, DR_REG_NULL, DR_REG_NULL, 0, | |
425 | msan_param_tls_offset + i*sizeof(void*), | |
426 | OPSZ_PTR), | |
427 | OPND_CREATE_INT32(0))); | |
428 | } | |
429 | # else /* !MSANDR_NATIVE_EXEC */ | |
430 | /* XXX: the code below only works if -mangle_app_seg and -private_loader, | |
431 | * which is turned off for optimized native exec | |
432 | */ | |
433 | dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); | |
434 | ||
435 | // Clobbers nothing except xax. | |
436 | bool res = | |
437 | dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX); | |
438 | CHECK(res); | |
439 | ||
440 | // TODO: unpoison more bytes? | |
441 | for (int i = 0; i < NUM_TLS_PARAM; ++i) { | |
442 | PRE(instr, | |
443 | mov_st(drcontext, OPND_CREATE_MEMPTR(DR_REG_XAX, msan_param_tls_offset + | |
444 | i * sizeof(void *)), | |
445 | OPND_CREATE_INT32(0))); | |
446 | } | |
447 | ||
448 | dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); | |
449 | # endif /* !MSANDR_NATIVE_EXEC */ | |
450 | // The original instruction is left untouched. The above instrumentation is just | |
451 | // a prefix. | |
452 | #endif /* !MSANDR_STANDALONE_TEST */ | |
453 | } | |
454 | ||
455 | #ifndef MSANDR_NATIVE_EXEC | |
456 | // For use with binary search. Modules shouldn't overlap, so we shouldn't have | |
457 | // to look at end_. If that can happen, we won't support such an application. | |
458 | bool ModuleDataCompareStart(const ModuleData &left, const ModuleData &right) { | |
459 | return left.start_ < right.start_; | |
460 | } | |
461 | ||
462 | // Look up the module containing PC. Should be relatively fast, as its called | |
463 | // for each bb instrumentation. | |
464 | ModuleData *LookupModuleByPC(app_pc pc) { | |
465 | ModuleData fake_mod_data; | |
466 | fake_mod_data.start_ = pc; | |
467 | std::vector<ModuleData>::iterator it = | |
468 | lower_bound(g_module_list.begin(), g_module_list.end(), fake_mod_data, | |
469 | ModuleDataCompareStart); | |
470 | // if (it == g_module_list.end()) | |
471 | // return NULL; | |
472 | if (it == g_module_list.end() || pc < it->start_) | |
473 | --it; | |
474 | CHECK(it->start_ <= pc); | |
475 | if (pc >= it->end_) { | |
476 | // We're past the end of this module. We shouldn't be in the next module, | |
477 | // or lower_bound lied to us. | |
478 | ++it; | |
479 | CHECK(it == g_module_list.end() || pc < it->start_); | |
480 | return NULL; | |
481 | } | |
482 | ||
483 | // OK, we found the module. | |
484 | return &*it; | |
485 | } | |
486 | ||
487 | bool ShouldInstrumentNonModuleCode() { return true; } | |
488 | ||
489 | bool ShouldInstrumentModule(ModuleData *mod_data) { | |
490 | // TODO(rnk): Flags for blacklist would get wired in here. | |
491 | generic_func_t p = | |
492 | dr_get_proc_address(mod_data->handle_, "__msan_track_origins"); | |
493 | return !p; | |
494 | } | |
495 | ||
496 | bool ShouldInstrumentPc(app_pc pc, ModuleData **pmod_data) { | |
497 | ModuleData *mod_data = LookupModuleByPC(pc); | |
498 | if (pmod_data) | |
499 | *pmod_data = mod_data; | |
500 | if (mod_data != NULL) { | |
501 | // This module is on a blacklist. | |
502 | if (!mod_data->should_instrument_) { | |
503 | return false; | |
504 | } | |
505 | } else if (!ShouldInstrumentNonModuleCode()) { | |
506 | return false; | |
507 | } | |
508 | return true; | |
509 | } | |
510 | #endif /* !MSANDR_NATIVE_CLIENT */ | |
511 | ||
512 | // TODO(rnk): Make sure we instrument after __msan_init. | |
513 | dr_emit_flags_t | |
514 | event_basic_block_app2app(void *drcontext, void *tag, instrlist_t *bb, | |
515 | bool for_trace, bool translating) { | |
516 | #ifndef MSANDR_NATIVE_EXEC | |
517 | app_pc pc = dr_fragment_app_pc(tag); | |
518 | if (ShouldInstrumentPc(pc, NULL)) | |
519 | CHECK(drutil_expand_rep_string(drcontext, bb)); | |
520 | #else /* MSANDR_NATIVE_EXEC */ | |
521 | CHECK(drutil_expand_rep_string(drcontext, bb)); | |
522 | #endif /* MSANDR_NATIVE_EXEC */ | |
523 | return DR_EMIT_PERSISTABLE; | |
524 | } | |
525 | ||
526 | dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb, | |
527 | bool for_trace, bool translating) { | |
528 | app_pc pc = dr_fragment_app_pc(tag); | |
529 | #ifndef MSANDR_NATIVE_EXEC | |
530 | ModuleData *mod_data; | |
531 | ||
532 | if (!ShouldInstrumentPc(pc, &mod_data)) | |
533 | return DR_EMIT_PERSISTABLE; | |
534 | ||
535 | if (VERBOSITY > 1) | |
536 | dr_printf("============================================================\n"); | |
537 | if (VERBOSITY > 0) { | |
538 | std::string mod_path = (mod_data ? mod_data->path_ : "<no module, JITed?>"); | |
539 | if (mod_data && !mod_data->executed_) { | |
540 | mod_data->executed_ = true; // Nevermind this race. | |
541 | dr_printf("Executing from new module: %s\n", mod_path.c_str()); | |
542 | } | |
543 | dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc, | |
544 | mod_path.c_str(), translating ? "true" : "false"); | |
545 | if (mod_data) { | |
546 | // Match standard sanitizer trace format for free symbols. | |
547 | // #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) | |
548 | dr_printf(" #0 %p (%s+%p)\n", pc, mod_data->path_.c_str(), | |
549 | pc - mod_data->start_); | |
550 | } | |
551 | } | |
552 | #endif /* !MSANDR_NATIVE_EXEC */ | |
553 | ||
554 | if (VERBOSITY > 1) { | |
555 | instrlist_disassemble(drcontext, pc, bb, STDOUT); | |
556 | instr_t *instr; | |
557 | for (instr = instrlist_first(bb); instr; instr = instr_get_next(instr)) { | |
558 | dr_printf("opcode: %d\n", instr_get_opcode(instr)); | |
559 | } | |
560 | } | |
561 | ||
562 | for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) { | |
563 | int opcode = instr_get_opcode(i); | |
564 | if (opcode == OP_ret || opcode == OP_ret_far) { | |
565 | InstrumentReturn(drcontext, bb, i); | |
566 | continue; | |
567 | } | |
568 | ||
569 | // These instructions hopefully cover all cases where control is transferred | |
570 | // to a function in a different module (we only care about calls into | |
571 | // compiler-instrumented modules). | |
572 | // * call_ind is used for normal indirect calls. | |
573 | // * jmp_ind is used for indirect tail calls, and calls through PLT (PLT | |
574 | // stub includes a jump to an address from GOT). | |
575 | if (opcode == OP_call_ind || opcode == OP_call_far_ind || | |
576 | opcode == OP_jmp_ind || opcode == OP_jmp_far_ind) { | |
577 | InstrumentIndirectBranch(drcontext, bb, i); | |
578 | continue; | |
579 | } | |
580 | ||
581 | if (!WantToInstrument(i)) | |
582 | continue; | |
583 | ||
584 | if (VERBOSITY > 1) { | |
585 | app_pc orig_pc = dr_fragment_app_pc(tag); | |
586 | uint flags = instr_get_arith_flags(i); | |
587 | dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n", | |
588 | instr_get_app_pc(i) - orig_pc, instr_get_opcode(i), flags); | |
589 | } | |
590 | ||
591 | if (instr_writes_memory(i)) { | |
592 | // Instrument memory writes | |
593 | // bool instrumented_anything = false; | |
594 | for (int d = 0; d < instr_num_dsts(i); d++) { | |
595 | opnd_t op = instr_get_dst(i, d); | |
596 | if (!OperandIsInteresting(op)) | |
597 | continue; | |
598 | ||
599 | // CHECK(!instrumented_anything); | |
600 | // instrumented_anything = true; | |
601 | InstrumentMops(drcontext, bb, i, op, true); | |
602 | break; // only instrumenting the first dst | |
603 | } | |
604 | } | |
605 | } | |
606 | ||
607 | // TODO: optimize away redundant restore-spill pairs? | |
608 | ||
609 | if (VERBOSITY > 1) { | |
610 | pc = dr_fragment_app_pc(tag); | |
611 | dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX ")\n", pc); | |
612 | instrlist_disassemble(drcontext, pc, bb, STDOUT); | |
613 | } | |
614 | return DR_EMIT_PERSISTABLE; | |
615 | } | |
616 | ||
617 | #ifndef MSANDR_NATIVE_EXEC | |
618 | void event_module_load(void *drcontext, const module_data_t *info, | |
619 | bool loaded) { | |
620 | // Insert the module into the list while maintaining the ordering. | |
621 | ModuleData mod_data(info); | |
622 | std::vector<ModuleData>::iterator it = | |
623 | upper_bound(g_module_list.begin(), g_module_list.end(), mod_data, | |
624 | ModuleDataCompareStart); | |
625 | it = g_module_list.insert(it, mod_data); | |
626 | // Check if we should instrument this module. | |
627 | it->should_instrument_ = ShouldInstrumentModule(&*it); | |
628 | dr_module_set_should_instrument(info->handle, it->should_instrument_); | |
629 | ||
630 | if (VERBOSITY > 0) | |
631 | dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n", | |
632 | info->full_path, info->start, info->end, | |
633 | it->should_instrument_ ? "on" : "off"); | |
634 | } | |
635 | ||
636 | void event_module_unload(void *drcontext, const module_data_t *info) { | |
637 | if (VERBOSITY > 0) | |
638 | dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info->full_path, | |
639 | info->start, info->end); | |
640 | ||
641 | // Remove the module from the list. | |
642 | ModuleData mod_data(info); | |
643 | std::vector<ModuleData>::iterator it = | |
644 | lower_bound(g_module_list.begin(), g_module_list.end(), mod_data, | |
645 | ModuleDataCompareStart); | |
646 | // It's a bug if we didn't actually find the module. | |
647 | CHECK(it != g_module_list.end() && it->start_ == mod_data.start_ && | |
648 | it->end_ == mod_data.end_ && it->path_ == mod_data.path_); | |
649 | g_module_list.erase(it); | |
650 | } | |
651 | #endif /* !MSANDR_NATIVE_EXEC */ | |
652 | ||
653 | void event_exit() { | |
654 | // Clean up so DR doesn't tell us we're leaking memory. | |
655 | drsys_exit(); | |
656 | drutil_exit(); | |
657 | drmgr_exit(); | |
658 | ||
659 | #ifdef MSANDR_STANDALONE_TEST | |
660 | /* free tls */ | |
661 | bool res; | |
662 | res = dr_raw_tls_cfree(msan_retval_tls_offset, NUM_TLS_RETVAL); | |
663 | CHECK(res); | |
664 | res = dr_raw_tls_cfree(msan_param_tls_offset, NUM_TLS_PARAM); | |
665 | CHECK(res); | |
666 | /* we do not bother to free the shadow memory */ | |
667 | #endif /* !MSANDR_STANDALONE_TEST */ | |
668 | if (VERBOSITY > 0) | |
669 | dr_printf("==DRMSAN== DONE\n"); | |
670 | } | |
671 | ||
672 | bool event_filter_syscall(void *drcontext, int sysnum) { | |
673 | // FIXME: only intercept syscalls with memory effects. | |
674 | return true; /* intercept everything */ | |
675 | } | |
676 | ||
677 | bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) { | |
678 | CHECK(arg->valid); | |
679 | ||
680 | if (arg->pre) | |
681 | return true; | |
682 | if (!TESTANY(DRSYS_PARAM_OUT, arg->mode)) | |
683 | return true; | |
684 | ||
685 | size_t sz = arg->size; | |
686 | ||
687 | if (sz > 0xFFFFFFFF) { | |
688 | drmf_status_t res; | |
689 | drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; | |
690 | const char *name; | |
691 | res = drsys_syscall_name(syscall, &name); | |
692 | CHECK(res == DRMF_SUCCESS); | |
693 | ||
694 | dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!" | |
695 | " Clipping to %llu.\n", | |
696 | name, arg->ordinal, (unsigned long long) sz, | |
697 | (unsigned long long)(sz & 0xFFFFFFFF)); | |
698 | } | |
699 | ||
700 | if (VERBOSITY > 0) { | |
701 | drmf_status_t res; | |
702 | drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; | |
703 | const char *name; | |
704 | res = drsys_syscall_name(syscall, &name); | |
705 | CHECK(res == DRMF_SUCCESS); | |
706 | dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n", | |
707 | name, arg->ordinal, arg->start_addr, | |
708 | (char *)arg->start_addr + sz); | |
709 | } | |
710 | ||
711 | // We don't switch to the app context because __msan_unpoison() doesn't need | |
712 | // TLS segments. | |
713 | __msan_unpoison(arg->start_addr, sz); | |
714 | ||
715 | return true; /* keep going */ | |
716 | } | |
717 | ||
718 | bool event_pre_syscall(void *drcontext, int sysnum) { | |
719 | drsys_syscall_t *syscall; | |
720 | drsys_sysnum_t sysnum_full; | |
721 | bool known; | |
722 | drsys_param_type_t ret_type; | |
723 | drmf_status_t res; | |
724 | const char *name; | |
725 | ||
726 | res = drsys_cur_syscall(drcontext, &syscall); | |
727 | CHECK(res == DRMF_SUCCESS); | |
728 | ||
729 | res = drsys_syscall_number(syscall, &sysnum_full); | |
730 | CHECK(res == DRMF_SUCCESS); | |
731 | CHECK(sysnum == sysnum_full.number); | |
732 | ||
733 | res = drsys_syscall_is_known(syscall, &known); | |
734 | CHECK(res == DRMF_SUCCESS); | |
735 | ||
736 | res = drsys_syscall_name(syscall, &name); | |
737 | CHECK(res == DRMF_SUCCESS); | |
738 | ||
739 | res = drsys_syscall_return_type(syscall, &ret_type); | |
740 | CHECK(res == DRMF_SUCCESS); | |
741 | CHECK(ret_type != DRSYS_TYPE_INVALID); | |
742 | CHECK(!known || ret_type != DRSYS_TYPE_UNKNOWN); | |
743 | ||
744 | res = drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, NULL); | |
745 | CHECK(res == DRMF_SUCCESS); | |
746 | ||
747 | return true; | |
748 | } | |
749 | ||
750 | static bool IsInLoader(void *drcontext) { | |
751 | // TODO: This segment swap is inefficient. DR should just let us query the | |
752 | // app segment base, which it has. Alternatively, if we disable | |
753 | // -mangle_app_seg, then we won't need the swap. | |
754 | bool need_swap = !dr_using_app_state(drcontext); | |
755 | if (need_swap) | |
756 | dr_switch_to_app_state(drcontext); | |
757 | bool is_in_loader = __msan_is_in_loader(); | |
758 | if (need_swap) | |
759 | dr_switch_to_dr_state(drcontext); | |
760 | return is_in_loader; | |
761 | } | |
762 | ||
763 | void event_post_syscall(void *drcontext, int sysnum) { | |
764 | drsys_syscall_t *syscall; | |
765 | drsys_sysnum_t sysnum_full; | |
766 | bool success = false; | |
767 | drmf_status_t res; | |
768 | ||
769 | res = drsys_cur_syscall(drcontext, &syscall); | |
770 | CHECK(res == DRMF_SUCCESS); | |
771 | ||
772 | res = drsys_syscall_number(syscall, &sysnum_full); | |
773 | CHECK(res == DRMF_SUCCESS); | |
774 | CHECK(sysnum == sysnum_full.number); | |
775 | ||
776 | res = drsys_syscall_succeeded(syscall, dr_syscall_get_result(drcontext), | |
777 | &success); | |
778 | CHECK(res == DRMF_SUCCESS); | |
779 | ||
780 | if (success) { | |
781 | res = | |
782 | drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall); | |
783 | CHECK(res == DRMF_SUCCESS); | |
784 | } | |
785 | ||
786 | // Our normal mmap interceptor can't intercept calls from the loader itself. | |
787 | // This means we don't clear the shadow for calls to dlopen. For now, we | |
788 | // solve this by intercepting mmap from ld.so here, but ideally we'd have a | |
789 | // solution that doesn't rely on msandr. | |
790 | // | |
791 | // Be careful not to intercept maps done by the msan rtl. Otherwise we end up | |
792 | // unpoisoning vast regions of memory and OOMing. | |
793 | // TODO: __msan_unpoison() could "flush" large regions of memory like tsan | |
794 | // does instead of doing a large memset. However, we need the memory to be | |
795 | // zeroed, where as tsan does not, so plain madvise is not enough. | |
796 | if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) { | |
797 | if (IsInLoader(drcontext)) { | |
798 | app_pc base = (app_pc)dr_syscall_get_result(drcontext); | |
799 | ptr_uint_t size; | |
800 | drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size); | |
801 | CHECK(res == DRMF_SUCCESS); | |
802 | if (VERBOSITY > 0) | |
803 | dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size); | |
804 | // We don't switch to the app context because __msan_unpoison() doesn't | |
805 | // need TLS segments. | |
806 | __msan_unpoison(base, size); | |
807 | } | |
808 | } | |
809 | } | |
810 | ||
811 | } // namespace | |
812 | ||
813 | DR_EXPORT void dr_init(client_id_t id) { | |
814 | drmf_status_t res; | |
815 | ||
816 | drmgr_init(); | |
817 | drutil_init(); | |
818 | ||
819 | #ifndef MSANDR_NATIVE_EXEC | |
820 | // We should use drconfig to ignore these applications. | |
821 | std::string app_name = dr_get_application_name(); | |
822 | // This blacklist will still run these apps through DR's code cache. On the | |
823 | // other hand, we are able to follow children of these apps. | |
824 | // FIXME: Once DR has detach, we could just detach here. Alternatively, | |
825 | // if DR had a fork or exec hook to let us decide there, that would be nice. | |
826 | // FIXME: make the blacklist cmd-adjustable. | |
827 | if (app_name == "python" || app_name == "python2.7" || app_name == "bash" || | |
828 | app_name == "sh" || app_name == "true" || app_name == "exit" || | |
829 | app_name == "yes" || app_name == "echo") | |
830 | return; | |
831 | #endif /* !MSANDR_NATIVE_EXEC */ | |
832 | ||
833 | drsys_options_t ops; | |
834 | memset(&ops, 0, sizeof(ops)); | |
835 | ops.struct_size = sizeof(ops); | |
836 | ops.analyze_unknown_syscalls = false; | |
837 | ||
838 | res = drsys_init(id, &ops); | |
839 | CHECK(res == DRMF_SUCCESS); | |
840 | ||
841 | dr_register_filter_syscall_event(event_filter_syscall); | |
842 | drmgr_register_pre_syscall_event(event_pre_syscall); | |
843 | drmgr_register_post_syscall_event(event_post_syscall); | |
844 | res = drsys_filter_all_syscalls(); | |
845 | CHECK(res == DRMF_SUCCESS); | |
846 | ||
847 | #ifdef MSANDR_STANDALONE_TEST | |
848 | reg_id_t reg_seg; | |
849 | /* alloc tls */ | |
850 | if (!dr_raw_tls_calloc(®_seg, &mock_msan_retval_tls_offset, NUM_TLS_RETVAL, 0)) | |
851 | CHECK(false); | |
852 | CHECK(reg_seg == DR_SEG_GS /* x64 only! */); | |
853 | if (!dr_raw_tls_calloc(®_seg, &mock_msan_param_tls_offset, NUM_TLS_PARAM, 0)) | |
854 | CHECK(false); | |
855 | CHECK(reg_seg == DR_SEG_GS /* x64 only! */); | |
856 | /* alloc shadow memory */ | |
857 | if (mmap(SHADOW_MEMORY_BASE, SHADOW_MEMORY_SIZE, PROT_READ|PROT_WRITE, | |
858 | MAP_PRIVATE | MAP_ANON, -1, 0) != SHADOW_MEMORY_BASE) { | |
859 | CHECK(false); | |
860 | } | |
861 | #endif /* MSANDR_STANDALONE_TEST */ | |
862 | InitializeMSanCallbacks(); | |
863 | ||
864 | // FIXME: the shadow is initialized earlier when DR calls one of our wrapper | |
865 | // functions. This may change one day. | |
866 | // TODO: make this more robust. | |
867 | ||
868 | void *drcontext = dr_get_current_drcontext(); | |
869 | ||
870 | dr_switch_to_app_state(drcontext); | |
871 | msan_retval_tls_offset = __msan_get_retval_tls_offset(); | |
872 | msan_param_tls_offset = __msan_get_param_tls_offset(); | |
873 | dr_switch_to_dr_state(drcontext); | |
874 | if (VERBOSITY > 0) { | |
875 | dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset); | |
876 | dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset); | |
877 | } | |
878 | ||
879 | // Standard DR events. | |
880 | dr_register_exit_event(event_exit); | |
881 | ||
882 | drmgr_priority_t priority = { | |
883 | sizeof(priority), /* size of struct */ | |
884 | "msandr", /* name of our operation */ | |
885 | NULL, /* optional name of operation we should precede */ | |
886 | NULL, /* optional name of operation we should follow */ | |
887 | 0 | |
888 | }; /* numeric priority */ | |
889 | ||
890 | drmgr_register_bb_app2app_event(event_basic_block_app2app, &priority); | |
891 | drmgr_register_bb_instru2instru_event(event_basic_block, &priority); | |
892 | #ifndef MSANDR_NATIVE_EXEC | |
893 | drmgr_register_module_load_event(event_module_load); | |
894 | drmgr_register_module_unload_event(event_module_unload); | |
895 | #endif /* MSANDR_NATIVE_EXEC */ | |
896 | __msan_dr_is_initialized(); | |
897 | __msan_set_indirect_call_wrapper(dr_app_handle_mbr_target); | |
898 | if (VERBOSITY > 0) | |
899 | dr_printf("==MSANDR== Starting!\n"); | |
900 | } |