From 504594112ffa7c1e6a848d4759953d4235c62318 Mon Sep 17 00:00:00 2001 From: Philippe Renon Date: Tue, 14 Jun 2016 22:05:01 +0200 Subject: [PATCH] LP-335 add support for address sanitizer (asan) --- ground/gcs/gcs.pri | 15 + make/scripts/asan_symbolize.py | 490 +++++++++++++++++++++++++++++++++ 2 files changed, 505 insertions(+) create mode 100755 make/scripts/asan_symbolize.py diff --git a/ground/gcs/gcs.pri b/ground/gcs/gcs.pri index 146a3c960..cb25cd54f 100644 --- a/ground/gcs/gcs.pri +++ b/ground/gcs/gcs.pri @@ -239,6 +239,21 @@ win32 { # Explicit setting of C++11 CONFIG += c++11 +address_sanitizer { + # enable asan by adding "address_sanitizer" to your root config file + # see https://github.com/google/sanitizers + # see https://blog.qt.io/blog/2013/04/17/using-gccs-4-8-0-address-sanitizer-with-qt/ + # + # to use, simply compile and run, asan will crash with a report if an error is found. + # if you don't see symbols, try this: ./build/librepilot-gcs_debug/bin/librepilot-gcs 2>&1 | ./make/scripts/asan_symbolize.py + # + # Note: asan will apply only to GCS and not to third party libraries (Qt, osg, ...). + + QMAKE_CXXFLAGS += -fsanitize=address -g -fno-omit-frame-pointer + QMAKE_CFLAGS += -fsanitize=address -g -fno-omit-frame-pointer + QMAKE_LFLAGS += -fsanitize=address -g +} + # Stricter warnings turned on for OS X. macx { CONFIG += warn_on diff --git a/make/scripts/asan_symbolize.py b/make/scripts/asan_symbolize.py new file mode 100755 index 000000000..8e6fb61f7 --- /dev/null +++ b/make/scripts/asan_symbolize.py @@ -0,0 +1,490 @@ +#!/usr/bin/env python +#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# +import argparse +import bisect +import getopt +import os +import re +import subprocess +import sys + +symbolizers = {} +DEBUG = False +demangle = False +binutils_prefix = None +sysroot_path = None +binary_name_filter = None +fix_filename_patterns = None +logfile = sys.stdin +allow_system_symbolizer = True + +# FIXME: merge the code that calls fix_filename(). +def fix_filename(file_name): + if fix_filename_patterns: + for path_to_cut in fix_filename_patterns: + file_name = re.sub('.*' + path_to_cut, '', file_name) + file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) + file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) + return file_name + +def sysroot_path_filter(binary_name): + return sysroot_path + binary_name + +def guess_arch(addr): + # Guess which arch we're running. 10 = len('0x') + 8 hex digits. + if len(addr) > 10: + return 'x86_64' + else: + return 'i386' + +class Symbolizer(object): + def __init__(self): + pass + + def symbolize(self, addr, binary, offset): + """Symbolize the given address (pair of binary and offset). + + Overriden in subclasses. + Args: + addr: virtual address of an instruction. + binary: path to executable/shared object containing this instruction. + offset: instruction offset in the @binary. + Returns: + list of strings (one string for each inlined frame) describing + the code locations for this instruction (that is, function name, file + name, line and column numbers). + """ + return None + + +class LLVMSymbolizer(Symbolizer): + def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]): + super(LLVMSymbolizer, self).__init__() + self.symbolizer_path = symbolizer_path + self.default_arch = default_arch + self.system = system + self.dsym_hints = dsym_hints + self.pipe = self.open_llvm_symbolizer() + + def open_llvm_symbolizer(self): + cmd = [self.symbolizer_path, + '--use-symbol-table=true', + '--demangle=%s' % demangle, + '--functions=linkage', + '--inlining=true', + '--default-arch=%s' % self.default_arch] + if self.system == 'Darwin': + for hint in self.dsym_hints: + cmd.append('--dsym-hint=%s' % hint) + if DEBUG: + print ' '.join(cmd) + try: + result = subprocess.Popen(cmd, stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + except OSError: + result = None + return result + + def symbolize(self, addr, binary, offset): + """Overrides Symbolizer.symbolize.""" + if not self.pipe: + return None + result = [] + try: + symbolizer_input = '"%s" %s' % (binary, offset) + if DEBUG: + print symbolizer_input + print >> self.pipe.stdin, symbolizer_input + while True: + function_name = self.pipe.stdout.readline().rstrip() + if not function_name: + break + file_name = self.pipe.stdout.readline().rstrip() + file_name = fix_filename(file_name) + if (not function_name.startswith('??') or + not file_name.startswith('??')): + # Append only non-trivial frames. + result.append('%s in %s %s' % (addr, function_name, + file_name)) + except Exception: + result = [] + if not result: + result = None + return result + + +def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]): + symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') + if not symbolizer_path: + symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH') + if not symbolizer_path: + # Assume llvm-symbolizer is in PATH. + symbolizer_path = 'llvm-symbolizer' + return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints) + + +class Addr2LineSymbolizer(Symbolizer): + def __init__(self, binary): + super(Addr2LineSymbolizer, self).__init__() + self.binary = binary + self.pipe = self.open_addr2line() + self.output_terminator = -1 + + def open_addr2line(self): + addr2line_tool = 'addr2line' + if binutils_prefix: + addr2line_tool = binutils_prefix + addr2line_tool + cmd = [addr2line_tool, '-fi'] + if demangle: + cmd += ['--demangle'] + cmd += ['-e', self.binary] + if DEBUG: + print ' '.join(cmd) + return subprocess.Popen(cmd, + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + + def symbolize(self, addr, binary, offset): + """Overrides Symbolizer.symbolize.""" + if self.binary != binary: + return None + lines = [] + try: + print >> self.pipe.stdin, offset + print >> self.pipe.stdin, self.output_terminator + is_first_frame = True + while True: + function_name = self.pipe.stdout.readline().rstrip() + file_name = self.pipe.stdout.readline().rstrip() + if is_first_frame: + is_first_frame = False + elif function_name in ['', '??']: + assert file_name == function_name + break + lines.append((function_name, file_name)); + except Exception: + lines.append(('??', '??:0')) + return ['%s in %s %s' % (addr, function, fix_filename(file)) for (function, file) in lines] + +class UnbufferedLineConverter(object): + """ + Wrap a child process that responds to each line of input with one line of + output. Uses pty to trick the child into providing unbuffered output. + """ + def __init__(self, args, close_stderr=False): + # Local imports so that the script can start on Windows. + import pty + import termios + pid, fd = pty.fork() + if pid == 0: + # We're the child. Transfer control to command. + if close_stderr: + dev_null = os.open('/dev/null', 0) + os.dup2(dev_null, 2) + os.execvp(args[0], args) + else: + # Disable echoing. + attr = termios.tcgetattr(fd) + attr[3] = attr[3] & ~termios.ECHO + termios.tcsetattr(fd, termios.TCSANOW, attr) + # Set up a file()-like interface to the child process + self.r = os.fdopen(fd, "r", 1) + self.w = os.fdopen(os.dup(fd), "w", 1) + + def convert(self, line): + self.w.write(line + "\n") + return self.readline() + + def readline(self): + return self.r.readline().rstrip() + + +class DarwinSymbolizer(Symbolizer): + def __init__(self, addr, binary): + super(DarwinSymbolizer, self).__init__() + self.binary = binary + self.arch = guess_arch(addr) + self.open_atos() + + def open_atos(self): + if DEBUG: + print 'atos -o %s -arch %s' % (self.binary, self.arch) + cmdline = ['atos', '-o', self.binary, '-arch', self.arch] + self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) + + def symbolize(self, addr, binary, offset): + """Overrides Symbolizer.symbolize.""" + if self.binary != binary: + return None + atos_line = self.atos.convert('0x%x' % int(offset, 16)) + while "got symbolicator for" in atos_line: + atos_line = self.atos.readline() + # A well-formed atos response looks like this: + # foo(type1, type2) (in object.name) (filename.cc:80) + match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) + if DEBUG: + print 'atos_line: ', atos_line + if match: + function_name = match.group(1) + function_name = re.sub('\(.*?\)', '', function_name) + file_name = fix_filename(match.group(3)) + return ['%s in %s %s' % (addr, function_name, file_name)] + else: + return ['%s in %s' % (addr, atos_line)] + + +# Chain several symbolizers so that if one symbolizer fails, we fall back +# to the next symbolizer in chain. +class ChainSymbolizer(Symbolizer): + def __init__(self, symbolizer_list): + super(ChainSymbolizer, self).__init__() + self.symbolizer_list = symbolizer_list + + def symbolize(self, addr, binary, offset): + """Overrides Symbolizer.symbolize.""" + for symbolizer in self.symbolizer_list: + if symbolizer: + result = symbolizer.symbolize(addr, binary, offset) + if result: + return result + return None + + def append_symbolizer(self, symbolizer): + self.symbolizer_list.append(symbolizer) + + +def BreakpadSymbolizerFactory(binary): + suffix = os.getenv('BREAKPAD_SUFFIX') + if suffix: + filename = binary + suffix + if os.access(filename, os.F_OK): + return BreakpadSymbolizer(filename) + return None + + +def SystemSymbolizerFactory(system, addr, binary): + if system == 'Darwin': + return DarwinSymbolizer(addr, binary) + elif system == 'Linux' or system == 'FreeBSD': + return Addr2LineSymbolizer(binary) + + +class BreakpadSymbolizer(Symbolizer): + def __init__(self, filename): + super(BreakpadSymbolizer, self).__init__() + self.filename = filename + lines = file(filename).readlines() + self.files = [] + self.symbols = {} + self.address_list = [] + self.addresses = {} + # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t + fragments = lines[0].rstrip().split() + self.arch = fragments[2] + self.debug_id = fragments[3] + self.binary = ' '.join(fragments[4:]) + self.parse_lines(lines[1:]) + + def parse_lines(self, lines): + cur_function_addr = '' + for line in lines: + fragments = line.split() + if fragments[0] == 'FILE': + assert int(fragments[1]) == len(self.files) + self.files.append(' '.join(fragments[2:])) + elif fragments[0] == 'PUBLIC': + self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) + elif fragments[0] in ['CFI', 'STACK']: + pass + elif fragments[0] == 'FUNC': + cur_function_addr = int(fragments[1], 16) + if not cur_function_addr in self.symbols.keys(): + self.symbols[cur_function_addr] = ' '.join(fragments[4:]) + else: + # Line starting with an address. + addr = int(fragments[0], 16) + self.address_list.append(addr) + # Tuple of symbol address, size, line, file number. + self.addresses[addr] = (cur_function_addr, + int(fragments[1], 16), + int(fragments[2]), + int(fragments[3])) + self.address_list.sort() + + def get_sym_file_line(self, addr): + key = None + if addr in self.addresses.keys(): + key = addr + else: + index = bisect.bisect_left(self.address_list, addr) + if index == 0: + return None + else: + key = self.address_list[index - 1] + sym_id, size, line_no, file_no = self.addresses[key] + symbol = self.symbols[sym_id] + filename = self.files[file_no] + if addr < key + size: + return symbol, filename, line_no + else: + return None + + def symbolize(self, addr, binary, offset): + if self.binary != binary: + return None + res = self.get_sym_file_line(int(offset, 16)) + if res: + function_name, file_name, line_no = res + result = ['%s in %s %s:%d' % ( + addr, function_name, file_name, line_no)] + print result + return result + else: + return None + + +class SymbolizationLoop(object): + def __init__(self, binary_name_filter=None, dsym_hint_producer=None): + if sys.platform == 'win32': + # ASan on Windows uses dbghelp.dll to symbolize in-process, which works + # even in sandboxed processes. Nothing needs to be done here. + self.process_line = self.process_line_echo + else: + # Used by clients who may want to supply a different binary name. + # E.g. in Chrome several binaries may share a single .dSYM. + self.binary_name_filter = binary_name_filter + self.dsym_hint_producer = dsym_hint_producer + self.system = os.uname()[0] + if self.system not in ['Linux', 'Darwin', 'FreeBSD']: + raise Exception('Unknown system') + self.llvm_symbolizers = {} + self.last_llvm_symbolizer = None + self.dsym_hints = set([]) + self.frame_no = 0 + self.process_line = self.process_line_posix + + def symbolize_address(self, addr, binary, offset): + # On non-Darwin (i.e. on platforms without .dSYM debug info) always use + # a single symbolizer binary. + # On Darwin, if the dsym hint producer is present: + # 1. check whether we've seen this binary already; if so, + # use |llvm_symbolizers[binary]|, which has already loaded the debug + # info for this binary (might not be the case for + # |last_llvm_symbolizer|); + # 2. otherwise check if we've seen all the hints for this binary already; + # if so, reuse |last_llvm_symbolizer| which has the full set of hints; + # 3. otherwise create a new symbolizer and pass all currently known + # .dSYM hints to it. + if not binary in self.llvm_symbolizers: + use_new_symbolizer = True + if self.system == 'Darwin' and self.dsym_hint_producer: + dsym_hints_for_binary = set(self.dsym_hint_producer(binary)) + use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints) + self.dsym_hints |= dsym_hints_for_binary + if self.last_llvm_symbolizer and not use_new_symbolizer: + self.llvm_symbolizers[binary] = self.last_llvm_symbolizer + else: + self.last_llvm_symbolizer = LLVMSymbolizerFactory( + self.system, guess_arch(addr), self.dsym_hints) + self.llvm_symbolizers[binary] = self.last_llvm_symbolizer + # Use the chain of symbolizers: + # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos + # (fall back to next symbolizer if the previous one fails). + if not binary in symbolizers: + symbolizers[binary] = ChainSymbolizer( + [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]]) + result = symbolizers[binary].symbolize(addr, binary, offset) + if result is None: + if not allow_system_symbolizer: + raise Exception('Failed to launch or use llvm-symbolizer.') + # Initialize system symbolizer only if other symbolizers failed. + symbolizers[binary].append_symbolizer( + SystemSymbolizerFactory(self.system, addr, binary)) + result = symbolizers[binary].symbolize(addr, binary, offset) + # The system symbolizer must produce some result. + assert result + return result + + def get_symbolized_lines(self, symbolized_lines): + if not symbolized_lines: + return [self.current_line] + else: + result = [] + for symbolized_frame in symbolized_lines: + result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip())) + self.frame_no += 1 + return result + + def process_logfile(self): + self.frame_no = 0 + for line in logfile: + processed = self.process_line(line) + print '\n'.join(processed) + + def process_line_echo(self, line): + return [line.rstrip()] + + def process_line_posix(self, line): + self.current_line = line.rstrip() + #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) + stack_trace_line_format = ( + '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') + match = re.match(stack_trace_line_format, line) + if not match: + return [self.current_line] + if DEBUG: + print line + _, frameno_str, addr, binary, offset = match.groups() + if frameno_str == '0': + # Assume that frame #0 is the first frame of new stack trace. + self.frame_no = 0 + original_binary = binary + if self.binary_name_filter: + binary = self.binary_name_filter(binary) + symbolized_line = self.symbolize_address(addr, binary, offset) + if not symbolized_line: + if original_binary != binary: + symbolized_line = self.symbolize_address(addr, binary, offset) + return self.get_symbolized_lines(symbolized_line) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description='ASan symbolization script', + epilog='Example of use:\n' + 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" ' + '-s "$HOME/SymbolFiles" < asan.log') + parser.add_argument('path_to_cut', nargs='*', + help='pattern to be cut from the result file path ') + parser.add_argument('-d','--demangle', action='store_true', + help='demangle function names') + parser.add_argument('-s', metavar='SYSROOT', + help='set path to sysroot for sanitized binaries') + parser.add_argument('-c', metavar='CROSS_COMPILE', + help='set prefix for binutils') + parser.add_argument('-l','--logfile', default=sys.stdin, + type=argparse.FileType('r'), + help='set log file name to parse, default is stdin') + args = parser.parse_args() + if args.path_to_cut: + fix_filename_patterns = args.path_to_cut + if args.demangle: + demangle = True + if args.s: + binary_name_filter = sysroot_path_filter + sysroot_path = args.s + if args.c: + binutils_prefix = args.c + if args.logfile: + logfile = args.logfile + else: + logfile = sys.stdin + loop = SymbolizationLoop(binary_name_filter) + loop.process_logfile()