Competition: automatically remove bad bugs

When injecting all bugs fails, do a binary search to find bad bugs and inject all bugs except those.
panda-re · Sep 5, 2019 · 0cfacd5 · 0cfacd5
1 parent 806e022
commit 0cfacd5
Show file tree

Hide file tree

Showing 5 changed files with 82 additions and 28 deletions.
diff --git a/scripts/competition.py b/scripts/competition.py
@@ -21,20 +21,16 @@
 from math import sqrt
 from os.path import basename, dirname, join, abspath, exists
 
+from utils import bad_bin_search
 from vars import parse_vars
 from lava import LavaDatabase, Bug, Build, DuaBytes, Run, \
     run_cmd, run_cmd_notimeout, mutfile, inject_bugs, LavaPaths, \
     validate_bugs, run_modified_program, unfuzzed_input_for_bug, \
     fuzzed_input_for_bug, get_trigger_line, AttackPoint, Bug, \
     get_allowed_bugtype_num, limit_atp_reuse
 
-# from pycparser.diversifier.diversify import diversify
-#from process_compile_commands import get_c_files
-
 version="2.0.0"
 
-RETRY_COUNT = 0
-
 # Build both scripts - in a seperate fn for testing
 def run_builds(scripts):
     for script in scripts:
@@ -212,6 +208,31 @@ def parse(item):
 
     return [b.id for b in bugs_and_non_bugs]
 
+def inject_bug_list(bug_list, db, lp, project, args, host_json, dataflow, lavatoolseed):
+    '''
+    Given a list of bugs, try to inject them all and ensure the program still works
+
+    May raise an AssertionError if injection fails or validation breaks original program
+
+    Returns a list of validated bugs
+    '''
+
+    real_bug_list = []
+    # add bugs to the source code and check that we can still compile
+    (build, input_files, bug_solutions) = inject_bugs(bug_list, db, lp, host_json, \
+                                      project, args, False, dataflow=dataflow, competition=True,
+                                      validated=False, lavatoolseed=lavatoolseed)
+
+    assert build is not None # build is none if injection fails
+
+    # Test if the injected bugs cause approperiate crashes and that our competition infrastructure parses the crashes correctly
+    # validate_bugs raise an AssertionError if the original input now a crash
+    real_bug_list = validate_bugs(bug_list, db, lp, project, input_files, build, \
+                                      args, False, competition=True, bug_solutions=bug_solutions)
+
+    assert(real_bug_list is not None)
+    return real_bug_list
+
 def main():
     parser = argparse.ArgumentParser(prog="competition.py", description='Inject and test LAVA bugs.')
     parser.add_argument('host_json', help = 'Host JSON file')
@@ -229,7 +250,7 @@ def main():
             #help = ('Diversify source code. Default false.'))
     parser.add_argument('-c', '--chaff', action="store_true", default=False, # TODO chaf and unvalided bugs aren't always the same thing
             help = ('Leave unvalidated bugs in the binary'))
-    parser.add_argument('-t', '--bugtypes', action="store", default="rel_write",
+    parser.add_argument('-t', '--bugtypes', action="store", default="ptr_add,rel_write",
                         help = ('bug types to inject'))
     parser.add_argument('--version', action="version", version="%(prog)s {}".format(version))
 
@@ -293,22 +314,39 @@ def main():
     ## With our bug list in hand, we inject all these bugs and count how many we can trigger
     ###############
 
-    real_bug_list = []
-    # add bugs to the source code and check that we can still compile
-    (build, input_files, bug_solutions) = inject_bugs(bug_list, db, lp, args.host_json, \
-                                      project, args, False, dataflow=dataflow, competition=True,
-                                      validated=False, lavatoolseed=lavatoolseed)
-    assert build is not None # build is None when injection fails. Could block here to allow for manual patches
+    # Do a binary search over bug list to identify bugs that break the build
 
-    # Test if the injected bugs cause approperiate crashes and that our competition infrastructure parses the crashes correctly
-    real_bug_list = validate_bugs(bug_list, db, lp, project, input_files, build, \
-                                      args, False, competition=True, bug_solutions=bug_solutions)
+    # Closure on everything but bug_list
+    def inject_closure(new_bug_list):
+        return inject_bug_list(new_bug_list, db, lp, project, args, args.host_json, dataflow, lavatoolseed)
+
+    orig_bugc = len(bug_list)
+
+    if orig_bugc > 1:  # We can't do a binary search over one item, just continue and detect if it's bad in re-validate step
+        bug_list = bad_bin_search(bug_list, inject_closure)
+        assert(bug_list is not None)
+        new_bugc = len(bug_list)
+        if new_bugc < orig_bugc:
+            print("Removed {} bad bugs from bug list".format(orig_bugc - new_bugc))
+
+        if len(bug_list) < int(args.minYield) or len(bug_list) == 0:
+            print("\n\nXXX Yield too low after injection -- Require at least {} bugs for"
+                    " competition, only have {}".format(args.minYield, len(real_bug_list)))
+            raise RuntimeError("Failure")
 
-    if len(real_bug_list) < int(args.minYield):
-        print("\n\nXXX Yield too low after injection -- Require at least {} bugs for"
+    # Re-validate one more time with all bugs to make sure they all work together
+    try:
+        real_bug_list = inject_closure(bug_list)
+    except AssertionError:
+        print("Attempted to inject {} bugs that validated seperately but encountered errors".format(len(bug_list)))
+        raise
+
+    if len(real_bug_list) < int(args.minYield) or len(real_bug_list) == 0:
+        print("\n\nXXX Yield too low after final injection -- Require at least {} bugs for"
                 " competition, only have {}".format(args.minYield, len(real_bug_list)))
         raise RuntimeError("Failure")
 
+
     print "\n\n Yield acceptable: {}".format(len(real_bug_list))
 
     # TODO- the rebuild process may invalidate a previously validated bug because the trigger will change
@@ -576,7 +614,7 @@ def main():
     os.chmod(trigger_all_crashes, (stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IROTH | stat.S_IXOTH))
     # Build a version to ship in src
     run_builds([log_build_sh, public_build_sh])
-    print("Injected {} bugs".format(len(real_bug_list)))
+    print("Injected {} bugs into corpus at {}".format(len(real_bug_list), corpdir))
 
     print("Counting how many crashes competition infrastructure identifies...")
     run_cmd(trigger_all_crashes, cwd=corpdir) # Prints about segfaults

diff --git a/scripts/inject.py b/scripts/inject.py
@@ -190,6 +190,8 @@ def main():
         # determine which of those bugs actually cause a seg fault
         real_bug_list = validate_bugs(bug_list, db, lp, project, input_files, build,
                                       args, update_db)
+        if not real_bug_list:
+            raise RuntimeError("Target program no longer works for original input")
 
 
         def count_bug_types(id_list):

diff --git a/scripts/lava.py b/scripts/lava.py
@@ -1250,8 +1250,6 @@ def validate_bug(db, lp, project, bug, bug_index, build, args, update_db,
     return validated
 
 
-
-@interactive_exceptions
 def test_orig(db, lp, project, update_db, input_files, build, expectedExitCode):
     timeout = project.get('timeout', 5)
     unfuzzed_outputs = {}
@@ -1283,14 +1281,18 @@ def test_orig(db, lp, project, update_db, input_files, build, expectedExitCode):
                                success=True, validated=False))
     return unfuzzed_outputs
 
-# validate this set of bugs
+# validate this set of bugs. Returns None if we've broken the program, otherwise a list of valid bugs
 def validate_bugs(bug_list, db, lp, project, input_files, build,
                   args, update_db, competition=False, bug_solutions=None):
 
     print("Validate bugs: {}".format(bug_list))
 
     print("Test for exit codes 0 or {}".format(args.exitCode))
-    unfuzzed_outputs = test_orig(db, lp, project, update_db, input_files, build, args.exitCode)
+    try:
+        unfuzzed_outputs = test_orig(db, lp, project, update_db, input_files, build, args.exitCode)
+    except AssertionError:
+        return None
+
     print("ORIG INPUT STILL WORKS\n")
 
     # second, try each of the fuzzed inputs and validate

diff --git a/scripts/utils.py b/scripts/utils.py
@@ -1,13 +1,16 @@
-def bad_bin_search(args, fun):
+def bad_bin_search(args, fun, depth=0):
     '''
     Given a list of items and a function that takes a list,
     do a binary search to remove all items that cause fun to fail
 
     Assumes args starts with > 1 element
 
+    XXX: Runs out of memory if you have lots of failures and ~1000 or more bugs to test
+
     Returns a list of OK args
     '''
     if len(args) <= 1: # Already failed on this arg, don't retry it
+        print("Identified bad bug: {}".format(args[0]))
         return []
 
     mid = len(args)/2
@@ -16,17 +19,22 @@ def bad_bin_search(args, fun):
 
     if len(left):
         try: # If left still fails, reduce farther
-            fun(left)
+            res = fun(left)
+            if not len(res): raise RuntimeError("Recurse")
         except (AssertionError, RuntimeError):
-            left = bad_bin_search(left, fun)
+            left = bad_bin_search(left, fun, depth+1)
 
     if len(right):
         try: # If right still fails, reduce farther
-            fun(right)
+            #right = fun(right)
+            res = fun(right)
+            if not len(res): raise RuntimeError("Recurse")
         except (AssertionError, RuntimeError):
-            right = bad_bin_search(right, fun)
+            right = bad_bin_search(right, fun, depth+1)
 
-    return left + right
+    #return left + right
+    both =  left + right
+    return both
 
 if __name__ == "__main__":
     def test_fn(l):

diff --git a/scripts/validate.sh b/scripts/validate.sh
@@ -115,6 +115,10 @@ else
     exit 1
 fi
 
+### TODO: validate that CFLAGS can be updated with `CFLAGS=-DLAVA_LOGGING make`
+
+### TODO: validate that injfixupscript argument doesn't break build
+
 
 # INSTALL
 progress "validate" 0  "Installing..."