From e99a22e02b35cb1848dd87c16589fa32787e6018 Mon Sep 17 00:00:00 2001 From: Zhang Yuntong Date: Thu, 21 Dec 2023 17:18:59 +0800 Subject: [PATCH] Fix Docker build, generate patches for plausible invariants (#1) * Doc: move old instructions to another place * Generate patch for plausible but overfitting invariants as well * Dockerfile: fix z3 install error * Try to bump up python version * Fix Dockerfile * Fix minor error * Print more info and update README --- Dockerfile | 23 +++-- README.md | 35 ++++---- doc/ISSTA22.md | 47 +++++++++++ requirements.txt | 4 + src/backend.py | 16 ++-- src/ce_refiner.py | 9 +- src/main.py | 208 +++++++++++++++++++++++++++------------------- src/patch_gen.py | 67 ++++++++------- src/values.py | 29 ++++--- 9 files changed, 269 insertions(+), 169 deletions(-) create mode 100644 doc/ISSTA22.md create mode 100644 requirements.txt diff --git a/Dockerfile b/Dockerfile index b4260f0..e1c22b0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,7 @@ FROM ubuntu:18.04 -RUN apt clean -RUN apt update +RUN apt clean && apt update RUN DEBIAN_FRONTEND=noninteractive apt install -y build-essential curl wget software-properties-common llvm -# add this for installing latest version of python3.8 -RUN add-apt-repository ppa:deadsnakes/ppa -RUN apt update # install elfutils RUN DEBIAN_FRONTEND=noninteractive apt install -y unzip pkg-config zlib1g zlib1g-dev autoconf libtool cmake @@ -25,17 +21,26 @@ RUN DEBIAN_FRONTEND=noninteractive apt install -y git vim python3-pip gdb \ RUN DEBIAN_FRONTEND=noninteractive apt install -y clang-10 -# install python3.8 and the libraries we need +# install a newer version of cmake, since it is required by z3 +RUN DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends wget +RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null +RUN DEBIAN_FRONTEND=noninteractive apt purge --yes --auto-remove cmake && \ + apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" && \ + apt update && \ + apt-get install --yes --no-install-recommends cmake + +# install python3.8, for driver scripts of the project RUN DEBIAN_FRONTEND=noninteractive apt install -y python3.8 -RUN python3.8 -m pip install toml pyparsing z3-solver libclang -RUN python3 -m pip install toml pyparsing # build the project COPY . /home/yuntong/vulnfix/ WORKDIR /home/yuntong/vulnfix/ RUN git submodule init RUN git submodule update -# build is slow within docker build, so just build inside container +RUN python3.8 -m pip install -r requirements.txt +# required for building cvc5 (default python3 is 3.6) +RUN python3 -m pip install toml pyparsing +# NOTE: this might be slow RUN ./build.sh ENV PATH="/home/yuntong/vulnfix/bin:${PATH}" diff --git a/README.md b/README.md index d00f77c..b47e72b 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,11 @@ the vulnerable states, which can be used to generate a patch later on. ## Getting started +_New changes has been added to VulnFix since the ISSTA22 publication. To get the version during +ISSTA22 period and steps for using that version, please refer to [ISSTA22.md](doc/ISSTA22.md)._ + +> TODO: Add getting started instruction for the new tool version. + Firstly, certain OS configurations are required to be set for VulnFix and its dependencies (e.g. AFL). To set these, run: @@ -30,35 +35,27 @@ echo 0 | sudo tee /proc/sys/kernel/randomize_va_space The VulnFix tool and its dependencies are available in docker container. (Please refer to [doc/INSTALL.md](doc/INSTALL.md) for instructions on building it from source.) + To start: ```bash -docker pull yuntongzhang/vulnfix:issta22 -docker run -it --memory=30g --name vulnfix-issta22 yuntongzhang/vulnfix:issta22 +docker pull yuntongzhang/vulnfix:latest-manual +docker run -it --memory=30g --name vulnfix yuntongzhang/vulnfix:latest-manual ``` -Once inside the container, navigate to the VulnFix directory and invoke it on CVE-2012-5134: +Once inside the container, invoke it on one example (e.g. CVE-2012-5134) with: ```bash +# clone and build the target project +cd /home/yuntong/vulnfix/data/libxml2/cve_2012_5134 +./setup.sh +# run vulnfix to repair cd /home/yuntong/vulnfix -python3.8 src/main.py data/libxml2/cve_2012_5134/config +vulnfix data/libxml2/cve_2012_5134/config ``` -AFL should be started after a shorting period of time of parsing the config file and setting up the -runtime directory. The snapshot fuzzing stage will follow. The total time taken for this command -is roughly 12-15 minutes, and the final few lines printed on screen should be something like this: - -``` -2022-05-24 05:40:33 --- Final patch invariants - #(1) : ['len >= 1'] --- - -2022-05-24 05:40:33 Generating patch from the patch invariant `len >= 1` ... -2022-05-24 05:40:41 Patch generation successful! Please find the patch at: /home/yuntong/vulnfix/data/libxml2/cve_2012_5134/runtime/vulnfix.patch. -``` - -This indicates a successful run of VulnFix, with a single patch invariant `len >= 1` produced in the -end. A patch file is also generated based on this invariant, at the location: -`/home/yuntong/vulnfix/data/libxml2/cve_2012_5134/runtime/vulnfix.patch`. - +After VulnFix finishes, the results (generated invariants and patches) can be found in +`/home/yuntong/vulnfix/data/libxml2/cve_2012_5134/runtime/result/`. ## Documentation diff --git a/doc/ISSTA22.md b/doc/ISSTA22.md new file mode 100644 index 0000000..3c5bcfb --- /dev/null +++ b/doc/ISSTA22.md @@ -0,0 +1,47 @@ +# Getting started steps (for the version during ISSTA 22) + +_This is the instruction for running VulnFix on one example. The full steps for ISSTA22 artifact +evaluation is at [doc/AE.md](doc/AE.md) + +Firstly, certain OS configurations are required to be set for VulnFix and its dependencies (e.g. AFL). +To set these, run: + +```bash +echo core | sudo tee /proc/sys/kernel/core_pattern +cd /sys/devices/system/cpu +echo performance | sudo tee cpu*/cpufreq/scaling_governor + +echo 0 | sudo tee /proc/sys/kernel/randomize_va_space +``` + +The VulnFix tool and its dependencies are available in docker container. (Please refer to +[doc/INSTALL.md](doc/INSTALL.md) for instructions on building it from source.) + +To start: + +```bash +docker pull yuntongzhang/vulnfix:issta22 +docker run -it --memory=30g --name vulnfix-issta22 yuntongzhang/vulnfix:issta22 +``` + +Once inside the container, navigate to the VulnFix directory and invoke it on CVE-2012-5134: + +```bash +cd /home/yuntong/vulnfix +python3.8 src/main.py data/libxml2/cve_2012_5134/config +``` + +AFL should be started after a shorting period of time of parsing the config file and setting up the +runtime directory. The snapshot fuzzing stage will follow. The total time taken for this command +is roughly 12-15 minutes, and the final few lines printed on screen should be something like this: + +``` +2022-05-24 05:40:33 --- Final patch invariants - #(1) : ['len >= 1'] --- + +2022-05-24 05:40:33 Generating patch from the patch invariant `len >= 1` ... +2022-05-24 05:40:41 Patch generation successful! Please find the patch at: /home/yuntong/vulnfix/data/libxml2/cve_2012_5134/runtime/vulnfix.patch. +``` + +This indicates a successful run of VulnFix, with a single patch invariant `len >= 1` produced in the +end. A patch file is also generated based on this invariant, at the location: +`/home/yuntong/vulnfix/data/libxml2/cve_2012_5134/runtime/vulnfix.patch`. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..562e3fe --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +libclang==14.0.1 +pyparsing==3.0.8 +toml==0.10.2 +z3-solver==4.8.17.0 diff --git a/src/backend.py b/src/backend.py index fc2abf4..8fc832e 100644 --- a/src/backend.py +++ b/src/backend.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod import subprocess import re -from sys import stdout +from typing import List from logger import logger from utils import * @@ -32,7 +32,7 @@ class DaikonBackend(BackendBase): def __init__(self): super().__init__() - def run(self): + def run(self) -> List[str]: """ :returns: A list of invariants, a list of variables appeared in invariants. If there is no output, returns two empty lists. @@ -66,7 +66,7 @@ def run(self): return invariants - def __filter_daikon_invariants(self, invs): + def __filter_daikon_invariants(self, invs: List[str]) -> List[str]: """ Some daikon invariants are complicated to turn off from Daikon configs. We filter them out here. @@ -82,7 +82,7 @@ def __filter_daikon_invariants(self, invs): return filtered_invs - def __sanitize_daikon_invariants(self, invs): + def __sanitize_daikon_invariants(self, invs: List[str]) -> List[str]: """ Daikon output is formatted in java. Here we sanitize them to format that can be handled by z3 in python, and also can be use to generat patch in C. @@ -122,7 +122,7 @@ def __sanitize_daikon_invariants(self, invs): return sanitized_invs - def __remove_duplicated_invariants(self, invs): + def __remove_duplicated_invariants(self, invs: List[str]) -> List[str]: """ Daikon can produce semantically equivalent invariants. This method detects the duplicates and only keeps one of them. @@ -184,7 +184,7 @@ def generate_input_from_snapshots(self): f.write(fail_res) - def __convert_vars_into_decls(self, vars): + def __convert_vars_into_decls(self, vars: List[str]) -> str: res = "\n\nppt ..fix_location():::ENTER\n" res += "\n\nppt ..fix_location():::EXIT\n" res += " ppt-type point\n" @@ -263,7 +263,7 @@ def __init__(self): pass - def run(self): + def run(self) -> List[str]: logger.info('Running cvc5 for inference. This make take a while ...') cmd = [values.full_cvc5, "--sygus-arg-relevant", "--sygus-eval-opt", "--sygus-grammar-norm", "--sygus-min-grammar", @@ -289,7 +289,7 @@ def run(self): return [inv] - def __sanitize_cvc5_invariant(self, invariant): + def __sanitize_cvc5_invariant(self, invariant: str) -> str: inv_tokens = invariant.strip().split() # change = to == inv_tokens = [ '==' if t == '=' else t for t in inv_tokens ] diff --git a/src/ce_refiner.py b/src/ce_refiner.py index 2669833..5aa7488 100644 --- a/src/ce_refiner.py +++ b/src/ce_refiner.py @@ -3,17 +3,20 @@ from ce_single_var import * from subroutines import * from snapshot import * +from backend import BackendBase +from typing import List EARLY_TERM_THRESHOLD = 5 class CeRefiner(object): - def __init__(self, exprs, inputs_pass, inputs_fail, backend): + def __init__(self, exprs: List[str], inputs_pass, inputs_fail, backend): """ :param exprs: list of candidate expressions (constraints) :param inputs_pass: list of passing test inputs :param inputs_fail: list of failing test inputs """ self.round = 0 + # always keep the current candidate invs from the current round self.candidate_exprs = exprs self.consecutive_same_count = 0 # all the inputs given @@ -22,7 +25,7 @@ def __init__(self, exprs, inputs_pass, inputs_fail, backend): # record which inputs have not been used self.untouched_inputs_pass = set(inputs_pass) self.untouched_inputs_fail = set(inputs_fail) - self.backend = backend + self.backend: BackendBase = backend self.__refresh_driver_tests() def __refresh_driver_tests(self): @@ -91,7 +94,7 @@ def one_step_refinement(self, max_iter=6): f' The most recent patch invariants are: {[e for e in self.candidate_exprs]}.\n') return candidate_exprs - # update refiner attributes + # update refiner attributes (set our own internal state) self.round += 1 if candidate_exprs == self.candidate_exprs: self.consecutive_same_count += 1 diff --git a/src/main.py b/src/main.py index 922513d..dddba1d 100644 --- a/src/main.py +++ b/src/main.py @@ -4,6 +4,7 @@ import random import argparse import configparser +from os.path import join as pjoin import values import snapshot_pool @@ -36,39 +37,38 @@ def parse_config_and_setup_runtime(config_file): values.dir_runtime = values.dir_runtime.replace("runtime", "afl-runtime") if not os.path.isdir(values.dir_runtime): os.mkdir(values.dir_runtime) - values.dir_afl_raw_input = os.path.join(values.dir_runtime, "afl-in") - values.dir_afl_raw_output = os.path.join(values.dir_runtime, "afl-out") - values.dir_afl_raw_input_normal = os.path.join(values.dir_runtime, "afl-in-normal") - values.dir_afl_raw_output_normal = os.path.join(values.dir_runtime, "afl-out-normal") - values.dir_afl_pass = os.path.join(values.dir_runtime, "afl-pass") - values.dir_afl_fail = os.path.join(values.dir_runtime, "afl-fail") - values.dir_seed_pass = os.path.join(values.dir_runtime, "seed-pass") - values.dir_seed_fail = os.path.join(values.dir_runtime, "seed-fail") - values.file_exploit = os.path.join(values.dir_runtime, "exploit") - values.file_snapshot_orig = os.path.join(values.dir_runtime, "snapshot.out") - values.file_snapshot_hash = os.path.join(values.dir_runtime, "snapshot.out.forhash") - values.file_snapshot_processed = os.path.join(values.dir_runtime, "snapshot.out.processed") - values.file_solver_in = os.path.join(values.dir_runtime, "input.sl") - values.file_pass_ss_pool = os.path.join(values.dir_runtime, "pass-ss-pool") - values.file_fail_ss_pool = os.path.join(values.dir_runtime, "fail-ss-pool") - values.file_logging = os.path.join(values.dir_runtime, "vulnfix.log") - values.file_final_patch = os.path.join(values.dir_runtime, "vulnfix.patch") - values.file_result = os.path.join(values.dir_runtime, "vulnfix.result") + values.dir_result = pjoin(values.dir_runtime, "result") + values.dir_afl_raw_input = pjoin(values.dir_runtime, "afl-in") + values.dir_afl_raw_output = pjoin(values.dir_runtime, "afl-out") + values.dir_afl_raw_input_normal = pjoin(values.dir_runtime, "afl-in-normal") + values.dir_afl_raw_output_normal = pjoin(values.dir_runtime, "afl-out-normal") + values.dir_afl_pass = pjoin(values.dir_runtime, "afl-pass") + values.dir_afl_fail = pjoin(values.dir_runtime, "afl-fail") + values.dir_seed_pass = pjoin(values.dir_runtime, "seed-pass") + values.dir_seed_fail = pjoin(values.dir_runtime, "seed-fail") + values.file_exploit = pjoin(values.dir_runtime, "exploit") + values.file_snapshot_orig = pjoin(values.dir_runtime, "snapshot.out") + values.file_snapshot_hash = pjoin(values.dir_runtime, "snapshot.out.forhash") + values.file_snapshot_processed = pjoin(values.dir_runtime, "snapshot.out.processed") + values.file_solver_in = pjoin(values.dir_runtime, "input.sl") + values.file_pass_ss_pool = pjoin(values.dir_runtime, "pass-ss-pool") + values.file_fail_ss_pool = pjoin(values.dir_runtime, "fail-ss-pool") + values.file_logging = pjoin(values.dir_runtime, "vulnfix.log") # runtime-dir => daikon runtime files - values.file_daikon_feasibility_traces = os.path.join(values.dir_runtime, "feasibility.dtrace") - values.file_daikon_pass_traces = os.path.join(values.dir_runtime, "pass.dtrace") - values.file_daikon_fail_traces = os.path.join(values.dir_runtime, "fail.dtrace") - values.file_daikon_decl = os.path.join(values.dir_runtime, "daikon.decls") - values.file_daikon_pass_inv = os.path.join(values.dir_runtime, "pass.inv") + values.file_daikon_feasibility_traces = pjoin(values.dir_runtime, "feasibility.dtrace") + values.file_daikon_pass_traces = pjoin(values.dir_runtime, "pass.dtrace") + values.file_daikon_fail_traces = pjoin(values.dir_runtime, "fail.dtrace") + values.file_daikon_decl = pjoin(values.dir_runtime, "daikon.decls") + values.file_daikon_pass_inv = pjoin(values.dir_runtime, "pass.inv") # binary values.binary_full_path = config_dict['binary'] bin_name = os.path.split(values.binary_full_path)[1] shutil.copy2(values.binary_full_path, values.dir_runtime) - values.bin_orig = os.path.join(values.dir_runtime, bin_name) - values.bin_afl = os.path.join(values.dir_runtime, bin_name + ".afl") - values.bin_snapshot = os.path.join(values.dir_runtime, bin_name + ".snapshot") - values.bin_mutate = os.path.join(values.dir_runtime, bin_name + ".mutate") - values.bin_crash = os.path.join(values.dir_runtime, bin_name + ".crash") + values.bin_orig = pjoin(values.dir_runtime, bin_name) + values.bin_afl = pjoin(values.dir_runtime, bin_name + ".afl") + values.bin_snapshot = pjoin(values.dir_runtime, bin_name + ".snapshot") + values.bin_mutate = pjoin(values.dir_runtime, bin_name + ".mutate") + values.bin_crash = pjoin(values.dir_runtime, bin_name + ".crash") # exploit shutil.copyfile(config_dict['exploit'], values.file_exploit) # others @@ -77,8 +77,8 @@ def parse_config_and_setup_runtime(config_file): values.crash_loc = config_dict['crash-location'] values.dir_source = config_dict['source-dir'] values.fix_file_rel_path = config_dict['fix-file-path'] - values.fix_file_path = os.path.join(values.dir_source, values.fix_file_rel_path) - values.backup_file_path = os.path.join(os.path.dirname(values.fix_file_path), "fix-file-backup.c") + values.fix_file_path = pjoin(values.dir_source, values.fix_file_rel_path) + values.backup_file_path = pjoin(os.path.dirname(values.fix_file_path), "fix-file-backup.c") values.fix_line = config_dict.getint('fix-line') values.build_cmd = config_dict['build-cmd'] # OPTIONAL @@ -121,20 +121,20 @@ def filter_store_initial_tests_and_snapshots(bound_time=True): os.mkdir(values.dir_afl_pass) if not os.path.isdir(values.dir_afl_fail): os.mkdir(values.dir_afl_fail) - raw_fails_dir = os.path.join(values.dir_afl_raw_output, "crashes") - raw_passes_dir = os.path.join(values.dir_afl_raw_output, "normals") - raw_fails = [os.path.join(raw_fails_dir, t) for t in os.listdir(raw_fails_dir)] - raw_passes = [os.path.join(raw_passes_dir, t) for t in os.listdir(raw_passes_dir)] + raw_fails_dir = pjoin(values.dir_afl_raw_output, "crashes") + raw_passes_dir = pjoin(values.dir_afl_raw_output, "normals") + raw_fails = [pjoin(raw_fails_dir, t) for t in os.listdir(raw_fails_dir)] + raw_passes = [pjoin(raw_passes_dir, t) for t in os.listdir(raw_passes_dir)] if values.files_normal_in: # consider outputs from normal run as well - raw_fails_dir_normal = os.path.join(values.dir_afl_raw_output_normal, "crashes") - raw_passes_dir_normal = os.path.join(values.dir_afl_raw_output_normal, "normals") - raw_fails.extend([os.path.join(raw_fails_dir_normal, t) for t in os.listdir(raw_fails_dir_normal)]) - raw_passes.extend([os.path.join(raw_passes_dir_normal, t) for t in os.listdir(raw_passes_dir_normal)]) + raw_fails_dir_normal = pjoin(values.dir_afl_raw_output_normal, "crashes") + raw_passes_dir_normal = pjoin(values.dir_afl_raw_output_normal, "normals") + raw_fails.extend([pjoin(raw_fails_dir_normal, t) for t in os.listdir(raw_fails_dir_normal)]) + raw_passes.extend([pjoin(raw_passes_dir_normal, t) for t in os.listdir(raw_passes_dir_normal)]) # preparation random.shuffle(raw_fails) random.shuffle(raw_passes) # also add the original exploit file to AFL dir and process it - shutil.copyfile(values.file_exploit, os.path.join(values.dir_afl_fail, "exploit")) + shutil.copyfile(values.file_exploit, pjoin(values.dir_afl_fail, "exploit")) raw_fails = [values.file_exploit] + raw_fails patch_for_snapshot() @@ -196,7 +196,7 @@ def process_raw_inputs(inputs, classification, seen_hashes, bound_time=True): logger.debug(f"Found a usable {classification} input: {t}.") num_saved += 1 file_name = classification + "-" + str(num_saved) - shutil.copyfile(t, os.path.join(destination_folder, file_name)) + shutil.copyfile(t, pjoin(destination_folder, file_name)) return num_saved, pass_snapshots, fail_snapshots, unique_hashes @@ -255,35 +255,43 @@ def filter_test_do_not_reach_crash_loc(pass_tests, fail_tests): return pass_remains, fail_remains -def save_run_result(patch_invs, write_patch=False): - with open(values.file_result, "w") as f: - if len(patch_invs) == 1: - f.write("SUCCESS (Exactly one patch invariant in the end) (Its correctness is not checked yet)\n") +def save_invariant_result(patch_invs): + if not os.path.isdir(values.dir_result): + os.mkdir(values.dir_result) + result_file = pjoin(values.dir_result, "invariants.txt") + with open(result_file, "w") as f: + if not patch_invs: + f.write("FAIL: no patch invariants can be generated.\n") else: - f.write("FAIL (More than one or no patch invariants in the end)\n") - f.write("\nPatch Invariants:\n") - f.write(str(len(patch_invs)) + "\n") - f.write(f'{[i for i in patch_invs]}') - if write_patch: - f.write("\nGenerated Patch:\n") - with open(values.file_final_patch, "r") as patch_f: - content = patch_f.read() - f.write(content) + f.write("SUCCESS: Some patch invariants are generated. (Their correctness is not checked yet.)\n") + f.write("\nPatch Invariants:\n") + f.write(str(len(patch_invs)) + "\n") + f.write(f'{[i for i in patch_invs]}') + f.write('\n') + +def save_one_patch(old_patch_file: str, patch_name: str): + """ + Save one patch to the final results directory. + """ + if not os.path.isdir(values.dir_result): + os.mkdir(values.dir_result) + new_path = pjoin(values.dir_result, patch_name) + shutil.copyfile(old_patch_file, new_path) def run_concfuzz_and_inference(backend): """ Entry for invoking ConcFuzz and then running backend for patch invariant inference. """ - conc_dir = os.path.join(values.dir_runtime, "testcases") + conc_dir = pjoin(values.dir_runtime, "testcases") conc_seeds = [values.file_exploit] + values.files_normal_in concfuzz.start(conc_seeds, values.fix_loc, conc_dir) - pass_dir = os.path.join(conc_dir, "pass") - fail_dir = os.path.join(conc_dir, "fail") + pass_dir = pjoin(conc_dir, "pass") + fail_dir = pjoin(conc_dir, "fail") conc_pass = os.listdir(pass_dir) conc_fail = os.listdir(fail_dir) - conc_pass = sorted([ os.path.join(pass_dir, t) for t in conc_pass]) - conc_fail = sorted([ os.path.join(fail_dir, t) for t in conc_fail]) + conc_pass = sorted([ pjoin(pass_dir, t) for t in conc_pass]) + conc_fail = sorted([ pjoin(fail_dir, t) for t in conc_fail]) # logger.info(f'After 30 minutes of ConcFuzz, there are {len(conc_pass)} passing tests' # f' and {len(conc_fail)} failing tests.') logger.info('Running generated inputs to collect snapshots ...') @@ -295,7 +303,7 @@ def run_concfuzz_and_inference(backend): logger.info(f'Patch invariants from ConcFuzz - ' f'#({len(candidate_exprs)}) : {[e for e in candidate_exprs]}.\n') # fini_logger() - save_run_result(candidate_exprs) + save_invariant_result(candidate_exprs) def run_aflfuzz_and_inference(backend): @@ -315,7 +323,7 @@ def run_aflfuzz_and_inference(backend): logger.info(f'Patch invariants from aflfuzz - ' f'#({len(candidate_exprs)}) : {[e for e in candidate_exprs]}.\n') # fini_logger() - save_run_result(candidate_exprs) + save_invariant_result(candidate_exprs) def main(): @@ -373,6 +381,7 @@ def main(): logger.info('Finished parsing config file.') + ########### For ablation study ########### if values.concfuzz: run_concfuzz_and_inference(backend) return @@ -380,6 +389,7 @@ def main(): if values.aflfuzz: run_aflfuzz_and_inference(backend) return + ########### END ablation study ########### # Here is the main starting point of VulnFix logger.info('Starting VulnFix now!') @@ -404,20 +414,26 @@ def main(): filter_store_initial_tests_and_snapshots(bound_time=False) afl_pass = os.listdir(values.dir_afl_pass) afl_fail = os.listdir(values.dir_afl_fail) - values.all_pass_inputs = sorted([ os.path.join(values.dir_afl_pass, t) for t in afl_pass ]) - values.all_fail_inputs = sorted([ os.path.join(values.dir_afl_fail, t) for t in afl_fail ]) + values.all_pass_inputs = sorted([ pjoin(values.dir_afl_pass, t) for t in afl_pass ]) + values.all_fail_inputs = sorted([ pjoin(values.dir_afl_fail, t) for t in afl_fail ]) # STEP (3): generate initial candidate invariant with backend backend.generate_input_from_snapshots() - candidate_exprs = backend.run() + initial_exprs = backend.run() logger.info(f'--- Initial patch invariants - ' - f'#({len(candidate_exprs)}) : {[e for e in candidate_exprs]} ---\n') + f'#({len(initial_exprs)}) : {[e for e in initial_exprs]} ---\n') # check whether there are some initial patch invariants - if candidate_exprs: + final_patch_invs = initial_exprs + if initial_exprs: + # keeps track the latest non-empty invariants + # This is for generating plausible patches, even if all invs are invalidated + # in later rounds of snapshot fuzzing. + latest_non_empty_invs = initial_exprs + # we can do more if initial invariants are not empty # STEP (4): refine candidate expr by mutating at fix location logger.info('Starting snapshot fuzzing stage ...') - refiner = CeRefiner(candidate_exprs, values.all_pass_inputs, values.all_fail_inputs, backend) + refiner = CeRefiner(initial_exprs, values.all_pass_inputs, values.all_fail_inputs, backend) while True: if time.time() > time_end: # time budget exhausted logger.info('Total timeout reached.') @@ -425,40 +441,58 @@ def main(): if values.early_term and refiner.reach_early_termination_criteria(): logger.info('Repeatedly getting the same invariant. Stopping snapshot fuzzing now.') break - curr_patch_invs = refiner.one_step_refinement(3) - if not curr_patch_invs: # backend fails to produce any result + refined_list = refiner.one_step_refinement(3) + if not refined_list: + # backend fails to produce any result this round break - final_patch_invs = refiner.candidate_exprs # final result after snapshot fuzzing - else: - # initial patch invariants are empty, just return those without doing snapshot fuzzing - final_patch_invs = candidate_exprs + # backend produced non-empty list of invs => record this + latest_non_empty_invs = refined_list + + # final result after snapshot fuzzing + final_patch_invs = latest_non_empty_invs logger.info(f'--- Final patch invariants - ' f'#({len(final_patch_invs)}) : {[e for e in final_patch_invs]} ---\n') - is_patched = False if len(final_patch_invs) == 0: logger.info('Could not infer a patch invariant with the current invariant templates/grammar.') elif len(final_patch_invs) != 1: logger.info('More than one final patch invariant.') - else: # got only 1 patch invariant - if values.backend_choice == 'daikon': - # only do patch generation for daikon backend to demonstrate the idea - patch_inv = final_patch_invs[0] - logger.info(f'Generating patch from the patch invariant `{patch_inv}` ...') - try: - generator = PatchGenerator(patch_inv) - is_patched = generator.gen() - except Exception as e: - logger.info(f'Patch generation unsuccessful due to exception {e}.') - is_patched = False + else: # only 1 patch invariant + logger.info('Exactly one final patch invariant.') + + # save invariants before generating patches + save_invariant_result(final_patch_invs) + + num_patches = 0 + logger.info(f"Attempting to generate patches from {len(final_patch_invs)} patch invariant(s) ...") + for inv in final_patch_invs: + if values.backend_choice != 'daikon': + logger.warning(f'Patch generation is only supported for daikon backend. Skipping.') + break + logger.info(f'Generating patch from the patch invariant `{inv}` ...') + try: + generator = PatchGenerator(inv) + patch_file = generator.gen() + except Exception as e: + logger.warning(f'{inv}: Patch generation unsuccessful due to exception {e}.') + continue + # let's see whether a patch file has been generated for this inv + if patch_file is None: + continue + # patch file generated successfully + num_patches += 1 + new_patch_f_name = f"{num_patches}.patch" + save_one_patch(patch_file, new_patch_f_name) # fini_logger() - if is_patched: - save_run_result(final_patch_invs, write_patch=True) + if num_patches == 0: + logger.info('No patches generated.') else: - save_run_result(final_patch_invs) + logger.info(f'Generated {num_patches} patches.') + + logger.info(f"VulnFix finished. Please find results at {values.dir_result}.") if __name__ == "__main__": diff --git a/src/patch_gen.py b/src/patch_gen.py index 16f7edc..ee9e5e9 100644 --- a/src/patch_gen.py +++ b/src/patch_gen.py @@ -2,6 +2,7 @@ import re import shutil import clang.cindex as cc +from typing import Optional import values from subroutines import * @@ -18,6 +19,7 @@ def __init__(self, inv): """ self.inv = inv self.fix_line = values.fix_line + # a temporary location on disk to store the diff patch file self.patch_file_path = values.fix_file_path + ".patch" self.need_include_ghost = False self.sed_include_cmd = "" @@ -109,7 +111,7 @@ def __is_keywords_in_line(self, line, keywords): return False - def gen(self): + def gen(self) -> Optional[str]: """ Entry point for generating a patch, given a patch invariant. @@ -122,41 +124,43 @@ def gen(self): new condition. Since there is no simple way of deciding them, the two choices are tried one by one until a patch that passes validation is produced. + + :returns: Path to a patch file if succeed; None if fail. """ - patched = False lines = list() with open(values.backup_file_path, "r") as f: lines = f.readlines() # check what the fix line actually is fix_line_index = self.fix_line - 1 ### 0-indexing! fix_line_content = lines[fix_line_index] + patch_file = None if self.is_if_cond_line(fix_line_content) or self.is_while_cond_line(fix_line_content): - patched = self.gen_patch_for_if_while(lines[fix_line_index:]) + patch_file = self.gen_patch_for_if_while(lines[fix_line_index:]) elif self.is_for_cond_line(fix_line_content): - patched = self.gen_patch_for_for(lines[fix_line_index:]) + patch_file = self.gen_patch_for_for(lines[fix_line_index:]) - if not patched: + if patch_file is None: # Now, either patch location is not condition, # or, patch location is condition, but failed to integrate inv into cond # generate walkaround fix with patch invariant - patched = self.gen_patch_for_non_cond() + patch_file = self.gen_patch_for_non_cond() - if patched: - logger.info(f"Patch generation successful! " - f"Please find the patch at: {values.file_final_patch}.") - else: + if patch_file is None: + # too bad, we tried all options but still fail logger.info("Patch generation unsuccessful. " - "However, a single patch invariant has been generated. " + "However, some patch invariants has been generated. " "Please manually generate a patch based on the patch invariant.") + else: + logger.info(f"Patch generation successful for this inv!") - return patched + return patch_file - def gen_patch_for_for(self, trailing_lines): + def gen_patch_for_for(self, trailing_lines) -> Optional[str]: """ Integrate the patch invariant into an existing for condition. :param trailing_lines: list of lines starting from the patch line. - :returns: True if succeeds, False if fails. + :returns: Path to a patch file if succeed; None if fail. """ logger.debug("Trying to integrate patch invariant into for-condition ...") # determines the open/close ; position of the for-statement @@ -185,11 +189,12 @@ def gen_patch_for_for(self, trailing_lines): return self.enumerate_patch_options(cmd_registry) - def enumerate_patch_options(self, cmd_options): + def enumerate_patch_options(self, cmd_options) -> Optional[str]: """ Enumerate and try out the patch options, until a successful patch is seen. :param cmd_options: list of cmd lists, where each list represents one patch option. - :returns: True if any one option succeeds; False if all fail. + :returns: Path to a patch file, if any option succeeds; + None if all fail. """ for cmd_list in cmd_options: restore_orig_patch_file() @@ -199,13 +204,13 @@ def enumerate_patch_options(self, cmd_options): os.system(self.sed_include_cmd) is_valid_patch = self.rebuild_and_validate_patch() if is_valid_patch: - self.gen_final_patch_file() + patch_f_path = self.gen_final_patch_file() logger.debug("Successfully integrated patch into existing condition!") - return True + return patch_f_path logger.debug(f"Failed to integrate patch invariant into existing condition. " "Generating walkaround patch ...") - return False + return None def __find_semicolon_positions(self, trailing_lines): @@ -224,11 +229,11 @@ def __find_semicolon_positions(self, trailing_lines): return [-1, -1, -1, -1] - def gen_patch_for_if_while(self, trailing_lines): + def gen_patch_for_if_while(self, trailing_lines) -> Optional[str]: """ Integrate the patch invariant into an existing if/while condition expr. :param trailing_lines: list of lines starting from the patch line. - :returns: True if succeeds, False if fails. + :returns: Path to a patch file if succeed; None if fail. """ logger.debug("Trying to integrate patch invariant into if/while-condition ...") # determines the open/close parenthesis position of the condition @@ -370,12 +375,14 @@ def __find_closing_bracket_position(self, trailing_lines): return (-1, -1) - def gen_patch_for_non_cond(self): + def gen_patch_for_non_cond(self) -> Optional[str]: """ Used if the fix line does not contain a condition. In this case, following patch will be inserted before the fix line: + if (!(patch invariant)) exit(1); - :returns: True if succeeds, False if fails. + + :returns: Path to a patch file, if any option succeeds; + None if all fail. """ logger.debug("Trying to generate walkaround patch ...") restore_orig_patch_file() @@ -387,21 +394,25 @@ def gen_patch_for_non_cond(self): os.system(self.sed_include_cmd) is_valid_patch = self.rebuild_and_validate_patch() if is_valid_patch: - self.gen_final_patch_file() + patch_f_path = self.gen_final_patch_file() logger.debug("Successfully generated walkaround patch!") else: + patch_f_path = None logger.debug(f"Failed to generate walkaround patch from patch invariant `{self.inv}`.") - return is_valid_patch + return patch_f_path - def gen_final_patch_file(self): + def gen_final_patch_file(self) -> str: """ - Produce final .patch file, and moves it to runtime dir. + Produce final .patch file. + :returns: path to the generated patch file. + Clients should copy it if they want to preserve the content. """ diff_cmd = ("diff -u " + values.backup_file_path + " " + values.fix_file_path + " > " + self.patch_file_path) os.system(diff_cmd) - shutil.copy2(self.patch_file_path, values.file_final_patch) + # shutil.copy2(self.patch_file_path, values.file_final_patch) + return self.patch_file_path def rebuild_and_validate_patch(self): diff --git a/src/values.py b/src/values.py index 721d0af..c4c3f2c 100644 --- a/src/values.py +++ b/src/values.py @@ -1,5 +1,5 @@ import os -from pathlib import Path +from os.path import join as pjoin ########### # Path definitions @@ -7,17 +7,18 @@ # dir_root = str(Path(__file__).parent.parent.resolve()) dir_root = "/home/yuntong/vulnfix" -dir_runtime = os.path.join(dir_root, "runtime") # set at runtime -dir_lib = os.path.join(dir_root, "lib") -dir_thirdparty = os.path.join(dir_root, "thirdparty") -dir_eusolver = os.path.join(dir_thirdparty, "eusolver") -dir_eusolver_src = os.path.join(dir_eusolver, "src") -dir_cvc5 = os.path.join(dir_thirdparty, "cvc5") -full_cvc5 = os.path.join(dir_cvc5, "build", "bin", "cvc5") -dir_daikon = os.path.join(dir_thirdparty, "daikon") -full_daikon = os.path.join(dir_daikon, "daikon.jar") -dir_afl = os.path.join(dir_thirdparty, "AFL") -dir_temp = os.path.join(dir_root, "run-temp") # a temp dir to store runtime-generated junk files +dir_runtime = pjoin(dir_root, "runtime") # set at runtime +dir_result = pjoin(dir_runtime, "result") +dir_lib = pjoin(dir_root, "lib") +dir_thirdparty = pjoin(dir_root, "thirdparty") +dir_eusolver = pjoin(dir_thirdparty, "eusolver") +dir_eusolver_src = pjoin(dir_eusolver, "src") +dir_cvc5 = pjoin(dir_thirdparty, "cvc5") +full_cvc5 = pjoin(dir_cvc5, "build", "bin", "cvc5") +dir_daikon = pjoin(dir_thirdparty, "daikon") +full_daikon = pjoin(dir_daikon, "daikon.jar") +dir_afl = pjoin(dir_thirdparty, "AFL") +dir_temp = pjoin(dir_root, "run-temp") # a temp dir to store runtime-generated junk files dir_afl_raw_input = "" dir_afl_raw_output = "" @@ -45,11 +46,9 @@ file_pass_ss_pool = "" file_fail_ss_pool = "" file_logging = "" -file_final_patch = "" -file_result = "" # daikon-related files -file_daikon_config = os.path.join(dir_root, "daikon-config") +file_daikon_config = pjoin(dir_root, "daikon-config") file_daikon_feasibility_traces = "" file_daikon_pass_traces = "" file_daikon_fail_traces = ""