Skip to content

Commit

Permalink
Add GreenBench implementation (google#1912)
Browse files Browse the repository at this point in the history
This PR ports implementation of
https://github.com/Rigorous-Software-Engineering/greenbench into
fuzzbench. Essentially, we introduce new experiment option to allow
users to opt in micro benchmark fuzzing that described in the GreenBench
paper.
  • Loading branch information
jiradeto authored Nov 30, 2023
1 parent ba22647 commit c734a74
Show file tree
Hide file tree
Showing 11 changed files with 160 additions and 6 deletions.
16 changes: 16 additions & 0 deletions common/experiment_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,17 @@ def get_custom_seed_corpora_filestore_path():
'custom_seed_corpora')


def get_oss_fuzz_corpora_unarchived_path():
"""Returns path containing the user-provided seed corpora."""
return posixpath.join(get_experiment_filestore_path(),
'oss_fuzz_unarchived')


def get_random_corpora_filestore_path():
"""Returns path containing seed corpora for the target fuzzing experiment.""" # pylint: disable=line-too-long
return posixpath.join(get_experiment_filestore_path(), 'random_corpora')


def get_dispatcher_instance_name(experiment: str) -> str:
"""Returns a dispatcher instance name for an experiment."""
return f'd-{experiment}'
Expand Down Expand Up @@ -138,6 +149,11 @@ def is_local_experiment():
return bool(environment.get('LOCAL_EXPERIMENT'))


def is_micro_experiment():
"""Returns True if running a micro experiment."""
return bool(environment.get('MICRO_EXPERIMENT'))


def get_trial_dir(fuzzer, benchmark, trial_id):
"""Returns the unique directory for |fuzzer|, |benchmark|, and
|trial_id|."""
Expand Down
101 changes: 101 additions & 0 deletions common/random_corpus_fuzzing_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for micro-experiment run."""

import random
import os
import tempfile
import multiprocessing
import zipfile
from typing import List

from common import experiment_utils
from common import filesystem
from common import logs

MAX_SOURCE_CORPUS_FILES = 1
CORPUS_ELEMENT_BYTES_LIMIT = 1 * 1024 * 1024


def initialize_random_corpus_fuzzing(benchmarks: List[str], num_trials: int):
"""Prepare corpus for micro experiment."""
pool_args = ()
with multiprocessing.Pool(*pool_args) as pool:
pool.starmap(prepare_benchmark_random_corpus,
[(benchmark, num_trials) for benchmark in benchmarks])
logs.info('Done preparing corpus for micro experiment')


# pylint: disable=too-many-locals
def prepare_benchmark_random_corpus(benchmark: str, num_trials: int):
"""Prepare corpus for given benchmark."""
# Temporary location to park corpus files before get picked randomly.
benchmark_unarchived_corpora = os.path.join(
experiment_utils.get_oss_fuzz_corpora_unarchived_path(), benchmark)
filesystem.create_directory(benchmark_unarchived_corpora)

# Unzip oss fuzz corpus.
corpus_archive_filename = f'{benchmark}.zip'
oss_fuzz_corpus_archive_path = os.path.join(
experiment_utils.get_oss_fuzz_corpora_filestore_path(),
corpus_archive_filename)
with zipfile.ZipFile(oss_fuzz_corpus_archive_path) as zip_file:
idx = 0
for seed_corpus_file in zip_file.infolist():
if seed_corpus_file.filename.endswith('/'):
# Ignore directories.
continue
# Allow callers to opt-out of unpacking large files.
if seed_corpus_file.file_size > CORPUS_ELEMENT_BYTES_LIMIT:
continue
output_filename = f'{idx:016d}'
output_file_path = os.path.join(benchmark_unarchived_corpora,
output_filename)
zip_file.extract(seed_corpus_file, output_file_path)
idx += 1

# Path used to store and feed seed corpus for benchmark runner
# each trial group will have the same seed input(s).
benchmark_random_corpora = os.path.join(
experiment_utils.get_random_corpora_filestore_path(), benchmark)
filesystem.create_directory(benchmark_random_corpora)

with tempfile.TemporaryDirectory() as tmp_dir:
all_corpus_files = []
for root, _, files in os.walk(benchmark_unarchived_corpora):
for filename in files:
file_path = os.path.join(root, filename)
all_corpus_files.append(file_path)

all_corpus_files.sort()
trial_group_num = 0
# All trials in the same group will start with the same
# set of randomly selected seed files.
while trial_group_num < num_trials:
trial_group_subdir = f'trial-group-{trial_group_num}'
custom_corpus_trial_dir = os.path.join(benchmark_random_corpora,
trial_group_subdir)
src_dir = os.path.join(tmp_dir, 'source')
filesystem.recreate_directory(src_dir)

source_files = random.sample(all_corpus_files,
MAX_SOURCE_CORPUS_FILES)
for file in source_files:
filesystem.copy(file, src_dir)

# Copy only the src directory.
filesystem.copytree(src_dir, custom_corpus_trial_dir)
trial_group_num += 1

return []
1 change: 1 addition & 0 deletions database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class Trial(Base):
# Columns used for preemptible experiments.
preemptible = Column(Boolean, default=False, nullable=False)
preempted = Column(Boolean, default=False, nullable=False)
trial_group_num = Column(Integer, nullable=True)

# Every trial has snapshots which is basically the saved state of that trial
# at a given time. The snapshots field here and the trial field on Snapshot,
Expand Down
11 changes: 9 additions & 2 deletions experiment/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import time
from typing import List

from common import random_corpus_fuzzing_utils
from common import experiment_path as exp_path
from common import experiment_utils
from common import logs
Expand Down Expand Up @@ -89,7 +90,7 @@ def _initialize_trials_in_db(trials: List[models.Trial]):
db_utils.bulk_save(trials)


class Experiment:
class Experiment: # pylint: disable=too-many-instance-attributes
"""Class representing an experiment."""

def __init__(self, experiment_config_filepath: str):
Expand All @@ -101,6 +102,7 @@ def __init__(self, experiment_config_filepath: str):
self.experiment_name = self.config['experiment']
self.git_hash = self.config['git_hash']
self.preemptible = self.config.get('preemptible_runners')
self.micro_experiment = self.config.get('micro_experiment')


def build_images_for_trials(fuzzers: List[str], benchmarks: List[str],
Expand All @@ -123,7 +125,8 @@ def build_images_for_trials(fuzzers: List[str], benchmarks: List[str],
models.Trial(fuzzer=fuzzer,
experiment=experiment_name,
benchmark=benchmark,
preemptible=preemptible) for _ in range(num_trials)
preemptible=preemptible,
trial_group_num=trial) for trial in range(num_trials)
]
trials.extend(fuzzer_benchmark_trials)
return trials
Expand All @@ -150,6 +153,10 @@ def dispatcher_main():
experiment.preemptible)
_initialize_trials_in_db(trials)

if experiment.micro_experiment:
random_corpus_fuzzing_utils.initialize_random_corpus_fuzzing(
experiment.benchmarks, experiment.num_trials)

create_work_subdirs(['experiment-folders', 'measurement-folders'])

# Start measurer and scheduler in seperate threads/processes.
Expand Down
2 changes: 2 additions & 0 deletions experiment/resources/runner-startup-script-template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ docker run \
-e BENCHMARK={{benchmark}} \
-e EXPERIMENT={{experiment}} \
-e TRIAL_ID={{trial_id}} \
-e TRIAL_GROUP_NUM={{trial_group_num}} \
-e MICRO_EXPERIMENT={{micro_experiment}} \
-e MAX_TOTAL_TIME={{max_total_time}} \
-e SNAPSHOT_PERIOD={{snapshot_period}} \
-e NO_SEEDS={{no_seeds}} \
Expand Down
3 changes: 3 additions & 0 deletions experiment/run_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def _set_default_config_values(config: Dict[str, Union[int, str, bool]],
config['snapshot_period'] = config.get(
'snapshot_period', experiment_utils.DEFAULT_SNAPSHOT_SECONDS)
config['private'] = config.get('private', False)
config['micro_experiment'] = config.get('micro_experiment', False)


def _validate_config_parameters(
Expand Down Expand Up @@ -187,6 +188,8 @@ def read_and_validate_experiment_config(config_filename: str) -> Dict:
Requirement(False, int, False, ''),
'runner_memory':
Requirement(False, str, False, ''),
'micro_experiment':
Requirement(False, bool, False, ''),
}

all_params_valid = _validate_config_parameters(config, config_requirements)
Expand Down
16 changes: 15 additions & 1 deletion experiment/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,18 @@ def get_clusterfuzz_seed_corpus_path(fuzz_target_path):
return seed_corpus_path if os.path.exists(seed_corpus_path) else None


def _unpack_random_corpus(corpus_directory):
shutil.rmtree(corpus_directory)

benchmark = environment.get('BENCHMARK')
trial_group_num = environment.get('TRIAL_GROUP_NUM', 0)
random_corpora_dir = experiment_utils.get_random_corpora_filestore_path()
random_corpora_sub_dir = f'trial-group-{int(trial_group_num)}'
random_corpus_dir = posixpath.join(random_corpora_dir, benchmark,
random_corpora_sub_dir)
filestore_utils.cp(random_corpus_dir, corpus_directory, recursive=True)


def _copy_custom_seed_corpus(corpus_directory):
"""Copy custom seed corpus provided by user"""
shutil.rmtree(corpus_directory)
Expand Down Expand Up @@ -257,7 +269,9 @@ def set_up_corpus_directories(self):
FUZZ_TARGET_DIR, fuzz_target_name)
input_corpus = environment.get('SEED_CORPUS_DIR')
os.makedirs(input_corpus, exist_ok=True)
if not environment.get('CUSTOM_SEED_CORPUS_DIR'):
if environment.get('MICRO_EXPERIMENT'):
_unpack_random_corpus(input_corpus)
elif not environment.get('CUSTOM_SEED_CORPUS_DIR'):
_unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
else:
_copy_custom_seed_corpus(input_corpus)
Expand Down
12 changes: 9 additions & 3 deletions experiment/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ def start_trials(trials, experiment_config: dict, pool, core_allocation=None):
return started_trials


class TrialProxy:
class TrialProxy: # pylint: disable=too-many-instance-attributes
"""A proxy object for a model.Trial. TrialProxy's allow these fields to be
set and retreived without making any database calls."""

Expand All @@ -701,6 +701,7 @@ def __init__(self, trial):
self.time_ended = trial.time_ended
self.preemptible = trial.preemptible
self.cpuset = None
self.trial_group_num = trial.trial_group_num


def _initialize_logs(experiment):
Expand Down Expand Up @@ -729,7 +730,7 @@ def _start_trial(trial: TrialProxy, experiment_config: dict, cpuset=None):
logger.info('Start trial %d.', trial.id)
started = create_trial_instance(trial.fuzzer, trial.benchmark, trial.id,
experiment_config, trial.preemptible,
cpuset)
cpuset, trial.trial_group_num)
if started:
trial.time_started = datetime_now()
trial.cpuset = cpuset
Expand All @@ -743,6 +744,7 @@ def render_startup_script_template( # pylint: disable=too-many-arguments
fuzzer: str,
benchmark: str,
trial_id: int,
trial_group_num: int,
experiment_config: dict,
cpuset=None):
"""Render the startup script using the template and the parameters
Expand All @@ -760,6 +762,8 @@ def render_startup_script_template( # pylint: disable=too-many-arguments
'experiment': experiment,
'fuzzer': fuzzer,
'trial_id': trial_id,
'trial_group_num': trial_group_num,
'micro_experiment': experiment_config['micro_experiment'],
'max_total_time': experiment_config['max_total_time'],
'snapshot_period': experiment_config['snapshot_period'],
'experiment_filestore': experiment_config['experiment_filestore'],
Expand Down Expand Up @@ -790,13 +794,15 @@ def create_trial_instance( # pylint: disable=too-many-arguments
trial_id: int,
experiment_config: dict,
preemptible: bool,
cpuset=None) -> bool:
cpuset=None,
trial_group_num: int = 0) -> bool:
"""Create or start a trial instance for a specific
trial_id,fuzzer,benchmark."""
instance_name = experiment_utils.get_trial_instance_name(
experiment_config['experiment'], trial_id)
startup_script = render_startup_script_template(instance_name, fuzzer,
benchmark, trial_id,
trial_group_num,
experiment_config, cpuset)
startup_script_path = f'/tmp/{instance_name}-start-docker.sh'
with open(startup_script_path, 'w', encoding='utf-8') as file_handle:
Expand Down
1 change: 1 addition & 0 deletions experiment/test_data/experiment-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ measurers_cpus: null
runner_num_cpu_cores: 1
runner_machine_type: 'n1-standard-1'
private: false
micro_experiment: false
1 change: 1 addition & 0 deletions experiment/test_data/local-experiment-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ report_filestore: /tmp/web-reports
local_experiment: true
benchmarks: "benchmark-1,benchmark-2"
git_hash: "git-hash"
micro_experiment: false
2 changes: 2 additions & 0 deletions experiment/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ def test_create_trial_instance(benchmark, expected_image, expected_target,
-e BENCHMARK={benchmark} \\
-e EXPERIMENT=test-experiment \\
-e TRIAL_ID=9 \\
-e TRIAL_GROUP_NUM=0 \\
-e MICRO_EXPERIMENT=False \\
-e MAX_TOTAL_TIME=86400 \\
-e SNAPSHOT_PERIOD=900 \\
-e NO_SEEDS=False \\
Expand Down

0 comments on commit c734a74

Please sign in to comment.