-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* adding dependency pandas * new sklearn * Good sklearn * run sklearn on CI --------- Co-authored-by: Zhuoxuan Zhang <[email protected]>
- Loading branch information
1 parent
e66798a
commit 97d9bd7
Showing
26 changed files
with
200 additions
and
84 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
|
||
export PASH_SPEC_TOP=${PASH_SPEC_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} | ||
|
||
benchmark_dir="sklearn" | ||
|
||
cd "$(realpath $(dirname "$0"))" | ||
mkdir -p "$PASH_SPEC_TOP/report/resources/sklearn" | ||
mkdir -p "$PASH_SPEC_TOP/report/output/sklearn" | ||
|
||
# Currently just dumped the entire dataset, but ideally we actually download it | ||
|
||
pip install -r requirements.txt |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
from pathlib import Path | ||
import os | ||
import time | ||
from subprocess import run, PIPE | ||
|
||
parser = argparse.ArgumentParser(description="Run benchmark") | ||
parser = argparse.ArgumentParser(description="Run benchmark") | ||
parser.add_argument('--window', default=5, type=int, help='window size to run hs with') | ||
parser.add_argument('--target', choices=['hs-only', 'sh-only', 'both'], | ||
help='to run with sh or hs') | ||
parser.add_argument('--log', choices=['enable', 'disable'], default="enable", | ||
help='whether to enable logging for hs') | ||
|
||
env = os.environ.copy() | ||
SCRIPT_NAME = "run.sh" | ||
|
||
|
||
def do_sh_run(test_base: Path, output_base: Path, env: dict): | ||
before = time.time() | ||
print(f'Running {test_base / SCRIPT_NAME}') | ||
result = run(['/bin/sh', test_base / SCRIPT_NAME], stdout=PIPE, env=env) | ||
duration = time.time() - before | ||
with open(output_base / "sh_time", 'w') as f: | ||
f.write(f'{duration}\n') | ||
os.rename(env["OUTPUT_DIR"] / "trained_model.obj", env["OUTPUT_DIR"] / "sh_trained_model.obj") | ||
return result.returncode, result.stdout | ||
|
||
def do_hs_run(test_base: Path, output_base: Path, hs_base: Path, window: int, env: dict, log: bool): | ||
cmd = [hs_base / 'pash-spec.sh', '--window', str(window)] | ||
if log: | ||
cmd.extend(['-d', '2']) | ||
cmd.append(test_base / SCRIPT_NAME) | ||
before = time.time() | ||
print(f'Running {cmd}') | ||
with open(output_base / 'hs_log', 'w') as log: | ||
result = run(cmd, stdout=PIPE, stderr=log, env=env) | ||
duration = time.time() - before | ||
with open(output_base / "hs_time", 'w') as f: | ||
f.write(f'{duration}\n') | ||
os.rename(env["OUTPUT_DIR"] / "trained_model.obj", env["OUTPUT_DIR"] / "hs_trained_model.obj") | ||
return result.returncode, result.stdout | ||
|
||
if __name__ == '__main__': | ||
args = parser.parse_args() | ||
test_base = Path(__file__).parent.resolve() | ||
hs_base = test_base.parent.parent.parent | ||
|
||
####################### | ||
# SPECIFY ENV VARS HERE | ||
|
||
env['TMP'] = hs_base / 'report' / 'resources' / 'sklearn' | ||
env['RESULT'] = hs_base / 'report' / 'output' / 'sklearn' | ||
env['OUTPUT_DIR'] = hs_base / 'report' / 'output' / 'sklearn' | ||
|
||
####################### | ||
|
||
bench_base = test_base.parent | ||
local_name = os.sep.join(test_base.parts[-1:]) | ||
print(local_name) | ||
output_base = hs_base / "report" / "output" / 'sklearn' / local_name | ||
run_hs = False | ||
run_sh = False | ||
if args.target in ["hs-only", "both"]: | ||
run_hs = True | ||
if args.target in ["sh-only", "both"]: | ||
run_sh = True | ||
if not run_hs and not run_sh: | ||
raise("Not running anything, add --target argument") | ||
output_base.mkdir(parents=True, exist_ok=True) | ||
|
||
|
||
if run_sh: | ||
output_sh = do_sh_run(test_base, output_base, env) | ||
if run_hs: | ||
output_hs = do_hs_run(test_base, output_base, hs_base, args.window, env, args.log == 'enable') | ||
if run_sh and run_hs: | ||
with open(output_base / 'error', 'w') as errf: | ||
print(output_sh[:100]) | ||
if output_sh == output_hs: | ||
errf.write('') | ||
else: | ||
errf.write('error\n') | ||
errf.write(f'return code {output_sh[0]} vs {output_hs[0]}\n') | ||
errf.write(f'==== output sh ====\n') | ||
errf.write(output_sh[1].decode('UTF-8')) | ||
errf.write(f'==== output hs ====\n') | ||
errf.write(output_hs[1].decode('UTF-8')) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,19 @@ | ||
from sklearn.model_selection import train_test_split | ||
from sklearn.preprocessing import LabelEncoder, MinMaxScaler | ||
from sklearn import datasets | ||
import pickle | ||
import pandas as pd | ||
import numpy as np | ||
import os | ||
|
||
X, y = datasets.fetch_kddcup99(data_home="inputs", percent10=False, return_X_y=True, as_frame=True, download_if_missing=True) | ||
X = pd.DataFrame(X).drop(columns=["protocol_type", "service", "flag"]).astype(float) | ||
X[X.columns] = MinMaxScaler().fit_transform(X[X.columns]) | ||
X = X.to_numpy() | ||
y = LabelEncoder().fit_transform(y).astype(np.int32) | ||
|
||
data = train_test_split(X, | ||
y, | ||
raw_data = datasets.fetch_covtype(data_home="inputs", download_if_missing=False) | ||
|
||
data = train_test_split(raw_data.data, | ||
raw_data.target, | ||
test_size=0.2, | ||
random_state=0) | ||
filenames = ['X_train', 'X_test', 'y_train', 'y_test'] | ||
tmp = os.environ.get('TMP') | ||
filepath = os.path.join(tmp, 'model.obj') | ||
for datum, name in zip(data, filenames): | ||
with open(f'{os.environ.get("TMP","./tmp")}/{name}.obj', 'w+b') as file: | ||
filepath = os.path.join(tmp, f'{name}.obj') | ||
with open(filepath, 'w+b') as file: | ||
pickle.dump(datum, file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,14 @@ | ||
from sklearn.linear_model import _logistic | ||
import sys | ||
import os | ||
import pickle | ||
import os | ||
|
||
with open(sys.argv[1], 'rb') as file: | ||
X = pickle.load(file) | ||
|
||
max_squared_sum = _logistic.row_norms(X, squared=True).max() | ||
|
||
with open(f'{os.environ.get("TMP","./tmp")}/max_squared_sum.obj', 'w+b') as file: | ||
tmp = os.environ.get('TMP') | ||
filepath = os.path.join(tmp, 'max_squared_sum.obj') | ||
with open(filepath, 'w+b') as file: | ||
pickle.dump(max_squared_sum, file) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.