From aca51f0dc89f3144609e7f81c368bd541d17577e Mon Sep 17 00:00:00 2001
From: SIMRAN MAKHIJA <simran.makhija2@gmail.com>
Date: Thu, 2 May 2024 01:32:46 -0400
Subject: [PATCH] copy bench

---
 benchmark_copy/bench.py           | 161 ++++++++++++++++++++++++++++++
 src/handlers/namespace_handler.rs |   4 +-
 2 files changed, 162 insertions(+), 3 deletions(-)
 create mode 100644 benchmark_copy/bench.py

diff --git a/benchmark_copy/bench.py b/benchmark_copy/bench.py
new file mode 100644
index 0000000..108e233
--- /dev/null
+++ b/benchmark_copy/bench.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+# This script is used to benchmark the catalog server.
+# It will start the catalog server, seed the catalog with some namespaces and tables, and use vegeta to stress test the server.
+# vegeta: https://github.com/tsenart/vegeta
+# Install on mac: brew install vegeta
+
+import subprocess as sp
+import time
+import signal
+import sys
+import requests
+import argparse
+import string
+import random
+
+
+def get_random_str(length=8):
+    letters = string.ascii_lowercase
+    return ''.join(random.choice(letters) for _ in range(length))
+
+
+def run(cmd, note, bg=False, out=None):
+    print(f"{note.ljust(48)}...", end=" ", flush=True)
+    try:
+        res = None
+        if out:
+            with open(out, "a") as f:
+                if bg:
+                    res = sp.Popen(cmd, shell=True, stdout=f, stderr=f)
+                else:
+                    sp.run(cmd, shell=True, check=True,
+                           stdout=f, stderr=f)
+        else:
+            if bg:
+                res = sp.Popen(cmd,   stdout=sp.DEVNULL, stderr=sp.DEVNULL)
+            else:
+                sp.run(cmd, shell=True, check=True,
+                       stdout=sp.DEVNULL, stderr=sp.DEVNULL)
+        print("DONE!")
+        return res
+    except sp.CalledProcessError as e:
+        print("FAIL!")
+        print("Error:", e)
+
+
+TEST_ROOT_DIR = "test"
+DEFAULT_BINARY_NAME = "catalog2"
+DEFAULT_DB_ROOT_DIR = f"{TEST_ROOT_DIR}/db"
+DEFAULT_BASE_URL = "http://127.0.0.1:8000/v1/"
+DEFAULT_NAMESPACE_NUM = 1
+DEFAULT_TABLE_NUM = 1
+DEFAULT_RATE = 8
+
+parser = argparse.ArgumentParser(description="Benchmark.")
+parser.add_argument("-b", "--binary_name", type=str,
+                    default=DEFAULT_BINARY_NAME, help="Name of the catalog binary.")
+parser.add_argument("-d", "--db_root", type=str,
+                    default=DEFAULT_DB_ROOT_DIR, help="Root directory for the database.")
+parser.add_argument("-u", "--base_url", type=str,
+                    default=DEFAULT_BASE_URL, help="Base URL for catalog server.")
+parser.add_argument("-n", "--namespace_num", type=int,
+                    default=DEFAULT_NAMESPACE_NUM, help="The number of namespace to seed in catalog.")
+parser.add_argument("-t", "--table_num", type=int,
+                    default=DEFAULT_TABLE_NUM, help="The number of table to seed in catalog.")
+parser.add_argument("-r", "--rate", type=int,
+                    default=DEFAULT_RATE, help="Request rate.")
+parser.add_argument("-p", "--plot", action="store_true",
+                    default=False, help="Generate a plot of this benchmark.")
+args = parser.parse_args()
+
+
+CATALOG_LOG = f"{TEST_ROOT_DIR}/catalog.log"
+
+# build catalog in release mode
+run(f"rm -rf {TEST_ROOT_DIR} && mkdir {TEST_ROOT_DIR}",
+    note="initializing test dir")
+run(f"cargo build --release && cp target/release/{args.binary_name} {TEST_ROOT_DIR}/{args.binary_name}",
+    note="building catalog in release mode")
+catalog_server = run(f"{TEST_ROOT_DIR}/{args.binary_name} --db-root {args.db_root}",
+                     note="starting catalog server", bg=True, out=CATALOG_LOG)
+print("Waiting for catalog server to start...")
+time.sleep(1)
+
+# seeding the catalog, uniformly distribute tables to namespaces
+print(f"Seeding namespaces and tables...")
+NAMESPACE_ENDPOINT = "namespaces"
+TABLE_ENDPOINT = "tables"
+namespaces = []
+table_per_namespace = args.table_num // args.namespace_num
+for i in range(args.namespace_num):
+    namespace = get_random_str(32)
+    tables = []
+    for j in range(table_per_namespace):
+        tables.append(get_random_str(32))
+    namespaces.append({'name': namespace, 'tables': tables})
+    # create namespace
+    response = requests.post(f"{args.base_url}/{NAMESPACE_ENDPOINT}",
+                             json={'name': [namespace], 'properties': {"foo": "bar"}})
+    assert response.status_code == 200, f"Failed to create namespace {namespace}"
+
+    # crate tables
+    for table in tables:
+        response = requests.post(
+            f"{args.base_url}/{NAMESPACE_ENDPOINT}/{namespace}/{TABLE_ENDPOINT}",
+            json={'name': table}
+        )
+        assert response.status_code == 201, f"Failed to create table in {namespace}"
+
+print(f"Seeded {len(namespaces)} namespaces and {len(namespaces) * table_per_namespace} tables.")
+
+# test begins
+# 1. single endpoint stress test
+namespace = namespaces[0]
+table = namespace['tables'][0]
+targets = {
+    "get_table": f"{args.base_url}/{NAMESPACE_ENDPOINT}/{namespace['name']}/{TABLE_ENDPOINT}/{table}",
+    "list_table": f"{args.base_url}/{NAMESPACE_ENDPOINT}/{namespace['name']}/{TABLE_ENDPOINT}",
+    "get_namespace": f"{args.base_url}/{NAMESPACE_ENDPOINT}/{namespace['name']}",
+    "list_namespace": f"{args.base_url}/{NAMESPACE_ENDPOINT}"
+}
+
+for name, target in targets.items():
+    STATISTIC_FILE = f"{TEST_ROOT_DIR}/results_{name}.bin"
+    attack = f"echo 'GET {target}' | vegeta attack -rate={args.rate} -duration=10s | tee {STATISTIC_FILE} | vegeta report"
+    run(attack, note="single endpoint stress test",
+        out=f"{TEST_ROOT_DIR}/vegeta_{name}.log")
+    if args.plot:
+        PLOT_FILE = f"{TEST_ROOT_DIR}/plot_{name}.html"
+        run(f"cat {STATISTIC_FILE} | vegeta plot > {PLOT_FILE}",
+            note="generating plot")
+# ... more?
+# 2. random endpoint stress test
+# Define the file path
+PATH_TARGET_FILE = f"{TEST_ROOT_DIR}/requests_get_table.txt"
+
+# Write the URLs to the file
+with open(PATH_TARGET_FILE, "w") as file:
+    for i in range(len(namespaces)):
+        random_namespace = random.choice(namespaces)
+        random_table = random.choice(random_namespace['tables'])
+
+        # Generate request URL
+        target = f"{args.base_url}/{NAMESPACE_ENDPOINT}/{random_namespace['name']}/{TABLE_ENDPOINT}/{random_table}"
+        request_url = f"GET {target}"
+
+        file.write(request_url + "\n")
+
+print("URLs have been written to", PATH_TARGET_FILE)
+
+
+STATISTIC_FILE = f"{TEST_ROOT_DIR}/results_random.bin"
+attack = f"vegeta attack -targets={PATH_TARGET_FILE} -rate={args.rate} -duration=60s | tee {STATISTIC_FILE} | vegeta report"
+run(attack, note="random endpoints stress test",
+    out=f"{TEST_ROOT_DIR}/vegeta_random.log")
+if args.plot:
+    PLOT_FILE = f"{TEST_ROOT_DIR}/plot_random.html"
+    run(f"cat {STATISTIC_FILE} | vegeta plot > {PLOT_FILE}",
+        note="generating plot")
+
+# clean up
+catalog_server.send_signal(signal.SIGINT)
diff --git a/src/handlers/namespace_handler.rs b/src/handlers/namespace_handler.rs
index f182cbf..aca213f 100644
--- a/src/handlers/namespace_handler.rs
+++ b/src/handlers/namespace_handler.rs
@@ -11,9 +11,7 @@ use std::sync::Arc;
 
 /*
     TODO:
-    if a namespace or table already exists, you might want to return a StatusCode::CONFLICT
-    instead of StatusCode::INTERNAL_SERVER_ERROR. Similarly, if a namespace or table is not found,
-    you might want to return a StatusCode::NOT_FOUND.
+    Parent Namespace
 */
 pub async fn list_namespaces(
     State(repo): State<Arc<NamespaceRepository>>,