diff --git a/.gitignore b/.gitignore index 9917264..dc6ece7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /data/ /dcs-data/ *.log +**/www/env diff --git a/Makefile b/Makefile index 5e114d4..6242363 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ docs: # Creates and collects all data files required to use Vidyut. create_all_data: - @./scripts/create_all_data.sh + @./scripts/create_all_data.sh $(MAKE) create_sandhi_rules: RUST_LOG=info cargo run --release --bin create_sandhi_rules -- \ diff --git a/README.md b/README.md index c88282f..2189fae 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ tests: ```shell $ git clone https://github.com/ambuda-org/vidyut.git $ cd vidyut -$ make test +$ make -j`nproc` test ``` Your first build will likely take a few minutes, but future builds will @@ -87,7 +87,7 @@ be much faster. Next, we recommend creating and collecting our rich linguistic data: ```shell -$ make create_all_data +$ make -j`nproc` create_all_data ``` This command will take several minutes, but most users will not need to re-run @@ -157,8 +157,8 @@ Documentation ------------- To view documentation for all crates (including private modules and structs), -run `make docs`. This command will generate Rust's standard documentation and -open it in your default web browser. +run `make -j`nproc` docs`. This command will generate Rust's standard +documentation and open it in your default web browser. Contributing diff --git a/scripts/create_all_data.sh b/scripts/create_all_data.sh index e047b20..684d782 100755 --- a/scripts/create_all_data.sh +++ b/scripts/create_all_data.sh @@ -1,54 +1,76 @@ -#!/usr/bin/env sh +#!/usr/bin/env bash # Create all of the linguistic data necessary for general usage. -# Clean up temporary files, if they exist. -rm -Rf data-git 2&> /dev/null -rm -Rf dcs-data 2&> /dev/null # Exit if any step in this install script fails. set -e +# Clean up temporary files, if they exist. +rm -rf data-git +rm -rf dcs-data + # Create necessary directories. mkdir -p "data/build/${1}" -echo "=========================" -echo "| DCS corpus data |" -echo "=========================" -echo -if [ -e "data/raw/dcs" ]; then + +echo -e " +========================= +| DCS corpus data | +========================= +" + +if [[ -e "data/raw/dcs" ]]; then echo "Training data already exists -- skipping fetch." else echo "Training data does not exist -- fetching." + mkdir -p "data/raw/dcs" git clone --depth 1 https://github.com/OliverHellwig/sanskrit.git dcs-data + mv dcs-data/dcs/data/conllu data/raw/dcs/conllu - rm -Rf dcs-data + rm -rf dcs-data fi -echo -echo "=========================" -echo "| Linguistic data fetch |" -echo "=========================" -echo -if [ -e "data/raw/lex" ]; then + + +echo -e " +========================= +| Linguistic data fetch | +========================= +" + +if [[ -e "data/raw/lex" ]]; then echo "Lexical data already exists -- skipping fetch." else echo "Lexical data does not exist -- fetching." + mkdir -p "data/raw/lex" git clone --depth=1 https://github.com/sanskrit/data.git data-git + python3 data-git/bin/make_data.py --make_prefixed_verbals mv data-git/all-data/* data/raw/lex + rm -rf data-git fi -echo -echo "=========================" -echo "| Vidyut build |" -echo "=========================" -echo -make create_kosha -make test_kosha -make create_sandhi_rules -make train_cheda -make eval_cheda -echo -echo "Complete." + + +echo -e " +========================= +| Vidyut build | +========================= +" + +if [[ "$1" == "" ]]; then + make_cmd="make -j`nproc`" +else + make_cmd=$1 +fi + +$make_cmd create_kosha +$make_cmd test_kosha +$make_cmd create_sandhi_rules +$make_cmd train_cheda +$make_cmd eval_cheda + + +echo -e "\nComplete." diff --git a/vidyut-cheda/scripts/fetch_training_data.py b/vidyut-cheda/scripts/fetch_training_data.py index 210cd71..8f8afac 100755 --- a/vidyut-cheda/scripts/fetch_training_data.py +++ b/vidyut-cheda/scripts/fetch_training_data.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 -"""Fetches training data from GitHub. +""" +Fetches training data from GitHub. We could do this in a shell script, but I find this more readable. """ @@ -9,12 +10,13 @@ from pathlib import Path -training_data = Path("dcs-data") -if not training_data.exists(): - print(f"Training data folder '{training_data}' does not exist -- fetching.") - subprocess.check_call( - f"git clone --depth 1 https://github.com/OliverHellwig/sanskrit.git {training_data}", - shell=True, - ) +train_folder = Path("dcs-data") + +if train_folder.exists(): + print(f"Training data folder '{train_folder}' exists -- skipping fetch.") else: - print(f"Training data folder '{training_data}' exists -- skipping fetch.") + print(f"Training data folder '{train_folder}' does not exist -- fetching.") + + repo_link = "https://github.com/OliverHellwig/sanskrit.git" + subprocess.check_call(f"git clone --depth 1 {repo_link} {train_folder}", + shell=True) diff --git a/vidyut-prakriya/scripts/run-debugger.sh b/vidyut-prakriya/scripts/run-debugger.sh index 4a1d49e..63ba151 100755 --- a/vidyut-prakriya/scripts/run-debugger.sh +++ b/vidyut-prakriya/scripts/run-debugger.sh @@ -1,21 +1,27 @@ -#!/usr/bin/env sh -if [[ ! $(command -v wasm-pack) ]] -then +#!/usr/bin/env bash + +set -e # Exit on failure. + +if [[ ! $(command -v wasm-pack) ]]; then echo "Our debugger requires wasm-pack. Please install wasm-pack:" echo "https://rustwasm.github.io/wasm-pack/installer/" - echo exit 1 fi # `cargo` uses the debug build by default, but `wasm-pack` uses the release # build by default instead. Creating this release build is slow, so instead # explicitly use the debug build by passing the `--debug` flag. + wasm-pack build --target web --debug -mkdir -p www/static/wasm && cp pkg/* www/static/wasm -mkdir -p www/static/data && cp data/* www/static/data -cd www \ - && python3 -m venv env \ - && . env/bin/activate \ - && pip3 install -r requirements.txt \ - && python app.py +mkdir -p www/static/wasm +cp pkg/* www/static/wasm + +mkdir -p www/static/data +cp data/* www/static/data + +cd www +python3 -m venv env +. env/bin/activate +pip3 install -r requirements.txt +python app.py