Skip to content

Commit

Permalink
Make process improvements
Browse files Browse the repository at this point in the history
- Move to bash as that was implicitly expected (ref. #vidyut on Discord)
- Some refactoring.
- Sub-make is correctly called when using make create_all_data.
- Use -j`nproc` in make.
- Ignore venv in git.
  • Loading branch information
gouenji-shuuya committed Mar 12, 2023
1 parent 38f4dd7 commit f38da63
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 53 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
/data/
/dcs-data/
*.log
**/www/env
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ docs:

# Creates and collects all data files required to use Vidyut.
create_all_data:
@./scripts/create_all_data.sh
@./scripts/create_all_data.sh $(MAKE)

create_sandhi_rules:
RUST_LOG=info cargo run --release --bin create_sandhi_rules -- \
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ tests:
```shell
$ git clone https://github.com/ambuda-org/vidyut.git
$ cd vidyut
$ make test
$ make -j`nproc` test
```

Your first build will likely take a few minutes, but future builds will
Expand All @@ -87,7 +87,7 @@ be much faster.
Next, we recommend creating and collecting our rich linguistic data:

```shell
$ make create_all_data
$ make -j`nproc` create_all_data
```

This command will take several minutes, but most users will not need to re-run
Expand Down Expand Up @@ -157,8 +157,8 @@ Documentation
-------------

To view documentation for all crates (including private modules and structs),
run `make docs`. This command will generate Rust's standard documentation and
open it in your default web browser.
run `make -j`nproc` docs`. This command will generate Rust's standard
documentation and open it in your default web browser.


Contributing
Expand Down
78 changes: 50 additions & 28 deletions scripts/create_all_data.sh
Original file line number Diff line number Diff line change
@@ -1,54 +1,76 @@
#!/usr/bin/env sh
#!/usr/bin/env bash

# Create all of the linguistic data necessary for general usage.

# Clean up temporary files, if they exist.
rm -Rf data-git 2&> /dev/null
rm -Rf dcs-data 2&> /dev/null

# Exit if any step in this install script fails.
set -e

# Clean up temporary files, if they exist.
rm -rf data-git
rm -rf dcs-data

# Create necessary directories.
mkdir -p "data/build/${1}"

echo "========================="
echo "| DCS corpus data |"
echo "========================="
echo
if [ -e "data/raw/dcs" ]; then

echo -e "
=========================
| DCS corpus data |
=========================
"

if [[ -e "data/raw/dcs" ]]; then
echo "Training data already exists -- skipping fetch."
else
echo "Training data does not exist -- fetching."

mkdir -p "data/raw/dcs"
git clone --depth 1 https://github.com/OliverHellwig/sanskrit.git dcs-data

mv dcs-data/dcs/data/conllu data/raw/dcs/conllu
rm -Rf dcs-data
rm -rf dcs-data
fi
echo
echo "========================="
echo "| Linguistic data fetch |"
echo "========================="
echo
if [ -e "data/raw/lex" ]; then


echo -e "
=========================
| Linguistic data fetch |
=========================
"

if [[ -e "data/raw/lex" ]]; then
echo "Lexical data already exists -- skipping fetch."
else
echo "Lexical data does not exist -- fetching."

mkdir -p "data/raw/lex"
git clone --depth=1 https://github.com/sanskrit/data.git data-git

python3 data-git/bin/make_data.py --make_prefixed_verbals
mv data-git/all-data/* data/raw/lex

rm -rf data-git
fi
echo
echo "========================="
echo "| Vidyut build |"
echo "========================="
echo
make create_kosha
make test_kosha
make create_sandhi_rules
make train_cheda
make eval_cheda
echo
echo "Complete."


echo -e "
=========================
| Vidyut build |
=========================
"

if [[ "$1" == "" ]]; then
make_cmd="make -j`nproc`"
else
make_cmd=$1
fi

$make_cmd create_kosha
$make_cmd test_kosha
$make_cmd create_sandhi_rules
$make_cmd train_cheda
$make_cmd eval_cheda


echo -e "\nComplete."
20 changes: 11 additions & 9 deletions vidyut-cheda/scripts/fetch_training_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3

"""Fetches training data from GitHub.
"""
Fetches training data from GitHub.
We could do this in a shell script, but I find this more readable.
"""
Expand All @@ -9,12 +10,13 @@
from pathlib import Path


training_data = Path("dcs-data")
if not training_data.exists():
print(f"Training data folder '{training_data}' does not exist -- fetching.")
subprocess.check_call(
f"git clone --depth 1 https://github.com/OliverHellwig/sanskrit.git {training_data}",
shell=True,
)
train_folder = Path("dcs-data")

if train_folder.exists():
print(f"Training data folder '{train_folder}' exists -- skipping fetch.")
else:
print(f"Training data folder '{training_data}' exists -- skipping fetch.")
print(f"Training data folder '{train_folder}' does not exist -- fetching.")

repo_link = "https://github.com/OliverHellwig/sanskrit.git"
subprocess.check_call(f"git clone --depth 1 {repo_link} {train_folder}",
shell=True)
28 changes: 17 additions & 11 deletions vidyut-prakriya/scripts/run-debugger.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
#!/usr/bin/env sh
if [[ ! $(command -v wasm-pack) ]]
then
#!/usr/bin/env bash

set -e # Exit on failure.

if [[ ! $(command -v wasm-pack) ]]; then
echo "Our debugger requires wasm-pack. Please install wasm-pack:"
echo "https://rustwasm.github.io/wasm-pack/installer/"
echo
exit 1
fi

# `cargo` uses the debug build by default, but `wasm-pack` uses the release
# build by default instead. Creating this release build is slow, so instead
# explicitly use the debug build by passing the `--debug` flag.

wasm-pack build --target web --debug
mkdir -p www/static/wasm && cp pkg/* www/static/wasm
mkdir -p www/static/data && cp data/* www/static/data
cd www \
&& python3 -m venv env \
&& . env/bin/activate \
&& pip3 install -r requirements.txt \
&& python app.py

mkdir -p www/static/wasm
cp pkg/* www/static/wasm

mkdir -p www/static/data
cp data/* www/static/data

cd www
python3 -m venv env
. env/bin/activate
pip3 install -r requirements.txt
python app.py

0 comments on commit f38da63

Please sign in to comment.