Skip to content

Commit

Permalink
[prakriya] Add various subanta and samasa rules
Browse files Browse the repository at this point in the history
New features:
- Rearrange and tweak subanta rules to provide much stronger support for
  subantas.
- Add experimental samasa rules that support the four major samasa
  types.
- Create `Pada` API to model a pada as a list of `Term`s. This is so
  that we can support more complex types of sandhi.

Expansions:
- Expand support for `derive_vakyas` to also accept padas.
- Expand `TermView` API in anticipation of more heavy use later on.
- Expand and refine support for optional rules, including a new
  `optionally` wrapper and a rename of `run_optional_at` to
  `optional_run_at`.
- Expand `Pratipadika` API to also hold a list of `Term`s. This is so
  that we can support more complex pratipadikas and subantas.

Code quality:
- Create `ganapatha.rs` and move most ganas into that module.
- Create `subanta.rs` and move most anga-subanta rules into that module.
- Add extensive comments on various public types.

Tests:
- Expand rule coverage from 1681 rules to 1900 rules.

Next steps:
- Add basic support for accent rules.
- Expand rule coverage to 2000 rules.
- Reduce number of ignored tests (currently 179).
  • Loading branch information
akprasad committed Nov 18, 2023
1 parent 30d26ae commit e54d782
Show file tree
Hide file tree
Showing 119 changed files with 10,937 additions and 4,379 deletions.
10 changes: 5 additions & 5 deletions vidyut-prakriya/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ test_tinantas:
--hash "f8934f99631e811c333c41ddd4925229d2faab0dd875bc549bb38350319706db"
../target/release/test_tinantas \
--test-cases test-files/tinantas-nic-kartari.csv \
--hash "5e173e6665872f205f8c6ca2addc166e221f0d435b800dee4360116d0a8af69b"
--hash "2e3d0f56c4e6d375b7064df034a7ee04a7cc91f10838ceee32cbeb37ad2870c5"
../target/release/test_tinantas \
--test-cases test-files/tinantas-san-kartari.csv \
--hash "a4bce1c19d54ea2246429dbd345acaca24c380ac3b8c888ddb442e9d862af679"
--hash "0dfec6333abf094ed8199694e2e55436991f837cacf223de3fd1223b576712e3"
../target/release/test_tinantas \
--test-cases test-files/tinantas-yan-kartari.csv \
--hash "b8c4b3cf7a7e5572af8a1290118f629fbf6812925fe6a41bf9ffadb47a658446"
--hash "08c5b0f9b6b2fa857018653583b63571eac3074804025d90c12e0c30a0db0616"
../target/release/test_tinantas \
--test-cases test-files/tinantas-basic-karmani.csv \
--hash "da0e4771bec284661bfd0f537734d44eb6e019e41a387e80dfaa80cf7dc27b03"
Expand All @@ -71,10 +71,10 @@ test_krdantas:
cargo build --release
../target/release/test_krdantas \
--test-cases test-files/krdantas-ktvA.csv \
--hash "13cd3088b99eeea4d30d91e58f28ad07e9082e6f92f122ff64f4c367dd4ff36c"
--hash "f4e31b5df19f578133834be6abaaa802eee3d836bdb5487cfcef3a936eeb2204"
../target/release/test_krdantas \
--test-cases test-files/krdantas-kta.csv \
--hash "650a4bc2e761f201389595785a5e6ab8a3023f657c58699aa269c01d28c75aae"
--hash "ddf06443f6ce62147fce21baed15afcb4720d1d9e69698157653cab3ace9e3c0"

test_subantas:
cargo run --bin test_subantas -- \
Expand Down
4 changes: 2 additions & 2 deletions vidyut-prakriya/scripts/check_rule_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def print_legend():
for path in glob.glob("**/*.rs", root_dir=tests, recursive=True):
with open(tests / path) as f:
for line in f:
if m := re.search(r"(\d+_\d+_\d+)", line):
tested_rules.add(m.group(1).replace('_', '.'))
for match in re.findall(r"(\d+_\d+_\d+)", line):
tested_rules.add(match.replace('_', '.'))

print_legend()
num_ok = 0
Expand Down
105 changes: 58 additions & 47 deletions vidyut-prakriya/src/ac_sandhi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
//! =========
//! (6.1.66 - 6.1.101)

use crate::char_view::{get_at, xy, CharPrakriya};
use crate::core::char_view::{get_at, xy, CharPrakriya};
use crate::core::iterators::xy_rule;
use crate::core::operators as op;
use crate::core::Prakriya;
use crate::core::Tag as T;
use crate::it_samjna;
use crate::iterators::xy_rule;
use crate::operators as op;
use crate::prakriya::Prakriya;
use crate::sounds as al;
use crate::sounds::{s, Set};
use crate::tag::Tag as T;
use lazy_static::lazy_static;

lazy_static! {
Expand All @@ -22,7 +22,6 @@ lazy_static! {
static ref EC: Set = s("ec");
static ref VAL: Set = s("val");
static ref HAL: Set = s("hal");
static ref YAN: Set = s("yaR");
}

pub fn try_lopo_vyor_vali(p: &mut Prakriya) {
Expand Down Expand Up @@ -84,53 +83,57 @@ pub fn apply_general_ac_sandhi(p: &mut Prakriya) {
},
);

cp.for_chars(xy(|x, y| EC.contains(x) && AC.contains(y)), |p, text, i| {
let x = text.as_bytes()[i] as char;
let sub = match x {
'e' => "ay",
'E' => "Ay",
'o' => "av",
'O' => "Av",
_ => panic!("Unexpected sub"),
};
p.run("6.1.78", |p| p.set_char_at(i, sub));
true
});

// HACK: ignore sandhi between upasarga and dhatu so that we can correctly derive prARinat,
// etc.
fn is_upasarga_sanadi_dhatu(p: &Prakriya, i: usize) -> bool {
get_at(p, i).expect("present").is_upasarga()
&& p.terms().last().expect("present").is_dhatu()
}

cp.for_chars(
xy(|x, y| AK.contains(x) && AK.contains(y) && al::savarna(x).contains(y)),
|p, text, i| {
cp.for_chars(xy(|x, y| AC.contains(x) && AC.contains(y)), |p, text, i| {
p.dump();
let x = text.as_bytes()[i] as char;
let y = text.as_bytes()[i + 1] as char;

let t_x = get_at(p, i).expect("ok");

if t_x.has_tag(T::Pragrhya) {
// agnI iti, ...
p.step("6.1.125");
false
} else if AK.contains(x) && AK.contains(y) && al::savarna(x).contains(y) {
if is_upasarga_sanadi_dhatu(p, i) {
return false;
}

let x = text.as_bytes()[i] as char;
p.run("6.1.101", |p| {
p.set_char_at(i, &al::to_dirgha(x).expect("should be ac").to_string());
p.set_char_at(i + 1, "");
});
true
},
);

cp.for_chars(xy(|x, y| IK.contains(x) && AC.contains(y)), |p, text, i| {
let x = text.as_bytes()[i] as char;
let res = match x {
'i' | 'I' => "y",
'u' | 'U' => "v",
'f' | 'F' => "r",
'x' | 'X' => "l",
_ => panic!("Unexpected res"),
};
p.run("6.1.77", |p| p.set_char_at(i, res));
true
} else if EC.contains(x) && AC.contains(y) {
let sub = match x {
'e' => "ay",
'E' => "Ay",
'o' => "av",
'O' => "Av",
_ => panic!("Unexpected sub"),
};
p.run("6.1.78", |p| p.set_char_at(i, sub));
true
} else if IK.contains(x) && AC.contains(y) {
let res = match x {
'i' | 'I' => "y",
'u' | 'U' => "v",
'f' | 'F' => "r",
'x' | 'X' => "l",
_ => panic!("Unexpected res"),
};
p.run("6.1.77", |p| p.set_char_at(i, res));
true
} else {
false
}
});

// upa + fcCati -> upArcCati
Expand All @@ -146,10 +149,9 @@ pub fn apply_general_ac_sandhi(p: &mut Prakriya) {
// upa + eti -> upEti
cp.for_terms(
|x, y| {
x.is_upasarga()
&& x.has_antya(&*A)
&& y.has_u_in(&["i\\R", "eDa~\\"])
&& y.has_adi(&*EN)
let eti_edhati = y.has_adi(&*EN) && y.has_u_in(&["i\\R", "eDa~\\"]);
let is_uth = y.has_adi('U') && y.has_tag(T::FlagUth);
!x.is_agama() && x.has_antya(&*A) && (eti_edhati || is_uth)
},
|p, _i, j| {
let y = p.get(j).expect("ok");
Expand All @@ -159,11 +161,14 @@ pub fn apply_general_ac_sandhi(p: &mut Prakriya) {
},
);

// HACK for KOnAti
// HACK for KOnAti, DOta, and a few others
cp.for_terms(
|x, _| x.has_text("KaU"),
|x, _| x.has_suffix_in(&["aU", "AU"]),
|p, i, _| {
p.run_at("6.1.89", i, |t| t.set_text("KO"));
p.run_at("6.1.89", i, |t| {
t.set_antya("");
t.set_antya("O")
});
},
);

Expand Down Expand Up @@ -223,7 +228,7 @@ pub fn try_sup_sandhi_before_angasya(p: &mut Prakriya) -> Option<()> {

/// Helper function for `try_sup_sandhi_after_angasya` to avoid too much nesting.
fn try_sup_sandhi_after_angasya_for_term(p: &mut Prakriya, i_sup: usize) -> Option<()> {
let i_anga = i_sup - 1;
let i_anga = p.find_prev_where(i_sup, |t| !t.is_empty())?;
let anga = p.get(i_anga)?;
let sup = p.get(i_sup)?;

Expand Down Expand Up @@ -253,7 +258,7 @@ fn try_sup_sandhi_after_angasya_for_term(p: &mut Prakriya, i_sup: usize) -> Opti
p.set(i_anga, op::antya("ur"));
p.set(i_sup, op::adi(""));
});
} else if anga.has_text("saKi") || anga.has_text("pati") {
} else if anga.has_text_in(&["saKi", "pati"]) {
// saKyuH, patyuH
p.run_at("6.1.112", i_sup, op::text("us"));
}
Expand Down Expand Up @@ -329,6 +334,12 @@ fn apply_ac_sandhi_at_term_boundary(p: &mut Prakriya, i: usize) -> Option<()> {
);
}

// TODO: not sure where else to put this.
let t = p.get(i)?;
if p.is_pada(i) && t.has_text("div") {
p.run_at("6.1.131", i, |t| t.set_antya("u"));
}

Some(())
}

Expand All @@ -344,7 +355,7 @@ fn try_sut_kat_purva(p: &mut Prakriya) -> Option<()> {
let prev = p.get(i_prev)?;

let optional_add_sut_agama = |rule, p: &mut Prakriya, i_dhatu: usize| {
if p.run_optional(rule, |p| op::insert_agama_before(p, i_dhatu, "su~w")) {
if p.optional_run(rule, |p| op::insert_agama_before(p, i_dhatu, "su~w")) {
it_samjna::run(p, i_dhatu).expect("ok");
}
};
Expand Down
Loading

0 comments on commit e54d782

Please sign in to comment.