From 635374bbaa6c4bb199497db042a4f4c6ddb1f437 Mon Sep 17 00:00:00 2001 From: Nourman Hajar Date: Sat, 5 Sep 2020 11:46:27 +0700 Subject: [PATCH] Fix charlist path resolving problem. Added README. Bumped version --- README.md | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ index.js | 24 +++++++++++++---- 2 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..e0b1cc7 --- /dev/null +++ b/README.md @@ -0,0 +1,78 @@ +# carakan.js + +[![npm](https://img.shields.io/npm/v/carakanjs?color=green)](https://www.npmjs.com/package/carakanjs) +[![size](https://img.shields.io/github/repo-size/masnormen/carakanjs?color=green)](https://github.com/masnormen/carakanjs) + +**carakan.js** is a small library for converting Latin script into Javanese script, also known as Aksara Jawa/Carakan. + + +### Why this library? + +Yes, I know there are already many Javanese script transliterating library out there. But they are **not accurate**. At least for some words with complicated syllable structure due to the nature of Javanese language. Like "ngglembyar", "nggrambyang". + +It is caused by the complexity of Javanese script writing rules. Therefore I want to create a library to create a more accurate transliteration from Latin into Javanese script and vice versa with the linguistic complexity and ease of use in mind. + +## Usage + +**Normal Usage** +```js +import { toCarakan } from "carakanjs"; + +// Example input with default options + +const example = toCarakan("aku ambyar", { + diacritics: false, + swaraMurda: true +}); // ꦲꦏꦸꦲꦩ꧀ꦧꦾꦂ +``` + +**Writing pepet and taling (/ə/ and /e/ sound)** +```js +const esDawet1 = toCarakan("es dawxt"); // ꦲꦺꦱ꧀ꦢꦮꦼꦠ꧀ +const gembeng1 = toCarakan("gxmbeng"); // ꦒꦼꦩ꧀ꦧꦺꦁ + +// "Diacritics" mode, false by default. You can use grave accent or accented 'e'. + +const esDawet2 = toCarakan("e\`s dawet", {diacritics: true}); // ꦲꦺꦱ꧀ꦢꦮꦼꦠ꧀ +const gembeng2 = toCarakan("gembe\`ng", {diacritics: true}); // ꦒꦼꦩ꧀ꦧꦺꦁ + +// Another example. Useful when copy-pasting from Wikipedia Basa Jawa + +const esDawet3 = toCarakan("és dawet", {diacritics: true}); // ꦲꦺꦱ꧀ꦢꦮꦼꦠ꧀ +const gembeng3 = toCarakan("gembéng", {diacritics: true}); // ꦒꦼꦩ꧀ꦧꦺꦁ +``` + +**Writing aksara Murda and Swara** + +Remember that aksara Murda only contains na, ka, ta, sa, pa, nya, ga, and ba. +```js +// true by default + +const example1 = toCarakan("GuSTiAllah"); // ꦓꦸꦯ꧀ꦡꦶꦄꦭ꧀ꦭꦃ +const example2 = toCarakan("Banjar"); // ꦨꦤ꧀ꦗꦂ +const example3 = toCarakan("Banjar", {swaraMurda: false}); // ꦧꦤ꧀ꦗꦂ +``` + +**Writing punctuations (pada)** + +|Name |Typed |Aksara Jawa| +|------------------|-------------------|-----------| +|Pada lingsa * |, |꧈ | +|Pada lungsi * |. |꧉ | +|Pada pangkat |: |꧇ | +|Pada adeg |" or ' or ( or ) |꧊ | +|Pada adeg-adeg || |꧋ | +|Pada piseleh |< |꧌ ...... | +|Pada piseleh walik|\> |...... ꧍ | +|Rerengan kiwa |{ |꧁ ... | +|Rerengan tengen |} |... ꧂ | + +*) Pada lingsa (comma) will not render if a pangkon is next to it. And pada lungsi (period) will become pada lingsa if a pangkon is next to it + +This behavior is expected and actually adheres to the rules of Javanese writing. + +## References + +- https://id.wikipedia.org/wiki/Bahasa_Jawa#Fonotaktik +- https://en.wikipedia.org/wiki/Javanese_language#Consonants +- https://en.wikipedia.org/wiki/Javanese_script diff --git a/index.js b/index.js index 0c1aefe..ea5e35b 100644 --- a/index.js +++ b/index.js @@ -312,15 +312,29 @@ const doTrans = (current, residue, input, isLast = true) => { return output(); }; -export const toCarakan = (value, useDiacritics = false, useSwaraMurda = true) => { +exports.toCarakan = (value, options = {}) => { if (typeof value !== "string") throw new TypeError("Expected a string"); - let input = useDiacritics - ? value.replace(/E(?!`)/g, "X").replace(/E`/g, "E").replace(/e(?!`)/g, "x").replace(/e`/g, "e") + options = { + diacritics: false, + swaraMurda: true, + ...options + }; + + console.log(options); + + let input = options.diacritics + ? value + .replace(/E(?!`)/g, "X") + .replace(/e(?!`)/g, "x") + .replace(/E`/g, "E") + .replace(/e`/g, "e") + .replace(/È/g, "E") + .replace(/è/g, "e") : value; - if (!useSwaraMurda) input = input.toLowerCase(); + if (!options.swaraMurda) input = input.toLowerCase(); let result = ""; const syllableCheck = /([\d]+|[:()'"|<>{}?!])|(dh|ny|th|ng|kh|dz|sy|gh|NY|[hncrkdtswlpjymgbzfvNKTSPGB])?(?![ ](?![aiueoxAIUEOXÉÈéè]))(dh|ny|th|ng|kh|dz|sy|gh|NY|[hncrkdtswlpjymgbzfvNKTSPGB]?)?([aiueoxAIUEOXÉÈéè])(ng|[rh])?(?![aiueoxAIUEOXÉÈéè])|(dh|ny|th|kh|dz|sy|gh|NY|[nckdtswlpjymgbzfvNKTSPGB])?([.,])(?:[ ])?/g; @@ -358,7 +372,7 @@ export const toCarakan = (value, useDiacritics = false, useSwaraMurda = true) => if (current.length > 0) // Transliterates the regex-filtered strings - result += doTrans(current, residue, input, isLast, useDiacritics); + result += doTrans(current, residue, input, isLast); } return result;