From cebd6b65dd7127690f8d5e2d8a295fafb0b5c8c8 Mon Sep 17 00:00:00 2001 From: Fityan Date: Mon, 20 Nov 2023 13:42:26 +0700 Subject: [PATCH] feat: add new charsets, `latin-ext-a` and `latin-ext-b` (v0.4.0) --- README.md | 4 +++- charsets/latin-ext-a.json | 40 ++++++++++++++++++++++++++++++++ charsets/latin-ext-b.json | 48 +++++++++++++++++++++++++++++++++++++++ package-lock.json | 4 ++-- package.json | 2 +- src/core.ts | 4 ++++ test/charsets.spec.ts | 9 ++++---- test/index.spec.ts | 2 ++ 8 files changed, 104 insertions(+), 9 deletions(-) create mode 100644 charsets/latin-ext-a.json create mode 100644 charsets/latin-ext-b.json diff --git a/README.md b/README.md index e1684e0..bd73a1b 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ The character set that will be used for obfuscation. Put the **name of the** [** The valid custom character set must be an object that contains key-value pairs where: - The **key** is the character to be replaced. It must be a **single alphabet character** (`a-z`, `A-Z`). -- The **value** is an array of characters that will be used to replace the key. It must be an array of **any single characters** other than [control characters](https://unicodeplus.com/category/Cc). +- The **value** is an array of characters that will be used to replace the key. It must be an array of **any single characters** other than [control characters](https://unicodeplus.com/category/Cc) and [private use area block](https://unicodeplus.com/block/E000). See the example below. @@ -148,6 +148,8 @@ Below is the built-in character sets available. See the details of each characte | --- | --- | --- | | `latin` | [Basic Latin](https://unicodeplus.com/block/0000) | \u0000 - \u007f | | `latin-1` | [Latin-1 Supplement](https://unicodeplus.com/block/0080) | \u0080 - \u00ff | +| `latin-ext-a` | [Latin Extended-A](https://unicodeplus.com/block/0100) | \u0100 - \u017f | +| `latin-ext-b` | [Latin Extended-B](https://unicodeplus.com/block/0180) | \u0180 - \u024f | ## Support This Project diff --git a/charsets/latin-ext-a.json b/charsets/latin-ext-a.json new file mode 100644 index 0000000..5abf1b7 --- /dev/null +++ b/charsets/latin-ext-a.json @@ -0,0 +1,40 @@ +{ + "A": ["\u0100", "\u0102", "\u0104"], + "a": ["\u0101", "\u0103", "\u0105"], + "C": ["\u0106", "\u0108", "\u010a", "\u010c"], + "c": ["\u0107", "\u0109", "\u010b", "\u010d"], + "D": ["\u010e", "\u0110"], + "d": ["\u010f", "\u0111"], + "E": ["\u0112", "\u0114", "\u0116", "\u0118", "\u011a"], + "e": ["\u0113", "\u0115", "\u0117", "\u0119", "\u011b"], + "G": ["\u011c", "\u011e", "\u0120", "\u0122"], + "g": ["\u011d", "\u011f", "\u0121", "\u0123"], + "H": ["\u0124", "\u0126"], + "h": ["\u0125", "\u0127"], + "I": ["\u0128", "\u012a", "\u012c", "\u012e", "\u0130"], + "i": ["\u0129", "\u012b", "\u012d", "\u012f", "\u0131"], + "J": ["\u0134"], + "j": ["\u0135"], + "K": ["\u0136"], + "k": ["\u0137", "\u0138"], + "L": ["\u0139", "\u013b", "\u013d", "\u013f", "\u0141"], + "l": ["\u013a", "\u013c", "\u013e", "\u0140", "\u0142"], + "N": ["\u0143", "\u0145", "\u0147", "\u014a"], + "n": ["\u0144", "\u0146", "\u0148", "\u0149", "\u014b"], + "O": ["\u014c", "\u014e", "\u0150"], + "o": ["\u014d", "\u014f", "\u0151"], + "R": ["\u0154", "\u0156", "\u0158"], + "r": ["\u0155", "\u0157", "\u0159"], + "S": ["\u015a", "\u015c", "\u015e", "\u0160"], + "s": ["\u015b", "\u015d", "\u015f", "\u0161"], + "T": ["\u0162", "\u0164", "\u0166"], + "t": ["\u0163", "\u0165", "\u0167"], + "U": ["\u0168", "\u016a", "\u016c", "\u016e", "\u0170", "\u0172"], + "u": ["\u0169", "\u016b", "\u016d", "\u016f", "\u0171", "\u0173"], + "W": ["\u0174"], + "w": ["\u0175"], + "Y": ["\u0176", "\u0178"], + "y": ["\u0177"], + "Z": ["\u0179", "\u017b", "\u017d"], + "z": ["\u017a", "\u017c", "\u017e"] +} diff --git a/charsets/latin-ext-b.json b/charsets/latin-ext-b.json new file mode 100644 index 0000000..e3d35e3 --- /dev/null +++ b/charsets/latin-ext-b.json @@ -0,0 +1,48 @@ +{ + "A": ["\u01cd", "\u01de", "\u01e0", "\u01fa", "\u0200", "\u0202", "\u0226", "\u023a", "\u0245"], + "a": ["\u01ce", "\u01df", "\u01e1", "\u01fb", "\u0201", "\u0203", "\u0227"], + "B": ["\u0181", "\u0243"], + "b": ["\u0180", "\u0182", "\u0183", "\u0184", "\u0185"], + "C": ["\u0186", "\u0187", "\u023b"], + "c": ["\u0188", "\u023c"], + "D": ["\u0189", "\u018a"], + "d": ["\u018b", "\u018c", "\u0221"], + "E": ["\u018e", "\u0190", "\u01a9", "\u01b7", "\u01b8", "\u0204", "\u0206", "\u0228", "\u0246"], + "e": ["\u01b9", "\u01dd", "\u0205", "\u0207", "\u0229", "\u0247"], + "F": ["\u0191"], + "f": ["\u0192", "\u01ad"], + "G": ["\u0193", "\u01e4", "\u01e6", "\u01f4"], + "g": ["\u01e5", "\u01e7", "\u01f5"], + "H": ["\u01f6", "\u021e"], + "h": ["\u021f"], + "I": ["\u0197", "\u01cf", "\u0208", "\u020a"], + "i": ["\u019a", "\u01d0", "\u0209", "\u020b"], + "J": ["\u0248"], + "j": ["\u01f0", "\u0237", "\u0249"], + "K": ["\u01e8"], + "k": ["\u01e9"], + "l": ["\u0196", "\u01aa", "\u0234"], + "N": ["\u019d", "\u01f8"], + "n": ["\u019b", "\u019e", "\u01f9", "\u0220", "\u0235"], + "O": ["\u019f", "\u01a0", "\u01b1", "\u01d1", "\u01fe", "\u020c", "\u020e", "\u022a", "\u022c", "\u022e", "\u0230"], + "o": ["\u01a1", "\u01d2", "\u01ff", "\u020d", "\u020f", "\u022b", "\u022d", "\u022f", "\u0231"], + "P": ["\u01a4"], + "p": ["\u01a5"], + "Q": ["\u01ea", "\u01ec", "\u024a"], + "q": ["\u01eb", "\u01ed", "\u024b"], + "R": ["\u01a6", "\u0210", "\u0212", "\u024c"], + "r": ["\u0211", "\u0213", "\u024d"], + "S": ["\u01a7", "\u0218"], + "s": ["\u01a8", "\u0219", "\u023f"], + "T": ["\u01ac", "\u01ae", "\u021a", "\u023e"], + "t": ["\u01ab", "\u021b", "\u0236"], + "U": ["\u01af", "\u01d3", "\u01d5", "\u01d7", "\u01d9", "\u01db", "\u0214", "\u0216", "\u0244"], + "u": ["\u01b0", "\u01d4", "\u01d6", "\u01d8", "\u01da", "\u01dc", "\u0215", "\u0217"], + "V": ["\u01b2"], + "W": ["\u019c"], + "w": ["\u01b3"], + "Y": ["\u0232", "\u024e"], + "y": ["\u0194", "\u01b4", "\u0233", "\u024f"], + "Z": ["\u01b5", "\u0224"], + "z": ["\u01b6", "\u0225", "\u0240"] +} diff --git a/package-lock.json b/package-lock.json index a1a9baf..ae2f875 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "wisely", - "version": "0.3.2", + "version": "0.4.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "wisely", - "version": "0.3.2", + "version": "0.4.0", "license": "MIT", "devDependencies": { "@typescript-eslint/eslint-plugin": "^6.10.0", diff --git a/package.json b/package.json index 8bb6f3f..f4f66d6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "wisely", - "version": "0.3.2", + "version": "0.4.0", "description": "Obfuscating text or phrases with random uncommon characters to avoid banning.", "type": "module", "files": [ diff --git a/src/core.ts b/src/core.ts index e040edb..5d04050 100644 --- a/src/core.ts +++ b/src/core.ts @@ -6,6 +6,8 @@ export type CharSet = Record; export const CharSets = { LATIN: 'latin', LATIN_1: 'latin-1', + LATIN_EXT_A: 'latin-ext-a', + LATIN_EXT_B: 'latin-ext-b', } as const; export class ValidationError extends Error { @@ -31,6 +33,8 @@ export function isCharSetValid(charSet: object): boolean { && char.length === 1 // eslint-disable-next-line no-control-regex && /[^\u0000-\u001f\u007f-\u009f]/.test(char) + // no private use characters + && /[^\ue000-\uf8ff]$/.test(char) )) )); } diff --git a/test/charsets.spec.ts b/test/charsets.spec.ts index 2abe1ce..2012e8d 100644 --- a/test/charsets.spec.ts +++ b/test/charsets.spec.ts @@ -1,12 +1,11 @@ import fs from 'node:fs'; import path from 'node:path'; import { expect, test } from 'vitest'; -import { CharSet, isCharSetValid } from '~/index.js'; +import { CharSet, CharSets, isCharSetValid } from '~/index.js'; -test.each([ - { name: 'latin' }, - { name: 'latin-1' }, -])('validate charSet: $name', ({ name }) => { +const charSetsNames = Object.values(CharSets); + +test.each(charSetsNames.map((name) => ({ name })))('validate charSet: $name', ({ name }) => { const strJson = fs.readFileSync( path.resolve(__dirname, `../charsets/${name}.json`), { encoding: 'utf8' }, diff --git a/test/index.spec.ts b/test/index.spec.ts index c76f0d5..427a0ea 100644 --- a/test/index.spec.ts +++ b/test/index.spec.ts @@ -84,6 +84,8 @@ describe('mergeCharSets', () => { expect(() => mergeCharSets({ a: ['\u0000', '\u0001', '\u001f', '\u007f', '\u0080', '\u009f'], })).toThrow(); + // Not contains private use characters + expect(() => mergeCharSets({ a: ['\ue000', '\ue001', '\uf8ff'] })).toThrow(); }); });