From 7f957059204714d9b0ce64d7b107b2496dc0cc12 Mon Sep 17 00:00:00 2001 From: Dave Nicolson Date: Sat, 17 Jun 2023 01:52:53 +0200 Subject: [PATCH] feat: Add ASCII encoding support (#78) --- .github/workflows/test-build.js | 1 + .github/workflows/test-build.ts | 1 + src/encoding/ascii.test.ts | 9 +++++++++ src/encoding/ascii.ts | 21 +++++++++++++++++++++ src/index.test.ts | 1 + src/index.ts | 2 ++ src/test/data/encodings/ascii | 1 + 7 files changed, 36 insertions(+) create mode 100644 src/encoding/ascii.test.ts create mode 100644 src/encoding/ascii.ts create mode 100644 src/test/data/encodings/ascii diff --git a/.github/workflows/test-build.js b/.github/workflows/test-build.js index 4dd0e53..febbf05 100644 --- a/.github/workflows/test-build.js +++ b/.github/workflows/test-build.js @@ -8,6 +8,7 @@ assert(typeof chardet.detectFile, 'function'); assert(typeof chardet.detectFileSync, 'function'); assert.deepStrictEqual(chardet.analyse(Buffer.from('This is a test')), [ + { confidence: 100, name: 'ASCII', lang: undefined }, { confidence: 98, name: 'ISO-8859-1', lang: 'en' }, { confidence: 98, name: 'ISO-8859-2', lang: 'hu' }, { confidence: 10, name: 'UTF-8', lang: undefined }, diff --git a/.github/workflows/test-build.ts b/.github/workflows/test-build.ts index 79f925c..0339a43 100644 --- a/.github/workflows/test-build.ts +++ b/.github/workflows/test-build.ts @@ -9,6 +9,7 @@ const main = async () => { assert(typeof chardet.detectFileSync, 'function'); assert.deepStrictEqual(chardet.analyse(Buffer.from('This is a test')), [ + { confidence: 100, name: 'ASCII', lang: undefined }, { confidence: 98, name: 'ISO-8859-1', lang: 'en' }, { confidence: 98, name: 'ISO-8859-2', lang: 'hu' }, { confidence: 10, name: 'UTF-8', lang: undefined }, diff --git a/src/encoding/ascii.test.ts b/src/encoding/ascii.test.ts new file mode 100644 index 0000000..f03d11b --- /dev/null +++ b/src/encoding/ascii.test.ts @@ -0,0 +1,9 @@ +import * as chardet from '..'; + +describe('ASCII', () => { + it('should return ASCII', () => { + expect( + chardet.detectFileSync(__dirname + '/../test/data/encodings/ascii') + ).toBe('ASCII'); + }); +}); diff --git a/src/encoding/ascii.ts b/src/encoding/ascii.ts new file mode 100644 index 0000000..4d23dc2 --- /dev/null +++ b/src/encoding/ascii.ts @@ -0,0 +1,21 @@ +import { Context, Recogniser } from '.'; +import match, { Match } from '../match'; + +export default class Ascii implements Recogniser { + name() { + return 'ASCII'; + } + + match(det: Context): Match | null { + const input = det.rawInput; + + for (let i = 0; i < det.rawLen; i++) { + const b = input[i]; + if (b < 32 || b > 126) { + return match(det, this, 0); + } + } + + return match(det, this, 100); + } +} diff --git a/src/index.test.ts b/src/index.test.ts index 56b4c62..40766dc 100644 --- a/src/index.test.ts +++ b/src/index.test.ts @@ -15,6 +15,7 @@ describe('chardet', () => { { 'confidence': 6, 'name': 'windows-1250', 'lang': 'pl' }, { 'confidence': 4, 'name': 'windows-1254', 'lang': 'tr' }, { 'confidence': 2, 'name': 'windows-1251', 'lang': 'ru' }, + { 'confidence': 0, 'name': 'ASCII', 'lang': undefined }, ]; it('has both named and default exports', () => { diff --git a/src/index.ts b/src/index.ts index 4f2ac45..28c7da5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,6 +3,7 @@ import { Recogniser, Context } from './encoding'; import loadFs from './fs/node'; +import Ascii from './encoding/ascii'; import Utf8 from './encoding/utf8'; import * as unicode from './encoding/unicode'; import * as mbcs from './encoding/mbcs'; @@ -40,6 +41,7 @@ const recognisers: Recogniser[] = [ new sbcs.windows_1251(), new sbcs.windows_1256(), new sbcs.KOI8_R(), + new Ascii(), ]; export type AnalyseResult = Match[]; diff --git a/src/test/data/encodings/ascii b/src/test/data/encodings/ascii new file mode 100644 index 0000000..d86e6b0 --- /dev/null +++ b/src/test/data/encodings/ascii @@ -0,0 +1 @@ + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ \ No newline at end of file