Skip to content

Commit

Permalink
feat: Add ASCII encoding support (#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
dnicolson authored Jun 16, 2023
1 parent 29cb821 commit 7f95705
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test-build.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ assert(typeof chardet.detectFile, 'function');
assert(typeof chardet.detectFileSync, 'function');

assert.deepStrictEqual(chardet.analyse(Buffer.from('This is a test')), [
{ confidence: 100, name: 'ASCII', lang: undefined },
{ confidence: 98, name: 'ISO-8859-1', lang: 'en' },
{ confidence: 98, name: 'ISO-8859-2', lang: 'hu' },
{ confidence: 10, name: 'UTF-8', lang: undefined },
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test-build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const main = async () => {
assert(typeof chardet.detectFileSync, 'function');

assert.deepStrictEqual(chardet.analyse(Buffer.from('This is a test')), [
{ confidence: 100, name: 'ASCII', lang: undefined },
{ confidence: 98, name: 'ISO-8859-1', lang: 'en' },
{ confidence: 98, name: 'ISO-8859-2', lang: 'hu' },
{ confidence: 10, name: 'UTF-8', lang: undefined },
Expand Down
9 changes: 9 additions & 0 deletions src/encoding/ascii.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import * as chardet from '..';

describe('ASCII', () => {
it('should return ASCII', () => {
expect(
chardet.detectFileSync(__dirname + '/../test/data/encodings/ascii')
).toBe('ASCII');
});
});
21 changes: 21 additions & 0 deletions src/encoding/ascii.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { Context, Recogniser } from '.';
import match, { Match } from '../match';

export default class Ascii implements Recogniser {
name() {
return 'ASCII';
}

match(det: Context): Match | null {
const input = det.rawInput;

for (let i = 0; i < det.rawLen; i++) {
const b = input[i];
if (b < 32 || b > 126) {
return match(det, this, 0);
}
}

return match(det, this, 100);
}
}
1 change: 1 addition & 0 deletions src/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ describe('chardet', () => {
{ 'confidence': 6, 'name': 'windows-1250', 'lang': 'pl' },
{ 'confidence': 4, 'name': 'windows-1254', 'lang': 'tr' },
{ 'confidence': 2, 'name': 'windows-1251', 'lang': 'ru' },
{ 'confidence': 0, 'name': 'ASCII', 'lang': undefined },
];

it('has both named and default exports', () => {
Expand Down
2 changes: 2 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { Recogniser, Context } from './encoding';

import loadFs from './fs/node';

import Ascii from './encoding/ascii';
import Utf8 from './encoding/utf8';
import * as unicode from './encoding/unicode';
import * as mbcs from './encoding/mbcs';
Expand Down Expand Up @@ -40,6 +41,7 @@ const recognisers: Recogniser[] = [
new sbcs.windows_1251(),
new sbcs.windows_1256(),
new sbcs.KOI8_R(),
new Ascii(),
];

export type AnalyseResult = Match[];
Expand Down
1 change: 1 addition & 0 deletions src/test/data/encodings/ascii
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~

0 comments on commit 7f95705

Please sign in to comment.