Skip to content

Commit

Permalink
Merge pull request #37 from runk/major-dev-deps
Browse files Browse the repository at this point in the history
Major dev deps
  • Loading branch information
Dmitry Shirokov authored Sep 23, 2020
2 parents bbb4fed + edcfec1 commit b176134
Show file tree
Hide file tree
Showing 17 changed files with 6,081 additions and 3,725 deletions.
2 changes: 2 additions & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
lib
jest.config.js
10 changes: 10 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"root": true,
"parser": "@typescript-eslint/parser",
"plugins": ["@typescript-eslint"],
"extends": ["eslint:recommended", "plugin:@typescript-eslint/recommended"],
"rules": {
"@typescript-eslint/no-unused-vars": ["warn", { "varsIgnorePattern": "_" }],
"@typescript-eslint/no-inferrable-types": ["off"]
}
}
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
language: node_js
node_js:
- "8"
- "10"
- "12"
- "14"

jobs:
include:
Expand Down
9,591 changes: 5,982 additions & 3,609 deletions package-lock.json

Large diffs are not rendered by default.

20 changes: 11 additions & 9 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
},
"scripts": {
"build": "rm -rf lib/* && tsc",
"lint": "tslint -p tsconfig.json -c tslint.json",
"lint": "eslint . --ext .js,.jsx,.ts,.tsx",
"lint:types": "tsc --noEmit",
"format": "prettier --write ./src/**/*.ts",
"format:check": "prettier --list-different ./src/**/*.ts",
Expand All @@ -33,14 +33,16 @@
"test": "test"
},
"devDependencies": {
"@types/jest": "^25.1.4",
"@types/node": "^13.9.5",
"jest": "^25.2.4",
"prettier": "^2.0.2",
"semantic-release": "^15.14.0",
"ts-jest": "^25.2.1",
"tslint": "^6.1.0",
"typescript": "^3.8.3"
"@types/jest": "^26.0.14",
"@types/node": "^14.11.2",
"@typescript-eslint/eslint-plugin": "^4.2.0",
"@typescript-eslint/parser": "^4.2.0",
"eslint": "^7.9.0",
"jest": "^26.4.2",
"prettier": "^2.1.2",
"semantic-release": "^17.1.2",
"ts-jest": "^26.4.0",
"typescript": "^4.0.3"
},
"keywords": [
"encoding",
Expand Down
2 changes: 1 addition & 1 deletion src/encoding/iso2022.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import * as chardet from '..';

describe('ISO-2022', () => {
var base = __dirname + '/../test/data/encodings';
const base = __dirname + '/../test/data/encodings';

it('should return ISO-2022-JP', () => {
expect(chardet.detectFileSync(base + '/iso2022jp')).toBe('ISO-2022-JP');
Expand Down
23 changes: 11 additions & 12 deletions src/encoding/iso2022.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { Context, Recogniser } from '.';

var match = require('../match').default;
import match, { Match } from '../match';

/**
* This is a superclass for the individual detectors for
Expand All @@ -15,7 +14,7 @@ class ISO_2022 implements Recogniser {
return 'ISO_2022';
}

match(det: Context) {
match(det: Context): Match | null {
/**
* Matching function shared among the 2022 detectors JP, CN and KR
* Counts up the number of legal an unrecognized escape sequences in
Expand All @@ -29,16 +28,16 @@ class ISO_2022 implements Recogniser {
* @return match quality, in the range of 0-100.
*/

var i, j;
var escN;
var hits = 0;
var misses = 0;
var shifts = 0;
var quality;
let i, j;
let escN;
let hits = 0;
let misses = 0;
let shifts = 0;
let quality;

// TODO: refactor me
var text = det.fInputBytes;
var textLen = det.fInputLen;
const text = det.fInputBytes;
const textLen = det.fInputLen;

scanInput: for (i = 0; i < textLen; i++) {
if (text[i] == 0x1b) {
Expand All @@ -47,7 +46,7 @@ class ISO_2022 implements Recogniser {
escN < this.escapeSequences.length;
escN++
) {
var seq = this.escapeSequences[escN];
const seq = this.escapeSequences[escN];

if (textLen - i < seq.length) continue checkEscapes;

Expand Down
2 changes: 1 addition & 1 deletion src/encoding/mbcs.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import * as chardet from '..';

describe('Multibyte Character Sets', () => {
var base = __dirname + '/../test/data/encodings';
const base = __dirname + '/../test/data/encodings';

it('should return Shift_JIS', () => {
expect(chardet.detectFileSync(base + '/shiftjis')).toBe('Shift_JIS');
Expand Down
46 changes: 21 additions & 25 deletions src/encoding/mbcs.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Context, Recogniser } from '.';
var match = require('../match').default;
import match, { Match } from '../match';

/**
* Binary search implementation (recursive)
Expand All @@ -18,7 +18,7 @@ function binarySearch(arr: number[], searchValue: number) {
There is a bug in the above line;
Joshua Bloch suggests the following replacement:
*/
var mid = Math.floor((left + right) >>> 1);
const mid = Math.floor((left + right) >>> 1);
if (searchValue > arr[mid]) return find(arr, searchValue, mid + 1, right);

if (searchValue < arr[mid]) return find(arr, searchValue, left, mid - 1);
Expand Down Expand Up @@ -68,7 +68,7 @@ class IteratedChar {
this.done = true;
return -1;
}
var byteValue = det.fRawInput[this.nextIndex++] & 0x00ff;
const byteValue = det.fRawInput[this.nextIndex++] & 0x00ff;
return byteValue;
}
}
Expand Down Expand Up @@ -97,23 +97,23 @@ class mbcs implements Recogniser {
* bits 0-7: the match confidence, ranging from 0-100
* bits 8-15: The match reason, an enum-like value.
*/
match(det: Context) {
var singleByteCharCount = 0, //TODO Do we really need this?
match(det: Context): Match | null {
let singleByteCharCount = 0, //TODO Do we really need this?
doubleByteCharCount = 0,
commonCharCount = 0,
badCharCount = 0,
totalCharCount = 0,
confidence = 0;

var iter = new IteratedChar();
const iter = new IteratedChar();

detectBlock: {
for (iter.reset(); this.nextChar(iter, det); ) {
totalCharCount++;
if (iter.error) {
badCharCount++;
} else {
var cv = iter.charValue & 0xffffffff;
const cv = iter.charValue & 0xffffffff;

if (cv <= 0xff) {
singleByteCharCount++;
Expand Down Expand Up @@ -159,20 +159,17 @@ class mbcs implements Recogniser {
}

if (this.commonChars == null) {
// We have no statistics on frequently occuring characters.
// We have no statistics on frequently occurring characters.
// Assess confidence purely on having a reasonable number of
// multi-byte characters (the more the better
confidence = 30 + doubleByteCharCount - 20 * badCharCount;
if (confidence > 100) {
confidence = 100;
}
} else {
//
// Frequency of occurrence statistics exist.
//
// @ts-ignore
var maxVal = Math.log(parseFloat(doubleByteCharCount) / 4);
var scaleFactor = 90.0 / maxVal;
const maxVal = Math.log(doubleByteCharCount / 4);
const scaleFactor = 90.0 / maxVal;
confidence = Math.floor(
Math.log(commonCharCount + 1) * scaleFactor + 10
);
Expand Down Expand Up @@ -278,14 +275,13 @@ export class sjis extends mbcs {
iter.index = iter.nextIndex;
iter.error = false;

var firstByte;
firstByte = iter.charValue = iter.nextByte(det);
const firstByte = (iter.charValue = iter.nextByte(det));
if (firstByte < 0) return false;

if (firstByte <= 0x7f || (firstByte > 0xa0 && firstByte <= 0xdf))
return true;

var secondByte = iter.nextByte(det);
const secondByte = iter.nextByte(det);
if (secondByte < 0) return false;

iter.charValue = (firstByte << 8) | secondByte;
Expand Down Expand Up @@ -418,14 +414,14 @@ export class big5 extends mbcs {
iter.index = iter.nextIndex;
iter.error = false;

var firstByte = (iter.charValue = iter.nextByte(det));
const firstByte = (iter.charValue = iter.nextByte(det));

if (firstByte < 0) return false;

// single byte character.
if (firstByte <= 0x7f || firstByte == 0xff) return true;

var secondByte = iter.nextByte(det);
const secondByte = iter.nextByte(det);

if (secondByte < 0) return false;

Expand All @@ -450,9 +446,9 @@ export class big5 extends mbcs {
function eucNextChar(iter: IteratedChar, det: Context) {
iter.index = iter.nextIndex;
iter.error = false;
var firstByte = 0;
var secondByte = 0;
var thirdByte = 0;
let firstByte = 0;
let secondByte = 0;
let thirdByte = 0;
//int fourthByte = 0;
buildChar: {
firstByte = iter.charValue = iter.nextByte(det);
Expand Down Expand Up @@ -763,10 +759,10 @@ export class gb_18030 extends mbcs {
nextChar(iter: IteratedChar, det: Context) {
iter.index = iter.nextIndex;
iter.error = false;
var firstByte = 0;
var secondByte = 0;
var thirdByte = 0;
var fourthByte = 0;
let firstByte = 0;
let secondByte = 0;
let thirdByte = 0;
let fourthByte = 0;
buildChar: {
firstByte = iter.charValue = iter.nextByte(det);
if (firstByte < 0) {
Expand Down
2 changes: 1 addition & 1 deletion src/encoding/sbcs.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import * as chardet from '..';

describe('Singlebyte Character Sets', () => {
var base = __dirname + '/../test/data/encodings';
const base = __dirname + '/../test/data/encodings';

it('should return ISO-8859-1 (English)', () => {
expect(chardet.detectFileSync(base + '/iso88591_en')).toBe('ISO-8859-1');
Expand Down
36 changes: 17 additions & 19 deletions src/encoding/sbcs.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import { Context, Recogniser } from '../encoding/index';

var match = require('../match').default;
import match, { Match } from '../match';

/**
* This class recognizes single-byte encodings. Because the encoding scheme is so
* simple, language statistics are used to do the matching.
*/

var N_GRAM_MASK = 0xffffff;
const N_GRAM_MASK = 0xffffff;

class NGramParser {
byteIndex: number = 0;
Expand All @@ -31,7 +30,7 @@ class NGramParser {
* Binary search for value in table, which must have exactly 64 entries.
*/
search(table: number[], value: number) {
var index = 0;
let index = 0;

if (table[index + 32] <= value) index += 32;
if (table[index + 16] <= value) index += 16;
Expand Down Expand Up @@ -65,12 +64,12 @@ class NGramParser {
}

parse(det: Context, spaceCh: number) {
var b,
let b,
ignoreSpace = false;
this.spaceChar = spaceCh;

while ((b = this.nextByte(det)) >= 0) {
var mb = this.byteMap[b];
const mb = this.byteMap[b];

// TODO: 0x20 might not be a space in all character sets...
if (mb != 0) {
Expand All @@ -85,7 +84,7 @@ class NGramParser {
// TODO: Is this OK? The buffer could have ended in the middle of a word...
this.addByte(this.spaceChar);

var rawPercent = this.hitCount / this.ngramCount;
const rawPercent = this.hitCount / this.ngramCount;

// TODO - This is a bit of a hack to take care of a case
// were we were getting a confidence of 135...
Expand Down Expand Up @@ -119,35 +118,34 @@ class sbcs implements Recogniser {
return [];
}

// @ts-ignore
name(input: Context): string {
return 'sbcs';
}

match(det: Context) {
var ngrams = this.ngrams();
match(det: Context): Match | null {
const ngrams = this.ngrams();

if (isFlatNgrams(ngrams)) {
var parser = new NGramParser(ngrams, this.byteMap());
var confidence = parser.parse(det, this.spaceChar);
const parser = new NGramParser(ngrams, this.byteMap());
const confidence = parser.parse(det, this.spaceChar);
return confidence <= 0 ? null : match(det, this, confidence);
}

var bestConfidenceSoFar = -1;
var lang = null;
let bestConfidenceSoFar = -1;
let lang;

for (var i = ngrams.length - 1; i >= 0; i--) {
var ngl = ngrams[i];
for (let i = ngrams.length - 1; i >= 0; i--) {
const ngl = ngrams[i];

var parser = new NGramParser(ngl.fNGrams, this.byteMap());
var confidence = parser.parse(det, this.spaceChar);
const parser = new NGramParser(ngl.fNGrams, this.byteMap());
const confidence = parser.parse(det, this.spaceChar);
if (confidence > bestConfidenceSoFar) {
bestConfidenceSoFar = confidence;
lang = ngl.fLang;
}
}

var name = this.name(det);
const name = this.name(det);
return bestConfidenceSoFar <= 0
? null
: match(det, this, bestConfidenceSoFar, name, lang);
Expand Down
2 changes: 1 addition & 1 deletion src/encoding/unicode.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import * as chardet from '..';

describe('Unicode', () => {
var base = __dirname + '/../test/data/encodings';
const base = __dirname + '/../test/data/encodings';

it('should return UTF-16LE', () => {
expect(chardet.detectFileSync(base + '/utf16le')).toBe('UTF-16LE');
Expand Down
Loading

0 comments on commit b176134

Please sign in to comment.