Skip to content

Commit

Permalink
refactor: replace bfj with stream-json (#8)
Browse files Browse the repository at this point in the history
* test: use bun test and add ndjson tests

* refactor: replace `bfj` with `stream-json`

* fix: remove the `test` script to not confuse bun

* chore: fix linting issues
  • Loading branch information
byCedric authored Mar 17, 2024
1 parent fe7b452 commit c8f7cdc
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 94 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ jobs:
- name: 📋 Typecheck core
run: bun run typecheck

- name: 🧪 Test core
run: bun test

- name: 👷 Build core
run: bun run build

Expand Down
Binary file modified bun.lockb
Binary file not shown.
6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
"dependencies": {
"@expo/server": "^0.3.1",
"arg": "^5.0.2",
"bfj": "^8.0.0",
"compression": "^1.7.4",
"connect": "^3.7.0",
"express": "^4.18.2",
Expand All @@ -46,12 +45,15 @@
"getenv": "^1.0.0",
"morgan": "^1.10.0",
"open": "^8.4.2",
"serve-static": "^1.15.0"
"serve-static": "^1.15.0",
"stream-json": "^1.8.0"
},
"devDependencies": {
"@types/bun": "^1.0.8",
"@types/chai": "^4",
"@types/express": "^4.17.21",
"@types/node": "^20.11.26",
"@types/stream-json": "^1.7.7",
"chai": "^4.3.10",
"eslint": "^8.57.0",
"eslint-config-universe": "^12.0.0",
Expand Down
14 changes: 7 additions & 7 deletions src/data/StatsFileSource.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import assert from 'assert';

import type { PartialStatsEntry, StatsEntry, StatsSource } from './types';
import { appendNDJsonToFile, mapNDJson, parseNDJsonAtLine } from '../utils/ndjson';
import { appendJsonLine, forEachJsonLines, parseJsonLine } from '../utils/ndjson';

export class StatsFileSource implements StatsSource {
constructor(public readonly statsPath: string) {
Expand All @@ -27,14 +27,14 @@ export async function listStatsEntries(statsPath: string) {
const bundlePattern = /^\["([^"]+)","([^"]+)","([^"]+)/;
const entries: PartialStatsEntry[] = [];

await mapNDJson(statsPath, (index, line) => {
await forEachJsonLines(statsPath, (contents, line) => {
// Skip the stats metadata line
if (index === 1) return;
if (line === 1) return;

const [_, platform, projectRoot, entryPoint] = line.match(bundlePattern) ?? [];
const [_, platform, projectRoot, entryPoint] = contents.match(bundlePattern) ?? [];
if (platform && projectRoot && entryPoint) {
entries.push({
id: String(index),
id: String(line),
platform: platform as any,
projectRoot,
entryPoint,
Expand All @@ -49,7 +49,7 @@ export async function listStatsEntries(statsPath: string) {
* Get the stats entry by id or line number, and parse the data.
*/
export async function readStatsEntry(statsPath: string, id: number): Promise<StatsEntry> {
const statsEntry = await parseNDJsonAtLine<any[]>(statsPath, id);
const statsEntry = await parseJsonLine<any[]>(statsPath, id);
return {
id: String(id),
platform: statsEntry[0],
Expand Down Expand Up @@ -80,5 +80,5 @@ export function writeStatsEntry(statsPath: string, stats: StatsEntry) {
stats.serializeOptions,
];

return (writeStatsQueue = writeStatsQueue.then(() => appendNDJsonToFile(statsPath, entry)));
return (writeStatsQueue = writeStatsQueue.then(() => appendJsonLine(statsPath, entry)));
}
4 changes: 4 additions & 0 deletions src/utils/__tests__/fixtures/ndjson.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"name": "Gilbert", "wins": [["straight", "7♣"], ["one pair", "10♥"]]}
{"name": "Alexa", "wins": [["two pair", "4♠"], ["two pair", "9♠"]]}
{"name": "May", "wins": []}
{"name": "Deloise", "wins": [["three of a kind", "5♣"]]}
51 changes: 51 additions & 0 deletions src/utils/__tests__/ndjson.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { describe, expect, it, mock } from 'bun:test';
import path from 'path';

import { forEachJsonLines, parseJsonLine } from '../ndjson';

function fixture(...filePath: string[]) {
return path.join(__dirname, 'fixtures', ...filePath);
}

describe('forEachJsonLines', () => {
it('iterates each line of file', async () => {
const lines: string[] = [];
await forEachJsonLines(fixture('ndjson.json'), (content) => {
lines.push(content);
});

expect(lines).toEqual([
expect.stringContaining('Gilbert'),
expect.stringContaining('Alexa'),
expect.stringContaining('May'),
expect.stringContaining('Deloise'),
]);
});

it('iterates each line with line numbers starting from 1', async () => {
const onReadLine = mock();
await forEachJsonLines(fixture('ndjson.json'), onReadLine);

// Callback is invoked with (content, line, reader) => ...
expect(onReadLine).not.toHaveBeenCalledWith(expect.any(String), 0, expect.any(Object));
expect(onReadLine).toHaveBeenCalledWith(expect.any(String), 1, expect.any(Object));
expect(onReadLine).toHaveBeenCalledWith(expect.any(String), 2, expect.any(Object));
expect(onReadLine).toHaveBeenCalledWith(expect.any(String), 3, expect.any(Object));
expect(onReadLine).toHaveBeenCalledWith(expect.any(String), 4, expect.any(Object));
});
});

describe('parseJsonLine', () => {
it('parses a single line from file', async () => {
expect(await parseJsonLine(fixture('ndjson.json'), 1)).toMatchObject({ name: 'Gilbert' });
expect(await parseJsonLine(fixture('ndjson.json'), 2)).toMatchObject({ name: 'Alexa' });
expect(await parseJsonLine(fixture('ndjson.json'), 3)).toMatchObject({ name: 'May' });
expect(await parseJsonLine(fixture('ndjson.json'), 4)).toMatchObject({ name: 'Deloise' });
});

it('throws if single line is not found', async () => {
await expect(parseJsonLine(fixture('ndjson.json'), 99999)).rejects.toThrow(
'Line 99999 not found in file'
);
});
});
79 changes: 41 additions & 38 deletions src/utils/__tests__/search.test.ts
Original file line number Diff line number Diff line change
@@ -1,87 +1,90 @@
import { expect } from 'chai';
import { describe, it } from 'node:test';
import { describe, expect, it } from 'bun:test';

import { type StatsModule } from '../../data/types';
import { fuzzyFilterModules } from '../search';

const modules = [
{ path: '/user/expo/node_modules/lodash/lodash.js' },
{ path: '/user/expo/node_modules/expo/package.json' },
{ path: '/user/expo/src/index.ts' },
{ path: '/user/expo/src/app/index.ts' },
] as StatsModule[];
asModule({ path: '/user/expo/node_modules/lodash/lodash.js' }),
asModule({ path: '/user/expo/node_modules/expo/package.json' }),
asModule({ path: '/user/expo/src/index.ts' }),
asModule({ path: '/user/expo/src/app/index.ts' }),
];

describe('fuzzyGlobSearch', () => {
function asModule(module: Pick<StatsModule, 'path'>) {
return module as StatsModule;
}

describe.skip('fuzzyGlobSearch', () => {
describe('include', () => {
it('filters by exact file name', () => {
expect(fuzzyFilterModules(modules, { include: 'index.ts' })).to.deep.equal([
{ path: '/user/expo/src/index.ts' },
{ path: '/user/expo/src/app/index.ts' },
expect(fuzzyFilterModules(modules, { include: 'index.ts' })).toEqual([
asModule({ path: '/user/expo/src/index.ts' }),
asModule({ path: '/user/expo/src/app/index.ts' }),
]);
});

it('filters by exact directory name', () => {
expect(fuzzyFilterModules(modules, { include: 'node_modules' })).to.deep.equal([
{ path: '/user/expo/node_modules/lodash/lodash.js' },
{ path: '/user/expo/node_modules/expo/package.json' },
expect(fuzzyFilterModules(modules, { include: 'node_modules' })).toEqual([
asModule({ path: '/user/expo/node_modules/lodash/lodash.js' }),
asModule({ path: '/user/expo/node_modules/expo/package.json' }),
]);
});

it('filters by multiple exact file or directory names', () => {
expect(fuzzyFilterModules(modules, { include: 'index.ts, lodash' })).to.deep.equal([
{ path: '/user/expo/src/index.ts' },
{ path: '/user/expo/src/app/index.ts' },
{ path: '/user/expo/node_modules/lodash/lodash.js' },
expect(fuzzyFilterModules(modules, { include: 'index.ts, lodash' })).toEqual([
asModule({ path: '/user/expo/src/index.ts' }),
asModule({ path: '/user/expo/src/app/index.ts' }),
asModule({ path: '/user/expo/node_modules/lodash/lodash.js' }),
]);
});

it('filters using star pattern on directory', () => {
expect(fuzzyFilterModules(modules, { include: 'src/*' })).to.deep.equal([
{ path: '/user/expo/src/index.ts' },
{ path: '/user/expo/src/app/index.ts' },
expect(fuzzyFilterModules(modules, { include: 'src/*' })).toEqual([
asModule({ path: '/user/expo/src/index.ts' }),
asModule({ path: '/user/expo/src/app/index.ts' }),
]);
});

it('filters using star pattern on nested directory', () => {
expect(fuzzyFilterModules(modules, { include: 'expo/src/**' })).to.deep.equal([
{ path: '/user/expo/src/index.ts' },
{ path: '/user/expo/src/app/index.ts' },
expect(fuzzyFilterModules(modules, { include: 'expo/src/**' })).toEqual([
asModule({ path: '/user/expo/src/index.ts' }),
asModule({ path: '/user/expo/src/app/index.ts' }),
]);
});
});

describe('exclude', () => {
it('filters by exact file name', () => {
expect(fuzzyFilterModules(modules, { exclude: 'index.ts' })).to.deep.equal([
{ path: '/user/expo/node_modules/lodash/lodash.js' },
{ path: '/user/expo/node_modules/expo/package.json' },
expect(fuzzyFilterModules(modules, { exclude: 'index.ts' })).toEqual([
asModule({ path: '/user/expo/node_modules/lodash/lodash.js' }),
asModule({ path: '/user/expo/node_modules/expo/package.json' }),
]);
});

it('filters by exact directory name', () => {
expect(fuzzyFilterModules(modules, { exclude: 'node_modules' })).to.deep.equal([
{ path: '/user/expo/src/index.ts' },
{ path: '/user/expo/src/app/index.ts' },
expect(fuzzyFilterModules(modules, { exclude: 'node_modules' })).toEqual([
asModule({ path: '/user/expo/src/index.ts' }),
asModule({ path: '/user/expo/src/app/index.ts' }),
]);
});

it('filters by multiple exact file or directory names', () => {
expect(fuzzyFilterModules(modules, { exclude: 'index.ts, lodash' })).to.deep.equal([
{ path: '/user/expo/node_modules/expo/package.json' },
expect(fuzzyFilterModules(modules, { exclude: 'index.ts, lodash' })).toEqual([
asModule({ path: '/user/expo/node_modules/expo/package.json' }),
]);
});

it('filters using star pattern on directory', () => {
expect(fuzzyFilterModules(modules, { exclude: 'src/*' })).to.deep.equal([
{ path: '/user/expo/node_modules/lodash/lodash.js' },
{ path: '/user/expo/node_modules/expo/package.json' },
expect(fuzzyFilterModules(modules, { exclude: 'src/*' })).toEqual([
asModule({ path: '/user/expo/node_modules/lodash/lodash.js' }),
asModule({ path: '/user/expo/node_modules/expo/package.json' }),
]);
});

it('filters using star pattern on nested directory', () => {
expect(fuzzyFilterModules(modules, { exclude: 'expo/src/**' })).to.deep.equal([
{ path: '/user/expo/node_modules/lodash/lodash.js' },
{ path: '/user/expo/node_modules/expo/package.json' },
expect(fuzzyFilterModules(modules, { exclude: 'expo/src/**' })).toEqual([
asModule({ path: '/user/expo/node_modules/lodash/lodash.js' }),
asModule({ path: '/user/expo/node_modules/expo/package.json' }),
]);
});
});
Expand Down
76 changes: 31 additions & 45 deletions src/utils/ndjson.ts
Original file line number Diff line number Diff line change
@@ -1,79 +1,65 @@
import events from 'events';
import fs from 'fs';
import readline from 'readline';
import stream from 'stream';
import { disassembler } from 'stream-json/Disassembler';
import { stringer } from 'stream-json/Stringer';

/**
* Efficiently map through all lines within the Newline-Delimited JSON (ndjson) file, using streams.
* This won't parse the actual JSON but returns the partial string instead.
* Note, line numbers starts at `1`.
* Iterate through lines of a ndjson/jsonl file using streams.
* This won't parse the actual JSON but invokes the callback for each line.
*
* @note Line numbers starts at `1`
*/
export async function mapNDJson(
export async function forEachJsonLines(
filePath: string,
callback: (line: number, contents: string) => any
callback: (lineContent: string, lineNumber: number, reader: readline.Interface) => any
) {
const stream = fs.createReadStream(filePath);
const reader = readline.createInterface({ input: stream });
const input = fs.createReadStream(filePath);
const reader = readline.createInterface({ input });
let lineNumber = 1;

reader.on('error', (error) => {
throw error;
});

reader.on('line', (contents) => {
callback(lineNumber++, contents);
callback(contents, lineNumber++, reader);
});

await events.once(reader, 'close');
stream.close();
}

/**
* Efficiently parse a single line from a Newline-Delimited JSON (ndjson) file, using streams.
* Note, line numbers starts at `1`.
* Parse a single line of a jsonl/ndjson file using streams.
* Once the line is found, iteration is stopped and the parsed JSON is returned.
*
* @note Line numbers starts at `1`
*/
export async function parseNDJsonAtLine<T = any>(filePath: string, line: number): Promise<T> {
const stream = fs.createReadStream(filePath);
const reader = readline.createInterface({ input: stream });
export async function parseJsonLine<T = any>(filePath: string, lineNumber: number): Promise<T> {
let lineContent = '';

let lineContents;
let lineNumber = 1;

reader.on('error', (error) => {
throw error;
});

reader.on('line', (contents) => {
if (lineNumber++ === line) {
lineContents = contents;
await forEachJsonLines(filePath, (content, line, reader) => {
if (line === lineNumber) {
lineContent = content;
reader.close();
}
});

await events.once(reader, 'close');
stream.close();

if (!lineContents) {
throw new Error(`Line ${line} not found in file: ${filePath}`);
if (!lineContent) {
throw new Error(`Line ${lineNumber} not found in file: ${filePath}`);
}

return JSON.parse(lineContents);
return JSON.parse(lineContent);
}

/** Efficiently append a new line to a Newline-Delimited JSON (ndjson) file, using streams. */
export async function appendNDJsonToFile(filePath: string, data: unknown): Promise<void> {
// Note(cedric): keep this dependency inlined to avoid loading it in the WebUI
const bfj = require('bfj');
await bfj.write(filePath, data, {
// Force stream to append to file
flags: 'a',
// Ignore all complex data types, which shouldn't exist in the data
buffers: 'ignore',
circular: 'ignore',
iterables: 'ignore',
promises: 'ignore',
// Only enable maps, as the graph dependencies are stored as a map
maps: 'object',
});
/** Append a single line of json data to a jsonl/ndjson file using streams. */
export async function appendJsonLine(filePath: string, data: unknown): Promise<void> {
const input = stream.Readable.from([data] as any, { objectMode: true });
const output = fs.createWriteStream(filePath, { flags: 'a' });

input.pipe(disassembler()).pipe(stringer()).pipe(output);

await events.once(output, 'finish');
await fs.promises.appendFile(filePath, '\n', 'utf-8');
}
4 changes: 2 additions & 2 deletions src/utils/stats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import path from 'path';
import { name, version } from '../../package.json';
import { env } from '../utils/env';
import { AtlasValidationError } from '../utils/errors';
import { parseNDJsonAtLine } from '../utils/ndjson';
import { parseJsonLine } from '../utils/ndjson';

export type StatsMetadata = { name: string; version: string };

Expand All @@ -28,7 +28,7 @@ export async function validateStatsFile(statsFile: string, metadata = getStatsMe
return;
}

const data = await parseNDJsonAtLine(statsFile, 1);
const data = await parseJsonLine(statsFile, 1);

if (data.name !== metadata.name || data.version !== metadata.version) {
throw new AtlasValidationError('STATS_FILE_INCOMPATIBLE', statsFile, data.version);
Expand Down

0 comments on commit c8f7cdc

Please sign in to comment.