Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: replace bfj with stream-json #8

Merged
merged 4 commits into from
Mar 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ jobs:
- name: 📋 Typecheck core
run: bun run typecheck

- name: 🧪 Test core
run: bun test

- name: 👷 Build core
run: bun run build

Expand Down
Binary file modified bun.lockb
Binary file not shown.
6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
"dependencies": {
"@expo/server": "^0.3.1",
"arg": "^5.0.2",
"bfj": "^8.0.0",
"compression": "^1.7.4",
"connect": "^3.7.0",
"express": "^4.18.2",
Expand All @@ -46,12 +45,15 @@
"getenv": "^1.0.0",
"morgan": "^1.10.0",
"open": "^8.4.2",
"serve-static": "^1.15.0"
"serve-static": "^1.15.0",
"stream-json": "^1.8.0"
},
"devDependencies": {
"@types/bun": "^1.0.8",
"@types/chai": "^4",
"@types/express": "^4.17.21",
"@types/node": "^20.11.26",
"@types/stream-json": "^1.7.7",
"chai": "^4.3.10",
"eslint": "^8.57.0",
"eslint-config-universe": "^12.0.0",
Expand Down
14 changes: 7 additions & 7 deletions src/data/StatsFileSource.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import assert from 'assert';

import type { PartialStatsEntry, StatsEntry, StatsSource } from './types';
import { appendNDJsonToFile, mapNDJson, parseNDJsonAtLine } from '../utils/ndjson';
import { appendJsonLine, forEachJsonLines, parseJsonLine } from '../utils/ndjson';

export class StatsFileSource implements StatsSource {
constructor(public readonly statsPath: string) {
Expand All @@ -27,14 +27,14 @@ export async function listStatsEntries(statsPath: string) {
const bundlePattern = /^\["([^"]+)","([^"]+)","([^"]+)/;
const entries: PartialStatsEntry[] = [];

await mapNDJson(statsPath, (index, line) => {
await forEachJsonLines(statsPath, (contents, line) => {
// Skip the stats metadata line
if (index === 1) return;
if (line === 1) return;

const [_, platform, projectRoot, entryPoint] = line.match(bundlePattern) ?? [];
const [_, platform, projectRoot, entryPoint] = contents.match(bundlePattern) ?? [];
if (platform && projectRoot && entryPoint) {
entries.push({
id: String(index),
id: String(line),
platform: platform as any,
projectRoot,
entryPoint,
Expand All @@ -49,7 +49,7 @@ export async function listStatsEntries(statsPath: string) {
* Get the stats entry by id or line number, and parse the data.
*/
export async function readStatsEntry(statsPath: string, id: number): Promise<StatsEntry> {
const statsEntry = await parseNDJsonAtLine<any[]>(statsPath, id);
const statsEntry = await parseJsonLine<any[]>(statsPath, id);
return {
id: String(id),
platform: statsEntry[0],
Expand Down Expand Up @@ -80,5 +80,5 @@ export function writeStatsEntry(statsPath: string, stats: StatsEntry) {
stats.serializeOptions,
];

return (writeStatsQueue = writeStatsQueue.then(() => appendNDJsonToFile(statsPath, entry)));
return (writeStatsQueue = writeStatsQueue.then(() => appendJsonLine(statsPath, entry)));
}
4 changes: 4 additions & 0 deletions src/utils/__tests__/fixtures/ndjson.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"name": "Gilbert", "wins": [["straight", "7♣"], ["one pair", "10♥"]]}
{"name": "Alexa", "wins": [["two pair", "4♠"], ["two pair", "9♠"]]}
{"name": "May", "wins": []}
{"name": "Deloise", "wins": [["three of a kind", "5♣"]]}
51 changes: 51 additions & 0 deletions src/utils/__tests__/ndjson.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { describe, expect, it, mock } from 'bun:test';
import path from 'path';

import { forEachJsonLines, parseJsonLine } from '../ndjson';

function fixture(...filePath: string[]) {
return path.join(__dirname, 'fixtures', ...filePath);
}

describe('forEachJsonLines', () => {
it('iterates each line of file', async () => {
const lines: string[] = [];
await forEachJsonLines(fixture('ndjson.json'), (content) => {
lines.push(content);
});

expect(lines).toEqual([
expect.stringContaining('Gilbert'),
expect.stringContaining('Alexa'),
expect.stringContaining('May'),
expect.stringContaining('Deloise'),
]);
});

it('iterates each line with line numbers starting from 1', async () => {
const onReadLine = mock();
await forEachJsonLines(fixture('ndjson.json'), onReadLine);

// Callback is invoked with (content, line, reader) => ...
expect(onReadLine).not.toHaveBeenCalledWith(expect.any(String), 0, expect.any(Object));
expect(onReadLine).toHaveBeenCalledWith(expect.any(String), 1, expect.any(Object));
expect(onReadLine).toHaveBeenCalledWith(expect.any(String), 2, expect.any(Object));
expect(onReadLine).toHaveBeenCalledWith(expect.any(String), 3, expect.any(Object));
expect(onReadLine).toHaveBeenCalledWith(expect.any(String), 4, expect.any(Object));
});
});

describe('parseJsonLine', () => {
it('parses a single line from file', async () => {
expect(await parseJsonLine(fixture('ndjson.json'), 1)).toMatchObject({ name: 'Gilbert' });
expect(await parseJsonLine(fixture('ndjson.json'), 2)).toMatchObject({ name: 'Alexa' });
expect(await parseJsonLine(fixture('ndjson.json'), 3)).toMatchObject({ name: 'May' });
expect(await parseJsonLine(fixture('ndjson.json'), 4)).toMatchObject({ name: 'Deloise' });
});

it('throws if single line is not found', async () => {
await expect(parseJsonLine(fixture('ndjson.json'), 99999)).rejects.toThrow(
'Line 99999 not found in file'
);
});
});
79 changes: 41 additions & 38 deletions src/utils/__tests__/search.test.ts
Original file line number Diff line number Diff line change
@@ -1,87 +1,90 @@
import { expect } from 'chai';
import { describe, it } from 'node:test';
import { describe, expect, it } from 'bun:test';

import { type StatsModule } from '../../data/types';
import { fuzzyFilterModules } from '../search';

const modules = [
{ path: '/user/expo/node_modules/lodash/lodash.js' },
{ path: '/user/expo/node_modules/expo/package.json' },
{ path: '/user/expo/src/index.ts' },
{ path: '/user/expo/src/app/index.ts' },
] as StatsModule[];
asModule({ path: '/user/expo/node_modules/lodash/lodash.js' }),
asModule({ path: '/user/expo/node_modules/expo/package.json' }),
asModule({ path: '/user/expo/src/index.ts' }),
asModule({ path: '/user/expo/src/app/index.ts' }),
];

describe('fuzzyGlobSearch', () => {
function asModule(module: Pick<StatsModule, 'path'>) {
return module as StatsModule;
}

describe.skip('fuzzyGlobSearch', () => {
describe('include', () => {
it('filters by exact file name', () => {
expect(fuzzyFilterModules(modules, { include: 'index.ts' })).to.deep.equal([
{ path: '/user/expo/src/index.ts' },
{ path: '/user/expo/src/app/index.ts' },
expect(fuzzyFilterModules(modules, { include: 'index.ts' })).toEqual([
asModule({ path: '/user/expo/src/index.ts' }),
asModule({ path: '/user/expo/src/app/index.ts' }),
]);
});

it('filters by exact directory name', () => {
expect(fuzzyFilterModules(modules, { include: 'node_modules' })).to.deep.equal([
{ path: '/user/expo/node_modules/lodash/lodash.js' },
{ path: '/user/expo/node_modules/expo/package.json' },
expect(fuzzyFilterModules(modules, { include: 'node_modules' })).toEqual([
asModule({ path: '/user/expo/node_modules/lodash/lodash.js' }),
asModule({ path: '/user/expo/node_modules/expo/package.json' }),
]);
});

it('filters by multiple exact file or directory names', () => {
expect(fuzzyFilterModules(modules, { include: 'index.ts, lodash' })).to.deep.equal([
{ path: '/user/expo/src/index.ts' },
{ path: '/user/expo/src/app/index.ts' },
{ path: '/user/expo/node_modules/lodash/lodash.js' },
expect(fuzzyFilterModules(modules, { include: 'index.ts, lodash' })).toEqual([
asModule({ path: '/user/expo/src/index.ts' }),
asModule({ path: '/user/expo/src/app/index.ts' }),
asModule({ path: '/user/expo/node_modules/lodash/lodash.js' }),
]);
});

it('filters using star pattern on directory', () => {
expect(fuzzyFilterModules(modules, { include: 'src/*' })).to.deep.equal([
{ path: '/user/expo/src/index.ts' },
{ path: '/user/expo/src/app/index.ts' },
expect(fuzzyFilterModules(modules, { include: 'src/*' })).toEqual([
asModule({ path: '/user/expo/src/index.ts' }),
asModule({ path: '/user/expo/src/app/index.ts' }),
]);
});

it('filters using star pattern on nested directory', () => {
expect(fuzzyFilterModules(modules, { include: 'expo/src/**' })).to.deep.equal([
{ path: '/user/expo/src/index.ts' },
{ path: '/user/expo/src/app/index.ts' },
expect(fuzzyFilterModules(modules, { include: 'expo/src/**' })).toEqual([
asModule({ path: '/user/expo/src/index.ts' }),
asModule({ path: '/user/expo/src/app/index.ts' }),
]);
});
});

describe('exclude', () => {
it('filters by exact file name', () => {
expect(fuzzyFilterModules(modules, { exclude: 'index.ts' })).to.deep.equal([
{ path: '/user/expo/node_modules/lodash/lodash.js' },
{ path: '/user/expo/node_modules/expo/package.json' },
expect(fuzzyFilterModules(modules, { exclude: 'index.ts' })).toEqual([
asModule({ path: '/user/expo/node_modules/lodash/lodash.js' }),
asModule({ path: '/user/expo/node_modules/expo/package.json' }),
]);
});

it('filters by exact directory name', () => {
expect(fuzzyFilterModules(modules, { exclude: 'node_modules' })).to.deep.equal([
{ path: '/user/expo/src/index.ts' },
{ path: '/user/expo/src/app/index.ts' },
expect(fuzzyFilterModules(modules, { exclude: 'node_modules' })).toEqual([
asModule({ path: '/user/expo/src/index.ts' }),
asModule({ path: '/user/expo/src/app/index.ts' }),
]);
});

it('filters by multiple exact file or directory names', () => {
expect(fuzzyFilterModules(modules, { exclude: 'index.ts, lodash' })).to.deep.equal([
{ path: '/user/expo/node_modules/expo/package.json' },
expect(fuzzyFilterModules(modules, { exclude: 'index.ts, lodash' })).toEqual([
asModule({ path: '/user/expo/node_modules/expo/package.json' }),
]);
});

it('filters using star pattern on directory', () => {
expect(fuzzyFilterModules(modules, { exclude: 'src/*' })).to.deep.equal([
{ path: '/user/expo/node_modules/lodash/lodash.js' },
{ path: '/user/expo/node_modules/expo/package.json' },
expect(fuzzyFilterModules(modules, { exclude: 'src/*' })).toEqual([
asModule({ path: '/user/expo/node_modules/lodash/lodash.js' }),
asModule({ path: '/user/expo/node_modules/expo/package.json' }),
]);
});

it('filters using star pattern on nested directory', () => {
expect(fuzzyFilterModules(modules, { exclude: 'expo/src/**' })).to.deep.equal([
{ path: '/user/expo/node_modules/lodash/lodash.js' },
{ path: '/user/expo/node_modules/expo/package.json' },
expect(fuzzyFilterModules(modules, { exclude: 'expo/src/**' })).toEqual([
asModule({ path: '/user/expo/node_modules/lodash/lodash.js' }),
asModule({ path: '/user/expo/node_modules/expo/package.json' }),
]);
});
});
Expand Down
76 changes: 31 additions & 45 deletions src/utils/ndjson.ts
Original file line number Diff line number Diff line change
@@ -1,79 +1,65 @@
import events from 'events';
import fs from 'fs';
import readline from 'readline';
import stream from 'stream';
import { disassembler } from 'stream-json/Disassembler';
import { stringer } from 'stream-json/Stringer';

/**
* Efficiently map through all lines within the Newline-Delimited JSON (ndjson) file, using streams.
* This won't parse the actual JSON but returns the partial string instead.
* Note, line numbers starts at `1`.
* Iterate through lines of a ndjson/jsonl file using streams.
* This won't parse the actual JSON but invokes the callback for each line.
*
* @note Line numbers starts at `1`
*/
export async function mapNDJson(
export async function forEachJsonLines(
filePath: string,
callback: (line: number, contents: string) => any
callback: (lineContent: string, lineNumber: number, reader: readline.Interface) => any
) {
const stream = fs.createReadStream(filePath);
const reader = readline.createInterface({ input: stream });
const input = fs.createReadStream(filePath);
const reader = readline.createInterface({ input });
let lineNumber = 1;

reader.on('error', (error) => {
throw error;
});

reader.on('line', (contents) => {
callback(lineNumber++, contents);
callback(contents, lineNumber++, reader);
});

await events.once(reader, 'close');
stream.close();
}

/**
* Efficiently parse a single line from a Newline-Delimited JSON (ndjson) file, using streams.
* Note, line numbers starts at `1`.
* Parse a single line of a jsonl/ndjson file using streams.
* Once the line is found, iteration is stopped and the parsed JSON is returned.
*
* @note Line numbers starts at `1`
*/
export async function parseNDJsonAtLine<T = any>(filePath: string, line: number): Promise<T> {
const stream = fs.createReadStream(filePath);
const reader = readline.createInterface({ input: stream });
export async function parseJsonLine<T = any>(filePath: string, lineNumber: number): Promise<T> {
let lineContent = '';

let lineContents;
let lineNumber = 1;

reader.on('error', (error) => {
throw error;
});

reader.on('line', (contents) => {
if (lineNumber++ === line) {
lineContents = contents;
await forEachJsonLines(filePath, (content, line, reader) => {
if (line === lineNumber) {
lineContent = content;
reader.close();
}
});

await events.once(reader, 'close');
stream.close();

if (!lineContents) {
throw new Error(`Line ${line} not found in file: ${filePath}`);
if (!lineContent) {
throw new Error(`Line ${lineNumber} not found in file: ${filePath}`);
}

return JSON.parse(lineContents);
return JSON.parse(lineContent);
}

/** Efficiently append a new line to a Newline-Delimited JSON (ndjson) file, using streams. */
export async function appendNDJsonToFile(filePath: string, data: unknown): Promise<void> {
// Note(cedric): keep this dependency inlined to avoid loading it in the WebUI
const bfj = require('bfj');
await bfj.write(filePath, data, {
// Force stream to append to file
flags: 'a',
// Ignore all complex data types, which shouldn't exist in the data
buffers: 'ignore',
circular: 'ignore',
iterables: 'ignore',
promises: 'ignore',
// Only enable maps, as the graph dependencies are stored as a map
maps: 'object',
});
/** Append a single line of json data to a jsonl/ndjson file using streams. */
export async function appendJsonLine(filePath: string, data: unknown): Promise<void> {
const input = stream.Readable.from([data] as any, { objectMode: true });
const output = fs.createWriteStream(filePath, { flags: 'a' });

input.pipe(disassembler()).pipe(stringer()).pipe(output);

await events.once(output, 'finish');
await fs.promises.appendFile(filePath, '\n', 'utf-8');
}
4 changes: 2 additions & 2 deletions src/utils/stats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import path from 'path';
import { name, version } from '../../package.json';
import { env } from '../utils/env';
import { AtlasValidationError } from '../utils/errors';
import { parseNDJsonAtLine } from '../utils/ndjson';
import { parseJsonLine } from '../utils/ndjson';

export type StatsMetadata = { name: string; version: string };

Expand All @@ -28,7 +28,7 @@ export async function validateStatsFile(statsFile: string, metadata = getStatsMe
return;
}

const data = await parseNDJsonAtLine(statsFile, 1);
const data = await parseJsonLine(statsFile, 1);

if (data.name !== metadata.name || data.version !== metadata.version) {
throw new AtlasValidationError('STATS_FILE_INCOMPATIBLE', statsFile, data.version);
Expand Down