From e83d7de44a011a8d8e3647c600b7d8ccc0d690e3 Mon Sep 17 00:00:00 2001 From: Dzmitry Kalabuk Date: Tue, 17 Sep 2024 18:04:14 +0300 Subject: [PATCH 01/23] Use streaming API (WIP) --- util/http-client/src/client.ts | 1 + .../util-internal-archive-client/package.json | 1 + .../src/client.ts | 66 +++++++++++++++++-- .../src/layout.ts | 2 +- .../util-internal-ingest-tools/src/archive.ts | 24 +++---- 5 files changed, 76 insertions(+), 18 deletions(-) diff --git a/util/http-client/src/client.ts b/util/http-client/src/client.ts index 505bf6fed..e4b7c81d6 100644 --- a/util/http-client/src/client.ts +++ b/util/http-client/src/client.ts @@ -331,6 +331,7 @@ export class HttpClient { if (error instanceof HttpResponse) { switch(error.status) { case 429: + case 500: case 502: case 503: case 504: diff --git a/util/util-internal-archive-client/package.json b/util/util-internal-archive-client/package.json index afb8adb20..8bc047178 100644 --- a/util/util-internal-archive-client/package.json +++ b/util/util-internal-archive-client/package.json @@ -17,6 +17,7 @@ }, "dependencies": { "@subsquid/util-internal": "^3.2.0", + "@subsquid/util-internal-archive-layout": "^0.4.1", "@subsquid/util-internal-range": "^0.3.0" }, "peerDependencies": { diff --git a/util/util-internal-archive-client/src/client.ts b/util/util-internal-archive-client/src/client.ts index 73fbc8b19..f9e9cbbaf 100644 --- a/util/util-internal-archive-client/src/client.ts +++ b/util/util-internal-archive-client/src/client.ts @@ -1,8 +1,11 @@ -import {HttpClient, HttpTimeoutError} from '@subsquid/http-client' -import type {Logger} from '@subsquid/logger' -import {wait, withErrorContext} from '@subsquid/util-internal' +import { HttpClient, HttpTimeoutError } from '@subsquid/http-client' +import type { Logger } from '@subsquid/logger' +import { concurrentWriter, wait, withErrorContext } from '@subsquid/util-internal' +import { splitLines } from '@subsquid/util-internal-archive-layout' import assert from 'assert' - +import { pipeline } from 'node:stream/promises' +import zlib from 'node:zlib' +import { Transform, TransformCallback } from 'stream' export interface ArchiveQuery { fromBlock: number @@ -74,7 +77,58 @@ export class ArchiveClient { httpTimeout: this.queryTimeout }).catch(withErrorContext({ archiveQuery: query - })) + })).then(body => { + // TODO: move the conversion to the server + let blocks = (body as string).trimEnd().split('\n').map(line => JSON.parse(line)) + return blocks + }) + + }) + } + + stream(query: Q): Promise> { + return this.retry(async () => { + return this.http.request('POST', this.getRouterUrl(`stream`), { + json: query, + retryAttempts: 0, + httpTimeout: this.queryTimeout, + stream: true, + }).catch(withErrorContext({ + archiveQuery: query + })).then(res => { + // Stream of JSON lines. For some reason it's already ungziped + let stream = res.body as NodeJS.ReadableStream + return concurrentWriter(1, async write => { + let blocks: B[] = [] + let buffer_size = 0 + await pipeline( + stream, + async function* (chunks) { + for await (let chunk of chunks) { + yield chunk as Buffer + } + }, + // zlib.createGunzip(), + async dataChunks => { + for await (let lines of splitLines(dataChunks)) { + for (let line of lines) { + buffer_size += line.length + let block: B = JSON.parse(line) + blocks.push(block) + if (buffer_size > 10 * 1024 * 1024) { + await write(blocks) + blocks = [] + buffer_size = 0 + } + } + } + } + ) + if (blocks.length) { + await write(blocks) + } + }) + }) }) } @@ -83,7 +137,7 @@ export class ArchiveClient { while (true) { try { return await request() - } catch(err: any) { + } catch (err: any) { if (this.http.isRetryableError(err)) { let pause = this.retrySchedule[Math.min(retries, this.retrySchedule.length - 1)] if (this.log?.isWarn()) { diff --git a/util/util-internal-archive-layout/src/layout.ts b/util/util-internal-archive-layout/src/layout.ts index 7aa938f18..4341af401 100644 --- a/util/util-internal-archive-layout/src/layout.ts +++ b/util/util-internal-archive-layout/src/layout.ts @@ -324,7 +324,7 @@ export class ArchiveLayout { } } -async function* splitLines(chunks: AsyncIterable) { +export async function* splitLines(chunks: AsyncIterable) { let splitter = new LineSplitter() for await (let chunk of chunks) { let lines = splitter.push(chunk) diff --git a/util/util-internal-ingest-tools/src/archive.ts b/util/util-internal-ingest-tools/src/archive.ts index b470c7d96..0034f8b15 100644 --- a/util/util-internal-ingest-tools/src/archive.ts +++ b/util/util-internal-ingest-tools/src/archive.ts @@ -36,21 +36,23 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn if (stopOnHead) return top = await height.call() } - let blocks = await client.query({ + let stream = await client.stream({ fromBlock: beg, toBlock: req.range.to, ...req.request }) - assert(blocks.length > 0, 'boundary blocks are expected to be included') - let lastBlock = last(blocks).header.number - assert(lastBlock >= beg) - beg = lastBlock + 1 - if (beg > top) { - top = await height.get() - } - yield { - blocks, - isHead: beg > top + for await (let blocks of stream) { + assert(blocks.length > 0, 'boundary blocks are expected to be included') + let lastBlock = last(blocks).header.number + assert(lastBlock >= beg) + beg = lastBlock + 1 + if (beg > top) { + top = await height.get() + } + yield { + blocks, + isHead: beg > top + } } } } From ab5a3b3f13d2317d3f5d71632c24d222f89b5021 Mon Sep 17 00:00:00 2001 From: belopash Date: Tue, 29 Oct 2024 18:16:21 +0500 Subject: [PATCH 02/23] mock changes --- .../http-client/portal-api_2024-10-29-13-16.json | 10 ++++++++++ .../portal-api_2024-10-29-13-16.json | 10 ++++++++++ .../portal-api_2024-10-29-13-16.json | 10 ++++++++++ .../portal-api_2024-10-29-13-16.json | 10 ++++++++++ test/erc20-transfers/src/processor.ts | 8 ++++---- 5 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 common/changes/@subsquid/http-client/portal-api_2024-10-29-13-16.json create mode 100644 common/changes/@subsquid/util-internal-archive-client/portal-api_2024-10-29-13-16.json create mode 100644 common/changes/@subsquid/util-internal-archive-layout/portal-api_2024-10-29-13-16.json create mode 100644 common/changes/@subsquid/util-internal-ingest-tools/portal-api_2024-10-29-13-16.json diff --git a/common/changes/@subsquid/http-client/portal-api_2024-10-29-13-16.json b/common/changes/@subsquid/http-client/portal-api_2024-10-29-13-16.json new file mode 100644 index 000000000..69eb7d1fe --- /dev/null +++ b/common/changes/@subsquid/http-client/portal-api_2024-10-29-13-16.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@subsquid/http-client", + "comment": "add streaming support", + "type": "minor" + } + ], + "packageName": "@subsquid/http-client" +} \ No newline at end of file diff --git a/common/changes/@subsquid/util-internal-archive-client/portal-api_2024-10-29-13-16.json b/common/changes/@subsquid/util-internal-archive-client/portal-api_2024-10-29-13-16.json new file mode 100644 index 000000000..fdcc14174 --- /dev/null +++ b/common/changes/@subsquid/util-internal-archive-client/portal-api_2024-10-29-13-16.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@subsquid/util-internal-archive-client", + "comment": "add streaming support", + "type": "minor" + } + ], + "packageName": "@subsquid/util-internal-archive-client" +} \ No newline at end of file diff --git a/common/changes/@subsquid/util-internal-archive-layout/portal-api_2024-10-29-13-16.json b/common/changes/@subsquid/util-internal-archive-layout/portal-api_2024-10-29-13-16.json new file mode 100644 index 000000000..71159385a --- /dev/null +++ b/common/changes/@subsquid/util-internal-archive-layout/portal-api_2024-10-29-13-16.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@subsquid/util-internal-archive-layout", + "comment": "add streaming support", + "type": "minor" + } + ], + "packageName": "@subsquid/util-internal-archive-layout" +} \ No newline at end of file diff --git a/common/changes/@subsquid/util-internal-ingest-tools/portal-api_2024-10-29-13-16.json b/common/changes/@subsquid/util-internal-ingest-tools/portal-api_2024-10-29-13-16.json new file mode 100644 index 000000000..ccad9f0c2 --- /dev/null +++ b/common/changes/@subsquid/util-internal-ingest-tools/portal-api_2024-10-29-13-16.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@subsquid/util-internal-ingest-tools", + "comment": "add streaming support", + "type": "minor" + } + ], + "packageName": "@subsquid/util-internal-ingest-tools" +} \ No newline at end of file diff --git a/test/erc20-transfers/src/processor.ts b/test/erc20-transfers/src/processor.ts index 29acb7fdd..e6385d5ec 100644 --- a/test/erc20-transfers/src/processor.ts +++ b/test/erc20-transfers/src/processor.ts @@ -4,14 +4,14 @@ import * as erc20 from './abi/erc20' import {Transfer} from './model' -const CONTRACT = '0xFd086bC7CD5C481DCC9C85ebE478A1C0b69FCbb9'.toLowerCase() +const CONTRACT = '0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48'.toLowerCase() const processor = new EvmBatchProcessor() - .setGateway('https://v2.archive.subsquid.io/network/arbitrum-one') - .setRpcEndpoint(process.env.ARB_NODE_WS) + .setGateway('http://localhost:8080/datasets/ethereum-mainnet') + // .setRpcEndpoint(process.env.ARB_NODE_WS) .setFinalityConfirmation(500) - .setBlockRange({from: 190000000}) + .setBlockRange({from: 0}) .setFields({ block: {size: true}, log: {transactionHash: true}, From 4848623ae5c31caff9ee806ee7060f9e513c75b6 Mon Sep 17 00:00:00 2001 From: belopash Date: Tue, 29 Oct 2024 18:24:01 +0500 Subject: [PATCH 03/23] update prerelease workflow --- .github/workflows/prerelease.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 261322de4..a6bb02195 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -9,7 +9,7 @@ jobs: steps: - uses: actions/checkout@v3 with: - ref: beta + ref: ${{ github.ref_name }} fetch-depth: 0 - name: Configure git user @@ -26,6 +26,6 @@ jobs: - name: Publish npm packages run: | commit_hash=$(git rev-parse HEAD) - node common/scripts/install-run-rush.js publish --apply --version-policy npm --partial-prerelease --publish --tag beta --prerelease-name beta.${commit_hash::6} + node common/scripts/install-run-rush.js publish --apply --version-policy npm --partial-prerelease --publish --tag ${{ github.ref_name }} --prerelease-name ${{ github.ref_name }}.${commit_hash::6} env: NPM_AUTH_TOKEN: ${{ secrets.NPM_AUTH_TOKEN }} \ No newline at end of file From 2743ea97c11b6e65d2ff98e70bdac6cda13db601 Mon Sep 17 00:00:00 2001 From: belopash Date: Tue, 29 Oct 2024 18:49:49 +0500 Subject: [PATCH 04/23] fix package inconsistency issue --- util/util-internal-archive-client/package.json | 4 ++-- util/util-internal-archive-client/src/client.ts | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/util/util-internal-archive-client/package.json b/util/util-internal-archive-client/package.json index 8bc047178..a48eaf5ff 100644 --- a/util/util-internal-archive-client/package.json +++ b/util/util-internal-archive-client/package.json @@ -17,8 +17,8 @@ }, "dependencies": { "@subsquid/util-internal": "^3.2.0", - "@subsquid/util-internal-archive-layout": "^0.4.1", - "@subsquid/util-internal-range": "^0.3.0" + "@subsquid/util-internal-range": "^0.3.0", + "@subsquid/util-internal-archive-layout": "^1.0.0" }, "peerDependencies": { "@subsquid/http-client": "^1.6.0", diff --git a/util/util-internal-archive-client/src/client.ts b/util/util-internal-archive-client/src/client.ts index f9e9cbbaf..fe0ee079a 100644 --- a/util/util-internal-archive-client/src/client.ts +++ b/util/util-internal-archive-client/src/client.ts @@ -4,8 +4,6 @@ import { concurrentWriter, wait, withErrorContext } from '@subsquid/util-interna import { splitLines } from '@subsquid/util-internal-archive-layout' import assert from 'assert' import { pipeline } from 'node:stream/promises' -import zlib from 'node:zlib' -import { Transform, TransformCallback } from 'stream' export interface ArchiveQuery { fromBlock: number From 505926d6fd8331c3952423c55b94d35311dcffd0 Mon Sep 17 00:00:00 2001 From: belopash Date: Tue, 29 Oct 2024 19:56:26 +0500 Subject: [PATCH 05/23] convert buffer to urf8 stream --- util/util-internal-archive-client/src/client.ts | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/util/util-internal-archive-client/src/client.ts b/util/util-internal-archive-client/src/client.ts index fe0ee079a..4ecbbecc4 100644 --- a/util/util-internal-archive-client/src/client.ts +++ b/util/util-internal-archive-client/src/client.ts @@ -65,28 +65,24 @@ export class ArchiveClient { query(query: Q): Promise { return this.retry(async () => { - let worker: string = await this.http.get(this.getRouterUrl(`${query.fromBlock}/worker`), { - retryAttempts: 0, - httpTimeout: 10_000 - }) - return this.http.post(worker, { + return this.http.request('POST', this.getRouterUrl(`stream`), { json: query, retryAttempts: 0, httpTimeout: this.queryTimeout + }).catch(withErrorContext({ archiveQuery: query - })).then(body => { + })).then(res => { // TODO: move the conversion to the server - let blocks = (body as string).trimEnd().split('\n').map(line => JSON.parse(line)) + let blocks = res.body.toString('utf8').trimEnd().split('\n').map(line => JSON.parse(line)) return blocks }) - }) } stream(query: Q): Promise> { return this.retry(async () => { - return this.http.request('POST', this.getRouterUrl(`stream`), { + return this.http.request('POST', this.getRouterUrl(`stream`), { json: query, retryAttempts: 0, httpTimeout: this.queryTimeout, @@ -95,12 +91,11 @@ export class ArchiveClient { archiveQuery: query })).then(res => { // Stream of JSON lines. For some reason it's already ungziped - let stream = res.body as NodeJS.ReadableStream return concurrentWriter(1, async write => { let blocks: B[] = [] let buffer_size = 0 await pipeline( - stream, + res.body, async function* (chunks) { for await (let chunk of chunks) { yield chunk as Buffer From c83122f870fe58e3b6251cd6a92360cfb44a7098 Mon Sep 17 00:00:00 2001 From: belopash Date: Wed, 30 Oct 2024 23:17:31 +0500 Subject: [PATCH 06/23] save --- test/erc20-transfers/src/processor.ts | 24 +-- .../src/client.ts | 164 +++++++++++------- .../util-internal-ingest-tools/src/archive.ts | 7 +- util/util-internal/src/async.ts | 8 + 4 files changed, 122 insertions(+), 81 deletions(-) diff --git a/test/erc20-transfers/src/processor.ts b/test/erc20-transfers/src/processor.ts index e6385d5ec..5f96cf513 100644 --- a/test/erc20-transfers/src/processor.ts +++ b/test/erc20-transfers/src/processor.ts @@ -11,7 +11,7 @@ const processor = new EvmBatchProcessor() .setGateway('http://localhost:8080/datasets/ethereum-mainnet') // .setRpcEndpoint(process.env.ARB_NODE_WS) .setFinalityConfirmation(500) - .setBlockRange({from: 0}) + .setBlockRange({from: 20801368}) .setFields({ block: {size: true}, log: {transactionHash: true}, @@ -29,19 +29,19 @@ processor.run(new TypeormDatabase({supportHotBlocks: true}), async ctx => { for (let block of ctx.blocks) { for (let log of block.logs) { if (log.address == CONTRACT && erc20.events.Transfer.is(log)) { - let {from, to, value} = erc20.events.Transfer.decode(log) - transfers.push(new Transfer({ - id: log.id, - blockNumber: block.header.height, - timestamp: new Date(block.header.timestamp), - tx: log.transactionHash, - from, - to, - amount: value - })) + // let {from, to, value} = erc20.events.Transfer.decode(log) + // transfers.push(new Transfer({ + // id: log.id, + // blockNumber: block.header.height, + // timestamp: new Date(block.header.timestamp), + // tx: log.transactionHash, + // from, + // to, + // amount: value + // })) } } } - await ctx.store.insert(transfers) + // await ctx.store.insert(transfers) }) diff --git a/util/util-internal-archive-client/src/client.ts b/util/util-internal-archive-client/src/client.ts index 4ecbbecc4..5a41a9128 100644 --- a/util/util-internal-archive-client/src/client.ts +++ b/util/util-internal-archive-client/src/client.ts @@ -1,16 +1,15 @@ -import { HttpClient, HttpTimeoutError } from '@subsquid/http-client' -import type { Logger } from '@subsquid/logger' -import { concurrentWriter, wait, withErrorContext } from '@subsquid/util-internal' -import { splitLines } from '@subsquid/util-internal-archive-layout' +import {HttpClient, HttpTimeoutError} from '@subsquid/http-client' +import type {Logger} from '@subsquid/logger' +import {AsyncQueue, concurrentWriter, ensureError, wait, withErrorContext} from '@subsquid/util-internal' +import {splitLines} from '@subsquid/util-internal-archive-layout' import assert from 'assert' -import { pipeline } from 'node:stream/promises' +import {pipeline} from 'node:stream/promises' export interface ArchiveQuery { fromBlock: number toBlock?: number } - export interface Block { header: { number: number @@ -18,7 +17,6 @@ export interface Block { } } - export interface ArchiveClientOptions { http: HttpClient url: string @@ -26,7 +24,6 @@ export interface ArchiveClientOptions { log?: Logger } - export class ArchiveClient { private url: URL private http: HttpClient @@ -55,7 +52,7 @@ export class ArchiveClient { return this.retry(async () => { let res: string = await this.http.get(this.getRouterUrl('height'), { retryAttempts: 0, - httpTimeout: 10_000 + httpTimeout: 10_000, }) let height = parseInt(res) assert(Number.isSafeInteger(height)) @@ -65,64 +62,94 @@ export class ArchiveClient { query(query: Q): Promise { return this.retry(async () => { - return this.http.request('POST', this.getRouterUrl(`stream`), { - json: query, - retryAttempts: 0, - httpTimeout: this.queryTimeout - - }).catch(withErrorContext({ - archiveQuery: query - })).then(res => { - // TODO: move the conversion to the server - let blocks = res.body.toString('utf8').trimEnd().split('\n').map(line => JSON.parse(line)) - return blocks - }) + return this.http + .request('POST', this.getRouterUrl(`stream`), { + json: query, + retryAttempts: 0, + httpTimeout: this.queryTimeout, + }) + .catch( + withErrorContext({ + archiveQuery: query, + }) + ) + .then((res) => { + // TODO: move the conversion to the server + let blocks = res.body + .toString('utf8') + .trimEnd() + .split('\n') + .map((line) => JSON.parse(line)) + return blocks + }) }) } - stream(query: Q): Promise> { - return this.retry(async () => { - return this.http.request('POST', this.getRouterUrl(`stream`), { - json: query, - retryAttempts: 0, - httpTimeout: this.queryTimeout, - stream: true, - }).catch(withErrorContext({ - archiveQuery: query - })).then(res => { - // Stream of JSON lines. For some reason it's already ungziped - return concurrentWriter(1, async write => { - let blocks: B[] = [] - let buffer_size = 0 - await pipeline( - res.body, - async function* (chunks) { - for await (let chunk of chunks) { - yield chunk as Buffer - } - }, - // zlib.createGunzip(), - async dataChunks => { - for await (let lines of splitLines(dataChunks)) { - for (let line of lines) { - buffer_size += line.length - let block: B = JSON.parse(line) - blocks.push(block) - if (buffer_size > 10 * 1024 * 1024) { - await write(blocks) - blocks = [] - buffer_size = 0 - } - } - } - } + async *stream(query: Q): AsyncIterable { + let queue = new AsyncQueue(1) + + const ingest = async () => { + let bufferSize = 0 + let fromBlock = query.fromBlock + let toBlock = query.toBlock ?? Infinity + + while (fromBlock <= toBlock) { + let stream = await this.http + .post(this.getRouterUrl(`stream`), { + json: {...query, fromBlock}, + retryAttempts: 3, + httpTimeout: this.queryTimeout, + stream: true, + }) + .catch( + withErrorContext({ + archiveQuery: query, + }) ) - if (blocks.length) { - await write(blocks) + + for await (let lines of splitLines(stream as AsyncIterable)) { + let batch = queue.peek() + if (batch instanceof Error) break + + if (!batch) { + bufferSize = 0 } - }) - }) - }) + + if (lines.length === 0) continue + + let blocks = lines.map((line) => { + bufferSize += line.length + return JSON.parse(line) as B + }) + + if (batch) { + // FIXME: won't it overflow stack? + batch.push(...blocks) + if (bufferSize > 10 * 1024 * 1024) { + await queue.wait() + } + } else { + await queue.put(blocks) + } + + fromBlock = blocks[blocks.length - 1].header.number + 1 + } + } + } + + ingest().then( + () => queue.close(), + (err) => { + if (!queue.isClosed()) { + queue.forcePut(ensureError(err)) + } + } + ) + + for await (let valueOrError of queue.iterate()) { + if (valueOrError instanceof Error) throw valueOrError + yield valueOrError + } } private async retry(request: () => Promise): Promise { @@ -134,12 +161,15 @@ export class ArchiveClient { if (this.http.isRetryableError(err)) { let pause = this.retrySchedule[Math.min(retries, this.retrySchedule.length - 1)] if (this.log?.isWarn()) { - let warn = retries > 3 || err instanceof HttpTimeoutError && err.ms > 10_000 + let warn = retries > 3 || (err instanceof HttpTimeoutError && err.ms > 10_000) if (warn) { - this.log.warn({ - reason: err.message, - ...err - }, `archive request failed, will retry in ${Math.round(pause / 1000)} secs`) + this.log.warn( + { + reason: err.message, + ...err, + }, + `archive request failed, will retry in ${Math.round(pause / 1000)} secs` + ) } } retries += 1 diff --git a/util/util-internal-ingest-tools/src/archive.ts b/util/util-internal-ingest-tools/src/archive.ts index 0034f8b15..ef79c048d 100644 --- a/util/util-internal-ingest-tools/src/archive.ts +++ b/util/util-internal-ingest-tools/src/archive.ts @@ -10,6 +10,7 @@ export interface ArchiveIngestOptions { requests: RangeRequestList stopOnHead?: boolean pollInterval?: number + concurrency?: number } @@ -18,7 +19,7 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn client, requests, stopOnHead = false, - pollInterval = 20_000 + pollInterval = 20_000, } = args let height = new Throttler(() => client.getHeight(), pollInterval) @@ -36,11 +37,13 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn if (stopOnHead) return top = await height.call() } - let stream = await client.stream({ + + let stream = client.stream({ fromBlock: beg, toBlock: req.range.to, ...req.request }) + for await (let blocks of stream) { assert(blocks.length > 0, 'boundary blocks are expected to be included') let lastBlock = last(blocks).header.number diff --git a/util/util-internal/src/async.ts b/util/util-internal/src/async.ts index 12794ee1d..c5d2f3933 100644 --- a/util/util-internal/src/async.ts +++ b/util/util-internal/src/async.ts @@ -111,6 +111,14 @@ export class AsyncQueue { return this.buf[this.pos] } + async wait(): Promise { + if (this.closed) throw new ClosedQueueError() + if (this.size < this.buf.length) return + assert(this.putFuture == null, 'concurrent puts and waits are not allowed') + this.putFuture = createFuture() + await this.putFuture.promise() + } + close(): void { this.closed = true if (this.putFuture) { From b3f82eb10c9ccdd476be487093e6524daf18fe82 Mon Sep 17 00:00:00 2001 From: belopash Date: Thu, 31 Oct 2024 20:45:41 +0500 Subject: [PATCH 07/23] save --- evm/evm-processor/package.json | 1 + evm/evm-processor/src/ds-archive/client.ts | 3 +- evm/evm-processor/src/processor.ts | 55 +++++- rush.json | 6 + test/erc20-transfers/src/processor.ts | 4 +- util/portal-client/README.md | 3 + util/portal-client/package.json | 38 +++++ util/portal-client/src/client.ts | 161 ++++++++++++++++++ util/portal-client/tsconfig.json | 21 +++ .../src/client.ts | 125 +++----------- util/util-internal-ingest-tools/package.json | 9 - .../util-internal-ingest-tools/src/archive.ts | 72 ++++++-- 12 files changed, 362 insertions(+), 136 deletions(-) create mode 100644 util/portal-client/README.md create mode 100644 util/portal-client/package.json create mode 100644 util/portal-client/src/client.ts create mode 100644 util/portal-client/tsconfig.json diff --git a/evm/evm-processor/package.json b/evm/evm-processor/package.json index 431ff8de4..b5e04ea8c 100644 --- a/evm/evm-processor/package.json +++ b/evm/evm-processor/package.json @@ -20,6 +20,7 @@ "@subsquid/http-client": "^1.6.0", "@subsquid/logger": "^1.3.3", "@subsquid/rpc-client": "^4.11.0", + "@subsquid/portal-client": "^0.0.0", "@subsquid/util-internal": "^3.2.0", "@subsquid/util-internal-archive-client": "^0.1.2", "@subsquid/util-internal-hex": "^1.2.2", diff --git a/evm/evm-processor/src/ds-archive/client.ts b/evm/evm-processor/src/ds-archive/client.ts index 2dc033d67..1a1148165 100644 --- a/evm/evm-processor/src/ds-archive/client.ts +++ b/evm/evm-processor/src/ds-archive/client.ts @@ -1,6 +1,5 @@ import {addErrorContext, assertNotNull, unexpectedCase} from '@subsquid/util-internal' -import {ArchiveClient} from '@subsquid/util-internal-archive-client' -import {archiveIngest} from '@subsquid/util-internal-ingest-tools' +import {archiveIngest, ArchiveClient} from '@subsquid/util-internal-ingest-tools' import {Batch, DataSource} from '@subsquid/util-internal-processor-tools' import {getRequestAt, RangeRequest} from '@subsquid/util-internal-range' import {cast} from '@subsquid/util-internal-validation' diff --git a/evm/evm-processor/src/processor.ts b/evm/evm-processor/src/processor.ts index 96f2aac0a..02f978c70 100644 --- a/evm/evm-processor/src/processor.ts +++ b/evm/evm-processor/src/processor.ts @@ -3,6 +3,7 @@ import {createLogger, Logger} from '@subsquid/logger' import {RpcClient} from '@subsquid/rpc-client' import {assertNotNull, def, runProgram} from '@subsquid/util-internal' import {ArchiveClient} from '@subsquid/util-internal-archive-client' +import {PortalClient} from '@subsquid/portal-client' import {Database, getOrGenerateSquidId, PrometheusServer, Runner} from '@subsquid/util-internal-processor-tools' import {applyRangeBound, mergeRangeRequests, Range, RangeRequest} from '@subsquid/util-internal-range' import {cast} from '@subsquid/util-internal-validation' @@ -107,6 +108,20 @@ export interface GatewaySettings { } +export interface PortalSettings { + /** + * Subsquid Network Gateway url + */ + url: string + /** + * Request timeout in ms + */ + requestTimeout?: number + + bufferThreshold?: number +} + + /** * @deprecated */ @@ -189,7 +204,7 @@ export class EvmBatchProcessor { private blockRange?: Range private fields?: FieldSelection private finalityConfirmation?: number - private archive?: GatewaySettings + private archive?: GatewaySettings & {type: 'gateway'} | PortalSettings & {type: 'portal'} private rpcIngestSettings?: RpcDataIngestionSettings private rpcEndpoint?: RpcEndpointSettings private running = false @@ -211,15 +226,29 @@ export class EvmBatchProcessor { * processor.setGateway('https://v2.archive.subsquid.io/network/ethereum-mainnet') */ setGateway(url: string | GatewaySettings): this { + assert(this.archive?.type !== 'gateway', 'setGateway() can not be used together with setPortal()') this.assertNotRunning() if (typeof url == 'string') { - this.archive = {url} + this.archive = {type: 'gateway', url} } else { - this.archive = url + this.archive = {type: 'gateway', ...url} } return this } + + setPortal(url: string | PortalSettings): this { + assert(this.archive?.type !== 'gateway', 'setPortal() can not be used together with setGateway()') + this.assertNotRunning() + if (typeof url == 'string') { + this.archive = {type: 'portal', url} + } else { + this.archive = {type: 'portal', ...url} + } + return this + } + + /** * Set chain RPC endpoint * @@ -500,12 +529,20 @@ export class EvmBatchProcessor { }) return new EvmArchive( - new ArchiveClient({ - http, - url: archive.url, - queryTimeout: archive.requestTimeout, - log - }) + archive.type === 'gateway' + ? new ArchiveClient({ + http, + url: archive.url, + queryTimeout: archive.requestTimeout, + log, + }) + : new PortalClient({ + http, + url: archive.url, + queryTimeout: archive.requestTimeout, + bufferThreshold: archive.bufferThreshold, + log, + }) ) } diff --git a/rush.json b/rush.json index d68c165df..4008d4e62 100644 --- a/rush.json +++ b/rush.json @@ -902,6 +902,12 @@ "shouldPublish": true, "versionPolicyName": "npm" }, + { + "packageName": "@subsquid/portal-client", + "projectFolder": "util/portal-client", + "shouldPublish": true, + "versionPolicyName": "npm" + }, { "packageName": "balances", "projectFolder": "test/balances", diff --git a/test/erc20-transfers/src/processor.ts b/test/erc20-transfers/src/processor.ts index 5f96cf513..f90588989 100644 --- a/test/erc20-transfers/src/processor.ts +++ b/test/erc20-transfers/src/processor.ts @@ -8,10 +8,10 @@ const CONTRACT = '0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48'.toLowerCase() const processor = new EvmBatchProcessor() - .setGateway('http://localhost:8080/datasets/ethereum-mainnet') + .setPortal('https://portal.sqd.dev/datasets/ethereum-mainnet') // .setRpcEndpoint(process.env.ARB_NODE_WS) .setFinalityConfirmation(500) - .setBlockRange({from: 20801368}) + .setBlockRange({from: 0}) .setFields({ block: {size: true}, log: {transactionHash: true}, diff --git a/util/portal-client/README.md b/util/portal-client/README.md new file mode 100644 index 000000000..87121cced --- /dev/null +++ b/util/portal-client/README.md @@ -0,0 +1,3 @@ +# @subsquid/portal-client + +API for SQD Portal. diff --git a/util/portal-client/package.json b/util/portal-client/package.json new file mode 100644 index 000000000..4e822a9c6 --- /dev/null +++ b/util/portal-client/package.json @@ -0,0 +1,38 @@ +{ + "name": "@subsquid/portal-client", + "version": "0.0.0", + "description": "SQD Portal API", + "license": "GPL-3.0-or-later", + "repository": "git@github.com:subsquid/squid.git", + "publishConfig": { + "access": "public" + }, + "main": "lib/client.js", + "files": [ + "lib", + "src" + ], + "scripts": { + "build": "rm -rf lib && tsc" + }, + "dependencies": { + "@subsquid/util-internal": "^3.2.0", + "@subsquid/util-internal-range": "^0.3.0", + "@subsquid/util-internal-archive-layout": "^1.0.0" + }, + "peerDependencies": { + "@subsquid/http-client": "^1.5.0", + "@subsquid/logger": "^1.3.3" + }, + "peerDependenciesMeta": { + "@subsquid/logger": { + "optional": true + } + }, + "devDependencies": { + "@subsquid/http-client": "^1.5.0", + "@subsquid/logger": "^1.3.3", + "@types/node": "^18.18.14", + "typescript": "~5.3.2" + } +} diff --git a/util/portal-client/src/client.ts b/util/portal-client/src/client.ts new file mode 100644 index 000000000..4b6e4c361 --- /dev/null +++ b/util/portal-client/src/client.ts @@ -0,0 +1,161 @@ +import {HttpClient} from '@subsquid/http-client' +import type {Logger} from '@subsquid/logger' +import {AsyncQueue, ensureError, last, wait, withErrorContext} from '@subsquid/util-internal' +import {splitLines} from '@subsquid/util-internal-archive-layout' +import assert from 'assert' + + +export interface PortalQuery { + fromBlock: number + toBlock?: number +} + + +export interface Block { + header: { + number: number + hash: string + } +} + + +export interface PortalClientOptions { + url: string + http?: HttpClient + log?: Logger + queryTimeout?: number + bufferThreshold?: number +} + + +export class PortalClient { + private url: URL + private http: HttpClient + private queryTimeout: number + private bufferThreshold: number + + constructor(options: PortalClientOptions) { + this.url = new URL(options.url) + this.http = options.http || new HttpClient({log: options.log}) + this.queryTimeout = options.queryTimeout ?? 180_000 + this.bufferThreshold = options.bufferThreshold ?? 10 * 1024 * 1024 + } + + private getRouterUrl(path: string): string { + let u = new URL(this.url) + if (this.url.pathname.endsWith('/')) { + u.pathname += path + } else { + u.pathname += '/' + path + } + return u.toString() + } + + async getHeight(): Promise { + let res: string = await this.http.get(this.getRouterUrl('height'), { + retryAttempts: 3, + httpTimeout: 10_000, + }) + let height = parseInt(res) + assert(Number.isSafeInteger(height)) + return height + } + + query(query: Q): Promise { + return this.http + .request('POST', this.getRouterUrl(`stream`), { + json: query, + retryAttempts: 3, + httpTimeout: this.queryTimeout, + }) + .catch( + withErrorContext({ + archiveQuery: query, + }) + ) + .then((res) => { + // TODO: move the conversion to the server + let blocks = res.body + .toString('utf8') + .trimEnd() + .split('\n') + .map((line) => JSON.parse(line)) + return blocks + }) + } + + async *stream(query: Q): AsyncIterable { + let queue = new AsyncQueue(1) + + const ingest = async () => { + let bufferSize = 0 + let fromBlock = query.fromBlock + let toBlock = query.toBlock ?? Infinity + + while (fromBlock <= toBlock) { + let res = await this.http + .request('POST', this.getRouterUrl(`stream`), { + json: {...query, fromBlock}, + retryAttempts: 3, + httpTimeout: this.queryTimeout, + stream: true, + }) + .catch( + withErrorContext({ + archiveQuery: query, + }) + ) + + for await (let lines of splitLines(res.body as AsyncIterable)) { + let batch = queue.peek() + if (batch instanceof Error) return + + if (!batch) { + bufferSize = 0 + } + + let blocks = lines.map((line) => { + bufferSize += line.length + return JSON.parse(line) as B + }) + + if (batch) { + // FIXME: won't it overflow stack? + batch.push(...blocks) + if (bufferSize > this.bufferThreshold) { + await queue.wait() + } + } else { + await queue.put(blocks) + } + + fromBlock = last(blocks).header.number + 1 + } + + // no blocks left + if (res.status == 204) { + await wait(1000) + } + } + } + + ingest().then( + () => queue.close(), + (err) => { + if (!queue.isClosed()) { + queue.forcePut(ensureError(err)) + } + } + ) + + for await (let valueOrError of queue.iterate()) { + if (valueOrError instanceof Error) throw valueOrError + yield valueOrError + } + } +} + +export function portal(url: string | PortalClientOptions) { + let options = typeof url == 'string' ? {url} : url + return new PortalClient(options) +} \ No newline at end of file diff --git a/util/portal-client/tsconfig.json b/util/portal-client/tsconfig.json new file mode 100644 index 000000000..deee9f66b --- /dev/null +++ b/util/portal-client/tsconfig.json @@ -0,0 +1,21 @@ +{ + "compilerOptions": { + "module": "commonjs", + "target": "es2020", + "outDir": "lib", + "rootDir": "src", + "allowJs": true, + "strict": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "esModuleInterop": true, + "experimentalDecorators": true, + "emitDecoratorMetadata": true, + "skipLibCheck": true + }, + "include": ["src"], + "exclude": [ + "node_modules" + ] +} diff --git a/util/util-internal-archive-client/src/client.ts b/util/util-internal-archive-client/src/client.ts index 5a41a9128..4af638504 100644 --- a/util/util-internal-archive-client/src/client.ts +++ b/util/util-internal-archive-client/src/client.ts @@ -1,15 +1,15 @@ import {HttpClient, HttpTimeoutError} from '@subsquid/http-client' import type {Logger} from '@subsquid/logger' -import {AsyncQueue, concurrentWriter, ensureError, wait, withErrorContext} from '@subsquid/util-internal' -import {splitLines} from '@subsquid/util-internal-archive-layout' +import {wait, withErrorContext} from '@subsquid/util-internal' import assert from 'assert' -import {pipeline} from 'node:stream/promises' + export interface ArchiveQuery { fromBlock: number toBlock?: number } + export interface Block { header: { number: number @@ -17,6 +17,7 @@ export interface Block { } } + export interface ArchiveClientOptions { http: HttpClient url: string @@ -24,6 +25,7 @@ export interface ArchiveClientOptions { log?: Logger } + export class ArchiveClient { private url: URL private http: HttpClient @@ -52,7 +54,7 @@ export class ArchiveClient { return this.retry(async () => { let res: string = await this.http.get(this.getRouterUrl('height'), { retryAttempts: 0, - httpTimeout: 10_000, + httpTimeout: 10_000 }) let height = parseInt(res) assert(Number.isSafeInteger(height)) @@ -62,114 +64,35 @@ export class ArchiveClient { query(query: Q): Promise { return this.retry(async () => { - return this.http - .request('POST', this.getRouterUrl(`stream`), { - json: query, - retryAttempts: 0, - httpTimeout: this.queryTimeout, - }) - .catch( - withErrorContext({ - archiveQuery: query, - }) - ) - .then((res) => { - // TODO: move the conversion to the server - let blocks = res.body - .toString('utf8') - .trimEnd() - .split('\n') - .map((line) => JSON.parse(line)) - return blocks - }) + let worker: string = await this.http.get(this.getRouterUrl(`${query.fromBlock}/worker`), { + retryAttempts: 0, + httpTimeout: 10_000 + }) + return this.http.post(worker, { + json: query, + retryAttempts: 0, + httpTimeout: this.queryTimeout + }).catch(withErrorContext({ + archiveQuery: query + })) }) } - async *stream(query: Q): AsyncIterable { - let queue = new AsyncQueue(1) - - const ingest = async () => { - let bufferSize = 0 - let fromBlock = query.fromBlock - let toBlock = query.toBlock ?? Infinity - - while (fromBlock <= toBlock) { - let stream = await this.http - .post(this.getRouterUrl(`stream`), { - json: {...query, fromBlock}, - retryAttempts: 3, - httpTimeout: this.queryTimeout, - stream: true, - }) - .catch( - withErrorContext({ - archiveQuery: query, - }) - ) - - for await (let lines of splitLines(stream as AsyncIterable)) { - let batch = queue.peek() - if (batch instanceof Error) break - - if (!batch) { - bufferSize = 0 - } - - if (lines.length === 0) continue - - let blocks = lines.map((line) => { - bufferSize += line.length - return JSON.parse(line) as B - }) - - if (batch) { - // FIXME: won't it overflow stack? - batch.push(...blocks) - if (bufferSize > 10 * 1024 * 1024) { - await queue.wait() - } - } else { - await queue.put(blocks) - } - - fromBlock = blocks[blocks.length - 1].header.number + 1 - } - } - } - - ingest().then( - () => queue.close(), - (err) => { - if (!queue.isClosed()) { - queue.forcePut(ensureError(err)) - } - } - ) - - for await (let valueOrError of queue.iterate()) { - if (valueOrError instanceof Error) throw valueOrError - yield valueOrError - } - } - private async retry(request: () => Promise): Promise { let retries = 0 while (true) { try { return await request() - } catch (err: any) { + } catch(err: any) { if (this.http.isRetryableError(err)) { let pause = this.retrySchedule[Math.min(retries, this.retrySchedule.length - 1)] if (this.log?.isWarn()) { - let warn = retries > 3 || (err instanceof HttpTimeoutError && err.ms > 10_000) + let warn = retries > 3 || err instanceof HttpTimeoutError && err.ms > 10_000 if (warn) { - this.log.warn( - { - reason: err.message, - ...err, - }, - `archive request failed, will retry in ${Math.round(pause / 1000)} secs` - ) + this.log.warn({ + reason: err.message, + ...err + }, `archive request failed, will retry in ${Math.round(pause / 1000)} secs`) } } retries += 1 @@ -180,4 +103,4 @@ export class ArchiveClient { } } } -} +} \ No newline at end of file diff --git a/util/util-internal-ingest-tools/package.json b/util/util-internal-ingest-tools/package.json index 4f8b0ee1e..b6594f990 100644 --- a/util/util-internal-ingest-tools/package.json +++ b/util/util-internal-ingest-tools/package.json @@ -20,16 +20,7 @@ "@subsquid/util-internal": "^3.2.0", "@subsquid/util-internal-range": "^0.3.0" }, - "peerDependencies": { - "@subsquid/util-internal-archive-client": "^0.1.2" - }, - "peerDependenciesMeta": { - "@subsquid/util-internal-archive-client": { - "optional": true - } - }, "devDependencies": { - "@subsquid/util-internal-archive-client": "^0.1.2", "@types/node": "^18.18.14", "typescript": "~5.5.4" } diff --git a/util/util-internal-ingest-tools/src/archive.ts b/util/util-internal-ingest-tools/src/archive.ts index ef79c048d..78463b97d 100644 --- a/util/util-internal-ingest-tools/src/archive.ts +++ b/util/util-internal-ingest-tools/src/archive.ts @@ -1,16 +1,35 @@ import {concurrentMap, last, Throttler} from '@subsquid/util-internal' -import type {ArchiveClient, Block} from '@subsquid/util-internal-archive-client' import {RangeRequestList} from '@subsquid/util-internal-range' import assert from 'assert' import {Batch} from './interfaces' +export interface Block { + header: { + number: number + hash: string + } +} + + +export interface ArchiveQuery { + fromBlock: number + toBlock?: number +} + + +export interface ArchiveClient { + getHeight(): Promise + query(query: Q): Promise + stream?(query: Q): AsyncIterable +} + + export interface ArchiveIngestOptions { client: ArchiveClient requests: RangeRequestList stopOnHead?: boolean pollInterval?: number - concurrency?: number } @@ -29,22 +48,48 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn for (let req of requests) { let beg = req.range.from let end = req.range.to ?? Infinity - while (beg <= end) { - if (top < beg) { - top = await height.get() - } - while (top < beg) { - if (stopOnHead) return - top = await height.call() - } - - let stream = client.stream({ - fromBlock: beg, + if (client.stream) { + let stream = client.stream?.({ + fromBlock: req.range.from, toBlock: req.range.to, ...req.request }) + + top = await height.get() for await (let blocks of stream) { + if (blocks.length == 0) continue + + let lastBlock = last(blocks).header.number + assert(beg <= lastBlock && lastBlock <= end, 'blocks are out of range') + beg = lastBlock + 1 + + // FIXME: is it needed here at all? Used only for `isHead` + top = await height.get() + + yield { + blocks, + isHead: lastBlock >= top + } + } + + if (beg < end && stopOnHead) break + + assert(beg === end + 1, 'boundary blocks are expected to be included') + } else { + while (beg <= end) { + if (top < beg) { + top = await height.get() + } + while (top < beg) { + if (stopOnHead) return + top = await height.call() + } + let blocks = await client.query({ + fromBlock: beg, + toBlock: req.range.to, + ...req.request + }) assert(blocks.length > 0, 'boundary blocks are expected to be included') let lastBlock = last(blocks).header.number assert(lastBlock >= beg) @@ -58,6 +103,7 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn } } } + } } From 2bcc382024532a4d963dda8fbb1e4af10ed81821 Mon Sep 17 00:00:00 2001 From: belopash Date: Thu, 31 Oct 2024 21:14:27 +0500 Subject: [PATCH 08/23] save --- .../util-internal-ingest-tools/src/archive.ts | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/util/util-internal-ingest-tools/src/archive.ts b/util/util-internal-ingest-tools/src/archive.ts index 78463b97d..0cfd14b89 100644 --- a/util/util-internal-ingest-tools/src/archive.ts +++ b/util/util-internal-ingest-tools/src/archive.ts @@ -49,33 +49,32 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn let beg = req.range.from let end = req.range.to ?? Infinity if (client.stream) { - let stream = client.stream?.({ + if (top < beg) { + top = await height.get() + } + if (top < beg && stopOnHead) return + + for await (let blocks of client.stream({ fromBlock: req.range.from, toBlock: req.range.to, ...req.request - }) - - top = await height.get() - - for await (let blocks of stream) { - if (blocks.length == 0) continue - + })) { + assert(blocks.length > 0, 'boundary blocks are expected to be included') let lastBlock = last(blocks).header.number - assert(beg <= lastBlock && lastBlock <= end, 'blocks are out of range') + assert(lastBlock >= beg) beg = lastBlock + 1 - - // FIXME: is it needed here at all? Used only for `isHead` - top = await height.get() + + if (beg > top) { + top = await height.get() + } yield { blocks, - isHead: lastBlock >= top + isHead: beg > top } + + if (top < beg && stopOnHead) return } - - if (beg < end && stopOnHead) break - - assert(beg === end + 1, 'boundary blocks are expected to be included') } else { while (beg <= end) { if (top < beg) { From d1c573defcae2842038252a60c72a8167ec653b4 Mon Sep 17 00:00:00 2001 From: belopash Date: Fri, 1 Nov 2024 12:52:20 +0500 Subject: [PATCH 09/23] save --- test/erc20-transfers/src/processor.ts | 24 +++++++++++-------- util/portal-client/src/client.ts | 6 +++-- .../util-internal-ingest-tools/src/archive.ts | 2 +- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/test/erc20-transfers/src/processor.ts b/test/erc20-transfers/src/processor.ts index f90588989..aede83036 100644 --- a/test/erc20-transfers/src/processor.ts +++ b/test/erc20-transfers/src/processor.ts @@ -8,7 +8,10 @@ const CONTRACT = '0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48'.toLowerCase() const processor = new EvmBatchProcessor() - .setPortal('https://portal.sqd.dev/datasets/ethereum-mainnet') + .setPortal({ + url: 'https://portal.sqd.dev/datasets/ethereum-mainnet', + bufferThreshold: 100 * 1024 * 1024 + }) // .setRpcEndpoint(process.env.ARB_NODE_WS) .setFinalityConfirmation(500) .setBlockRange({from: 0}) @@ -30,18 +33,19 @@ processor.run(new TypeormDatabase({supportHotBlocks: true}), async ctx => { for (let log of block.logs) { if (log.address == CONTRACT && erc20.events.Transfer.is(log)) { // let {from, to, value} = erc20.events.Transfer.decode(log) - // transfers.push(new Transfer({ - // id: log.id, - // blockNumber: block.header.height, - // timestamp: new Date(block.header.timestamp), - // tx: log.transactionHash, - // from, - // to, - // amount: value - // })) + transfers.push(new Transfer({ + // id: log.id, + // blockNumber: block.header.height, + // timestamp: new Date(block.header.timestamp), + // tx: log.transactionHash, + // from, + // to, + // amount: value + })) } } } + // ctx.log.info(`found ${transfers.length} transfers`) // await ctx.store.insert(transfers) }) diff --git a/util/portal-client/src/client.ts b/util/portal-client/src/client.ts index 4b6e4c361..6046fa6e5 100644 --- a/util/portal-client/src/client.ts +++ b/util/portal-client/src/client.ts @@ -93,16 +93,18 @@ export class PortalClient { let toBlock = query.toBlock ?? Infinity while (fromBlock <= toBlock) { + let archiveQuery = {...query, fromBlock} + let res = await this.http .request('POST', this.getRouterUrl(`stream`), { - json: {...query, fromBlock}, + json: archiveQuery, retryAttempts: 3, httpTimeout: this.queryTimeout, stream: true, }) .catch( withErrorContext({ - archiveQuery: query, + archiveQuery, }) ) diff --git a/util/util-internal-ingest-tools/src/archive.ts b/util/util-internal-ingest-tools/src/archive.ts index 0cfd14b89..b0d4872fa 100644 --- a/util/util-internal-ingest-tools/src/archive.ts +++ b/util/util-internal-ingest-tools/src/archive.ts @@ -64,7 +64,7 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn assert(lastBlock >= beg) beg = lastBlock + 1 - if (beg > top) { + if (top < beg) { top = await height.get() } From 2b131a44393e1c79ef5e0e956318148cc2acfdb1 Mon Sep 17 00:00:00 2001 From: belopash Date: Fri, 1 Nov 2024 13:43:25 +0500 Subject: [PATCH 10/23] save --- test/erc20-transfers/src/processor.ts | 5 ++--- util/util-internal-ingest-tools/src/archive.ts | 11 +++-------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/test/erc20-transfers/src/processor.ts b/test/erc20-transfers/src/processor.ts index aede83036..f03ac16f0 100644 --- a/test/erc20-transfers/src/processor.ts +++ b/test/erc20-transfers/src/processor.ts @@ -9,10 +9,10 @@ const CONTRACT = '0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48'.toLowerCase() const processor = new EvmBatchProcessor() .setPortal({ - url: 'https://portal.sqd.dev/datasets/ethereum-mainnet', + url: 'http://localhost:8000/datasets/ethereum-mainnet', bufferThreshold: 100 * 1024 * 1024 }) - // .setRpcEndpoint(process.env.ARB_NODE_WS) + .setRpcEndpoint('https://rpc.ankr.com/eth') .setFinalityConfirmation(500) .setBlockRange({from: 0}) .setFields({ @@ -28,7 +28,6 @@ const processor = new EvmBatchProcessor() processor.run(new TypeormDatabase({supportHotBlocks: true}), async ctx => { let transfers: Transfer[] = [] - for (let block of ctx.blocks) { for (let log of block.logs) { if (log.address == CONTRACT && erc20.events.Transfer.is(log)) { diff --git a/util/util-internal-ingest-tools/src/archive.ts b/util/util-internal-ingest-tools/src/archive.ts index b0d4872fa..82c16f6aa 100644 --- a/util/util-internal-ingest-tools/src/archive.ts +++ b/util/util-internal-ingest-tools/src/archive.ts @@ -49,9 +49,6 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn let beg = req.range.from let end = req.range.to ?? Infinity if (client.stream) { - if (top < beg) { - top = await height.get() - } if (top < beg && stopOnHead) return for await (let blocks of client.stream({ @@ -63,16 +60,14 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn let lastBlock = last(blocks).header.number assert(lastBlock >= beg) beg = lastBlock + 1 - - if (top < beg) { - top = await height.get() - } - + yield { blocks, isHead: beg > top } + top = await height.get() + if (top < beg && stopOnHead) return } } else { From 0de2e72637a898e14a633f09e149c7325a4cdbfa Mon Sep 17 00:00:00 2001 From: belopash Date: Sat, 2 Nov 2024 14:34:06 +0500 Subject: [PATCH 11/23] save --- .../portal-api_2024-11-02-09-34.json | 10 ++ .../portal-api_2024-11-02-09-34.json | 10 ++ .../portal-api_2024-10-29-13-16.json | 10 -- .../portal-api_2024-10-29-13-16.json | 10 -- .../portal-api_2024-10-29-13-16.json | 10 -- .../portal-api_2024-11-02-09-34.json | 10 ++ evm/evm-processor/src/ds-archive/client.ts | 93 +-------------- evm/evm-processor/src/ds-archive/mapping.ts | 106 ++++++++++++++++++ evm/evm-processor/src/ds-archive/portal.ts | 75 +++++++++++++ evm/evm-processor/src/processor.ts | 15 ++- .../util-internal-archive-client/package.json | 5 +- util/util-internal-ingest-tools/package.json | 11 +- .../util-internal-ingest-tools/src/archive.ts | 89 ++++----------- 13 files changed, 259 insertions(+), 195 deletions(-) create mode 100644 common/changes/@subsquid/evm-processor/portal-api_2024-11-02-09-34.json create mode 100644 common/changes/@subsquid/portal-client/portal-api_2024-11-02-09-34.json delete mode 100644 common/changes/@subsquid/util-internal-archive-client/portal-api_2024-10-29-13-16.json delete mode 100644 common/changes/@subsquid/util-internal-archive-layout/portal-api_2024-10-29-13-16.json delete mode 100644 common/changes/@subsquid/util-internal-ingest-tools/portal-api_2024-10-29-13-16.json create mode 100644 common/changes/@subsquid/util-internal/portal-api_2024-11-02-09-34.json create mode 100644 evm/evm-processor/src/ds-archive/mapping.ts create mode 100644 evm/evm-processor/src/ds-archive/portal.ts diff --git a/common/changes/@subsquid/evm-processor/portal-api_2024-11-02-09-34.json b/common/changes/@subsquid/evm-processor/portal-api_2024-11-02-09-34.json new file mode 100644 index 000000000..149e8155c --- /dev/null +++ b/common/changes/@subsquid/evm-processor/portal-api_2024-11-02-09-34.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@subsquid/evm-processor", + "comment": "portal api", + "type": "minor" + } + ], + "packageName": "@subsquid/evm-processor" +} \ No newline at end of file diff --git a/common/changes/@subsquid/portal-client/portal-api_2024-11-02-09-34.json b/common/changes/@subsquid/portal-client/portal-api_2024-11-02-09-34.json new file mode 100644 index 000000000..a5a8742fb --- /dev/null +++ b/common/changes/@subsquid/portal-client/portal-api_2024-11-02-09-34.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@subsquid/portal-client", + "comment": "", + "type": "none" + } + ], + "packageName": "@subsquid/portal-client" +} \ No newline at end of file diff --git a/common/changes/@subsquid/util-internal-archive-client/portal-api_2024-10-29-13-16.json b/common/changes/@subsquid/util-internal-archive-client/portal-api_2024-10-29-13-16.json deleted file mode 100644 index fdcc14174..000000000 --- a/common/changes/@subsquid/util-internal-archive-client/portal-api_2024-10-29-13-16.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "changes": [ - { - "packageName": "@subsquid/util-internal-archive-client", - "comment": "add streaming support", - "type": "minor" - } - ], - "packageName": "@subsquid/util-internal-archive-client" -} \ No newline at end of file diff --git a/common/changes/@subsquid/util-internal-archive-layout/portal-api_2024-10-29-13-16.json b/common/changes/@subsquid/util-internal-archive-layout/portal-api_2024-10-29-13-16.json deleted file mode 100644 index 71159385a..000000000 --- a/common/changes/@subsquid/util-internal-archive-layout/portal-api_2024-10-29-13-16.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "changes": [ - { - "packageName": "@subsquid/util-internal-archive-layout", - "comment": "add streaming support", - "type": "minor" - } - ], - "packageName": "@subsquid/util-internal-archive-layout" -} \ No newline at end of file diff --git a/common/changes/@subsquid/util-internal-ingest-tools/portal-api_2024-10-29-13-16.json b/common/changes/@subsquid/util-internal-ingest-tools/portal-api_2024-10-29-13-16.json deleted file mode 100644 index ccad9f0c2..000000000 --- a/common/changes/@subsquid/util-internal-ingest-tools/portal-api_2024-10-29-13-16.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "changes": [ - { - "packageName": "@subsquid/util-internal-ingest-tools", - "comment": "add streaming support", - "type": "minor" - } - ], - "packageName": "@subsquid/util-internal-ingest-tools" -} \ No newline at end of file diff --git a/common/changes/@subsquid/util-internal/portal-api_2024-11-02-09-34.json b/common/changes/@subsquid/util-internal/portal-api_2024-11-02-09-34.json new file mode 100644 index 000000000..ec32315de --- /dev/null +++ b/common/changes/@subsquid/util-internal/portal-api_2024-11-02-09-34.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@subsquid/util-internal", + "comment": "", + "type": "none" + } + ], + "packageName": "@subsquid/util-internal" +} \ No newline at end of file diff --git a/evm/evm-processor/src/ds-archive/client.ts b/evm/evm-processor/src/ds-archive/client.ts index 1a1148165..703d734fb 100644 --- a/evm/evm-processor/src/ds-archive/client.ts +++ b/evm/evm-processor/src/ds-archive/client.ts @@ -1,5 +1,6 @@ import {addErrorContext, assertNotNull, unexpectedCase} from '@subsquid/util-internal' -import {archiveIngest, ArchiveClient} from '@subsquid/util-internal-ingest-tools' +import {ArchiveClient} from '@subsquid/util-internal-archive-client' +import {archiveIngest} from '@subsquid/util-internal-ingest-tools' import {Batch, DataSource} from '@subsquid/util-internal-processor-tools' import {getRequestAt, RangeRequest} from '@subsquid/util-internal-range' import {cast} from '@subsquid/util-internal-validation' @@ -25,6 +26,7 @@ import { } from '../mapping/entities' import {setUpRelations} from '../mapping/relations' import {getBlockValidator} from './schema' +import {mapBlock} from './mapping' const NO_FIELDS = {} @@ -57,7 +59,7 @@ export class EvmArchive implements DataSource { let blocks = batch.blocks.map(b => { try { - return this.mapBlock(b, fields) + return mapBlock(b, fields) } catch(err: any) { throw addErrorContext(err, { blockHeight: b.header.number, @@ -69,89 +71,4 @@ export class EvmArchive implements DataSource { yield {blocks, isHead: batch.isHead} } } - - private mapBlock(rawBlock: unknown, fields: FieldSelection): Block { - let validator = getBlockValidator(fields) - - let src = cast(validator, rawBlock) - - let {number, hash, parentHash, ...hdr} = src.header - if (hdr.timestamp) { - hdr.timestamp = hdr.timestamp * 1000 // convert to ms - } - - let header = new BlockHeader(number, hash, parentHash) - Object.assign(header, hdr) - - let block = new Block(header) - - if (src.transactions) { - for (let {transactionIndex, ...props} of src.transactions) { - let tx = new Transaction(header, transactionIndex) - Object.assign(tx, props) - block.transactions.push(tx) - } - } - - if (src.logs) { - for (let {logIndex, transactionIndex, ...props} of src.logs) { - let log = new Log(header, logIndex, transactionIndex) - Object.assign(log, props) - block.logs.push(log) - } - } - - if (src.traces) { - for (let {transactionIndex, traceAddress, type, ...props} of src.traces) { - transactionIndex = assertNotNull(transactionIndex) - let trace: Trace - switch(type) { - case 'create': - trace = new TraceCreate(header, transactionIndex, traceAddress) - break - case 'call': - trace = new TraceCall(header, transactionIndex, traceAddress) - break - case 'suicide': - trace = new TraceSuicide(header, transactionIndex, traceAddress) - break - case 'reward': - trace = new TraceReward(header, transactionIndex, traceAddress) - break - default: - throw unexpectedCase() - } - Object.assign(trace, props) - block.traces.push(trace) - } - } - - if (src.stateDiffs) { - for (let {transactionIndex, address, key, kind, ...props} of src.stateDiffs) { - let diff: StateDiff - switch(kind) { - case '=': - diff = new StateDiffNoChange(header, transactionIndex, address, key) - break - case '+': - diff = new StateDiffAdd(header, transactionIndex, address, key) - break - case '*': - diff = new StateDiffChange(header, transactionIndex, address, key) - break - case '-': - diff = new StateDiffDelete(header, transactionIndex, address, key) - break - default: - throw unexpectedCase() - } - Object.assign(diff, props) - block.stateDiffs.push(diff) - } - } - - setUpRelations(block) - - return block - } -} +} \ No newline at end of file diff --git a/evm/evm-processor/src/ds-archive/mapping.ts b/evm/evm-processor/src/ds-archive/mapping.ts new file mode 100644 index 000000000..a74f56372 --- /dev/null +++ b/evm/evm-processor/src/ds-archive/mapping.ts @@ -0,0 +1,106 @@ +import {assertNotNull, unexpectedCase} from '@subsquid/util-internal' +import {cast} from '@subsquid/util-internal-validation' +import {FieldSelection} from '../interfaces/data' +import { + Block, + BlockHeader, + Log, + StateDiff, + StateDiffAdd, + StateDiffChange, + StateDiffDelete, + StateDiffNoChange, + Trace, + TraceCall, + TraceCreate, + TraceReward, + TraceSuicide, + Transaction, +} from '../mapping/entities' +import {setUpRelations} from '../mapping/relations' +import {getBlockValidator} from './schema' + +export function mapBlock(rawBlock: unknown, fields: FieldSelection): Block { + let validator = getBlockValidator(fields) + + let src = cast(validator, rawBlock) + + let {number, hash, parentHash, ...hdr} = src.header + if (hdr.timestamp) { + hdr.timestamp = hdr.timestamp * 1000 // convert to ms + } + + let header = new BlockHeader(number, hash, parentHash) + Object.assign(header, hdr) + + let block = new Block(header) + + if (src.transactions) { + for (let {transactionIndex, ...props} of src.transactions) { + let tx = new Transaction(header, transactionIndex) + Object.assign(tx, props) + block.transactions.push(tx) + } + } + + if (src.logs) { + for (let {logIndex, transactionIndex, ...props} of src.logs) { + let log = new Log(header, logIndex, transactionIndex) + Object.assign(log, props) + block.logs.push(log) + } + } + + if (src.traces) { + for (let {transactionIndex, traceAddress, type, ...props} of src.traces) { + transactionIndex = assertNotNull(transactionIndex) + let trace: Trace + switch (type) { + case 'create': + trace = new TraceCreate(header, transactionIndex, traceAddress) + break + case 'call': + trace = new TraceCall(header, transactionIndex, traceAddress) + break + case 'suicide': + trace = new TraceSuicide(header, transactionIndex, traceAddress) + break + case 'reward': + trace = new TraceReward(header, transactionIndex, traceAddress) + break + default: + throw unexpectedCase() + } + Object.assign(trace, props) + block.traces.push(trace) + } + } + + if (src.stateDiffs) { + for (let {transactionIndex, address, key, kind, ...props} of src.stateDiffs) { + let diff: StateDiff + switch (kind) { + case '=': + diff = new StateDiffNoChange(header, transactionIndex, address, key) + break + case '+': + diff = new StateDiffAdd(header, transactionIndex, address, key) + break + case '*': + diff = new StateDiffChange(header, transactionIndex, address, key) + break + case '-': + diff = new StateDiffDelete(header, transactionIndex, address, key) + break + default: + throw unexpectedCase() + } + Object.assign(diff, props) + block.stateDiffs.push(diff) + } + } + + setUpRelations(block) + + return block +} diff --git a/evm/evm-processor/src/ds-archive/portal.ts b/evm/evm-processor/src/ds-archive/portal.ts new file mode 100644 index 000000000..09393927c --- /dev/null +++ b/evm/evm-processor/src/ds-archive/portal.ts @@ -0,0 +1,75 @@ +import {addErrorContext, last, Throttler} from '@subsquid/util-internal' +import {Batch, DataSource} from '@subsquid/util-internal-processor-tools' +import {getRequestAt, RangeRequest} from '@subsquid/util-internal-range' +import assert from 'assert' +import {Bytes32} from '../interfaces/base' +import {DataRequest} from '../interfaces/data-request' +import {Block} from '../mapping/entities' +import {mapBlock} from './mapping' +import {PortalClient} from '@subsquid/portal-client' + + +const NO_FIELDS = {} + + +export class EvmPortal implements DataSource { + constructor(private client: PortalClient) {} + + getFinalizedHeight(): Promise { + return this.client.getHeight() + } + + async getBlockHash(height: number): Promise { + let blocks = await this.client.query({ + fromBlock: height, + toBlock: height, + includeAllBlocks: true + }) + assert(blocks.length == 1) + return blocks[0].header.hash + } + + async *getFinalizedBlocks(requests: RangeRequest[], stopOnHead?: boolean | undefined): AsyncIterable> { + let height = new Throttler(() => this.client.getHeight(), 20_000) + + let top = await height.get() + for (let req of requests) { + let beg = req.range.from + let fields = getRequestAt(requests, beg)?.fields || NO_FIELDS + + if (top < beg && stopOnHead) return + + for await (let batch of this.client.stream({ + fromBlock: req.range.from, + toBlock: req.range.to, + ...req.request + })) { + assert(batch.length > 0, 'boundary blocks are expected to be included') + let lastBlock = last(batch).header.number + assert(lastBlock >= beg) + beg = lastBlock + 1 + + let blocks = batch.map(b => { + try { + return mapBlock(b, fields) + } catch(err: any) { + throw addErrorContext(err, { + blockHeight: b.header.number, + blockHash: b.header.hash + }) + } + }) + + yield { + blocks, + isHead: beg > top + } + + top = await height.get() + + if (top < beg && stopOnHead) return + } + } + } +} + diff --git a/evm/evm-processor/src/processor.ts b/evm/evm-processor/src/processor.ts index 02f978c70..09398edaa 100644 --- a/evm/evm-processor/src/processor.ts +++ b/evm/evm-processor/src/processor.ts @@ -8,6 +8,7 @@ import {Database, getOrGenerateSquidId, PrometheusServer, Runner} from '@subsqui import {applyRangeBound, mergeRangeRequests, Range, RangeRequest} from '@subsquid/util-internal-range' import {cast} from '@subsquid/util-internal-validation' import assert from 'assert' +import {EvmPortal} from './ds-archive/portal' import {EvmArchive} from './ds-archive/client' import {EvmRpcDataSource} from './ds-rpc/client' import {Chain} from './interfaces/chain' @@ -513,7 +514,7 @@ export class EvmBatchProcessor { } @def - private getArchiveDataSource(): EvmArchive { + private getArchiveDataSource(): EvmArchive | EvmPortal { let archive = assertNotNull(this.archive) let log = this.getLogger().child('archive') @@ -528,22 +529,24 @@ export class EvmBatchProcessor { log }) - return new EvmArchive( - archive.type === 'gateway' - ? new ArchiveClient({ + return archive.type === 'gateway' + ? new EvmArchive( + new ArchiveClient({ http, url: archive.url, queryTimeout: archive.requestTimeout, log, }) - : new PortalClient({ + ) + : new EvmPortal( + new PortalClient({ http, url: archive.url, queryTimeout: archive.requestTimeout, bufferThreshold: archive.bufferThreshold, log, }) - ) + ) } @def diff --git a/util/util-internal-archive-client/package.json b/util/util-internal-archive-client/package.json index a48eaf5ff..318b13d5b 100644 --- a/util/util-internal-archive-client/package.json +++ b/util/util-internal-archive-client/package.json @@ -17,8 +17,7 @@ }, "dependencies": { "@subsquid/util-internal": "^3.2.0", - "@subsquid/util-internal-range": "^0.3.0", - "@subsquid/util-internal-archive-layout": "^1.0.0" + "@subsquid/util-internal-range": "^0.3.0" }, "peerDependencies": { "@subsquid/http-client": "^1.6.0", @@ -35,4 +34,4 @@ "@types/node": "^18.18.14", "typescript": "~5.5.4" } -} +} \ No newline at end of file diff --git a/util/util-internal-ingest-tools/package.json b/util/util-internal-ingest-tools/package.json index b6594f990..1ccef772d 100644 --- a/util/util-internal-ingest-tools/package.json +++ b/util/util-internal-ingest-tools/package.json @@ -20,8 +20,17 @@ "@subsquid/util-internal": "^3.2.0", "@subsquid/util-internal-range": "^0.3.0" }, + "peerDependencies": { + "@subsquid/util-internal-archive-client": "^0.1.2" + }, + "peerDependenciesMeta": { + "@subsquid/util-internal-archive-client": { + "optional": true + } + }, "devDependencies": { + "@subsquid/util-internal-archive-client": "^0.1.2", "@types/node": "^18.18.14", "typescript": "~5.5.4" } -} +} \ No newline at end of file diff --git a/util/util-internal-ingest-tools/src/archive.ts b/util/util-internal-ingest-tools/src/archive.ts index 82c16f6aa..351a21074 100644 --- a/util/util-internal-ingest-tools/src/archive.ts +++ b/util/util-internal-ingest-tools/src/archive.ts @@ -1,30 +1,10 @@ import {concurrentMap, last, Throttler} from '@subsquid/util-internal' +import type {ArchiveClient, Block} from '@subsquid/util-internal-archive-client' import {RangeRequestList} from '@subsquid/util-internal-range' import assert from 'assert' import {Batch} from './interfaces' -export interface Block { - header: { - number: number - hash: string - } -} - - -export interface ArchiveQuery { - fromBlock: number - toBlock?: number -} - - -export interface ArchiveClient { - getHeight(): Promise - query(query: Q): Promise - stream?(query: Q): AsyncIterable -} - - export interface ArchiveIngestOptions { client: ArchiveClient requests: RangeRequestList @@ -38,7 +18,7 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn client, requests, stopOnHead = false, - pollInterval = 20_000, + pollInterval = 20_000 } = args let height = new Throttler(() => client.getHeight(), pollInterval) @@ -48,56 +28,31 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn for (let req of requests) { let beg = req.range.from let end = req.range.to ?? Infinity - if (client.stream) { - if (top < beg && stopOnHead) return - - for await (let blocks of client.stream({ - fromBlock: req.range.from, + while (beg <= end) { + if (top < beg) { + top = await height.get() + } + while (top < beg) { + if (stopOnHead) return + top = await height.call() + } + let blocks = await client.query({ + fromBlock: beg, toBlock: req.range.to, ...req.request - })) { - assert(blocks.length > 0, 'boundary blocks are expected to be included') - let lastBlock = last(blocks).header.number - assert(lastBlock >= beg) - beg = lastBlock + 1 - - yield { - blocks, - isHead: beg > top - } - + }) + assert(blocks.length > 0, 'boundary blocks are expected to be included') + let lastBlock = last(blocks).header.number + assert(lastBlock >= beg) + beg = lastBlock + 1 + if (beg > top) { top = await height.get() - - if (top < beg && stopOnHead) return } - } else { - while (beg <= end) { - if (top < beg) { - top = await height.get() - } - while (top < beg) { - if (stopOnHead) return - top = await height.call() - } - let blocks = await client.query({ - fromBlock: beg, - toBlock: req.range.to, - ...req.request - }) - assert(blocks.length > 0, 'boundary blocks are expected to be included') - let lastBlock = last(blocks).header.number - assert(lastBlock >= beg) - beg = lastBlock + 1 - if (beg > top) { - top = await height.get() - } - yield { - blocks, - isHead: beg > top - } + yield { + blocks, + isHead: beg > top } } - } } @@ -106,4 +61,4 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn ingest(), batch => Promise.resolve(batch) ) -} +} \ No newline at end of file From aef1fac0ddb535087c43d8bdaf70c581bfa13c1d Mon Sep 17 00:00:00 2001 From: belopash Date: Mon, 4 Nov 2024 13:06:02 +0500 Subject: [PATCH 12/23] changes --- ....json => portal-api_2024-11-04-08-07.json} | 2 +- .../portal-api_2024-10-29-13-16.json | 10 -------- ....json => portal-api_2024-11-04-08-07.json} | 0 .../portal-api_2024-11-04-08-10.json | 10 ++++++++ ....json => portal-api_2024-11-04-08-10.json} | 4 ++-- evm/evm-processor/src/ds-archive/client.ts | 23 ++----------------- util/http-client/src/client.ts | 1 - .../util-internal-archive-client/package.json | 2 +- .../src/client.ts | 2 +- util/util-internal-ingest-tools/package.json | 2 +- .../util-internal-ingest-tools/src/archive.ts | 2 +- 11 files changed, 19 insertions(+), 39 deletions(-) rename common/changes/@subsquid/evm-processor/{portal-api_2024-11-02-09-34.json => portal-api_2024-11-04-08-07.json} (77%) delete mode 100644 common/changes/@subsquid/http-client/portal-api_2024-10-29-13-16.json rename common/changes/@subsquid/portal-client/{portal-api_2024-11-02-09-34.json => portal-api_2024-11-04-08-07.json} (100%) create mode 100644 common/changes/@subsquid/util-internal-archive-layout/portal-api_2024-11-04-08-10.json rename common/changes/@subsquid/util-internal/{portal-api_2024-11-02-09-34.json => portal-api_2024-11-04-08-10.json} (63%) diff --git a/common/changes/@subsquid/evm-processor/portal-api_2024-11-02-09-34.json b/common/changes/@subsquid/evm-processor/portal-api_2024-11-04-08-07.json similarity index 77% rename from common/changes/@subsquid/evm-processor/portal-api_2024-11-02-09-34.json rename to common/changes/@subsquid/evm-processor/portal-api_2024-11-04-08-07.json index 149e8155c..bf92fe610 100644 --- a/common/changes/@subsquid/evm-processor/portal-api_2024-11-02-09-34.json +++ b/common/changes/@subsquid/evm-processor/portal-api_2024-11-04-08-07.json @@ -2,7 +2,7 @@ "changes": [ { "packageName": "@subsquid/evm-processor", - "comment": "portal api", + "comment": "add SQD Portal support", "type": "minor" } ], diff --git a/common/changes/@subsquid/http-client/portal-api_2024-10-29-13-16.json b/common/changes/@subsquid/http-client/portal-api_2024-10-29-13-16.json deleted file mode 100644 index 69eb7d1fe..000000000 --- a/common/changes/@subsquid/http-client/portal-api_2024-10-29-13-16.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "changes": [ - { - "packageName": "@subsquid/http-client", - "comment": "add streaming support", - "type": "minor" - } - ], - "packageName": "@subsquid/http-client" -} \ No newline at end of file diff --git a/common/changes/@subsquid/portal-client/portal-api_2024-11-02-09-34.json b/common/changes/@subsquid/portal-client/portal-api_2024-11-04-08-07.json similarity index 100% rename from common/changes/@subsquid/portal-client/portal-api_2024-11-02-09-34.json rename to common/changes/@subsquid/portal-client/portal-api_2024-11-04-08-07.json diff --git a/common/changes/@subsquid/util-internal-archive-layout/portal-api_2024-11-04-08-10.json b/common/changes/@subsquid/util-internal-archive-layout/portal-api_2024-11-04-08-10.json new file mode 100644 index 000000000..4228d4d46 --- /dev/null +++ b/common/changes/@subsquid/util-internal-archive-layout/portal-api_2024-11-04-08-10.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@subsquid/util-internal-archive-layout", + "comment": "export `.splitLines()`", + "type": "minor" + } + ], + "packageName": "@subsquid/util-internal-archive-layout" +} \ No newline at end of file diff --git a/common/changes/@subsquid/util-internal/portal-api_2024-11-02-09-34.json b/common/changes/@subsquid/util-internal/portal-api_2024-11-04-08-10.json similarity index 63% rename from common/changes/@subsquid/util-internal/portal-api_2024-11-02-09-34.json rename to common/changes/@subsquid/util-internal/portal-api_2024-11-04-08-10.json index ec32315de..4bbc96001 100644 --- a/common/changes/@subsquid/util-internal/portal-api_2024-11-02-09-34.json +++ b/common/changes/@subsquid/util-internal/portal-api_2024-11-04-08-10.json @@ -2,8 +2,8 @@ "changes": [ { "packageName": "@subsquid/util-internal", - "comment": "", - "type": "none" + "comment": "add `.wait() to `AsyncQueue``", + "type": "minor" } ], "packageName": "@subsquid/util-internal" diff --git a/evm/evm-processor/src/ds-archive/client.ts b/evm/evm-processor/src/ds-archive/client.ts index 703d734fb..8d2f55466 100644 --- a/evm/evm-processor/src/ds-archive/client.ts +++ b/evm/evm-processor/src/ds-archive/client.ts @@ -1,31 +1,12 @@ -import {addErrorContext, assertNotNull, unexpectedCase} from '@subsquid/util-internal' +import {addErrorContext} from '@subsquid/util-internal' import {ArchiveClient} from '@subsquid/util-internal-archive-client' import {archiveIngest} from '@subsquid/util-internal-ingest-tools' import {Batch, DataSource} from '@subsquid/util-internal-processor-tools' import {getRequestAt, RangeRequest} from '@subsquid/util-internal-range' -import {cast} from '@subsquid/util-internal-validation' import assert from 'assert' import {Bytes32} from '../interfaces/base' -import {FieldSelection} from '../interfaces/data' import {DataRequest} from '../interfaces/data-request' -import { - Block, - BlockHeader, - Log, - StateDiff, - StateDiffAdd, - StateDiffChange, - StateDiffDelete, - StateDiffNoChange, - Trace, - TraceCall, - TraceCreate, - TraceReward, - TraceSuicide, - Transaction -} from '../mapping/entities' -import {setUpRelations} from '../mapping/relations' -import {getBlockValidator} from './schema' +import {Block} from '../mapping/entities' import {mapBlock} from './mapping' diff --git a/util/http-client/src/client.ts b/util/http-client/src/client.ts index e4b7c81d6..505bf6fed 100644 --- a/util/http-client/src/client.ts +++ b/util/http-client/src/client.ts @@ -331,7 +331,6 @@ export class HttpClient { if (error instanceof HttpResponse) { switch(error.status) { case 429: - case 500: case 502: case 503: case 504: diff --git a/util/util-internal-archive-client/package.json b/util/util-internal-archive-client/package.json index 318b13d5b..afb8adb20 100644 --- a/util/util-internal-archive-client/package.json +++ b/util/util-internal-archive-client/package.json @@ -34,4 +34,4 @@ "@types/node": "^18.18.14", "typescript": "~5.5.4" } -} \ No newline at end of file +} diff --git a/util/util-internal-archive-client/src/client.ts b/util/util-internal-archive-client/src/client.ts index 4af638504..73fbc8b19 100644 --- a/util/util-internal-archive-client/src/client.ts +++ b/util/util-internal-archive-client/src/client.ts @@ -103,4 +103,4 @@ export class ArchiveClient { } } } -} \ No newline at end of file +} diff --git a/util/util-internal-ingest-tools/package.json b/util/util-internal-ingest-tools/package.json index 1ccef772d..4f8b0ee1e 100644 --- a/util/util-internal-ingest-tools/package.json +++ b/util/util-internal-ingest-tools/package.json @@ -33,4 +33,4 @@ "@types/node": "^18.18.14", "typescript": "~5.5.4" } -} \ No newline at end of file +} diff --git a/util/util-internal-ingest-tools/src/archive.ts b/util/util-internal-ingest-tools/src/archive.ts index 351a21074..b470c7d96 100644 --- a/util/util-internal-ingest-tools/src/archive.ts +++ b/util/util-internal-ingest-tools/src/archive.ts @@ -61,4 +61,4 @@ export function archiveIngest(args: ArchiveIngestOptions): Asyn ingest(), batch => Promise.resolve(batch) ) -} \ No newline at end of file +} From 0e8c821c576c1ce71240e12ad7c09888db3feacf Mon Sep 17 00:00:00 2001 From: belopash Date: Mon, 4 Nov 2024 13:17:03 +0500 Subject: [PATCH 13/23] metadata --- util/portal-client/src/client.ts | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/util/portal-client/src/client.ts b/util/portal-client/src/client.ts index 6046fa6e5..93380b9d8 100644 --- a/util/portal-client/src/client.ts +++ b/util/portal-client/src/client.ts @@ -19,6 +19,11 @@ export interface Block { } +export interface Metadata { + isRealTime: boolean +} + + export interface PortalClientOptions { url: string http?: HttpClient @@ -41,7 +46,7 @@ export class PortalClient { this.bufferThreshold = options.bufferThreshold ?? 10 * 1024 * 1024 } - private getRouterUrl(path: string): string { + private getDatasetUrl(path: string): string { let u = new URL(this.url) if (this.url.pathname.endsWith('/')) { u.pathname += path @@ -52,7 +57,7 @@ export class PortalClient { } async getHeight(): Promise { - let res: string = await this.http.get(this.getRouterUrl('height'), { + let res: string = await this.http.get(this.getDatasetUrl('height'), { retryAttempts: 3, httpTimeout: 10_000, }) @@ -61,9 +66,19 @@ export class PortalClient { return height } + async getMetadata(): Promise { + let res: {real_time: boolean} = await this.http.get(this.getDatasetUrl('metadata'), { + retryAttempts: 3, + httpTimeout: 10_000, + }) + return { + isRealTime: !!res.real_time + } + } + query(query: Q): Promise { return this.http - .request('POST', this.getRouterUrl(`stream`), { + .request('POST', this.getDatasetUrl(`stream`), { json: query, retryAttempts: 3, httpTimeout: this.queryTimeout, @@ -96,7 +111,7 @@ export class PortalClient { let archiveQuery = {...query, fromBlock} let res = await this.http - .request('POST', this.getRouterUrl(`stream`), { + .request('POST', this.getDatasetUrl(`stream`), { json: archiveQuery, retryAttempts: 3, httpTimeout: this.queryTimeout, @@ -156,8 +171,3 @@ export class PortalClient { } } } - -export function portal(url: string | PortalClientOptions) { - let options = typeof url == 'string' ? {url} : url - return new PortalClient(options) -} \ No newline at end of file From 611c8282ae3a2cf267dadd4030235ccbcc466ad2 Mon Sep 17 00:00:00 2001 From: belopash Date: Mon, 4 Nov 2024 17:25:05 +0500 Subject: [PATCH 14/23] fix changes --- ...2024-11-04-08-07.json => portal-api_2024-11-04-12-29.json} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename common/changes/@subsquid/portal-client/{portal-api_2024-11-04-08-07.json => portal-api_2024-11-04-12-29.json} (72%) diff --git a/common/changes/@subsquid/portal-client/portal-api_2024-11-04-08-07.json b/common/changes/@subsquid/portal-client/portal-api_2024-11-04-12-29.json similarity index 72% rename from common/changes/@subsquid/portal-client/portal-api_2024-11-04-08-07.json rename to common/changes/@subsquid/portal-client/portal-api_2024-11-04-12-29.json index a5a8742fb..b14416900 100644 --- a/common/changes/@subsquid/portal-client/portal-api_2024-11-04-08-07.json +++ b/common/changes/@subsquid/portal-client/portal-api_2024-11-04-12-29.json @@ -2,8 +2,8 @@ "changes": [ { "packageName": "@subsquid/portal-client", - "comment": "", - "type": "none" + "comment": "init", + "type": "patch" } ], "packageName": "@subsquid/portal-client" From d58bd6ee324707385037bf04301d49a6c903b93b Mon Sep 17 00:00:00 2001 From: belopash Date: Wed, 6 Nov 2024 16:58:39 +0500 Subject: [PATCH 15/23] add stream timeout --- evm/evm-processor/src/ds-archive/portal.ts | 4 +- evm/evm-processor/src/processor.ts | 3 + test/erc20-transfers/src/processor.ts | 9 ++- util/portal-client/package.json | 3 +- util/portal-client/src/client.ts | 94 +++++++++++++--------- 5 files changed, 69 insertions(+), 44 deletions(-) diff --git a/evm/evm-processor/src/ds-archive/portal.ts b/evm/evm-processor/src/ds-archive/portal.ts index 09393927c..14d87b2d3 100644 --- a/evm/evm-processor/src/ds-archive/portal.ts +++ b/evm/evm-processor/src/ds-archive/portal.ts @@ -43,7 +43,7 @@ export class EvmPortal implements DataSource { fromBlock: req.range.from, toBlock: req.range.to, ...req.request - })) { + }, stopOnHead)) { assert(batch.length > 0, 'boundary blocks are expected to be included') let lastBlock = last(batch).header.number assert(lastBlock >= beg) @@ -66,8 +66,6 @@ export class EvmPortal implements DataSource { } top = await height.get() - - if (top < beg && stopOnHead) return } } } diff --git a/evm/evm-processor/src/processor.ts b/evm/evm-processor/src/processor.ts index 09398edaa..598dcfde3 100644 --- a/evm/evm-processor/src/processor.ts +++ b/evm/evm-processor/src/processor.ts @@ -120,6 +120,8 @@ export interface PortalSettings { requestTimeout?: number bufferThreshold?: number + + newBlockTimeout?: number } @@ -544,6 +546,7 @@ export class EvmBatchProcessor { url: archive.url, queryTimeout: archive.requestTimeout, bufferThreshold: archive.bufferThreshold, + newBlockTimeout: archive.newBlockTimeout, log, }) ) diff --git a/test/erc20-transfers/src/processor.ts b/test/erc20-transfers/src/processor.ts index f03ac16f0..9533af597 100644 --- a/test/erc20-transfers/src/processor.ts +++ b/test/erc20-transfers/src/processor.ts @@ -9,12 +9,13 @@ const CONTRACT = '0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48'.toLowerCase() const processor = new EvmBatchProcessor() .setPortal({ - url: 'http://localhost:8000/datasets/ethereum-mainnet', - bufferThreshold: 100 * 1024 * 1024 + url: 'https://portal.sqd.dev/datasets/ethereum-mainnet', + bufferThreshold: 100 * 1024 * 1024, + newBlockTimeout: 5000, }) .setRpcEndpoint('https://rpc.ankr.com/eth') .setFinalityConfirmation(500) - .setBlockRange({from: 0}) + .setBlockRange({from: 20801368}) .setFields({ block: {size: true}, log: {transactionHash: true}, @@ -45,6 +46,6 @@ processor.run(new TypeormDatabase({supportHotBlocks: true}), async ctx => { } } - // ctx.log.info(`found ${transfers.length} transfers`) + ctx.log.info(`found ${transfers.length} transfers`) // await ctx.store.insert(transfers) }) diff --git a/util/portal-client/package.json b/util/portal-client/package.json index 4e822a9c6..c838d681b 100644 --- a/util/portal-client/package.json +++ b/util/portal-client/package.json @@ -18,7 +18,8 @@ "dependencies": { "@subsquid/util-internal": "^3.2.0", "@subsquid/util-internal-range": "^0.3.0", - "@subsquid/util-internal-archive-layout": "^1.0.0" + "@subsquid/util-internal-archive-layout": "^1.0.0", + "@subsquid/util-timeout": "^2.3.2" }, "peerDependencies": { "@subsquid/http-client": "^1.5.0", diff --git a/util/portal-client/src/client.ts b/util/portal-client/src/client.ts index 93380b9d8..7cc7c5351 100644 --- a/util/portal-client/src/client.ts +++ b/util/portal-client/src/client.ts @@ -2,15 +2,15 @@ import {HttpClient} from '@subsquid/http-client' import type {Logger} from '@subsquid/logger' import {AsyncQueue, ensureError, last, wait, withErrorContext} from '@subsquid/util-internal' import {splitLines} from '@subsquid/util-internal-archive-layout' +import {addTimeout, TimeoutError} from '@subsquid/util-timeout' import assert from 'assert' - +import {Readable} from 'stream' export interface PortalQuery { fromBlock: number toBlock?: number } - export interface Block { header: { number: number @@ -18,32 +18,34 @@ export interface Block { } } - export interface Metadata { isRealTime: boolean } - export interface PortalClientOptions { url: string - http?: HttpClient + http: HttpClient log?: Logger queryTimeout?: number bufferThreshold?: number + newBlockTimeout?: number } - export class PortalClient { private url: URL private http: HttpClient private queryTimeout: number private bufferThreshold: number + private newBlockTimeout: number + private log?: Logger constructor(options: PortalClientOptions) { this.url = new URL(options.url) - this.http = options.http || new HttpClient({log: options.log}) + this.log = options.log + this.http = options.http this.queryTimeout = options.queryTimeout ?? 180_000 this.bufferThreshold = options.bufferThreshold ?? 10 * 1024 * 1024 + this.newBlockTimeout = options.newBlockTimeout ?? 120_000 } private getDatasetUrl(path: string): string { @@ -72,7 +74,7 @@ export class PortalClient { httpTimeout: 10_000, }) return { - isRealTime: !!res.real_time + isRealTime: !!res.real_time, } } @@ -99,7 +101,10 @@ export class PortalClient { }) } - async *stream(query: Q): AsyncIterable { + async *stream( + query: Q, + stopOnHead = false + ): AsyncIterable { let queue = new AsyncQueue(1) const ingest = async () => { @@ -111,7 +116,7 @@ export class PortalClient { let archiveQuery = {...query, fromBlock} let res = await this.http - .request('POST', this.getDatasetUrl(`stream`), { + .request('POST', this.getDatasetUrl(`stream`), { json: archiveQuery, retryAttempts: 3, httpTimeout: this.queryTimeout, @@ -123,35 +128,52 @@ export class PortalClient { }) ) - for await (let lines of splitLines(res.body as AsyncIterable)) { - let batch = queue.peek() - if (batch instanceof Error) return - - if (!batch) { - bufferSize = 0 - } - - let blocks = lines.map((line) => { - bufferSize += line.length - return JSON.parse(line) as B - }) - - if (batch) { - // FIXME: won't it overflow stack? - batch.push(...blocks) - if (bufferSize > this.bufferThreshold) { - await queue.wait() - } - } else { - await queue.put(blocks) - } - - fromBlock = last(blocks).header.number + 1 - } - // no blocks left if (res.status == 204) { + if (stopOnHead) return await wait(1000) + } else { + try { + let stream = splitLines(res.body) + + while (true) { + let lines = await addTimeout(stream.next(), this.newBlockTimeout) + if (lines.done) break + + let batch = queue.peek() + if (batch instanceof Error) return + + if (!batch) { + bufferSize = 0 + } + + let blocks = lines.value.map((line) => { + bufferSize += line.length + return JSON.parse(line) as B + }) + + if (batch) { + // FIXME: won't it overflow stack? + batch.push(...blocks) + if (bufferSize > this.bufferThreshold) { + await queue.wait() + } + } else { + await queue.put(blocks) + } + + fromBlock = last(blocks).header.number + 1 + } + } catch (err) { + if (err instanceof TimeoutError) { + this.log?.warn( + `resetting stream, because we haven't seen a new blocks for ${this.newBlockTimeout} ms` + ) + res.body.destroy() + } else { + throw err + } + } } } } From ce988d6557ea9a281485e978b810b299ebed9e2f Mon Sep 17 00:00:00 2001 From: belopash Date: Tue, 12 Nov 2024 16:50:51 +0500 Subject: [PATCH 16/23] update paths --- util/portal-client/src/client.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/util/portal-client/src/client.ts b/util/portal-client/src/client.ts index 7cc7c5351..3299898c6 100644 --- a/util/portal-client/src/client.ts +++ b/util/portal-client/src/client.ts @@ -59,7 +59,7 @@ export class PortalClient { } async getHeight(): Promise { - let res: string = await this.http.get(this.getDatasetUrl('height'), { + let res: string = await this.http.get(this.getDatasetUrl('finalized-stream/height'), { retryAttempts: 3, httpTimeout: 10_000, }) @@ -80,7 +80,7 @@ export class PortalClient { query(query: Q): Promise { return this.http - .request('POST', this.getDatasetUrl(`stream`), { + .request('POST', this.getDatasetUrl(`finalized-stream`), { json: query, retryAttempts: 3, httpTimeout: this.queryTimeout, @@ -116,7 +116,7 @@ export class PortalClient { let archiveQuery = {...query, fromBlock} let res = await this.http - .request('POST', this.getDatasetUrl(`stream`), { + .request('POST', this.getDatasetUrl(`finalized-stream`), { json: archiveQuery, retryAttempts: 3, httpTimeout: this.queryTimeout, From 08236e43875ad31bf9da9c45dba8fa64a75d0aab Mon Sep 17 00:00:00 2001 From: belopash Date: Fri, 15 Nov 2024 16:57:57 +0500 Subject: [PATCH 17/23] add always selected fields --- evm/evm-processor/src/ds-archive/portal.ts | 60 ++++++++++++++++++---- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/evm/evm-processor/src/ds-archive/portal.ts b/evm/evm-processor/src/ds-archive/portal.ts index 14d87b2d3..baa106ef8 100644 --- a/evm/evm-processor/src/ds-archive/portal.ts +++ b/evm/evm-processor/src/ds-archive/portal.ts @@ -1,15 +1,50 @@ import {addErrorContext, last, Throttler} from '@subsquid/util-internal' import {Batch, DataSource} from '@subsquid/util-internal-processor-tools' -import {getRequestAt, RangeRequest} from '@subsquid/util-internal-range' +import {RangeRequest} from '@subsquid/util-internal-range' import assert from 'assert' import {Bytes32} from '../interfaces/base' import {DataRequest} from '../interfaces/data-request' import {Block} from '../mapping/entities' import {mapBlock} from './mapping' import {PortalClient} from '@subsquid/portal-client' +import {FieldSelection} from '../interfaces/data' -const NO_FIELDS = {} +const ALWAYS_SELECTED_FIELDS = { + block: { + number: true, + hash: true, + parentHash: true, + }, + transaction: { + transactionIndex: true, + }, + log: { + logIndex: true, + transactionIndex: true, + }, + trace: { + transactionIndex: true, + traceAddress: true, + type: true, + }, + stateDiff: { + transactionIndex: true, + address: true, + key: true, + }, +} as const + + +function addAlwaysSelectedFields(fields?: FieldSelection): FieldSelection { + return { + block: {...fields?.block, ...ALWAYS_SELECTED_FIELDS.block}, + transaction: {...fields?.transaction, ...ALWAYS_SELECTED_FIELDS.transaction}, + log: {...fields?.log, ...ALWAYS_SELECTED_FIELDS.log}, + trace: {...fields?.trace, ...ALWAYS_SELECTED_FIELDS.trace}, + stateDiff: {...fields?.stateDiff, ...ALWAYS_SELECTED_FIELDS.stateDiff, kind: true} + } +} export class EvmPortal implements DataSource { @@ -34,20 +69,23 @@ export class EvmPortal implements DataSource { let top = await height.get() for (let req of requests) { - let beg = req.range.from - let fields = getRequestAt(requests, beg)?.fields || NO_FIELDS + let fromBlock = req.range.from + let toBlock = req.range.from + let fields = addAlwaysSelectedFields(req.request.fields) - if (top < beg && stopOnHead) return + if (top < fromBlock && stopOnHead) return for await (let batch of this.client.stream({ - fromBlock: req.range.from, - toBlock: req.range.to, - ...req.request + ...req.request, + type: 'evm', + fromBlock, + toBlock, + fields, }, stopOnHead)) { assert(batch.length > 0, 'boundary blocks are expected to be included') let lastBlock = last(batch).header.number - assert(lastBlock >= beg) - beg = lastBlock + 1 + assert(lastBlock >= fromBlock) + fromBlock = lastBlock + 1 let blocks = batch.map(b => { try { @@ -62,7 +100,7 @@ export class EvmPortal implements DataSource { yield { blocks, - isHead: beg > top + isHead: fromBlock > top } top = await height.get() From 12304ee000c273e86eee725c497a16b532a48ad4 Mon Sep 17 00:00:00 2001 From: Eugene Formanenko Date: Thu, 28 Nov 2024 13:16:03 +0400 Subject: [PATCH 18/23] fix: typo --- evm/evm-processor/src/ds-archive/portal.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evm/evm-processor/src/ds-archive/portal.ts b/evm/evm-processor/src/ds-archive/portal.ts index baa106ef8..f2c9dedfc 100644 --- a/evm/evm-processor/src/ds-archive/portal.ts +++ b/evm/evm-processor/src/ds-archive/portal.ts @@ -70,7 +70,7 @@ export class EvmPortal implements DataSource { let top = await height.get() for (let req of requests) { let fromBlock = req.range.from - let toBlock = req.range.from + let toBlock = req.range.to let fields = addAlwaysSelectedFields(req.request.fields) if (top < fromBlock && stopOnHead) return From a61c87412e63ea96b0fc8c163d546b5b354a4558 Mon Sep 17 00:00:00 2001 From: belopash Date: Mon, 2 Dec 2024 21:43:30 +0500 Subject: [PATCH 19/23] don't forget to add type and default query to old request --- evm/evm-processor/src/ds-archive/portal.ts | 67 ++++++++++++---------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/evm/evm-processor/src/ds-archive/portal.ts b/evm/evm-processor/src/ds-archive/portal.ts index f2c9dedfc..6c0895e62 100644 --- a/evm/evm-processor/src/ds-archive/portal.ts +++ b/evm/evm-processor/src/ds-archive/portal.ts @@ -9,7 +9,6 @@ import {mapBlock} from './mapping' import {PortalClient} from '@subsquid/portal-client' import {FieldSelection} from '../interfaces/data' - const ALWAYS_SELECTED_FIELDS = { block: { number: true, @@ -35,17 +34,27 @@ const ALWAYS_SELECTED_FIELDS = { }, } as const - -function addAlwaysSelectedFields(fields?: FieldSelection): FieldSelection { +function getFields(fields?: FieldSelection): FieldSelection { return { block: {...fields?.block, ...ALWAYS_SELECTED_FIELDS.block}, transaction: {...fields?.transaction, ...ALWAYS_SELECTED_FIELDS.transaction}, log: {...fields?.log, ...ALWAYS_SELECTED_FIELDS.log}, trace: {...fields?.trace, ...ALWAYS_SELECTED_FIELDS.trace}, - stateDiff: {...fields?.stateDiff, ...ALWAYS_SELECTED_FIELDS.stateDiff, kind: true} + stateDiff: {...fields?.stateDiff, ...ALWAYS_SELECTED_FIELDS.stateDiff, kind: true}, } } +function makeQuery(req: RangeRequest) { + let {fields, ...request} = req.request + + return { + type: 'evm', + fromBlock: req.range.from, + toBlock: req.range.to, + fields: getFields(fields), + ...request, + } +} export class EvmPortal implements DataSource { constructor(private client: PortalClient) {} @@ -55,52 +64,51 @@ export class EvmPortal implements DataSource { } async getBlockHash(height: number): Promise { - let blocks = await this.client.query({ - fromBlock: height, - toBlock: height, - includeAllBlocks: true + let query = makeQuery({ + range: {from: height, to: height}, + request: {includeAllBlocks: true}, }) + let blocks = await this.client.query(query) assert(blocks.length == 1) return blocks[0].header.hash } - async *getFinalizedBlocks(requests: RangeRequest[], stopOnHead?: boolean | undefined): AsyncIterable> { + async *getFinalizedBlocks( + requests: RangeRequest[], + stopOnHead?: boolean | undefined + ): AsyncIterable> { let height = new Throttler(() => this.client.getHeight(), 20_000) - + let top = await height.get() for (let req of requests) { - let fromBlock = req.range.from - let toBlock = req.range.to - let fields = addAlwaysSelectedFields(req.request.fields) + let from = req.range.from + let to = req.range.to + if (top < from && stopOnHead) return - if (top < fromBlock && stopOnHead) return - - for await (let batch of this.client.stream({ - ...req.request, - type: 'evm', - fromBlock, - toBlock, - fields, - }, stopOnHead)) { + let query = makeQuery({ + ...req, + range: {from, to}, + }) + for await (let batch of this.client.stream(query, stopOnHead)) { assert(batch.length > 0, 'boundary blocks are expected to be included') let lastBlock = last(batch).header.number - assert(lastBlock >= fromBlock) - fromBlock = lastBlock + 1 + assert(lastBlock >= from) + from = lastBlock + 1 - let blocks = batch.map(b => { + let blocks = batch.map((b) => { try { - return mapBlock(b, fields) - } catch(err: any) { + return mapBlock(b, req.request.fields || {}) + } catch (err: any) { throw addErrorContext(err, { blockHeight: b.header.number, - blockHash: b.header.hash + blockHash: b.header.hash, }) } }) yield { blocks, - isHead: fromBlock > top + isHead: from > top, } top = await height.get() @@ -108,4 +116,3 @@ export class EvmPortal implements DataSource { } } } - From f8d8f196c66ede164b6fc3437eb7e4db782cb54e Mon Sep 17 00:00:00 2001 From: belopash Date: Fri, 6 Dec 2024 11:12:27 +0500 Subject: [PATCH 20/23] add portal support for substrate --- .github/workflows/prerelease.yml | 4 + .../portal-api_2024-12-06-06-12.json | 10 + evm/evm-processor/src/ds-archive/portal.ts | 26 +-- evm/evm-processor/src/processor.ts | 6 +- substrate/substrate-processor/package.json | 1 + .../substrate-processor/src/ds-archive.ts | 194 +++++++++--------- .../substrate-processor/src/ds-portal.ts | 145 +++++++++++++ .../substrate-processor/src/processor.ts | 71 +++++-- .../substrate-processor/src/selection.ts | 15 ++ test/balances/src/processor.ts | 7 +- 10 files changed, 345 insertions(+), 134 deletions(-) create mode 100644 common/changes/@subsquid/substrate-processor/portal-api_2024-12-06-06-12.json create mode 100644 substrate/substrate-processor/src/ds-portal.ts diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index a6bb02195..375b1e8d9 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -2,6 +2,10 @@ name: prerelease on: workflow_dispatch: +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: release: name: release diff --git a/common/changes/@subsquid/substrate-processor/portal-api_2024-12-06-06-12.json b/common/changes/@subsquid/substrate-processor/portal-api_2024-12-06-06-12.json new file mode 100644 index 000000000..0cddc0429 --- /dev/null +++ b/common/changes/@subsquid/substrate-processor/portal-api_2024-12-06-06-12.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@subsquid/substrate-processor", + "comment": "add portal api support", + "type": "minor" + } + ], + "packageName": "@subsquid/substrate-processor" +} \ No newline at end of file diff --git a/evm/evm-processor/src/ds-archive/portal.ts b/evm/evm-processor/src/ds-archive/portal.ts index 6c0895e62..a9bf8854c 100644 --- a/evm/evm-processor/src/ds-archive/portal.ts +++ b/evm/evm-processor/src/ds-archive/portal.ts @@ -79,21 +79,15 @@ export class EvmPortal implements DataSource { ): AsyncIterable> { let height = new Throttler(() => this.client.getHeight(), 20_000) - let top = await height.get() + let top = await height.call() for (let req of requests) { - let from = req.range.from - let to = req.range.to - if (top < from && stopOnHead) return + let lastBlock = req.range.from - 1 + let endBlock = req.range.to || Infinity + let query = makeQuery(req) - let query = makeQuery({ - ...req, - range: {from, to}, - }) for await (let batch of this.client.stream(query, stopOnHead)) { assert(batch.length > 0, 'boundary blocks are expected to be included') - let lastBlock = last(batch).header.number - assert(lastBlock >= from) - from = lastBlock + 1 + lastBlock = last(batch).header.number let blocks = batch.map((b) => { try { @@ -108,11 +102,19 @@ export class EvmPortal implements DataSource { yield { blocks, - isHead: from > top, + isHead: lastBlock > top, } top = await height.get() } + + // stream ended before requested range, + // which means we reached the last available block + // should not happen if stopOnHead is set to false + if (lastBlock < endBlock) { + assert(stopOnHead, 'unexpected end of stream') + break + } } } } diff --git a/evm/evm-processor/src/processor.ts b/evm/evm-processor/src/processor.ts index 598dcfde3..c7c15f5f3 100644 --- a/evm/evm-processor/src/processor.ts +++ b/evm/evm-processor/src/processor.ts @@ -229,7 +229,7 @@ export class EvmBatchProcessor { * processor.setGateway('https://v2.archive.subsquid.io/network/ethereum-mainnet') */ setGateway(url: string | GatewaySettings): this { - assert(this.archive?.type !== 'gateway', 'setGateway() can not be used together with setPortal()') + assert(this.archive?.type !== 'gateway', '.setGateway() can not be used together with setPortal()') this.assertNotRunning() if (typeof url == 'string') { this.archive = {type: 'gateway', url} @@ -239,9 +239,8 @@ export class EvmBatchProcessor { return this } - setPortal(url: string | PortalSettings): this { - assert(this.archive?.type !== 'gateway', 'setPortal() can not be used together with setGateway()') + assert(this.archive?.type !== 'gateway', '.setPortal() can not be used together with setGateway()') this.assertNotRunning() if (typeof url == 'string') { this.archive = {type: 'portal', url} @@ -251,7 +250,6 @@ export class EvmBatchProcessor { return this } - /** * Set chain RPC endpoint * diff --git a/substrate/substrate-processor/package.json b/substrate/substrate-processor/package.json index 6bc42d2c7..fb795a03f 100644 --- a/substrate/substrate-processor/package.json +++ b/substrate/substrate-processor/package.json @@ -19,6 +19,7 @@ "@subsquid/http-client": "^1.6.0", "@subsquid/logger": "^1.3.3", "@subsquid/rpc-client": "^4.11.0", + "@subsquid/portal-client": "^0.0.0", "@subsquid/substrate-data": "^4.2.1", "@subsquid/substrate-data-raw": "^1.2.0", "@subsquid/util-internal": "^3.2.0", diff --git a/substrate/substrate-processor/src/ds-archive.ts b/substrate/substrate-processor/src/ds-archive.ts index 239efe0d2..550130559 100644 --- a/substrate/substrate-processor/src/ds-archive.ts +++ b/substrate/substrate-processor/src/ds-archive.ts @@ -10,6 +10,7 @@ import {DEFAULT_FIELDS, FieldSelection} from './interfaces/data' import {ArchiveBlock, ArchiveBlockHeader} from './interfaces/data-partial' import {DataRequest} from './interfaces/data-request' import {Block, BlockHeader, Call, Event, Extrinsic, setUpItems} from './mapping' +import {mergeFields} from './selection' interface ArchiveQuery extends DataRequest { @@ -44,7 +45,6 @@ export class SubstrateArchive implements DataSource { } async *getFinalizedBlocks(requests: RangeRequestList, stopOnHead?: boolean): AsyncIterable> { - let runtimeTracker = new RuntimeTracker( this.rpc, hdr => ({height: hdr.number, hash: hdr.hash, parentHash: hdr.parentHash}), @@ -80,108 +80,8 @@ export class SubstrateArchive implements DataSource { @annotateSyncError((src: ArchiveBlock) => ({blockHeight: src.header.number, blockHash: src.header.hash})) private mapBlock(src: ArchiveBlock): Block { - let block = new Block(new BlockHeader( - assertNotNull(src.header.runtime), - assertNotNull(src.header.runtimeOfPrevBlock), - { - height: src.header.number, - ...src.header - } - )) - - if (src.extrinsics) { - for (let s of src.extrinsics) { - let extrinsic = new Extrinsic(block.header, s.index) - if (s.version != null) { - extrinsic.version = s.version - } - if (s.signature != null) { - extrinsic.signature = s.signature - } - if (s.fee != null) { - extrinsic.fee = BigInt(s.fee) - } - if (s.tip != null) { - extrinsic.tip = BigInt(s.tip) - } - if (s.error != null) { - extrinsic.error = s.error - } - if (s.success != null) { - extrinsic.success = s.success - } - if (s.hash != null) { - extrinsic.hash = s.hash - } - block.extrinsics.push(extrinsic) - } - } - - if (src.calls) { - for (let s of src.calls) { - let call = new Call(block.header, s.extrinsicIndex, s.address) - if (s.name) { - call.name = s.name - } - if (s.args != null) { - call.args = s.args - } - if (s.origin != null) { - call.origin = s.origin - } - if (s.error != null) { - call.error = s.error - } - if (s.success != null) { - call.success = s.success - } - block.calls.push(call) - } - } - - if (src.events) { - for (let s of src.events) { - let event = new Event(block.header, s.index) - if (s.name != null) { - event.name = s.name - } - if (s.args != null) { - event.args = s.args - } - if (s.phase != null) { - event.phase = s.phase - } - if (s.extrinsicIndex != null) { - event.extrinsicIndex = s.extrinsicIndex - } - if (s.callAddress != null) { - event.callAddress = s.callAddress - } - if (s.topics != null) { - event.topics = s.topics - } - block.events.push(event) - } - } - - setUpItems(block) - return block - } -} - - -type Selector = { - [K in Keys]?: boolean -} - - -function mergeFields(def: Selector, requested?: Selector, required?: Selector): Selector { - let fields: Selector = {...def} - for (let key in requested) { - fields[key] = requested[key] + return mapBlock(src) } - Object.assign(fields, required) - return fields } @@ -198,3 +98,93 @@ function getFields(fields: FieldSelection | undefined): FieldSelection { extrinsic: mergeFields(DEFAULT_FIELDS.extrinsic, fields?.extrinsic) } } + + +export function mapBlock(src: ArchiveBlock): Block { + let block = new Block(new BlockHeader( + assertNotNull(src.header.runtime), + assertNotNull(src.header.runtimeOfPrevBlock), + { + height: src.header.number, + ...src.header + } + )) + + if (src.extrinsics) { + for (let s of src.extrinsics) { + let extrinsic = new Extrinsic(block.header, s.index) + if (s.version != null) { + extrinsic.version = s.version + } + if (s.signature != null) { + extrinsic.signature = s.signature + } + if (s.fee != null) { + extrinsic.fee = BigInt(s.fee) + } + if (s.tip != null) { + extrinsic.tip = BigInt(s.tip) + } + if (s.error != null) { + extrinsic.error = s.error + } + if (s.success != null) { + extrinsic.success = s.success + } + if (s.hash != null) { + extrinsic.hash = s.hash + } + block.extrinsics.push(extrinsic) + } + } + + if (src.calls) { + for (let s of src.calls) { + let call = new Call(block.header, s.extrinsicIndex, s.address) + if (s.name) { + call.name = s.name + } + if (s.args != null) { + call.args = s.args + } + if (s.origin != null) { + call.origin = s.origin + } + if (s.error != null) { + call.error = s.error + } + if (s.success != null) { + call.success = s.success + } + block.calls.push(call) + } + } + + if (src.events) { + for (let s of src.events) { + let event = new Event(block.header, s.index) + if (s.name != null) { + event.name = s.name + } + if (s.args != null) { + event.args = s.args + } + if (s.phase != null) { + event.phase = s.phase + } + if (s.extrinsicIndex != null) { + event.extrinsicIndex = s.extrinsicIndex + } + if (s.callAddress != null) { + event.callAddress = s.callAddress + } + if (s.topics != null) { + event.topics = s.topics + } + block.events.push(event) + } + } + + setUpItems(block) + return block +} \ No newline at end of file diff --git a/substrate/substrate-processor/src/ds-portal.ts b/substrate/substrate-processor/src/ds-portal.ts new file mode 100644 index 000000000..bd5d54df5 --- /dev/null +++ b/substrate/substrate-processor/src/ds-portal.ts @@ -0,0 +1,145 @@ +import {addErrorContext, annotateSyncError, last, Throttler} from '@subsquid/util-internal' +import {Batch, DataSource} from '@subsquid/util-internal-processor-tools' +import {RangeRequest} from '@subsquid/util-internal-range' +import assert from 'assert' +import {mapBlock} from './ds-archive' +import {PortalClient} from '@subsquid/portal-client' +import {Rpc, RuntimeTracker, WithRuntime} from '@subsquid/substrate-data' +import {ArchiveBlock, ArchiveBlockHeader} from './interfaces/data-partial' +import {RpcClient} from '@subsquid/rpc-client' +import {OldSpecsBundle, OldTypesBundle} from '@subsquid/substrate-runtime' +import {DataRequest} from './interfaces/data-request' +import {Block} from './mapping' +import {assertIsValid, IsInvalid} from '@subsquid/util-internal-ingest-tools' +import {DEFAULT_FIELDS, FieldSelection} from './interfaces/data' +import {mergeFields} from './selection' + +function getFields(fields: FieldSelection | undefined): FieldSelection { + return { + block: mergeFields(DEFAULT_FIELDS.block, fields?.block, { + number: true, + hash: true, + parentHash: true, + specName: true, + specVersion: true, + implName: true, + implVersion: true, + }), + event: mergeFields(DEFAULT_FIELDS.event, fields?.event, { + index: true, + extrinsicIndex: true, + callAddress: true, + }), + call: mergeFields(DEFAULT_FIELDS.call, fields?.call, { + extrinsicIndex: true, + address: true, + }), + extrinsic: mergeFields(DEFAULT_FIELDS.extrinsic, fields?.extrinsic, { + index: true + }), + } +} + +function makeQuery(req: RangeRequest) { + let {fields, ...request} = req.request + + return { + type: 'substrate', + fromBlock: req.range.from, + toBlock: req.range.to, + fields: getFields(fields), + ...request, + } +} + +export interface SubstratePortalOptions { + client: PortalClient + rpc: RpcClient + typesBundle?: OldTypesBundle | OldSpecsBundle +} + +export class SubstratePortal implements DataSource { + private client: PortalClient + private rpc: Rpc + private typesBundle?: OldTypesBundle | OldSpecsBundle + + constructor(options: SubstratePortalOptions) { + this.client = options.client + this.rpc = new Rpc(options.rpc) + this.typesBundle = options.typesBundle + } + + getFinalizedHeight(): Promise { + return this.client.getHeight() + } + + async getBlockHash(height: number): Promise { + let query = makeQuery({ + range: {from: height, to: height}, + request: {includeAllBlocks: true}, + }) + let blocks = await this.client.query(query) + return blocks[0]?.header?.hash || null + } + + async *getFinalizedBlocks( + requests: RangeRequest[], + stopOnHead?: boolean | undefined + ): AsyncIterable> { + let height = new Throttler(() => this.client.getHeight(), 20_000) + + let runtimeTracker = new RuntimeTracker( + this.rpc, + (hdr) => ({height: hdr.number, hash: hdr.hash, parentHash: hdr.parentHash}), + (hdr) => hdr, + this.typesBundle + ) + + let top = await height.call() + for (let req of requests) { + let lastBlock = req.range.from - 1 + let endBlock = req.range.to || Infinity + let query = makeQuery(req) + + for await (let batch of this.client.stream(query, stopOnHead)) { + assert(batch.length > 0, 'boundary blocks are expected to be included') + lastBlock = last(batch).header.number + + let headers: (ArchiveBlockHeader & IsInvalid)[] = batch.map((b) => b.header) + await runtimeTracker.setRuntime(headers) + assertIsValid(headers) + + let blocks = batch.map((b) => { + try { + return this.mapBlock(b) + } catch (err: any) { + throw addErrorContext(err, { + blockHeight: b.header.number, + blockHash: b.header.hash, + }) + } + }) + + yield { + blocks, + isHead: lastBlock > top, + } + + top = await height.get() + } + + // stream ended before requested range, + // which means we reached the last available block + // should not happen if stopOnHead is set to false + if (lastBlock < endBlock) { + assert(stopOnHead, 'unexpected end of stream') + break + } + } + } + + @annotateSyncError((src: ArchiveBlock) => ({blockHeight: src.header.number, blockHash: src.header.hash})) + private mapBlock(src: ArchiveBlock): Block { + return mapBlock(src) + } +} diff --git a/substrate/substrate-processor/src/processor.ts b/substrate/substrate-processor/src/processor.ts index 1349eb08a..c8a40fd46 100644 --- a/substrate/substrate-processor/src/processor.ts +++ b/substrate/substrate-processor/src/processor.ts @@ -32,6 +32,8 @@ import { GearUserMessageSentRequest } from './interfaces/data-request' import {getFieldSelectionValidator} from './selection' +import {SubstratePortal} from './ds-portal' +import {PortalClient} from '@subsquid/portal-client' export interface RpcEndpointSettings { @@ -94,6 +96,22 @@ export interface GatewaySettings { } +export interface PortalSettings { + /** + * Subsquid Network Gateway url + */ + url: string + /** + * Request timeout in ms + */ + requestTimeout?: number + + bufferThreshold?: number + + newBlockTimeout?: number +} + + /** * @deprecated */ @@ -160,7 +178,7 @@ export class SubstrateBatchProcessor { private fields?: FieldSelection private blockRange?: Range private finalityConfirmation?: number - private archive?: GatewaySettings + private archive?: GatewaySettings & {type: 'gateway'} | PortalSettings & {type: 'portal'} private rpcEndpoint?: RpcEndpointSettings private rpcIngestSettings?: RpcDataIngestionSettings private typesBundle?: OldTypesBundle | OldSpecsBundle @@ -184,11 +202,23 @@ export class SubstrateBatchProcessor { * processor.setGateway('https://v2.archive.subsquid.io/network/kusama') */ setGateway(url: string | GatewaySettings): this { + assert(this.archive?.type !== 'gateway', '.setGateway() can not be used together with setPortal()') this.assertNotRunning() if (typeof url == 'string') { - this.archive = {url} + this.archive = {type: 'gateway', url} } else { - this.archive = url + this.archive = {type: 'gateway', ...url} + } + return this + } + + setPortal(url: string | PortalSettings): this { + assert(this.archive?.type !== 'gateway', '.setPortal() can not be used together with setGateway()') + this.assertNotRunning() + if (typeof url == 'string') { + this.archive = {type: 'portal', url} + } else { + this.archive = {type: 'portal', ...url} } return this } @@ -475,7 +505,7 @@ export class SubstrateBatchProcessor { } @def - private getArchiveDataSource(): SubstrateArchive { + private getArchiveDataSource(): SubstrateArchive | SubstratePortal { let options = assertNotNull(this.archive) let log = this.getLogger().child('archive') @@ -490,16 +520,29 @@ export class SubstrateBatchProcessor { log }) - return new SubstrateArchive({ - client: new ArchiveClient({ - http, - url: options.url, - queryTimeout: options.requestTimeout, - log - }), - rpc: this.getChainRpcClient(), - typesBundle: this.typesBundle - }) + return options.type === 'gateway' + ? new SubstrateArchive({ + client: new ArchiveClient({ + http, + url: options.url, + queryTimeout: options.requestTimeout, + log + }), + rpc: this.getChainRpcClient(), + typesBundle: this.typesBundle + }) + : new SubstratePortal({ + client: new PortalClient({ + http, + url: options.url, + queryTimeout: options.requestTimeout, + bufferThreshold: options.bufferThreshold, + newBlockTimeout: options.newBlockTimeout, + log, + }), + rpc: this.getChainRpcClient(), + typesBundle: this.typesBundle + }) } @def diff --git a/substrate/substrate-processor/src/selection.ts b/substrate/substrate-processor/src/selection.ts index cc76b6286..311505fcc 100644 --- a/substrate/substrate-processor/src/selection.ts +++ b/substrate/substrate-processor/src/selection.ts @@ -64,4 +64,19 @@ export function getFieldSelectionValidator() { call: option(getCallSelectionValidator()), event: option(getEventSelectionValidator()), }) +} + + +type Selector = { + [K in Keys]?: boolean +} + + +export function mergeFields(def: Selector, requested?: Selector, required?: Selector): Selector { + let fields: Selector = {...def} + for (let key in requested) { + fields[key] = requested[key] + } + Object.assign(fields, required) + return fields } \ No newline at end of file diff --git a/test/balances/src/processor.ts b/test/balances/src/processor.ts index e8ddb7c17..f3b93d821 100644 --- a/test/balances/src/processor.ts +++ b/test/balances/src/processor.ts @@ -8,14 +8,17 @@ import {events} from './types' const processor = new SubstrateBatchProcessor() - .setGateway('https://v2.archive.subsquid.io/network/kusama') + .setPortal('https://portal.sqd.dev/datasets/kusama') .setRpcEndpoint(process.env.KUSAMA_NODE_WS || 'wss://kusama-rpc.polkadot.io') + .setRpcDataIngestionSettings({ + // disabled: true, + }) .setFields({ block: { timestamp: true } }) - .setBlockRange({from: 19_666_100}) + .setBlockRange({from: 0}) .addEvent({ name: [events.balances.transfer.name] }) From 69ecabd1a4e83ed01ed993b220c12251af38440a Mon Sep 17 00:00:00 2001 From: belopash Date: Tue, 10 Dec 2024 19:59:32 +0500 Subject: [PATCH 21/23] try to improve portal ingesting --- evm/evm-processor/src/ds-archive/portal.ts | 8 +- .../substrate-processor/src/ds-portal.ts | 8 +- test/balances/src/processor.ts | 8 +- util/portal-client/src/client.ts | 152 +++++++++++------- 4 files changed, 109 insertions(+), 67 deletions(-) diff --git a/evm/evm-processor/src/ds-archive/portal.ts b/evm/evm-processor/src/ds-archive/portal.ts index a9bf8854c..44d12ae6f 100644 --- a/evm/evm-processor/src/ds-archive/portal.ts +++ b/evm/evm-processor/src/ds-archive/portal.ts @@ -60,7 +60,7 @@ export class EvmPortal implements DataSource { constructor(private client: PortalClient) {} getFinalizedHeight(): Promise { - return this.client.getHeight() + return this.client.getFinalizedHeight() } async getBlockHash(height: number): Promise { @@ -68,7 +68,7 @@ export class EvmPortal implements DataSource { range: {from: height, to: height}, request: {includeAllBlocks: true}, }) - let blocks = await this.client.query(query) + let blocks = await this.client.finalizedQuery(query) assert(blocks.length == 1) return blocks[0].header.hash } @@ -77,7 +77,7 @@ export class EvmPortal implements DataSource { requests: RangeRequest[], stopOnHead?: boolean | undefined ): AsyncIterable> { - let height = new Throttler(() => this.client.getHeight(), 20_000) + let height = new Throttler(() => this.client.getFinalizedHeight(), 20_000) let top = await height.call() for (let req of requests) { @@ -85,7 +85,7 @@ export class EvmPortal implements DataSource { let endBlock = req.range.to || Infinity let query = makeQuery(req) - for await (let batch of this.client.stream(query, stopOnHead)) { + for await (let batch of this.client.finalizedStream(query, stopOnHead)) { assert(batch.length > 0, 'boundary blocks are expected to be included') lastBlock = last(batch).header.number diff --git a/substrate/substrate-processor/src/ds-portal.ts b/substrate/substrate-processor/src/ds-portal.ts index bd5d54df5..e065a02af 100644 --- a/substrate/substrate-processor/src/ds-portal.ts +++ b/substrate/substrate-processor/src/ds-portal.ts @@ -70,7 +70,7 @@ export class SubstratePortal implements DataSource { } getFinalizedHeight(): Promise { - return this.client.getHeight() + return this.client.getFinalizedHeight() } async getBlockHash(height: number): Promise { @@ -78,7 +78,7 @@ export class SubstratePortal implements DataSource { range: {from: height, to: height}, request: {includeAllBlocks: true}, }) - let blocks = await this.client.query(query) + let blocks = await this.client.finalizedQuery(query) return blocks[0]?.header?.hash || null } @@ -86,7 +86,7 @@ export class SubstratePortal implements DataSource { requests: RangeRequest[], stopOnHead?: boolean | undefined ): AsyncIterable> { - let height = new Throttler(() => this.client.getHeight(), 20_000) + let height = new Throttler(() => this.client.getFinalizedHeight(), 20_000) let runtimeTracker = new RuntimeTracker( this.rpc, @@ -101,7 +101,7 @@ export class SubstratePortal implements DataSource { let endBlock = req.range.to || Infinity let query = makeQuery(req) - for await (let batch of this.client.stream(query, stopOnHead)) { + for await (let batch of this.client.finalizedStream(query, stopOnHead)) { assert(batch.length > 0, 'boundary blocks are expected to be included') lastBlock = last(batch).header.number diff --git a/test/balances/src/processor.ts b/test/balances/src/processor.ts index f3b93d821..24179ba3d 100644 --- a/test/balances/src/processor.ts +++ b/test/balances/src/processor.ts @@ -8,7 +8,10 @@ import {events} from './types' const processor = new SubstrateBatchProcessor() - .setPortal('https://portal.sqd.dev/datasets/kusama') + .setPortal({ + url: 'https://portal.sqd.dev/datasets/kusama', + bufferThreshold: 50 * 1024 * 1024, + }) .setRpcEndpoint(process.env.KUSAMA_NODE_WS || 'wss://kusama-rpc.polkadot.io') .setRpcDataIngestionSettings({ // disabled: true, @@ -20,11 +23,12 @@ const processor = new SubstrateBatchProcessor() }) .setBlockRange({from: 0}) .addEvent({ - name: [events.balances.transfer.name] + name: [events.balances.transfer.name], }) processor.run(new TypeormDatabase(), async ctx => { + let transfers: Transfer[] = [] for (let block of ctx.blocks) { diff --git a/util/portal-client/src/client.ts b/util/portal-client/src/client.ts index 3299898c6..1abb7a025 100644 --- a/util/portal-client/src/client.ts +++ b/util/portal-client/src/client.ts @@ -58,16 +58,6 @@ export class PortalClient { return u.toString() } - async getHeight(): Promise { - let res: string = await this.http.get(this.getDatasetUrl('finalized-stream/height'), { - retryAttempts: 3, - httpTimeout: 10_000, - }) - let height = parseInt(res) - assert(Number.isSafeInteger(height)) - return height - } - async getMetadata(): Promise { let res: {real_time: boolean} = await this.http.get(this.getDatasetUrl('metadata'), { retryAttempts: 3, @@ -78,7 +68,18 @@ export class PortalClient { } } - query(query: Q): Promise { + async getFinalizedHeight(): Promise { + let res: string = await this.http.get(this.getDatasetUrl('finalized-stream/height'), { + retryAttempts: 3, + httpTimeout: 10_000, + }) + let height = parseInt(res) + assert(Number.isSafeInteger(height)) + return height + } + + finalizedQuery(query: Q): Promise { + // FIXME: is it needed or it is better to always use stream? return this.http .request('POST', this.getDatasetUrl(`finalized-stream`), { json: query, @@ -91,7 +92,6 @@ export class PortalClient { }) ) .then((res) => { - // TODO: move the conversion to the server let blocks = res.body .toString('utf8') .trimEnd() @@ -101,14 +101,50 @@ export class PortalClient { }) } - async *stream( + async *finalizedStream( query: Q, stopOnHead = false ): AsyncIterable { let queue = new AsyncQueue(1) + let bufferSize = 0 + let isReady = false + let cache: B[] = [] + + const getBuffer = () => { + if (queue.isClosed()) return + let peeked = queue.peek() + // FIXME: is it a valid case? + if (peeked instanceof Error) return + + // buffer has been consumed, we need to reset + if (isReady && !peeked) { + reset() + } + + return peeked ?? cache + } + + const reset = () => { + bufferSize = 0 + isReady = false + cache.length = 0 + } + + const setReady = () => { + if (queue.isClosed()) return + if (isReady) return + queue.forcePut(cache) + isReady = true + cache = [] + } + + const waitForReset = async () => { + if (queue.isClosed()) return + await queue.wait() + reset() + } const ingest = async () => { - let bufferSize = 0 let fromBlock = query.fromBlock let toBlock = query.toBlock ?? Infinity @@ -131,49 +167,51 @@ export class PortalClient { // no blocks left if (res.status == 204) { if (stopOnHead) return + await wait(1000) - } else { - try { - let stream = splitLines(res.body) - - while (true) { - let lines = await addTimeout(stream.next(), this.newBlockTimeout) - if (lines.done) break - - let batch = queue.peek() - if (batch instanceof Error) return - - if (!batch) { - bufferSize = 0 - } - - let blocks = lines.value.map((line) => { - bufferSize += line.length - return JSON.parse(line) as B - }) - - if (batch) { - // FIXME: won't it overflow stack? - batch.push(...blocks) - if (bufferSize > this.bufferThreshold) { - await queue.wait() - } - } else { - await queue.put(blocks) - } - - fromBlock = last(blocks).header.number + 1 - } - } catch (err) { - if (err instanceof TimeoutError) { - this.log?.warn( - `resetting stream, because we haven't seen a new blocks for ${this.newBlockTimeout} ms` - ) - res.body.destroy() - } else { - throw err + continue + } + + try { + let stream = splitLines(res.body) + + while (true) { + let lines = await addTimeout(stream.next(), this.newBlockTimeout) + if (lines.done) break + + let buffer = getBuffer() + if (buffer == null) break + + let blocks = lines.value.map((line) => { + bufferSize += line.length + return JSON.parse(line) as B + }) + + // FIXME: won't it overflow stack? + buffer.push(...blocks) + + fromBlock = last(blocks).header.number + 1 + + if (bufferSize > this.bufferThreshold) { + setReady() + await waitForReset() } } + + if (bufferSize > 0) { + setReady() + } + } catch (err) { + if (err instanceof TimeoutError) { + this.log?.warn( + `resetting stream, because we haven't seen a new blocks for ${this.newBlockTimeout} ms` + ) + } else { + throw err + } + } finally { + // FIXME: is it needed? + res.body.destroy() } } } @@ -181,9 +219,9 @@ export class PortalClient { ingest().then( () => queue.close(), (err) => { - if (!queue.isClosed()) { - queue.forcePut(ensureError(err)) - } + if (queue.isClosed()) return + queue.forcePut(ensureError(err)) + queue.close() } ) From 5a51523b6a6e31d38dd6ba406490ccb5da727093 Mon Sep 17 00:00:00 2001 From: belopash Date: Tue, 17 Dec 2024 00:07:52 +0300 Subject: [PATCH 22/23] add support for retry after header --- .../portal-api_2024-12-16-21-16.json | 10 ++++++++ evm/evm-processor/src/processor.ts | 5 +++- .../substrate-processor/src/processor.ts | 5 +++- util/portal-client/src/client.ts | 25 +++++++++++-------- 4 files changed, 32 insertions(+), 13 deletions(-) create mode 100644 common/changes/@subsquid/http-client/portal-api_2024-12-16-21-16.json diff --git a/common/changes/@subsquid/http-client/portal-api_2024-12-16-21-16.json b/common/changes/@subsquid/http-client/portal-api_2024-12-16-21-16.json new file mode 100644 index 000000000..ff35dda8c --- /dev/null +++ b/common/changes/@subsquid/http-client/portal-api_2024-12-16-21-16.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@subsquid/http-client", + "comment": "add `retry-after` header support", + "type": "minor" + } + ], + "packageName": "@subsquid/http-client" +} \ No newline at end of file diff --git a/evm/evm-processor/src/processor.ts b/evm/evm-processor/src/processor.ts index c7c15f5f3..61bfa63ca 100644 --- a/evm/evm-processor/src/processor.ts +++ b/evm/evm-processor/src/processor.ts @@ -118,6 +118,8 @@ export interface PortalSettings { * Request timeout in ms */ requestTimeout?: number + + retryAttempts?: number bufferThreshold?: number @@ -542,7 +544,8 @@ export class EvmBatchProcessor { new PortalClient({ http, url: archive.url, - queryTimeout: archive.requestTimeout, + requestTimeout: archive.requestTimeout, + retryAttempts: archive.retryAttempts, bufferThreshold: archive.bufferThreshold, newBlockTimeout: archive.newBlockTimeout, log, diff --git a/substrate/substrate-processor/src/processor.ts b/substrate/substrate-processor/src/processor.ts index c8a40fd46..59b2152f5 100644 --- a/substrate/substrate-processor/src/processor.ts +++ b/substrate/substrate-processor/src/processor.ts @@ -105,6 +105,8 @@ export interface PortalSettings { * Request timeout in ms */ requestTimeout?: number + + retryAttempts?: number bufferThreshold?: number @@ -535,7 +537,8 @@ export class SubstrateBatchProcessor { client: new PortalClient({ http, url: options.url, - queryTimeout: options.requestTimeout, + requestTimeout: options.requestTimeout, + retryAttempts: options.retryAttempts, bufferThreshold: options.bufferThreshold, newBlockTimeout: options.newBlockTimeout, log, diff --git a/util/portal-client/src/client.ts b/util/portal-client/src/client.ts index 1abb7a025..8a33d4ec5 100644 --- a/util/portal-client/src/client.ts +++ b/util/portal-client/src/client.ts @@ -26,7 +26,8 @@ export interface PortalClientOptions { url: string http: HttpClient log?: Logger - queryTimeout?: number + requestTimeout?: number + retryAttempts?: number bufferThreshold?: number newBlockTimeout?: number } @@ -34,18 +35,20 @@ export interface PortalClientOptions { export class PortalClient { private url: URL private http: HttpClient - private queryTimeout: number + private requestTimeout: number private bufferThreshold: number private newBlockTimeout: number + private retryAttempts: number private log?: Logger constructor(options: PortalClientOptions) { this.url = new URL(options.url) this.log = options.log this.http = options.http - this.queryTimeout = options.queryTimeout ?? 180_000 + this.requestTimeout = options.requestTimeout ?? 180_000 this.bufferThreshold = options.bufferThreshold ?? 10 * 1024 * 1024 this.newBlockTimeout = options.newBlockTimeout ?? 120_000 + this.retryAttempts = options.retryAttempts ?? Infinity } private getDatasetUrl(path: string): string { @@ -60,8 +63,8 @@ export class PortalClient { async getMetadata(): Promise { let res: {real_time: boolean} = await this.http.get(this.getDatasetUrl('metadata'), { - retryAttempts: 3, - httpTimeout: 10_000, + retryAttempts: this.retryAttempts, + httpTimeout: this.requestTimeout, }) return { isRealTime: !!res.real_time, @@ -70,8 +73,8 @@ export class PortalClient { async getFinalizedHeight(): Promise { let res: string = await this.http.get(this.getDatasetUrl('finalized-stream/height'), { - retryAttempts: 3, - httpTimeout: 10_000, + retryAttempts: this.retryAttempts, + httpTimeout: this.requestTimeout, }) let height = parseInt(res) assert(Number.isSafeInteger(height)) @@ -83,8 +86,8 @@ export class PortalClient { return this.http .request('POST', this.getDatasetUrl(`finalized-stream`), { json: query, - retryAttempts: 3, - httpTimeout: this.queryTimeout, + retryAttempts: this.retryAttempts, + httpTimeout: this.requestTimeout, }) .catch( withErrorContext({ @@ -154,8 +157,8 @@ export class PortalClient { let res = await this.http .request('POST', this.getDatasetUrl(`finalized-stream`), { json: archiveQuery, - retryAttempts: 3, - httpTimeout: this.queryTimeout, + retryAttempts: this.retryAttempts, + httpTimeout: this.requestTimeout, stream: true, }) .catch( From cc6baca8ee48c766e96f8858ef5d6cd8bd893478 Mon Sep 17 00:00:00 2001 From: belopash Date: Fri, 20 Dec 2024 21:15:53 +0300 Subject: [PATCH 23/23] use ReadableStream for portal client stream --- .../portal-api_2024-12-16-21-16.json | 10 - common/config/rush/pnpm-lock.yaml | 44 ++- test/erc20-transfers/src/processor.ts | 22 +- util/portal-client/package.json | 2 +- util/portal-client/src/client.ts | 279 ++++++++++++------ 5 files changed, 222 insertions(+), 135 deletions(-) delete mode 100644 common/changes/@subsquid/http-client/portal-api_2024-12-16-21-16.json diff --git a/common/changes/@subsquid/http-client/portal-api_2024-12-16-21-16.json b/common/changes/@subsquid/http-client/portal-api_2024-12-16-21-16.json deleted file mode 100644 index ff35dda8c..000000000 --- a/common/changes/@subsquid/http-client/portal-api_2024-12-16-21-16.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "changes": [ - { - "packageName": "@subsquid/http-client", - "comment": "add `retry-after` header support", - "type": "minor" - } - ], - "packageName": "@subsquid/http-client" -} \ No newline at end of file diff --git a/common/config/rush/pnpm-lock.yaml b/common/config/rush/pnpm-lock.yaml index 901ed1a9b..9614d4bd8 100644 --- a/common/config/rush/pnpm-lock.yaml +++ b/common/config/rush/pnpm-lock.yaml @@ -122,6 +122,9 @@ dependencies: '@rush-temp/ops-xcm-typegen': specifier: file:./projects/ops-xcm-typegen.tgz version: file:projects/ops-xcm-typegen.tgz + '@rush-temp/portal-client': + specifier: file:./projects/portal-client.tgz + version: file:projects/portal-client.tgz '@rush-temp/raw-archive-validator': specifier: file:./projects/raw-archive-validator.tgz version: file:projects/raw-archive-validator.tgz @@ -6991,7 +6994,7 @@ packages: dev: false file:projects/data-test.tgz: - resolution: {integrity: sha512-vfocRZTAM/R/+T+dR+5OwAzOim5k+JWu+uWq5NqTINBuin6gQ4H6ufbTXjXeAvD3OvAyaUqS2TPYENntSFAHdA==, tarball: file:projects/data-test.tgz} + resolution: {integrity: sha512-s+hwonZVmCEShGErXBavkrZKxMnQxPGBIsraWM+8LK74aGUAD1nYqshUh9jh05RlwavSWEilNTo+BbI81fFriw==, tarball: file:projects/data-test.tgz} name: '@rush-temp/data-test' version: 0.0.0 dependencies: @@ -7100,7 +7103,7 @@ packages: dev: false file:projects/evm-processor.tgz: - resolution: {integrity: sha512-JEYc/4KCDHotB8lMj+SjVPzNPOPpzsWXWwreQy+hAjZQes/fWnMg4FGQ71NFQOYwZ/zL0k+imKUpvvcTQ69ALw==, tarball: file:projects/evm-processor.tgz} + resolution: {integrity: sha512-ZgPmppneDuwcDlXNnE+VhlGQxlqHoG/WtfYidVuwqgUKbX4jEKYKXJE3hzkBlNegrmIOvOnF3FStRgcuWucnvQ==, tarball: file:projects/evm-processor.tgz} name: '@rush-temp/evm-processor' version: 0.0.0 dependencies: @@ -7109,7 +7112,7 @@ packages: dev: false file:projects/evm-typegen.tgz: - resolution: {integrity: sha512-zze/6ha5YyYINCNglygQHh63jV8/nsVqPbKpZeeznnnEldNG2n6/WMajR0vdAnGDRTMR+nmP9/TveOPbDTX+mg==, tarball: file:projects/evm-typegen.tgz} + resolution: {integrity: sha512-3DiW6rLdLmAUqUB+P1iYHfRHi9123lAVabeIrJB0klQj16q70mvel+mrY9hCk1nUvxiEVEnPCueay3Ldf2ZQTg==, tarball: file:projects/evm-typegen.tgz} name: '@rush-temp/evm-typegen' version: 0.0.0 dependencies: @@ -7140,7 +7143,7 @@ packages: dev: false file:projects/fuel-data.tgz: - resolution: {integrity: sha512-6XW1Dvmzit570V/bEwNjiD5jJ9ZHbvJ3gyNoS6HE2qoR+C2/NPqPjMcev6xhOd/XS2/raaw6y8c6ywT3lTWTUw==, tarball: file:projects/fuel-data.tgz} + resolution: {integrity: sha512-LZwxHIPqKJw/590kj/j7tba2SzlHejChA/x2JTnSD9HW1ijOrrmit8TYH9zPf4mcW4gZTM9hcZFFtjIOfPLsCw==, tarball: file:projects/fuel-data.tgz} name: '@rush-temp/fuel-data' version: 0.0.0 dependencies: @@ -7149,7 +7152,7 @@ packages: dev: false file:projects/fuel-dump.tgz: - resolution: {integrity: sha512-P93UXjAjSIwTD/ZyemYajxts+j65qTByNGxAApyDXZOWywHdt0K3EPnDyLDz4/gm8LG6OtalcJfOhyPU/iKk2A==, tarball: file:projects/fuel-dump.tgz} + resolution: {integrity: sha512-ImKKGnItSKiCytmEqLaz148rhsH0b2kxYWkAMAR+JN3s89/rVDLKcFrqOh6CJHinIeZ1/zcT4ZoFzVXyYIS9mQ==, tarball: file:projects/fuel-dump.tgz} name: '@rush-temp/fuel-dump' version: 0.0.0 dependencies: @@ -7216,7 +7219,7 @@ packages: dev: false file:projects/fuel-stream.tgz: - resolution: {integrity: sha512-iEuYdfArMof7F87gKvpnuGx4j/PzsgkMrruS29W2UTx+7FhwkUnujQxqLQx2E2++SuKSb18Ga+eI+GP0lsl07g==, tarball: file:projects/fuel-stream.tgz} + resolution: {integrity: sha512-6J2VBe48XWIqSZ3M8lZtBrsHZr+p8zRTofEkolzhhT4kighSSCG/HVPp3tp1ZXc5rPGpQGT/JZeCArOsGo3EhQ==, tarball: file:projects/fuel-stream.tgz} name: '@rush-temp/fuel-stream' version: 0.0.0 dependencies: @@ -7225,7 +7228,7 @@ packages: dev: false file:projects/gql-test-client.tgz(graphql@15.8.0): - resolution: {integrity: sha512-JGE+gV8EgQ8u90IfJW06yab+oC1gw+HsJdwvSj6LdzY6dbtlIDXNRFeJMV988eLN8IxLSdDhd98h4YYt6n3suw==, tarball: file:projects/gql-test-client.tgz} + resolution: {integrity: sha512-bivyHNQZ2H4YrNdCo4dwEc53cIe+iXpU2UF6vMjBwbgHqiIbrhGpMbtzgKoZLdNAkbqchh0E543pkDQy318iow==, tarball: file:projects/gql-test-client.tgz} id: file:projects/gql-test-client.tgz name: '@rush-temp/gql-test-client' version: 0.0.0 @@ -7393,6 +7396,15 @@ packages: typescript: 5.5.4 dev: false + file:projects/portal-client.tgz: + resolution: {integrity: sha512-pbgQKxXzHf+VnuzfaxJ6HPz0cHEdqcufxV+ztN/6aTP4tDaAaHGnS7iKdUZNmj/ZOxhkeMRPrxdjsJQQyuy+Iw==, tarball: file:projects/portal-client.tgz} + name: '@rush-temp/portal-client' + version: 0.0.0 + dependencies: + '@types/node': 18.19.0 + typescript: 5.5.4 + dev: false + file:projects/raw-archive-validator.tgz: resolution: {integrity: sha512-ySuPCdXOui/7IYLbvb6mbtiZaYR6jo96wxUe+TcVy9RsNpXKCjKMESmmaIbL2rHFty95SClZrwKkchoKdNScWw==, tarball: file:projects/raw-archive-validator.tgz} name: '@rush-temp/raw-archive-validator' @@ -7404,7 +7416,7 @@ packages: dev: false file:projects/rpc-client.tgz: - resolution: {integrity: sha512-B54boDboO5+HelVasKnnBZs/VahkkLvB9ETLN4nlILcTBwv2SZP0UbC4VYNaSRm5n1PtYb52954pqKlNkEecmA==, tarball: file:projects/rpc-client.tgz} + resolution: {integrity: sha512-T49gc/C/cmRUB956khwoKu41Izdk4BPYLlFBZAWkPn0xDje0kbmmjb76WyueHTu7mrjvSdjVNqrIkg/RYxGV8Q==, tarball: file:projects/rpc-client.tgz} name: '@rush-temp/rpc-client' version: 0.0.0 dependencies: @@ -7580,7 +7592,7 @@ packages: dev: false file:projects/solana-stream.tgz: - resolution: {integrity: sha512-xt4F3+TWZqYkHFwWyxO9FFeCi1ZwfDjgt3AkM9qPwhnxBfyawaGdEeTUwfJtPJsNrxaroMLXmLRFmKp65iXO8w==, tarball: file:projects/solana-stream.tgz} + resolution: {integrity: sha512-krZzJP5QBey2MWE6BMGBn4fRnV4P1weoVts3iAyfNNYs+ukKIZLn76qFrzJ8yIpYlZUF4tq49CK0v0jnZ+XHEA==, tarball: file:projects/solana-stream.tgz} name: '@rush-temp/solana-stream' version: 0.0.0 dependencies: @@ -7590,7 +7602,7 @@ packages: dev: false file:projects/solana-typegen.tgz: - resolution: {integrity: sha512-Qva321wiPxpOzXBUf6Qj5vlQ2eZILvwFdBCfchCmxQY7EqzLILY+FjMfNcj/VzaA3tTnfp8adYgO1YfoKHdCMw==, tarball: file:projects/solana-typegen.tgz} + resolution: {integrity: sha512-P2R//97dE6hHXv65zV5e3SuD+ArfjY1jxt9Ct2j5ckibvec646B47UHZsBRTlu6u9tVJSJIFth4OvWa7CogkYw==, tarball: file:projects/solana-typegen.tgz} name: '@rush-temp/solana-typegen' version: 0.0.0 dependencies: @@ -7687,7 +7699,7 @@ packages: dev: false file:projects/starknet-stream.tgz: - resolution: {integrity: sha512-WSp/9Dd2y5+GeN0F01Az1HS5BtnOAjlDQ6o1qwlUptfoYMB6DBf4udXGAA7i2Sj3AijE+t3pAxoRXkX82Y3GmA==, tarball: file:projects/starknet-stream.tgz} + resolution: {integrity: sha512-xOStr+km+Cj7Q9yY8YNp0ledJQhcYDzparQYEVgg55Na1fPHHJls2qvHiGb5mqofRjVfQlYsmWkD1QeyXpojUg==, tarball: file:projects/starknet-stream.tgz} name: '@rush-temp/starknet-stream' version: 0.0.0 dependencies: @@ -7760,7 +7772,7 @@ packages: dev: false file:projects/substrate-processor.tgz: - resolution: {integrity: sha512-bv9yCssIgRcDJfSIiJqw97p9t03FHIBMk5Qvfs3cQuN9MOOCB4retTUPhzT+yhgtD1czIriXIcAgmGBgcEeOHw==, tarball: file:projects/substrate-processor.tgz} + resolution: {integrity: sha512-i4cnuhfZZGKSH8as/BE4usnpoRdUwlB+LS6PONw4c8zpX5pl1yATKfDZiBOwUudAuZyFv514fXc0ejOx5y3/zw==, tarball: file:projects/substrate-processor.tgz} name: '@rush-temp/substrate-processor' version: 0.0.0 dependencies: @@ -7782,7 +7794,7 @@ packages: dev: false file:projects/substrate-typegen.tgz: - resolution: {integrity: sha512-HDrlRIPga2gKlNtr8I+7jJPb4UMygW8+7BEIBJsIOVjpk/jPBQX5EjWOg6bmuiL6PtODQTN4gDutgrmptMOLZg==, tarball: file:projects/substrate-typegen.tgz} + resolution: {integrity: sha512-hYFLl7+BgkKy15M2J31XXmpXDo67jmhmh1/j/C0uJbDPwiTKkLleFaDM9bPBgynB+WF3AmGv9XUkvCGvLawzbQ==, tarball: file:projects/substrate-typegen.tgz} name: '@rush-temp/substrate-typegen' version: 0.0.0 dependencies: @@ -7792,7 +7804,7 @@ packages: dev: false file:projects/tron-data.tgz: - resolution: {integrity: sha512-TSoknC0M1/aV4Jg070uz8i/V4YBRBcXkZzrSZwc1SaTQUOYVtQh8UpBdQyJLcRlVTi+K6SZtUM99wsE/VJgB3Q==, tarball: file:projects/tron-data.tgz} + resolution: {integrity: sha512-AnZ2mjIhDmxqQ6je661F2AzDRvUJHoZTkdqa0lWo5QScjjvvdEOGv42VAGXqPuYe4n4gY4Dj+K/eFzfeDCAQ+Q==, tarball: file:projects/tron-data.tgz} name: '@rush-temp/tron-data' version: 0.0.0 dependencies: @@ -7840,7 +7852,7 @@ packages: dev: false file:projects/tron-processor.tgz: - resolution: {integrity: sha512-jxOST5hrGQAEkbehlZuOZPDQ47SX+nQ1u0vp0UAmaj5XcgxoRm3u9oX807X2YOWxAfgrjX7fBWeIassnFewLVg==, tarball: file:projects/tron-processor.tgz} + resolution: {integrity: sha512-hyWAMCO/NbpJPmnw4Jo+CpML5m6wcS1j3gXe+IL7nZYUAjZzvNEa4NCtsaKaVRSNR1qr8kNz1++56Kt2VvQQ1A==, tarball: file:projects/tron-processor.tgz} name: '@rush-temp/tron-processor' version: 0.0.0 dependencies: @@ -7995,7 +8007,7 @@ packages: dev: false file:projects/util-internal-archive-client.tgz: - resolution: {integrity: sha512-cq/2eIibQ3DuQCV1bCylf0IPexvdRTf27x7BhOjR4lkGMS6DeqlF0qgVfct9WY4TCHljIey09EMnRwTJzn+7uQ==, tarball: file:projects/util-internal-archive-client.tgz} + resolution: {integrity: sha512-aTWT++wNI2gux8EwA+nSZgiaU6tM7Rh+DVnumLCpJF3z44ujTs0ipFTbnktt0pVOtkbN7G7oyTrZUXUMQgjthA==, tarball: file:projects/util-internal-archive-client.tgz} name: '@rush-temp/util-internal-archive-client' version: 0.0.0 dependencies: diff --git a/test/erc20-transfers/src/processor.ts b/test/erc20-transfers/src/processor.ts index 9533af597..fadd48850 100644 --- a/test/erc20-transfers/src/processor.ts +++ b/test/erc20-transfers/src/processor.ts @@ -10,12 +10,12 @@ const CONTRACT = '0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48'.toLowerCase() const processor = new EvmBatchProcessor() .setPortal({ url: 'https://portal.sqd.dev/datasets/ethereum-mainnet', - bufferThreshold: 100 * 1024 * 1024, + bufferThreshold: 10 * 1024 * 1024, newBlockTimeout: 5000, }) .setRpcEndpoint('https://rpc.ankr.com/eth') .setFinalityConfirmation(500) - .setBlockRange({from: 20801368}) + .setBlockRange({from: 0}) .setFields({ block: {size: true}, log: {transactionHash: true}, @@ -32,20 +32,20 @@ processor.run(new TypeormDatabase({supportHotBlocks: true}), async ctx => { for (let block of ctx.blocks) { for (let log of block.logs) { if (log.address == CONTRACT && erc20.events.Transfer.is(log)) { - // let {from, to, value} = erc20.events.Transfer.decode(log) + let {from, to, value} = erc20.events.Transfer.decode(log) transfers.push(new Transfer({ - // id: log.id, - // blockNumber: block.header.height, - // timestamp: new Date(block.header.timestamp), - // tx: log.transactionHash, - // from, - // to, - // amount: value + id: log.id, + blockNumber: block.header.height, + timestamp: new Date(block.header.timestamp), + tx: log.transactionHash, + from, + to, + amount: value })) } } } ctx.log.info(`found ${transfers.length} transfers`) - // await ctx.store.insert(transfers) + await ctx.store.insert(transfers) }) diff --git a/util/portal-client/package.json b/util/portal-client/package.json index c838d681b..3cd2c4134 100644 --- a/util/portal-client/package.json +++ b/util/portal-client/package.json @@ -34,6 +34,6 @@ "@subsquid/http-client": "^1.5.0", "@subsquid/logger": "^1.3.3", "@types/node": "^18.18.14", - "typescript": "~5.3.2" + "typescript": "~5.5.4" } } diff --git a/util/portal-client/src/client.ts b/util/portal-client/src/client.ts index 8a33d4ec5..607f4b1d1 100644 --- a/util/portal-client/src/client.ts +++ b/util/portal-client/src/client.ts @@ -1,6 +1,6 @@ import {HttpClient} from '@subsquid/http-client' import type {Logger} from '@subsquid/logger' -import {AsyncQueue, ensureError, last, wait, withErrorContext} from '@subsquid/util-internal' +import {AsyncQueue, last, wait, withErrorContext} from '@subsquid/util-internal' import {splitLines} from '@subsquid/util-internal-archive-layout' import {addTimeout, TimeoutError} from '@subsquid/util-timeout' import assert from 'assert' @@ -29,14 +29,20 @@ export interface PortalClientOptions { requestTimeout?: number retryAttempts?: number bufferThreshold?: number + newBlockThreshold?: number + durationThreshold?: number newBlockTimeout?: number + headPollInterval?: number } export class PortalClient { private url: URL private http: HttpClient private requestTimeout: number + private headPollInterval: number private bufferThreshold: number + private newBlockThreshold: number + private durationThreshold: number private newBlockTimeout: number private retryAttempts: number private log?: Logger @@ -46,7 +52,10 @@ export class PortalClient { this.log = options.log this.http = options.http this.requestTimeout = options.requestTimeout ?? 180_000 + this.headPollInterval = options.headPollInterval ?? 5_000 this.bufferThreshold = options.bufferThreshold ?? 10 * 1024 * 1024 + this.newBlockThreshold = options.newBlockThreshold ?? 500 + this.durationThreshold = options.durationThreshold ?? 5_000 this.newBlockTimeout = options.newBlockTimeout ?? 120_000 this.retryAttempts = options.retryAttempts ?? Infinity } @@ -104,133 +113,209 @@ export class PortalClient { }) } - async *finalizedStream( + finalizedStream( query: Q, stopOnHead = false - ): AsyncIterable { - let queue = new AsyncQueue(1) - let bufferSize = 0 - let isReady = false - let cache: B[] = [] - - const getBuffer = () => { - if (queue.isClosed()) return - let peeked = queue.peek() - // FIXME: is it a valid case? - if (peeked instanceof Error) return - - // buffer has been consumed, we need to reset - if (isReady && !peeked) { - reset() - } + ): ReadableStream { + let headPollInterval = this.headPollInterval + let newBlockThreshold = this.newBlockThreshold + let durationThreshold = this.durationThreshold + let bufferThreshold = this.bufferThreshold + let newBlockTimeout = this.newBlockTimeout - return peeked ?? cache - } + let buffer = new BlocksBuffer(bufferThreshold) + let abortStream = new AbortController() - const reset = () => { - bufferSize = 0 - isReady = false - cache.length = 0 - } + const ingest = async () => { + let startBlock = query.fromBlock + let endBlock = query.toBlock ?? Infinity - const setReady = () => { - if (queue.isClosed()) return - if (isReady) return - queue.forcePut(cache) - isReady = true - cache = [] - } + let heartbeat: HeartBeat | undefined + let timeout: ReturnType | undefined + let reader: ReadableStreamDefaultReader | undefined - const waitForReset = async () => { - if (queue.isClosed()) return - await queue.wait() - reset() - } + function abort() { + return reader?.cancel() + } - const ingest = async () => { - let fromBlock = query.fromBlock - let toBlock = query.toBlock ?? Infinity - - while (fromBlock <= toBlock) { - let archiveQuery = {...query, fromBlock} - - let res = await this.http - .request('POST', this.getDatasetUrl(`finalized-stream`), { - json: archiveQuery, - retryAttempts: this.retryAttempts, - httpTimeout: this.requestTimeout, - stream: true, - }) - .catch( - withErrorContext({ - archiveQuery, + while (startBlock <= endBlock && !abortStream.signal.aborted) { + try { + let archiveQuery = {...query, fromBlock: startBlock} + let res = await this.http + .request('POST', this.getDatasetUrl('finalized-stream'), { + json: archiveQuery, + httpTimeout: this.requestTimeout, + retryAttempts: this.retryAttempts, + stream: true, + abort: abortStream.signal, }) - ) + .catch( + withErrorContext({ + query: archiveQuery, + }) + ) - // no blocks left - if (res.status == 204) { - if (stopOnHead) return + if (res.status == 204) { + if (stopOnHead) break + await wait(headPollInterval, abortStream.signal) + continue + } - await wait(1000) - continue - } + abortStream.signal.addEventListener('abort', abort, {once: true}) - try { - let stream = splitLines(res.body) + let heartbeatInterval = Math.ceil(newBlockThreshold / 4) + heartbeat = new HeartBeat((diff) => { + if (diff > newBlockThreshold) { + buffer.ready() + } + }, heartbeatInterval) + timeout = setTimeout(() => buffer.ready(), durationThreshold) + + let stream = addStreamTimeout(Readable.toWeb(res.body) as ReadableStream, newBlockTimeout) + let reader = splitLines(stream) while (true) { - let lines = await addTimeout(stream.next(), this.newBlockTimeout) + let lines = await reader.next() if (lines.done) break - let buffer = getBuffer() - if (buffer == null) break + heartbeat.pulse() + let size = 0 let blocks = lines.value.map((line) => { - bufferSize += line.length - return JSON.parse(line) as B + let block = JSON.parse(line) as B + size += line.length + return block }) - // FIXME: won't it overflow stack? - buffer.push(...blocks) - - fromBlock = last(blocks).header.number + 1 - - if (bufferSize > this.bufferThreshold) { - setReady() - await waitForReset() - } - } + await buffer.put(blocks, size) - if (bufferSize > 0) { - setReady() + let lastBlock = last(blocks).header.number + startBlock = lastBlock + 1 } } catch (err) { - if (err instanceof TimeoutError) { - this.log?.warn( - `resetting stream, because we haven't seen a new blocks for ${this.newBlockTimeout} ms` - ) + if (abortStream.signal.aborted) { + // FIXME: should we do anything here? + } else if (err instanceof TimeoutError) { + this.log?.warn(`resetting stream due to inactivity for ${this.newBlockTimeout} ms`) } else { throw err } } finally { - // FIXME: is it needed? - res.body.destroy() + await reader?.cancel().catch(() => {}) + heartbeat?.stop() + buffer.ready() + clearTimeout(timeout) + abortStream.signal.removeEventListener('abort', abort) } } } - ingest().then( - () => queue.close(), - (err) => { - if (queue.isClosed()) return - queue.forcePut(ensureError(err)) - queue.close() - } - ) + return new ReadableStream({ + start: async (controller) => { + ingest() + .then(() => { + buffer.close() + }) + .catch((error) => { + if (buffer.isClosed()) return + controller.error(error) + buffer.close() + }) + }, + pull: async (controller) => { + let value = await buffer.take() + if (value) { + controller.enqueue(value) + } else { + controller.close() + } + }, + cancel: () => { + abortStream.abort() + }, + }) + } +} + +class BlocksBuffer { + private blocks: B[] = [] + private queue = new AsyncQueue(1) + private size = 0 + + constructor(private bufferSizeThreshold: number) {} - for await (let valueOrError of queue.iterate()) { - if (valueOrError instanceof Error) throw valueOrError - yield valueOrError + async put(blocks: B[], size: number) { + this.blocks.push(...blocks) + this.size += size + + if (this.size > this.bufferSizeThreshold) { + this.ready() + await this.queue.wait() } } + + async take() { + let value = await this.queue.take() + this.blocks = [] + this.size = 0 + return value + } + + ready() { + if (this.blocks.length == 0) return + this.queue.forcePut(this.blocks) + } + + close() { + return this.queue.close() + } + + isClosed() { + return this.queue.isClosed() + } +} + +class HeartBeat { + private interval: ReturnType | undefined + private timestamp: number + + constructor(fn: (diff: number) => void, ms?: number) { + this.timestamp = Date.now() + this.interval = setInterval(() => fn(Date.now() - this.timestamp), ms) + } + + pulse() { + this.timestamp = Date.now() + } + + stop() { + clearInterval(this.interval) + } +} + +function addStreamTimeout( + stream: ReadableStream, + ms: number, + onTimeout?: () => Error | undefined | void +): ReadableStream { + const reader = stream.getReader() + + return new ReadableStream({ + pull: async (c) => { + try { + let data = await addTimeout(reader.read(), ms, onTimeout) + if (data.done) { + c.close() + } else { + c.enqueue(data.value) + } + } catch (e) { + c.error(e) + await reader.cancel() + } + }, + cancel: async (reason) => { + await reader.cancel(reason) + }, + }) }