Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add import data from gql script #247

Open
wants to merge 16 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
/vendor
/wallets
test-results.xml
/scripts/import-data/bundles
/scripts/import-data/transactions

# Generated docs
/docs/sqlite/bundles
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ services:
- ENABLE_BACKGROUND_DATA_VERIFICATION=${ENABLE_BACKGROUND_DATA_VERIFICATION:-}
- BACKGROUND_DATA_VERIFICATION_INTERVAL_SECONDS=${BACKGROUND_DATA_VERIFICATION_INTERVAL_SECONDS:-}
- CLICKHOUSE_URL=${CLICKHOUSE_URL:-}
- BUNDLE_DATA_IMPORTER_QUEUE_SIZE=${BUNDLE_DATA_IMPORTER_QUEUE_SIZE:-}
networks:
- ar-io-network
depends_on:
Expand Down
366 changes: 366 additions & 0 deletions scripts/import-data/fetch-data-gql.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,366 @@
/**
* AR.IO Gateway
* Copyright (C) 2022-2023 Permanent Data Solutions, Inc. All Rights Reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import * as fs from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { fetchLatestBlockHeight, fetchWithRetry } from './utils.js';
const args = process.argv.slice(2);
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

let GQL_ENDPOINT = 'https://arweave-search.goldsky.com/graphql';
let MIN_BLOCK_HEIGHT = 0;
let MAX_BLOCK_HEIGHT: number | undefined;
let BLOCK_RANGE_SIZE = 100;
let BUNDLES_FETCH_ROOT_TX = true;
let GQL_TAGS = `[
{
name: "App-Name"
values: [
"ArDrive-App"
"ArDrive-Web"
"ArDrive-CLI"
"ArDrive-Desktop"
"ArDrive-Mobile"
"ArDrive-Core"
"ArDrive-Sync"
]
}
]`;
djwhitt marked this conversation as resolved.
Show resolved Hide resolved

args.forEach((arg, index) => {
djwhitt marked this conversation as resolved.
Show resolved Hide resolved
switch (arg) {
case '--gqlEndpoint':
if (args[index + 1]) {
GQL_ENDPOINT = args[index + 1];
} else {
console.error('Missing value for --gqlEndpoint');
process.exit(1);
}
break;
case '--minHeight':
if (args[index + 1]) {
MIN_BLOCK_HEIGHT = parseInt(args[index + 1], 10);
} else {
console.error('Missing value for --minHeight');
process.exit(1);
}
break;
case '--maxHeight':
if (args[index + 1]) {
MAX_BLOCK_HEIGHT = parseInt(args[index + 1], 10);
} else {
console.error('Missing value for --maxHeight');
process.exit(1);
}
break;
case '--blockRangeSize':
if (args[index + 1]) {
BLOCK_RANGE_SIZE = parseInt(args[index + 1], 10);
} else {
console.error('Missing value for --blockRangeSize');
process.exit(1);
}
break;
case '--fetchOnlyRootTx':
if (args[index + 1]) {
BUNDLES_FETCH_ROOT_TX = args[index + 1] === 'true';
} else {
console.error('Missing value for --fetchOnlyRootTx');
process.exit(1);
}
break;
case '--gqlTags':
if (args[index + 1]) {
GQL_TAGS = args[index + 1];
} else {
console.error('Missing value for --gqlTags');
process.exit(1);
}
break;
default:
break;
}
});

type BlockRange = { min: number; max: number };
const getBlockRanges = ({
minBlock,
maxBlock,
rangeSize,
}: {
minBlock: number;
maxBlock: number;
rangeSize: number;
}) => {
if (minBlock >= maxBlock || rangeSize <= 0) {
throw new Error(
'Invalid input: ensure minBlock < maxBlock and rangeSize > 0',
);
}
karlprieb marked this conversation as resolved.
Show resolved Hide resolved

const ranges: BlockRange[] = [];
let currentMin = minBlock;

while (currentMin < maxBlock) {
const currentMax = Math.min(currentMin + rangeSize - 1, maxBlock);
ranges.push({ min: currentMin, max: currentMax });
currentMin = currentMax + 1;
}

return ranges;
};

const txsGqlQuery = ({
minBlock,
maxBlock,
cursor,
}: {
minBlock: number;
maxBlock: number;
cursor?: string;
}) => `
query {
transactions(
block: {
min: ${minBlock}
max: ${maxBlock}
}
tags: ${GQL_TAGS}
first: 100
sort: HEIGHT_ASC
after: "${cursor !== undefined ? cursor : ''}"
karlprieb marked this conversation as resolved.
Show resolved Hide resolved
) {
pageInfo {
hasNextPage
}
edges {
cursor
node {
id
bundledIn {
id
}
block {
height
}
}
}
}
}
`;

const rootTxGqlQuery = (txId: string) => `
query {
transaction(
id: "${txId}"
) {
bundledIn {
id
}
}
}
`;

const getRootTxId = async (txId: string) => {
let rootTxId: string | undefined;
let currentId = txId;

while (rootTxId === undefined) {
const response = await fetchWithRetry(GQL_ENDPOINT, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
query: rootTxGqlQuery(currentId),
}),
});

const result = await response.json();

if (result.errors) {
throw new Error(`GraphQL error: ${JSON.stringify(result.errors)}`);
}

const { data } = result;
const bundleId = data.transaction.bundledIn?.id;

if (bundleId === undefined) {
rootTxId = currentId;
} else {
currentId = bundleId;
}
}

return rootTxId;
};

const fetchGql = async ({
minBlock,
maxBlock,
cursor,
}: {
minBlock: number;
maxBlock: number;
cursor?: string;
}) => {
const response = await fetchWithRetry(GQL_ENDPOINT, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
query: txsGqlQuery({ minBlock, maxBlock, cursor }),
}),
});
const result = await response.json();
if (result.errors) {
throw new Error(`GraphQL error: ${JSON.stringify(result.errors)}`);
}
const { data } = result;
return data;
};

type BlockTransactions = Map<number, Set<string>>;
const getTransactionsForRange = async ({ min, max }: BlockRange) => {
let cursor: string | undefined;
let hasNextPage = true;
const transactions: BlockTransactions = new Map();
const bundles: BlockTransactions = new Map();

while (hasNextPage) {
const {
transactions: { edges, pageInfo },
} = await fetchGql({
minBlock: min,
maxBlock: max,
cursor,
});

hasNextPage = pageInfo.hasNextPage;
cursor = hasNextPage ? edges[edges.length - 1].cursor : undefined;

karlprieb marked this conversation as resolved.
Show resolved Hide resolved
for (const edge of edges) {
const blockHeight = edge.node.block.height;
const bundleId = edge.node.bundledIn?.id;
const id = edge.node.id;

if (!transactions.has(blockHeight)) {
transactions.set(blockHeight, new Set());
}
if (!bundles.has(blockHeight)) {
bundles.set(blockHeight, new Set());
}

if (bundleId !== undefined) {
if (BUNDLES_FETCH_ROOT_TX) {
const rootTxId = await getRootTxId(bundleId);
bundles.get(blockHeight)?.add(rootTxId);
} else {
bundles.get(blockHeight)?.add(bundleId);
}
} else {
transactions.get(blockHeight)?.add(id);
}
}
}

return { transactions, bundles };
};

const writeTransactionsToFile = async ({
outputDir,
transactions,
}: {
outputDir: string;
transactions: BlockTransactions;
}) => {
try {
await fs.mkdir(outputDir, { recursive: true });
} catch (error) {
console.error(`Failed to create directory: ${error}`);
throw error;
}

for (const [height, ids] of transactions.entries()) {
if (ids.size === 0) continue;

const content = JSON.stringify([...ids]);
const filePath = path.join(outputDir, `${height}.json`);

try {
await fs.writeFile(filePath, content);
} catch (error) {
console.error(`Failed to write ${filePath}: ${error}`);
throw error;
}
}
};

const countTransactions = (map: BlockTransactions) => {
let total = 0;
map.forEach((set) => {
total += set.size;
});
return total;
};

(async () => {
if (MAX_BLOCK_HEIGHT === undefined) {
MAX_BLOCK_HEIGHT = await fetchLatestBlockHeight();
}

const blockRanges = getBlockRanges({
minBlock: MIN_BLOCK_HEIGHT,
maxBlock: MAX_BLOCK_HEIGHT,
rangeSize: BLOCK_RANGE_SIZE,
});

console.log(
`Starting to fetch transactions and bundles from block ${MIN_BLOCK_HEIGHT} to ${MAX_BLOCK_HEIGHT}`,
);

let txCount = 0;
let bundleCount = 0;

for (const range of blockRanges) {
const { transactions, bundles } = await getTransactionsForRange(range);

await writeTransactionsToFile({
outputDir: path.join(__dirname, 'transactions'),
transactions,
});
await writeTransactionsToFile({
outputDir: path.join(__dirname, 'bundles'),
transactions: bundles,
});

txCount += countTransactions(transactions);
bundleCount += countTransactions(bundles);

if (transactions.size !== 0 || bundles.size !== 0) {
console.log(
`Transactions and bundles from block ${range.min} to ${range.max} saved!`,
);
console.log(
`Saved transactions: ${txCount}, Saved bundles: ${bundleCount}`,
);
}
}
})();
Loading
Loading