Skip to content

Commit

Permalink
Load data in batches
Browse files Browse the repository at this point in the history
  • Loading branch information
samwillis committed Dec 2, 2024
1 parent a28c961 commit fb18325
Showing 1 changed file with 25 additions and 17 deletions.
42 changes: 25 additions & 17 deletions demos/linearlite/db/load_data.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ if (!process.env.DATABASE_URL) {

const DATABASE_URL = process.env.DATABASE_URL
const ISSUES_TO_LOAD = process.env.ISSUES_TO_LOAD || 512
const BATCH_SIZE = 1000
const issues = generateIssues(ISSUES_TO_LOAD)

console.info(`Connecting to Postgres at ${DATABASE_URL}`)
Expand All @@ -30,23 +31,30 @@ const issueCount = issues.length
let commentCount = 0

try {
await sql.begin(async (sql) => {
// Disable FK checks
await sql`SET CONSTRAINTS ALL DEFERRED`

// Insert issues
const issuesData = issues.map(({ comments: _, ...rest }) => rest)
const issueColumns = Object.keys(issuesData[0])
await batchInsert(sql, 'issue', issueColumns, issuesData)

// Insert comments
const allComments = issues.flatMap((issue) => issue.comments)
commentCount = allComments.length
const commentColumns = Object.keys(allComments[0])
await batchInsert(sql, 'comment', commentColumns, allComments)
})

process.stdout.write(`\n`)
// Process data in batches
for (let i = 0; i < issues.length; i += BATCH_SIZE) {
const issueBatch = issues.slice(i, i + BATCH_SIZE)

await sql.begin(async (sql) => {
// Disable FK checks
await sql`SET CONSTRAINTS ALL DEFERRED`

// Insert issues
const issuesData = issueBatch.map(({ comments: _, ...rest }) => rest)
const issueColumns = Object.keys(issuesData[0])
await batchInsert(sql, 'issue', issueColumns, issuesData, BATCH_SIZE)

// Insert related comments
const batchComments = issueBatch.flatMap((issue) => issue.comments)
const commentColumns = Object.keys(batchComments[0])
await batchInsert(sql, 'comment', commentColumns, batchComments, BATCH_SIZE)

commentCount += batchComments.length
})

process.stdout.write(`\nProcessed batch ${Math.floor(i / BATCH_SIZE) + 1}: ${Math.min(i + BATCH_SIZE, issues.length)} of ${issues.length} issues\n`)
}

console.info(`Loaded ${issueCount} issues with ${commentCount} comments.`)
} catch (error) {
console.error('Error loading data:', error)
Expand Down

0 comments on commit fb18325

Please sign in to comment.