From a657629679534c0032228442a83cc1d75cac24e2 Mon Sep 17 00:00:00 2001 From: johnmeshulam <55348702+johnmeshulam@users.noreply.github.com> Date: Sat, 21 Dec 2024 22:32:40 +0200 Subject: [PATCH 1/3] Fix memory leaks in puppteer --- apps/backend/src/lib/export.ts | 274 +++++++++++++++++++++++++++++---- 1 file changed, 247 insertions(+), 27 deletions(-) diff --git a/apps/backend/src/lib/export.ts b/apps/backend/src/lib/export.ts index ffc36e35..7ec785fe 100644 --- a/apps/backend/src/lib/export.ts +++ b/apps/backend/src/lib/export.ts @@ -1,25 +1,189 @@ -import puppeteer from 'puppeteer'; +import puppeteer, { Browser, Page, PDFOptions } from 'puppeteer'; import jwt from 'jsonwebtoken'; import * as db from '@lems/database'; +import { WithId } from 'mongodb'; +import { SafeUser } from '@lems/types'; -export const getLemsWebpageAsPdf = async (path: string) => { - const url = process.env.LEMS_DOMAIN + path; - const user = await db.getUser({ username: 'admin' }); - const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] }); - const page = await browser.newPage(); +class BrowserManager { + private static instance: Browser | null = null; + private static activePages: Set = new Set(); + private static isShuttingDown = false; + private static initializationPromise: Promise | null = null; + private static pageTimeouts: Map = new Map(); + private static readonly MAX_PAGES = 25; // Limit concurrent pages + private static readonly PAGE_TIMEOUT = 5 * 60 * 1000; // 5 minutes - const jwtSecret = process.env.JWT_SECRET; - const token = jwt.sign( - { - userId: user._id - }, - jwtSecret, - { - issuer: 'FIRST', - expiresIn: 60 + private static readonly PUPPETEER_ARGS = [ + // Security + '--no-sandbox', // Disables Chrome sandbox. Required when running as root/in Docker. Security implications - use in trusted environments only. + + // Memory Management + '--disable-dev-shm-usage', // Prevents running out of memory in containers by not using /dev/shm. Essential for Docker. + '--disable-gpu', // Disables GPU hardware acceleration. Reduces memory usage and prevents issues in headless environments. + + // Performance Critical + '--disable-extensions', // Disables Chrome extensions. Reduces memory footprint and prevents interference. + '--disable-background-networking', // Prevents background network requests that can leak memory. + '--disable-background-timer-throttling', // Prevents timers being throttled in background tabs, ensuring consistent cleanup. + + // Recommended Additional Flags + '--disable-translate', // Disables the translation feature. Removes unnecessary overhead. + '--disable-sync', // Disables Chrome sync features. Removes unnecessary overhead. + '--disable-notifications' // Disables notification features. Removes unnecessary overhead. + ]; + + private static async initialize(): Promise { + if (!this.initializationPromise) { + this.initializationPromise = puppeteer + .launch({ + headless: true, + args: BrowserManager.PUPPETEER_ARGS + }) + .catch(error => { + this.initializationPromise = null; + throw error; + }); + } + return this.initializationPromise; + } + + public static async getBrowser(): Promise { + if (this.isShuttingDown) { + throw new Error('Browser manager is shutting down'); + } + + try { + if (!this.instance || !this.instance.isConnected()) { + this.instance = await this.initialize(); + } + return this.instance; + } catch (error) { + this.instance = null; + this.initializationPromise = null; + throw error; + } + } + + public static async createPage(): Promise { + if (this.activePages.size >= this.MAX_PAGES) { + throw new Error('Maximum number of concurrent pages reached'); + } + + const browser = await this.getBrowser(); + const page = await browser.newPage(); + + const timeout = setTimeout(() => { + this.closePage(page).catch(console.error); + }, this.PAGE_TIMEOUT); + + this.pageTimeouts.set(page, timeout); + this.activePages.add(page); + + return page; + } + + public static async closePage(page: Page): Promise { + try { + const timeout = this.pageTimeouts.get(page); + if (timeout) { + clearTimeout(timeout); + this.pageTimeouts.delete(page); + } + + if (!page.isClosed()) { + await page + .evaluate(() => { + window.stop(); + const elements = document.getElementsByTagName('*'); + for (let i = 0; i < elements.length; i++) { + const element = elements[i]; + element.remove(); + } + }) + .catch(console.error); + + await page.close(); + } + } catch (error) { + console.error('Error cleaning up page:', error); + } finally { + this.activePages.delete(page); + } + } + + public static async shutdown(): Promise { + if (this.isShuttingDown) return; + + this.isShuttingDown = true; + + try { + // Clear all timeouts + for (const timeout of this.pageTimeouts.values()) { + clearTimeout(timeout); + } + this.pageTimeouts.clear(); + + // Close all active pages + const closePagePromises = Array.from(this.activePages).map(page => + this.closePage(page).catch(console.error) + ); + await Promise.all(closePagePromises); + this.activePages.clear(); + + // Close browser instance + if (this.instance) { + await this.instance.close().catch(console.error); + this.instance = null; + } + + this.initializationPromise = null; + } finally { + this.isShuttingDown = false; + } + } +} + +// Graceful application shutdown handlers +const shutdownHandlers = ['exit', 'SIGINT', 'SIGTERM', 'uncaughtException', 'unhandledRejection']; +let isShuttingDown = false; + +shutdownHandlers.forEach(event => { + process.on(event, async error => { + if (isShuttingDown) return; + isShuttingDown = true; + + console.error(`Received ${event} signal`, error); + try { + await BrowserManager.shutdown(); + } catch (shutdownError) { + console.error('Error during shutdown:', shutdownError); + } finally { + if (event !== 'exit') { + process.exit(1); + } } - ); + }); +}); + +const withTimeout = async (promise: Promise, ms: number): Promise => { + let timeoutId: NodeJS.Timeout; + const timeoutPromise = new Promise((_, reject) => { + timeoutId = setTimeout(() => reject(new Error('Operation timed out')), ms); + }); + return Promise.race([promise, timeoutPromise]).finally(() => clearTimeout(timeoutId)); +}; + +const createAuthToken = async (user: WithId): Promise => { + const jwtSecret = process.env.JWT_SECRET; + if (!jwtSecret) throw new Error('JWT_SECRET is not configured'); + + return jwt.sign({ userId: user._id }, jwtSecret, { + issuer: 'FIRST', + expiresIn: 60 + }); +}; +const setupPageAuthentication = async (page: Page, url: string, token: string): Promise => { await page.setExtraHTTPHeaders({ Authorization: `Bearer ${token}` }); await page.setCookie({ url, @@ -29,19 +193,75 @@ export const getLemsWebpageAsPdf = async (path: string) => { secure: true, httpOnly: true }); +}; - await page.goto(url, { - waitUntil: ['load', 'domcontentloaded'] - }); - - await page.waitForNetworkIdle({ concurrency: 0, idleTime: 2000, timeout: 30000 }); - - const data = await page.pdf({ +export async function getLemsWebpageAsPdf( + path: string, + options: PDFOptions = { format: 'A4', margin: { top: '0.18in', bottom: '0.18in', right: '0.18in', left: '0.18in' }, printBackground: true - }); + } +): Promise { + let page: Page | null = null; - await browser.close(); - return data; -}; + try { + const domain = process.env.LEMS_DOMAIN; + if (!domain) { + throw new Error('LEMS_DOMAIN is not configured'); + } + + const url = domain + path; + const user = await db.getUser({ username: 'admin' }); + const token = await createAuthToken(user); + + page = await BrowserManager.createPage(); + + page.setDefaultNavigationTimeout(30000); + page.setDefaultTimeout(30000); + + await setupPageAuthentication(page, url, token); + + await page.setRequestInterception(true); + page.on('request', request => { + if (!page.isClosed()) { + // Only block media, websockets, and other non-essential resources + // Allow images, fonts, and stylesheets for proper PDF rendering + if (['media', 'websocket', 'other'].includes(request.resourceType())) { + request.abort(); + } else { + request.continue(); + } + } else { + request.abort(); + } + }); + + // Wait for network to be idle and content to load + await page.goto(url, { + waitUntil: ['networkidle0', 'domcontentloaded'], + timeout: 30000 + }); + + // Wait for fonts to load (important for PDF rendering) + await page.evaluate(() => document.fonts.ready); + + // Generate PDF with proper timeout + const data = await withTimeout( + page.pdf({ + ...options, + timeout: 60000 + }), + 75000 + ); + + return Buffer.from(data); + } catch (error) { + console.error('Error generating PDF:', error); + throw error; + } finally { + if (page) { + await BrowserManager.closePage(page); + } + } +} From 342be446de88ddf98b1f3012ee70911397be924e Mon Sep 17 00:00:00 2001 From: johnmeshulam <55348702+johnmeshulam@users.noreply.github.com> Date: Sat, 21 Dec 2024 22:35:19 +0200 Subject: [PATCH 2/3] Cleanup --- apps/backend/src/lib/export.ts | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/apps/backend/src/lib/export.ts b/apps/backend/src/lib/export.ts index 7ec785fe..0b12ef36 100644 --- a/apps/backend/src/lib/export.ts +++ b/apps/backend/src/lib/export.ts @@ -10,8 +10,8 @@ class BrowserManager { private static isShuttingDown = false; private static initializationPromise: Promise | null = null; private static pageTimeouts: Map = new Map(); - private static readonly MAX_PAGES = 25; // Limit concurrent pages - private static readonly PAGE_TIMEOUT = 5 * 60 * 1000; // 5 minutes + private static readonly MAX_PAGES = 25; + private static readonly PAGE_TIMEOUT = 5 * 60 * 1000; private static readonly PUPPETEER_ARGS = [ // Security @@ -117,20 +117,17 @@ class BrowserManager { this.isShuttingDown = true; try { - // Clear all timeouts for (const timeout of this.pageTimeouts.values()) { clearTimeout(timeout); } this.pageTimeouts.clear(); - // Close all active pages const closePagePromises = Array.from(this.activePages).map(page => this.closePage(page).catch(console.error) ); await Promise.all(closePagePromises); this.activePages.clear(); - // Close browser instance if (this.instance) { await this.instance.close().catch(console.error); this.instance = null; @@ -237,16 +234,13 @@ export async function getLemsWebpageAsPdf( } }); - // Wait for network to be idle and content to load await page.goto(url, { waitUntil: ['networkidle0', 'domcontentloaded'], timeout: 30000 }); - // Wait for fonts to load (important for PDF rendering) await page.evaluate(() => document.fonts.ready); - // Generate PDF with proper timeout const data = await withTimeout( page.pdf({ ...options, From 3b3d78d61aa83fc871b93495a488d894b4525d38 Mon Sep 17 00:00:00 2001 From: johnmeshulam Date: Sat, 28 Dec 2024 21:49:09 +0200 Subject: [PATCH 3/3] Review and test memory leak code --- apps/backend/src/lib/export.ts | 91 +++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 39 deletions(-) diff --git a/apps/backend/src/lib/export.ts b/apps/backend/src/lib/export.ts index 0b12ef36..7ea6a019 100644 --- a/apps/backend/src/lib/export.ts +++ b/apps/backend/src/lib/export.ts @@ -1,4 +1,4 @@ -import puppeteer, { Browser, Page, PDFOptions } from 'puppeteer'; +import puppeteer, { Browser, BrowserContext, Page, PDFOptions } from 'puppeteer'; import jwt from 'jsonwebtoken'; import * as db from '@lems/database'; import { WithId } from 'mongodb'; @@ -7,6 +7,7 @@ import { SafeUser } from '@lems/types'; class BrowserManager { private static instance: Browser | null = null; private static activePages: Set = new Set(); + private static activeContexts: Set = new Set(); private static isShuttingDown = false; private static initializationPromise: Promise | null = null; private static pageTimeouts: Map = new Map(); @@ -70,7 +71,9 @@ class BrowserManager { } const browser = await this.getBrowser(); - const page = await browser.newPage(); + const context = await browser.createBrowserContext(); + this.activeContexts.add(context); + const page = await context.newPage(); const timeout = setTimeout(() => { this.closePage(page).catch(console.error); @@ -83,26 +86,27 @@ class BrowserManager { } public static async closePage(page: Page): Promise { - try { - const timeout = this.pageTimeouts.get(page); - if (timeout) { - clearTimeout(timeout); - this.pageTimeouts.delete(page); - } + const timeout = this.pageTimeouts.get(page); + if (timeout) { + clearTimeout(timeout); + this.pageTimeouts.delete(page); + } + try { if (!page.isClosed()) { + const context = page.browserContext(); + await page .evaluate(() => { window.stop(); - const elements = document.getElementsByTagName('*'); - for (let i = 0; i < elements.length; i++) { - const element = elements[i]; - element.remove(); - } + document.documentElement.innerHTML = ''; }) .catch(console.error); await page.close(); + await context.close(); + + this.activeContexts.delete(context); } } catch (error) { console.error('Error cleaning up page:', error); @@ -128,6 +132,12 @@ class BrowserManager { await Promise.all(closePagePromises); this.activePages.clear(); + const closeContextPromises = Array.from(this.activeContexts).map(context => + context.close().catch(console.error) + ); + await Promise.all(closeContextPromises); + this.activeContexts.clear(); + if (this.instance) { await this.instance.close().catch(console.error); this.instance = null; @@ -180,10 +190,12 @@ const createAuthToken = async (user: WithId): Promise => { }); }; -const setupPageAuthentication = async (page: Page, url: string, token: string): Promise => { +const setupPageAuthentication = async (page: Page, url: URL, token: string): Promise => { await page.setExtraHTTPHeaders({ Authorization: `Bearer ${token}` }); - await page.setCookie({ - url, + + const context = page.browserContext(); + await context.setCookie({ + domain: url.hostname, path: '/', name: 'auth-token', value: token, @@ -201,14 +213,13 @@ export async function getLemsWebpageAsPdf( } ): Promise { let page: Page | null = null; + let pdfBuffer: Buffer | null = null; try { const domain = process.env.LEMS_DOMAIN; - if (!domain) { - throw new Error('LEMS_DOMAIN is not configured'); - } + if (!domain) throw new Error('LEMS_DOMAIN is not configured'); - const url = domain + path; + const url = new URL(domain + path); const user = await db.getUser({ username: 'admin' }); const token = await createAuthToken(user); @@ -219,22 +230,7 @@ export async function getLemsWebpageAsPdf( await setupPageAuthentication(page, url, token); - await page.setRequestInterception(true); - page.on('request', request => { - if (!page.isClosed()) { - // Only block media, websockets, and other non-essential resources - // Allow images, fonts, and stylesheets for proper PDF rendering - if (['media', 'websocket', 'other'].includes(request.resourceType())) { - request.abort(); - } else { - request.continue(); - } - } else { - request.abort(); - } - }); - - await page.goto(url, { + await page.goto(url.toString(), { waitUntil: ['networkidle0', 'domcontentloaded'], timeout: 30000 }); @@ -249,13 +245,30 @@ export async function getLemsWebpageAsPdf( 75000 ); - return Buffer.from(data); + pdfBuffer = Buffer.from(data); + return pdfBuffer; } catch (error) { console.error('Error generating PDF:', error); throw error; } finally { - if (page) { - await BrowserManager.closePage(page); + try { + if (page) { + await page + .evaluate(() => { + window.stop(); + document.documentElement.innerHTML = ''; + }) + .catch(console.error); + + await BrowserManager.closePage(page); + } + + page = null; + pdfBuffer = null; + + if (global.gc) global.gc(); + } catch (cleanupError) { + console.error('Cleanup error:', cleanupError); } } }