From 6e041d07eb0a48de73612ffa3f7c7af1be236531 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Thu, 2 Jan 2025 10:56:01 +0100 Subject: [PATCH] page cache --- pdf2htmlEX/src/CoveredTextDetector.cc | 14 ++++++------ pdf2htmlEX/src/CoveredTextDetector.h | 2 +- pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h | 5 +++++ pdf2htmlEX/src/HTMLRenderer/general.cc | 26 ++++++++++++++++++++++ 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/pdf2htmlEX/src/CoveredTextDetector.cc b/pdf2htmlEX/src/CoveredTextDetector.cc index 0792c528..f46817cb 100644 --- a/pdf2htmlEX/src/CoveredTextDetector.cc +++ b/pdf2htmlEX/src/CoveredTextDetector.cc @@ -14,7 +14,7 @@ namespace pdf2htmlEX { -CoveredTextDetector::CoveredTextDetector(Param & param): param(param) +CoveredTextDetector::CoveredTextDetector(Param & param): param(¶m) { } @@ -41,10 +41,10 @@ void CoveredTextDetector::add_char_bbox_clipped(cairo_t *cairo, double * bbox, i char_pts_visible.push_back(pts_visible); // DCRH: Hide if no points are visible, or if some points are visible and correct_text_visibility == 2 - if (pts_visible == 0 || param.correct_text_visibility == 2) { + if (pts_visible == 0 || param->correct_text_visibility == 2) { chars_covered.push_back(true); - if (pts_visible > 0 && param.correct_text_visibility == 2) { - param.actual_dpi = std::min(param.text_dpi, param.max_dpi); // Char partially covered so increase background resolution + if (pts_visible > 0 && param->correct_text_visibility == 2) { + param->actual_dpi = std::min(param->text_dpi, param->max_dpi); // Char partially covered so increase background resolution } } else { chars_covered.push_back(false); @@ -98,13 +98,13 @@ printf("pts_visible=%x\n", pts_visible); printf("pts_visible=%x\n", pts_visible); #endif char_pts_visible[i] = pts_visible; - if (pts_visible == 0 || (pts_visible != (1|2|4|8) && param.correct_text_visibility == 2)) { + if (pts_visible == 0 || (pts_visible != (1|2|4|8) && param->correct_text_visibility == 2)) { #ifdef DEBUG printf("Char covered\n"); #endif chars_covered[i] = true; - if (pts_visible > 0 && param.correct_text_visibility == 2) { // Partially visible text => increase rendering DPI - param.actual_dpi = std::min(param.text_dpi, param.max_dpi); + if (pts_visible > 0 && param->correct_text_visibility == 2) { // Partially visible text => increase rendering DPI + param->actual_dpi = std::min(param->text_dpi, param->max_dpi); } } } else { diff --git a/pdf2htmlEX/src/CoveredTextDetector.h b/pdf2htmlEX/src/CoveredTextDetector.h index 0f0506f3..2e664a7f 100644 --- a/pdf2htmlEX/src/CoveredTextDetector.h +++ b/pdf2htmlEX/src/CoveredTextDetector.h @@ -60,7 +60,7 @@ class CoveredTextDetector // x00, y00, x01, y01; x10, y10, x11, y11;... std::vector char_bboxes; std::vector char_pts_visible; - Param & param; + Param * param; }; } diff --git a/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h b/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h index 984b1d7d..983962d1 100644 --- a/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h +++ b/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h @@ -381,6 +381,11 @@ struct HTMLRenderer : OutputDev CoveredTextDetector covered_text_detector; DrawingTracer tracer; + + struct PageCache { + CoveredTextDetector covered_text_detector; + }; + std::unordered_map page_cache; }; } //namespace pdf2htmlEX diff --git a/pdf2htmlEX/src/HTMLRenderer/general.cc b/pdf2htmlEX/src/HTMLRenderer/general.cc index 07c22fd0..c2811238 100644 --- a/pdf2htmlEX/src/HTMLRenderer/general.cc +++ b/pdf2htmlEX/src/HTMLRenderer/general.cc @@ -183,12 +183,31 @@ void HTMLRenderer::process(PDFDoc *doc) post_process(); + if (param.delay_background == 0) + { + bg_renderer = nullptr; + fallback_bg_renderer = nullptr; + } + if(param.quiet == 0) cerr << endl; } bool HTMLRenderer::renderPage(PDFDoc *doc, int pageno) { + if (param.delay_background == 0) + { + return false; + } + + if (page_cache.find(pageno) != page_cache.end()) + { + cerr << "Page number " << pageno << " not found in page cache" << endl; + return false; + } + + covered_text_detector = page_cache[pageno].covered_text_detector; + if (bg_renderer->render_page(cur_doc, pageno)) { return true; @@ -209,6 +228,13 @@ void HTMLRenderer::setDefaultCTM(const double *ctm) void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) { + if (param.delay_background && this->pageNum > 0) + { + page_cache[this->pageNum] = { + .covered_text_detector = covered_text_detector, + }; + } + covered_text_detector.reset(); tracer.reset(state);