From 9528d0e0d0e32e35a7c40008e23576e93600ed22 Mon Sep 17 00:00:00 2001 From: andiwand Date: Fri, 22 Dec 2023 19:51:06 +0100 Subject: [PATCH] more progress on xlsx and pptx --- .../ooxml_presentation_document.cpp | 7 ++ .../ooxml_presentation_document.hpp | 3 + .../ooxml_presentation_parser.cpp | 2 +- .../ooxml_spreadsheet_document.cpp | 15 +++- .../ooxml_spreadsheet_document.hpp | 2 + .../spreadsheet/ooxml_spreadsheet_element.cpp | 90 ++++++++++++++++--- .../spreadsheet/ooxml_spreadsheet_element.hpp | 40 +++++++++ .../spreadsheet/ooxml_spreadsheet_parser.cpp | 80 ++++++++++++++--- test/data/reference-output/odr-public | 2 +- 9 files changed, 211 insertions(+), 30 deletions(-) diff --git a/src/odr/internal/ooxml/presentation/ooxml_presentation_document.cpp b/src/odr/internal/ooxml/presentation/ooxml_presentation_document.cpp index 82c151b91..c2d481aad 100644 --- a/src/odr/internal/ooxml/presentation/ooxml_presentation_document.cpp +++ b/src/odr/internal/ooxml/presentation/ooxml_presentation_document.cpp @@ -40,4 +40,11 @@ void Document::save(const common::Path & /*path*/, throw UnsupportedOperation(); } +pugi::xml_node Document::get_slide_root(const std::string &ref) const { + if (auto it = m_slides_xml.find(ref); it != std::end(m_slides_xml)) { + return it->second.document_element(); + } + return {}; +} + } // namespace odr::internal::ooxml::presentation diff --git a/src/odr/internal/ooxml/presentation/ooxml_presentation_document.hpp b/src/odr/internal/ooxml/presentation/ooxml_presentation_document.hpp index 5af9a5c2c..214663655 100644 --- a/src/odr/internal/ooxml/presentation/ooxml_presentation_document.hpp +++ b/src/odr/internal/ooxml/presentation/ooxml_presentation_document.hpp @@ -21,6 +21,9 @@ class Document final : public common::TemplateDocument { void save(const common::Path &path) const final; void save(const common::Path &path, const char *password) const final; + pugi::xml_node get_slide_root(const std::string &ref) const; + +private: pugi::xml_document m_document_xml; std::unordered_map m_slides_xml; diff --git a/src/odr/internal/ooxml/presentation/ooxml_presentation_parser.cpp b/src/odr/internal/ooxml/presentation/ooxml_presentation_parser.cpp index f8a805069..ae13ee336 100644 --- a/src/odr/internal/ooxml/presentation/ooxml_presentation_parser.cpp +++ b/src/odr/internal/ooxml/presentation/ooxml_presentation_parser.cpp @@ -33,7 +33,7 @@ void parse_element_children(Document &document, Root *element, pugi::xml_node node) { for (auto child_node : node.child("p:sldIdLst").children("p:sldId")) { const char *id = child_node.attribute("r:id").value(); - auto slide_node = document.m_slides_xml.at(id).document_element(); + auto slide_node = document.get_slide_root(id); auto [slide, _] = parse_element_tree(document, slide_node); element->append_child_(slide); } diff --git a/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_document.cpp b/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_document.cpp index 493cb2147..296b399c4 100644 --- a/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_document.cpp +++ b/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_document.cpp @@ -18,10 +18,6 @@ Document::Document(std::shared_ptr filesystem) m_workbook_xml = util::xml::parse(*m_filesystem, "xl/workbook.xml"); m_styles_xml = util::xml::parse(*m_filesystem, "xl/styles.xml"); - m_root_element = parse_tree(*this, m_workbook_xml.document_element()); - - m_style_registry = StyleRegistry(m_styles_xml.document_element()); - for (const auto &relationships : parse_relationships(*m_filesystem, "xl/workbook.xml")) { auto sheet_path = common::Path("xl").join(relationships.second); @@ -42,6 +38,10 @@ Document::Document(std::shared_ptr filesystem) m_sheets[relationships.first].sheet_xml = std::move(sheet_xml); } + m_root_element = parse_tree(*this, m_workbook_xml.document_element()); + + m_style_registry = StyleRegistry(m_styles_xml.document_element()); + if (m_filesystem->exists("xl/sharedStrings.xml")) { m_shared_strings_xml = util::xml::parse(*m_filesystem, "xl/sharedStrings.xml"); @@ -67,4 +67,11 @@ void Document::save(const common::Path & /*path*/, throw UnsupportedOperation(); } +pugi::xml_node Document::get_sheet_root(const std::string &ref) const { + if (auto it = m_sheets.find(ref); it != std::end(m_sheets)) { + return it->second.sheet_xml.document_element(); + } + return {}; +} + } // namespace odr::internal::ooxml::spreadsheet diff --git a/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_document.hpp b/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_document.hpp index 063c5db3a..2b42253b0 100644 --- a/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_document.hpp +++ b/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_document.hpp @@ -26,6 +26,8 @@ class Document final : public common::TemplateDocument { void save(const common::Path &path) const final; void save(const common::Path &path, const char *password) const final; + pugi::xml_node get_sheet_root(const std::string &ref) const; + private: struct Sheet final { common::Path sheet_path; diff --git a/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_element.cpp b/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_element.cpp index 74a8122ea..2f3388a78 100644 --- a/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_element.cpp +++ b/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_element.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -65,35 +66,74 @@ Element::shared_strings_(const abstract::Document *document) { return document_(document)->m_shared_strings; } -std::string Sheet::name(const abstract::Document *) const { - return m_node.attribute("name").value(); +void SheetIndex::init_column(std::uint32_t /*min*/, std::uint32_t max, + pugi::xml_node element) { + columns[max] = element; } -TableDimensions Sheet::dimensions(const abstract::Document *document) const { - if (auto dimension = - sheet_node_(document).child("dimension").attribute("ref")) { - try { - auto range = common::TableRange(dimension.value()); - return {range.to().row() + 1, range.to().column() + 1}; - } catch (...) { +void SheetIndex::init_row(std::uint32_t row, pugi::xml_node element) { + rows[row].row = element; +} + +void SheetIndex::init_cell(std::uint32_t column, std::uint32_t row, + pugi::xml_node element) { + rows[row].cells[column] = element; +} + +pugi::xml_node SheetIndex::column(std::uint32_t column) const { + if (auto it = util::map::lookup_greater_than(columns, column); + it != std::end(columns)) { + return it->second; + } + return {}; +} + +pugi::xml_node SheetIndex::row(std::uint32_t row) const { + if (auto it = util::map::lookup_greater_than(rows, row); + it != std::end(rows)) { + return it->second.row; + } + return {}; +} + +pugi::xml_node SheetIndex::cell(std::uint32_t column, std::uint32_t row) const { + if (auto row_it = util::map::lookup_greater_than(rows, row); + row_it != std::end(rows)) { + const auto &cells = row_it->second.cells; + if (auto cell_it = util::map::lookup_greater_than(cells, column); + cell_it != std::end(cells)) { + return cell_it->second; } } return {}; } +std::string Sheet::name(const abstract::Document *) const { + return m_node.attribute("name").value(); +} + +TableDimensions +Sheet::dimensions(const abstract::Document * /*document*/) const { + return m_index.dimensions; +} + TableDimensions Sheet::content(const abstract::Document *document, std::optional) const { return dimensions(document); // TODO } abstract::SheetCell *Sheet::cell(const abstract::Document *, - std::uint32_t /*column*/, - std::uint32_t /*row*/) const { - return nullptr; // TODO + std::uint32_t column, + std::uint32_t row) const { + if (auto cell_it = m_cells.find({column, row}); + cell_it != std::end(m_cells)) { + return cell_it->second; + } + return nullptr; } abstract::Element *Sheet::first_shape(const abstract::Document *) const { - return nullptr; // TODO + return m_first_shape; } TableStyle Sheet::style(const abstract::Document *) const { @@ -116,6 +156,30 @@ TableCellStyle Sheet::cell_style(const abstract::Document *, return TableCellStyle(); // TODO } +void Sheet::init_column_(std::uint32_t min, std::uint32_t max, + pugi::xml_node element) { + m_index.init_column(min, max, element); +} + +void Sheet::init_row_(std::uint32_t row, pugi::xml_node element) { + m_index.init_row(row, element); +} + +void Sheet::init_cell_(std::uint32_t column, std::uint32_t row, + pugi::xml_node element) { + m_index.init_cell(column, row, element); +} + +void Sheet::init_cell_element_(std::uint32_t column, std::uint32_t row, + SheetCell *element) { + m_cells[{column, row}] = element; + element->m_parent = this; +} + +void Sheet::init_dimensions_(TableDimensions dimensions) { + m_index.dimensions = dimensions; +} + pugi::xml_node Sheet::sheet_node_(const abstract::Document *document) const { return sheet_(document, m_node.attribute("r:id").value()); } diff --git a/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_element.hpp b/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_element.hpp index d236e67d3..458766414 100644 --- a/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_element.hpp +++ b/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_element.hpp @@ -10,12 +10,16 @@ #include #include +#include "odr/internal/common/table_position.hpp" +#include #include namespace odr::internal::ooxml::spreadsheet { class Document; class StyleRegistry; +class SheetCell; + class Element : public common::Element { public: explicit Element(pugi::xml_node node); @@ -54,6 +58,28 @@ class Root final : public DefaultElement { using DefaultElement::DefaultElement; }; +struct SheetIndex final { + struct Row { + pugi::xml_node row; + std::map cells; + }; + + TableDimensions dimensions; + + std::map columns; + std::map rows; + + void init_column(std::uint32_t min, std::uint32_t max, + pugi::xml_node element); + void init_row(std::uint32_t row, pugi::xml_node element); + void init_cell(std::uint32_t column, std::uint32_t row, + pugi::xml_node element); + + pugi::xml_node column(std::uint32_t) const; + pugi::xml_node row(std::uint32_t) const; + pugi::xml_node cell(std::uint32_t column, std::uint32_t row) const; +}; + class Sheet final : public Element, public abstract::Sheet { public: using Element::Element; @@ -82,7 +108,21 @@ class Sheet final : public Element, public abstract::Sheet { std::uint32_t column, std::uint32_t row) const final; + void init_column_(std::uint32_t min, std::uint32_t max, + pugi::xml_node element); + void init_row_(std::uint32_t row, pugi::xml_node element); + void init_cell_(std::uint32_t column, std::uint32_t row, + pugi::xml_node element); + void init_cell_element_(std::uint32_t column, std::uint32_t row, + SheetCell *element); + void init_dimensions_(TableDimensions dimensions); + private: + SheetIndex m_index; + + std::unordered_map m_cells; + Element *m_first_shape{nullptr}; + pugi::xml_node sheet_node_(const abstract::Document *) const; pugi::xml_node drawing_node_(const abstract::Document *) const; }; diff --git a/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_parser.cpp b/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_parser.cpp index 91d5f817b..c3d1e5edf 100644 --- a/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_parser.cpp +++ b/src/odr/internal/ooxml/spreadsheet/ooxml_spreadsheet_parser.cpp @@ -2,6 +2,7 @@ #include +#include "odr/internal/common/table_range.hpp" #include #include @@ -9,9 +10,12 @@ namespace odr::internal::ooxml::spreadsheet { namespace { -template -std::tuple parse_element_tree(Document &document, - pugi::xml_node node); +template +std::tuple +parse_element_tree(Document &document, pugi::xml_node node, args_t &&...args); +template <> +std::tuple +parse_element_tree(Document &document, pugi::xml_node node); std::tuple parse_any_element_tree(Document &document, pugi::xml_node node); @@ -29,14 +33,25 @@ void parse_element_children(Document &document, Element *element, } } -template -std::tuple parse_element_tree(Document &document, - pugi::xml_node node) { +void parse_element_children(Document &document, Root *element, + pugi::xml_node node) { + for (auto child_node : node.child("sheets").children("sheet")) { + const char *id = child_node.attribute("r:id").value(); + auto sheet_node = document.get_sheet_root(id); + auto [sheet, _] = parse_element_tree(document, sheet_node); + element->append_child_(sheet); + } +} + +template +std::tuple +parse_element_tree(Document &document, pugi::xml_node node, args_t &&...args) { if (!node) { return std::make_tuple(nullptr, pugi::xml_node()); } - auto element_unique = std::make_unique(node); + auto element_unique = + std::make_unique(node, std::forward(args)...); auto element = element_unique.get(); document.register_element_(std::move(element_unique)); @@ -45,6 +60,52 @@ std::tuple parse_element_tree(Document &document, return std::make_tuple(element, node.next_sibling()); } +template <> +std::tuple +parse_element_tree(Document &document, pugi::xml_node node) { + if (!node) { + return std::make_tuple(nullptr, pugi::xml_node()); + } + + auto element_unique = std::make_unique(node); + auto element = element_unique.get(); + document.register_element_(std::move(element_unique)); + + for (auto col_node : node.child("cols").children("col")) { + std::uint32_t min = col_node.attribute("min").as_uint() - 1; + std::uint32_t max = col_node.attribute("max").as_uint() - 1; + element->init_column_(min, max, col_node); + } + + for (auto row_node : node.child("sheetData").children("row")) { + std::uint32_t row = row_node.attribute("r").as_uint() - 1; + element->init_row_(row, row_node); + + for (auto cell_node : row_node.children("c")) { + auto position = common::TablePosition(cell_node.attribute("r").value()); + element->init_cell_(position.column(), position.row(), cell_node); + + auto [cell, _] = parse_element_tree(document, cell_node); + element->init_cell_element_(position.column(), position.row(), cell); + } + } + + { + std::string dimension_ref = + node.child("dimension").attribute("ref").value(); + common::TablePosition position_to; + if (dimension_ref.find(":") == std::string::npos) { + position_to = common::TablePosition(dimension_ref); + } else { + position_to = common::TableRange(dimension_ref).to(); + } + element->init_dimensions_( + TableDimensions(position_to.row() + 1, position_to.column() + 1)); + } + + return std::make_tuple(element, node.next_sibling()); +} + bool is_text_node(const pugi::xml_node node) { if (!node) { return false; @@ -63,7 +124,7 @@ bool is_text_node(const pugi::xml_node node) { } template <> -std::tuple +std::tuple parse_element_tree(Document &document, pugi::xml_node first) { if (!first) { return std::make_tuple(nullptr, pugi::xml_node()); @@ -88,9 +149,6 @@ parse_any_element_tree(Document &document, pugi::xml_node node) { static std::unordered_map parser_table{ {"workbook", parse_element_tree}, {"worksheet", parse_element_tree}, - // TODO - //{"col", parse_element_tree}, - //{"row", parse_element_tree}, {"r", parse_element_tree}, {"t", parse_element_tree}, {"v", parse_element_tree}, diff --git a/test/data/reference-output/odr-public b/test/data/reference-output/odr-public index 4429328a5..e1cc43f7b 160000 --- a/test/data/reference-output/odr-public +++ b/test/data/reference-output/odr-public @@ -1 +1 @@ -Subproject commit 4429328a514c1adc1f47343247c458f37bf06f9f +Subproject commit e1cc43f7b68ede5a05b56582cef3644dbb965edb