diff --git a/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/ListHelper.java b/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/ListHelper.java index 2c1caf6..1f4ccf3 100644 --- a/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/ListHelper.java +++ b/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/ListHelper.java @@ -124,6 +124,7 @@ protected BlockBox popListStack() { return box; } protected BlockBox peekListStack() { + if(listStack.peek() == null) return new BlockBox(); return listStack.peek(); } @@ -184,6 +185,7 @@ void init() { } protected ListItemContentState peekListItemStateStack() { + if(listItemStateStack.peek() == null) return new ListItemContentState(); return listItemStateStack.peek(); } private void pushListItemStateStack() { diff --git a/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/XHTMLImageHandlerDefault.java b/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/XHTMLImageHandlerDefault.java index 4f58e21..402c579 100644 --- a/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/XHTMLImageHandlerDefault.java +++ b/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/XHTMLImageHandlerDefault.java @@ -173,7 +173,7 @@ public void addImage(Docx4jUserAgent docx4jUserAgent, WordprocessingMLPackage wo drawing.getAnchorOrInline().add(inline); } } catch (Exception e1) { - log.error(MessageFormat.format("Error during image processing: ''{0}'', insert default text.", new Object[] {e.getAttribute("alt")}), e1); + log.error(MessageFormat.format("Error during image processing: ''{0}'', insert default text.", e.getAttribute("alt")), e1); isError = true; } diff --git a/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/XHTMLImageHandlerDifferentTarget.java b/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/XHTMLImageHandlerDifferentTarget.java new file mode 100644 index 0000000..22b43c6 --- /dev/null +++ b/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/XHTMLImageHandlerDifferentTarget.java @@ -0,0 +1,258 @@ +package org.docx4j.convert.in.xhtml; + +import org.apache.commons.codec.binary.Base64; +import org.docx4j.convert.in.xhtml.renderer.Docx4JFSImage; +import org.docx4j.convert.in.xhtml.renderer.Docx4jUserAgent; +import org.docx4j.dml.wordprocessingDrawing.Inline; +import org.docx4j.jaxb.Context; +import org.docx4j.openpackaging.packages.WordprocessingMLPackage; +import org.docx4j.openpackaging.parts.Part; +import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage; +import org.docx4j.wml.CTTblCellMar; +import org.docx4j.wml.CTTblPrBase; +import org.docx4j.wml.P; +import org.docx4j.wml.Style; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Element; + +import java.text.MessageFormat; +import java.util.HashMap; + +public class XHTMLImageHandlerDifferentTarget implements XHTMLImageHandler{ + + + + + public static Logger log = LoggerFactory.getLogger(org.docx4j.convert.in.xhtml.XHTMLImageHandlerDifferentTarget.class); + + private int maxWidth = -1; + private String tableStyle; + public int getMaxWidth() { + return maxWidth; + } + @Override + public void setMaxWidth(int maxWidth, String tableStyle) { + this.maxWidth = maxWidth; + this.tableStyle = tableStyle; + } + + public void setMaxWidth(int maxWidth) { + this.maxWidth = maxWidth; + } + + protected HashMap imagePartCache = new HashMap(); + + private XHTMLImporterImpl importer; + private Part targetPart; + + public XHTMLImageHandlerDifferentTarget(XHTMLImporterImpl importer, Part targetPart) { + this.importer = importer; + this.targetPart = targetPart; + } + + /** + * @param docx4jUserAgent + * @param wordMLPackage + * @param p + * @param e + * @param cx width of image itself (ie excluding CSS margin, padding) in EMU + * @param cy + */ + public void addImage(Docx4jUserAgent docx4jUserAgent, WordprocessingMLPackage wordMLPackage, + P p, Element e, Long cx, Long cy) { + + BinaryPartAbstractImage imagePart = null; + + boolean isError = false; + try { + byte[] imageBytes = null; + + if (e.getAttribute("src").startsWith("data:image")) { + // Supports + // data:[][;charset=][;base64], + // eg data:image/png;base64,iVBORw0KGgo... + // http://www.greywyvern.com/code/php/binary2base64 is a convenient online encoder + String base64String = e.getAttribute("src"); + int commaPos = base64String.indexOf(","); + if (commaPos < 6) { // or so ... + // .. its broken + org.docx4j.wml.R run = Context.getWmlObjectFactory().createR(); + p.getContent().add(run); + + org.docx4j.wml.Text text = Context.getWmlObjectFactory().createText(); + text.setValue("[INVALID DATA URI: " + e.getAttribute("src")); + + run.getContent().add(text); + + return; + } + base64String = base64String.substring(commaPos + 1); + log.debug(base64String); + imageBytes = Base64.decodeBase64(base64String.getBytes("UTF8")); + } else { + + imagePart = imagePartCache.get(e.getAttribute("src")); + + if (imagePart==null) { + + String url = e.getAttribute("src"); + // Workaround for cannot resolve the URL C:\... with base URL file:/C:/... + // where @src points to a raw file path + if (url.substring(1,2).equals(":")) { + url = "file:/" + url; + } + + Docx4JFSImage docx4JFSImage = docx4jUserAgent.getDocx4JImageResource(url); + if (docx4JFSImage == null) { + // in case of wrong URL - docx4JFSImage will be null + log.error("Couldn't fetch " + url); + } else { + imageBytes = docx4JFSImage.getBytes(); + } + } + } + if (imageBytes == null + && imagePart==null) { + isError = true; + } else { + + if (imagePart==null) { + // Its not cached + imagePart = BinaryPartAbstractImage.createImagePart(wordMLPackage, imageBytes); + if (e.getAttribute("src").startsWith("data:image")) { + // don't bother caching + } else { + // cache it + imagePartCache.put(e.getAttribute("src"), imagePart); + } + } + + + long docPrId = wordMLPackage.getDrawingPropsIdTracker().generateId(); + + Inline inline=null; + if (cx == null && cy == null) { + + if (maxWidth>0) { + log.debug("image maxWidth:" + maxWidth + ", table style: " + tableStyle); + long excessWidth = getTblCellMargins(tableStyle); + if(excessWidth > 0) { + log.debug("table style margins subtracted (twips): " + excessWidth); + } + inline = imagePart.createImageInline(null, e.getAttribute("alt"), docPrId, 1, false, maxWidth - (int)excessWidth); + } else { + inline = imagePart.createImageInline(null, e.getAttribute("alt"), docPrId, 1, false); + } + } else { + + if (cx == null) { + + cx = imagePart.getImageInfo().getSize().getWidthPx() * + (cy / imagePart.getImageInfo().getSize().getHeightPx()); + + } else if (cy == null) { + + cy = imagePart.getImageInfo().getSize().getHeightPx() * + (cx / imagePart.getImageInfo().getSize().getWidthPx()); + + } + inline = imagePart.createImageInline(null, e.getAttribute("alt"), + docPrId, 1, cx, cy, false); + + /* + * That sets text wrapping distance from text to 0. + * + * Parameter tableStyle can be null - 0 will be returned. + * @return left margin plus right margin (twips) + */ + private long getTblCellMargins(String tableStyle) { + Style s = null; + if(tableStyle != null && !tableStyle.isEmpty()) { + s = importer.getStyleByIdOrName(tableStyle); + } + if(s != null && importer.getTableHelper().isTableStyle(s)) { + CTTblCellMar cellMar = getTblCellMar(s); + if(cellMar == null) { + //try "based on" style + CTTblCellMar bsCellMar = getBasedOnTblCellMar(s); + if(bsCellMar != null) { + return getLeftPlusRightMarginsValue(bsCellMar); + } + } else { + return getLeftPlusRightMarginsValue(cellMar); + } + } + return 0; + } + + private long getLeftPlusRightMarginsValue(CTTblCellMar cellMar) { + return cellMar.getLeft().getW().longValue() + cellMar.getRight().getW().longValue(); + } + + /** + * Get cell margins from "based on" style. + *
Search recursively while possible. + */ + private CTTblCellMar getBasedOnTblCellMar(Style s) { + Style.BasedOn bo = s.getBasedOn(); + if(bo != null) { + String basedOn = bo.getVal(); + if(basedOn != null && !basedOn.isEmpty()) { + Style bs = importer.getStyleByIdOrName(basedOn); + if(bs != null) { + CTTblCellMar bsCellMar = getTblCellMar(bs); + if(bsCellMar != null) { + return bsCellMar; + } else { + return getBasedOnTblCellMar(bs); + } + } + } + } + return null; + } + + private CTTblCellMar getTblCellMar(Style s) { + CTTblPrBase tpb = s.getTblPr(); + if(tpb != null) { + return tpb.getTblCellMar(); + } + return null; + } + } + + diff --git a/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/XHTMLImporterImpl.java b/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/XHTMLImporterImpl.java index 0efe4f0..efec7c1 100644 --- a/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/XHTMLImporterImpl.java +++ b/docx4j-ImportXHTML-core/src/main/java/org/docx4j/convert/in/xhtml/XHTMLImporterImpl.java @@ -65,6 +65,7 @@ import org.docx4j.openpackaging.exceptions.Docx4JException; import org.docx4j.openpackaging.exceptions.InvalidFormatException; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; +import org.docx4j.openpackaging.parts.Part; import org.docx4j.openpackaging.parts.WordprocessingML.NumberingDefinitionsPart; import org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart; import org.docx4j.openpackaging.parts.relationships.Namespaces; @@ -232,8 +233,13 @@ public void setHyperlinkStyle ( public void setXHTMLImageHandler(XHTMLImageHandler xHTMLImageHandler) { this.xHTMLImageHandler = xHTMLImageHandler; } + public void setDefaultHandler(){ + xHTMLImageHandler = defaultHandler; + } - private XHTMLImageHandler xHTMLImageHandler = new XHTMLImageHandlerDefault(this); + private XHTMLImageHandler xHTMLImageHandler; + + private XHTMLImageHandlerDefault defaultHandler = new XHTMLImageHandlerDefault(this); @Override public void setMaxWidth(int maxWidth, String tableStyle) { @@ -492,7 +498,7 @@ private String stylesToCSS() { * @throws IOException */ public List convert(File file, String baseUrl) throws Docx4JException { - + setDefaultHandler(); renderer = getRenderer(); File parent = file.getAbsoluteFile().getParentFile(); @@ -523,7 +529,7 @@ public List convert(File file, String baseUrl) throws Docx4JException { * @throws IOException */ public List convert(InputSource is, String baseUrl) throws Docx4JException { - + setDefaultHandler(); renderer = getRenderer(); Document dom = XMLResource.load(is).getDocument(); @@ -539,12 +545,11 @@ public List convert(InputSource is, String baseUrl) throws Docx4JExcept /** * @param is * @param baseUrl - * @param wordMLPackage * @return * @throws IOException */ public List convert(InputStream is, String baseUrl) throws Docx4JException { - + setDefaultHandler(); renderer = getRenderer(); Document dom = XMLResource.load(is).getDocument(); @@ -560,12 +565,11 @@ public List convert(InputStream is, String baseUrl) throws Docx4JExcepti /** * @param node * @param baseUrl - * @param wordMLPackage * @return * @throws IOException */ public List convert(Node node, String baseUrl) throws Docx4JException { - + setDefaultHandler(); renderer = getRenderer(); if (node instanceof Document) { renderer.setDocument( (Document)node, baseUrl ); @@ -584,12 +588,11 @@ public List convert(Node node, String baseUrl) throws Docx4JException { /** * @param reader * @param baseUrl - * @param wordMLPackage * @return * @throws IOException */ public List convert(Reader reader, String baseUrl) throws Docx4JException { - + setDefaultHandler(); renderer = getRenderer(); Document dom = XMLResource.load(reader).getDocument(); @@ -601,40 +604,16 @@ public List convert(Reader reader, String baseUrl) throws Docx4JExcepti return imports.getContent(); } - -// /** -// * @param source -// * @param baseUrl -// * @param wordMLPackage -// * @return -// * @throws IOException -// */ -// public List convert(Source source, String baseUrl) throws Docx4JException { -// -// renderer = getRenderer(); -// -// Document dom = XMLResource.load(source).getDocument(); -// renderer.setDocument(dom, baseUrl); -// -// renderer.layout(); -// -// traverse(renderer.getRootBox(), null); -// -// return imports.getContent(); -// } - - //public List convert(XMLEventReader reader) throws IOException { - //public List convert(XMLStreamReader reader) throws IOException { + /** * Convert the well formed XHTML found at the specified URI to a list of WML objects. * * @param url - * @param wordMLPackage * @return */ public List convert(URL url) throws Docx4JException { - + setDefaultHandler(); renderer = getRenderer(); String urlString = url.toString(); @@ -653,7 +632,6 @@ public List convert(URL url) throws Docx4JException { * * @param content * @param baseUrl - * @param wordMLPackage * @return */ public List convert(String content, String baseUrl) throws Docx4JException { @@ -663,7 +641,7 @@ public List convert(String content, String baseUrl) throws Docx4JExcept * http://stackoverflow.com/questions/4897876/reading-utf-8-bom-marker * http://www.unicode.org/faq/utf_bom.html#BOM */ - + setDefaultHandler(); int firstChar = content.codePointAt(0); if (firstChar==0xFEFF) { log.info("Removing BOM.."); @@ -708,7 +686,71 @@ public List convert(String content, String baseUrl) throws Docx4JExcept traverse(renderer.getRootBox(), null); return imports.getContent(); - } + } + /** + * + * Convert the well formed XHTML contained in the string to a list of WML objects. + * + * @param content - the content of the XHTML in String + * @param baseUrl - the base URL of the XHTML + * @param targetPart - the part to which the content will be added. + * @return - the list of objects added to the targetPart. + */ + public List convert(String content, String baseUrl, Part targetPart) throws Docx4JException { + + /* Test for and if present remove BOM, which causes "SAXParseException: Content is not allowed in prolog" + * See further: + * http://stackoverflow.com/questions/4897876/reading-utf-8-bom-marker + * http://www.unicode.org/faq/utf_bom.html#BOM + */ + + int firstChar = content.codePointAt(0); + if (firstChar==0xFEFF) { + log.info("Removing BOM.."); + content = content.substring(1); + } + setXHTMLImageHandler(new + XHTMLImageHandlerDifferentTarget(this,targetPart)); + + renderer = getRenderer(); + + InputSource is = new InputSource(new BufferedReader(new StringReader(content))); + + Document dom; + try { + dom = XMLResource.load(is).getDocument(); + } catch ( com.openhtmltopdf.util.XRRuntimeException xre) { + // javax.xml.transform.TransformerException te + Throwable t = xre.getCause(); + log.error(t.getMessage(), t); + if (t instanceof javax.xml.transform.TransformerException) { + // eg content of elements must consist of well-formed character data or markup. + + + Throwable t2 = ((javax.xml.transform.TransformerException)t).getCause(); + if (t2 instanceof org.xml.sax.SAXParseException) { + throw new Docx4JException( + "issues at Line " + ((org.xml.sax.SAXParseException)t2).getLineNumber() + + ", Col " + ((org.xml.sax.SAXParseException)t2).getColumnNumber(), t); + + } + + throw new Docx4JException( + ((javax.xml.transform.TransformerException)t).getLocationAsString(), t); + + } else { + throw xre; + } + } + + + renderer.setDocument(dom, baseUrl); + renderer.layout(); + + traverse(renderer.getRootBox(), null); + + return imports.getContent(); + } public Map getCascadedProperties(CalculatedStyle cs) {