Takeover from fawkez 1.0.

jCoderZ · Jul 21, 2012 · ee8a0a3 · ee8a0a3
1 parent 823c1d8
commit ee8a0a3
Show file tree

Hide file tree

Showing 5 changed files with 1,451 additions and 0 deletions.
diff --git a/pom.xml b/pom.xml
@@ -0,0 +1,90 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>org.jcoderz.fawkez</groupId>
+  <artifactId>doctools</artifactId>
+  <version>0.0.1-SNAPSHOT</version>
+  <packaging>jar</packaging>
+
+  <name>fawkez-doctools</name>
+  <description>This is the Maven base project for the FawkeZ javadoc utilities.</description>
+  <url>http://jcoderz.github.com</url>
+    <organization>
+        <name>jCoderZ</name>
+        <url>http://www.jcoderz.org/</url>
+    </organization>
+     <licenses>
+      <license>
+        <name>The BSD 3-Clause License</name>
+        <url>http://www.opensource.org/licenses/BSD-3-Clause</url>
+        <distribution>repo</distribution>
+      </license>
+    </licenses>
+    <scm>
+        <url>https://github.com/jCoderZ/fawkez-doctools</url>
+        <connection>scm:git:://github.com/jCoderZ/fawkez-doctools.git</connection>
+        <developerConnection>scm:git:://github.com/jCoderZ/fawkez-doctools.git</developerConnection>
+    </scm>
+
+    <issueManagement>
+        <system>github</system>
+        <url>https://github.com/jCoderZ/fawkez-doctools/issues</url>
+    </issueManagement>
+
+    <developers>
+       <developer>
+            <id>amandel</id>
+            <name>Andreas Mandel</name>
+            <email>[email protected]</email>
+            <organization>jCoderZ</organization>
+            <organizationUrl>https://github.com/jCoderZ</organizationUrl>
+        </developer>
+        <developer>
+            <id>mrumpf</id>
+            <name>Michael Rumpf</name>
+            <email>[email protected]</email>
+            <organization>jCoderZ</organization>
+            <organizationUrl>https://github.com/jCoderZ</organizationUrl>
+        </developer>
+        <developer>
+            <id>mgriffel</id>
+            <name>Michael Griffel</name>
+            <email>[email protected]</email>
+            <organization>jCoderZ</organization>
+            <organizationUrl>https://github.com/jCoderZ</organizationUrl>
+        </developer>
+    </developers>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.jcoderz.fawkez</groupId>
+      <artifactId>fawkez-commons</artifactId>
+      <version>2.0.0</version>
+    </dependency>
+     <dependency>
+        <groupId>jtidy</groupId>
+        <artifactId>jtidy</artifactId>
+        <version>4aug2000r7-dev</version>
+      </dependency>
+
+      <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.10</version>
+      <scope>test</scope>
+    </dependency>
+        <dependency>
+          <groupId>com.sun</groupId>
+          <artifactId>tools</artifactId>
+          <version>1.7.0</version>
+          <scope>system</scope>
+          <systemPath>${java.home}/../lib/tools.jar</systemPath>
+        </dependency>
+      </dependencies>
+
+</project>
diff --git a/src/main/java/org/jcoderz/fawkez/doctools/doclet/HtmlCleaner.java b/src/main/java/org/jcoderz/fawkez/doctools/doclet/HtmlCleaner.java
@@ -0,0 +1,161 @@
+/*
+ * $Id: HtmlCleaner.java 1011 2008-06-16 17:57:36Z amandel $
+ *
+ * Copyright 2006, The jCoderZ.org Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *    * Neither the name of the jCoderZ.org Project nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package org.jcoderz.fawkez.doctools.doclet;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.w3c.tidy.Configuration;
+import org.w3c.tidy.Tidy;
+
+/**
+ * This class provides an easy interface to jTidy to clean up
+ * html fragments as used within javadoc.
+ *
+ * @author Andreas Mandel
+ */
+public class HtmlCleaner
+{
+   /** The full qualified name of this class. */
+   private static final String CLASSNAME = HtmlCleaner.class.getName();
+
+   /** The logger to use. */
+   private static final Logger logger = Logger.getLogger(CLASSNAME);
+
+   private static final String FIX_HEADER
+         = "<html><head><title>clean</title></head><body>";
+
+   private static final String FIX_FOOTER
+         = "</body></html>";
+
+   private String mWarnings = "";
+   private boolean mHasErrors = false;
+
+   /**
+    * Converts the given HTML fragment string into wellformed xhtml.
+    * @param in the html fragment to be cleaned up.
+    * @return a cleaned up wellformed xhtml version of the in string.
+    */
+   public String clean (CharSequence in)
+   {
+      if (logger.isLoggable(Level.FINER))
+      {
+         logger.entering(CLASSNAME, "clean(CharSequence)", in);
+      }
+      mHasErrors = false;
+      final Tidy tidy = new Tidy();
+      final String inData = FIX_HEADER + in + FIX_FOOTER;
+      final StringWriter err = new StringWriter();
+      String result = null;
+      try
+      {
+         tidy.setCharEncoding(Configuration.UTF8);
+         tidy.setMakeClean(true);
+         tidy.setXmlOut(true);
+         tidy.setRawOut(true);
+         tidy.setNumEntities(true);
+         tidy.setWraplen(0); // do not care about line length
+         // tidy.setOnlyErrors(true);
+         tidy.setErrout(new PrintWriter(err));
+
+         final InputStream inStream = new ByteArrayInputStream(
+               inData.getBytes("utf-8"));
+
+         final ByteArrayOutputStream out = new ByteArrayOutputStream();
+
+         tidy.parse(inStream, out);
+
+         final String resultString = new String(out.toByteArray(), "utf-8");
+
+         final int start = resultString.indexOf("<body>");
+         final int end = resultString.lastIndexOf("</body>");
+
+         if (start != -1 && end != -1)
+         {
+            result = resultString.substring(
+                  start + "<body>\n".length(), end).trim();
+         }
+         else
+         {
+            result = "Invalid HTML could not be parsed.";
+         }
+
+         if (tidy.getParseWarnings() == 0 && tidy.getParseErrors() == 0)
+         {
+            mWarnings = "";
+         }
+         else
+         {
+            mWarnings = err.toString();
+         }
+         mHasErrors = (tidy.getParseErrors() == 0);
+      }
+      catch (Exception ex)
+      {
+         result = "Invalid HTML could not be parsed.";
+         err.write(result);
+         err.write("Got exception:");
+         err.write(ex.toString());
+         ex.printStackTrace(new PrintWriter(err));
+         mWarnings = err.toString();
+         logger.log(Level.FINER,
+               "Could not handle html fragment. '" + in + "'." , ex);
+         mHasErrors = true;
+      }
+      if (logger.isLoggable(Level.FINER))
+      {
+         logger.exiting(CLASSNAME, "clean(CharSequence)", result);
+      }
+      return result;
+   }
+
+   /**
+    * Returns the warnings encountered during last clean.
+    * @return the warnings encountered during last clean.
+    */
+   public String getWarnings ()
+   {
+      return mWarnings;
+   }
+
+   public boolean hasErrors ()
+   {
+      return mHasErrors;
+   }
+}
+