Skip to content

Commit

Permalink
Takeover from fawkez 1.0.
Browse files Browse the repository at this point in the history
  • Loading branch information
amandel committed Jul 21, 2012
1 parent 823c1d8 commit ee8a0a3
Show file tree
Hide file tree
Showing 5 changed files with 1,451 additions and 0 deletions.
90 changes: 90 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.jcoderz.fawkez</groupId>
<artifactId>doctools</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>

<name>fawkez-doctools</name>
<description>This is the Maven base project for the FawkeZ javadoc utilities.</description>
<url>http://jcoderz.github.com</url>
<organization>
<name>jCoderZ</name>
<url>http://www.jcoderz.org/</url>
</organization>
<licenses>
<license>
<name>The BSD 3-Clause License</name>
<url>http://www.opensource.org/licenses/BSD-3-Clause</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<url>https://github.com/jCoderZ/fawkez-doctools</url>
<connection>scm:git:://github.com/jCoderZ/fawkez-doctools.git</connection>
<developerConnection>scm:git:://github.com/jCoderZ/fawkez-doctools.git</developerConnection>
</scm>

<issueManagement>
<system>github</system>
<url>https://github.com/jCoderZ/fawkez-doctools/issues</url>
</issueManagement>

<developers>
<developer>
<id>amandel</id>
<name>Andreas Mandel</name>
<email>[email protected]</email>
<organization>jCoderZ</organization>
<organizationUrl>https://github.com/jCoderZ</organizationUrl>
</developer>
<developer>
<id>mrumpf</id>
<name>Michael Rumpf</name>
<email>[email protected]</email>
<organization>jCoderZ</organization>
<organizationUrl>https://github.com/jCoderZ</organizationUrl>
</developer>
<developer>
<id>mgriffel</id>
<name>Michael Griffel</name>
<email>[email protected]</email>
<organization>jCoderZ</organization>
<organizationUrl>https://github.com/jCoderZ</organizationUrl>
</developer>
</developers>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>org.jcoderz.fawkez</groupId>
<artifactId>fawkez-commons</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>jtidy</groupId>
<artifactId>jtidy</artifactId>
<version>4aug2000r7-dev</version>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.sun</groupId>
<artifactId>tools</artifactId>
<version>1.7.0</version>
<scope>system</scope>
<systemPath>${java.home}/../lib/tools.jar</systemPath>
</dependency>
</dependencies>

</project>
161 changes: 161 additions & 0 deletions src/main/java/org/jcoderz/fawkez/doctools/doclet/HtmlCleaner.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
* $Id: HtmlCleaner.java 1011 2008-06-16 17:57:36Z amandel $
*
* Copyright 2006, The jCoderZ.org Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
* * Neither the name of the jCoderZ.org Project nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.jcoderz.fawkez.doctools.doclet;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.w3c.tidy.Configuration;
import org.w3c.tidy.Tidy;

/**
* This class provides an easy interface to jTidy to clean up
* html fragments as used within javadoc.
*
* @author Andreas Mandel
*/
public class HtmlCleaner
{
/** The full qualified name of this class. */
private static final String CLASSNAME = HtmlCleaner.class.getName();

/** The logger to use. */
private static final Logger logger = Logger.getLogger(CLASSNAME);

private static final String FIX_HEADER
= "<html><head><title>clean</title></head><body>";

private static final String FIX_FOOTER
= "</body></html>";

private String mWarnings = "";
private boolean mHasErrors = false;

/**
* Converts the given HTML fragment string into wellformed xhtml.
* @param in the html fragment to be cleaned up.
* @return a cleaned up wellformed xhtml version of the in string.
*/
public String clean (CharSequence in)
{
if (logger.isLoggable(Level.FINER))
{
logger.entering(CLASSNAME, "clean(CharSequence)", in);
}
mHasErrors = false;
final Tidy tidy = new Tidy();
final String inData = FIX_HEADER + in + FIX_FOOTER;
final StringWriter err = new StringWriter();
String result = null;
try
{
tidy.setCharEncoding(Configuration.UTF8);
tidy.setMakeClean(true);
tidy.setXmlOut(true);
tidy.setRawOut(true);
tidy.setNumEntities(true);
tidy.setWraplen(0); // do not care about line length
// tidy.setOnlyErrors(true);
tidy.setErrout(new PrintWriter(err));

final InputStream inStream = new ByteArrayInputStream(
inData.getBytes("utf-8"));

final ByteArrayOutputStream out = new ByteArrayOutputStream();

tidy.parse(inStream, out);

final String resultString = new String(out.toByteArray(), "utf-8");

final int start = resultString.indexOf("<body>");
final int end = resultString.lastIndexOf("</body>");

if (start != -1 && end != -1)
{
result = resultString.substring(
start + "<body>\n".length(), end).trim();
}
else
{
result = "Invalid HTML could not be parsed.";
}

if (tidy.getParseWarnings() == 0 && tidy.getParseErrors() == 0)
{
mWarnings = "";
}
else
{
mWarnings = err.toString();
}
mHasErrors = (tidy.getParseErrors() == 0);
}
catch (Exception ex)
{
result = "Invalid HTML could not be parsed.";
err.write(result);
err.write("Got exception:");
err.write(ex.toString());
ex.printStackTrace(new PrintWriter(err));
mWarnings = err.toString();
logger.log(Level.FINER,
"Could not handle html fragment. '" + in + "'." , ex);
mHasErrors = true;
}
if (logger.isLoggable(Level.FINER))
{
logger.exiting(CLASSNAME, "clean(CharSequence)", result);
}
return result;
}

/**
* Returns the warnings encountered during last clean.
* @return the warnings encountered during last clean.
*/
public String getWarnings ()
{
return mWarnings;
}

public boolean hasErrors ()
{
return mHasErrors;
}
}

Loading

0 comments on commit ee8a0a3

Please sign in to comment.