Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Quote in parameters patch #28

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
v1.8.0
* Pulled changes from "bitter" for RequireJS support

v1.7.3
* Renamed node-htmlparser.* to htmlparser.* and created shims for people still expecting node-htmlparser.*
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ A forgiving HTML/XML/RSS parser written in JS for both the browser and NodeJS (y
###Run tests under node:
node runtests.js

###Run tests under node using RequireJS:
node r.js runtests.js

###Run tests in browser:
View runtests.html in any browser

Expand Down
65 changes: 57 additions & 8 deletions lib/htmlparser.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***********************************************
Copyright 2010, Chris Winberry <[email protected]>. All rights reserved.
Copyright 2010, 2011, Chris Winberry <[email protected]>. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
Expand All @@ -18,25 +18,21 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
***********************************************/
/* v1.7.2 */
/* v1.8.0 */

(function () {

function runningInNode () {
function runAsModule () {
return(
(typeof require) == "function"
&&
(typeof exports) == "object"
&&
(typeof module) == "object"
&&
(typeof __filename) == "string"
&&
(typeof __dirname) == "string"
);
}

if (!runningInNode()) {
if (!runAsModule()) {
if (!this.Tautologistics)
this.Tautologistics = {};
else if (this.Tautologistics.NodeHtmlParser)
Expand Down Expand Up @@ -304,6 +300,59 @@ function Parser (handler, options) {

//Processing of non-special tags
if (element.type == ElementType.Tag) {

// hack parser to evalute correctly such tags as <input value="<">
// so, it supports '\". Also it knows about escape character
// I don't really saw it's support in browsers, but it's done. Zibx 14 aug 2011


if( (rawData.length - rawData.replace(/\"/g,'').length) % 2 !== 0 || (rawData.length - rawData.replace(/\'/g,'').length) % 2 !== 0 ){
// here we gets only if count of quotes in supposed tag don't % 2
var nest = 1;
var findCloseTagPos = this._current;
var currentChar = '';
var quote = false;
var quoteType = false; //false == " : true == '
var previousChar = '';
var unlimLoop = 0; //this var indicates that something goes wrong, but we don't want to suspend forever
do{
unlimLoop++;
currentChar = this._buffer.charAt(findCloseTagPos);

//quote anything about previousChar for deny escape slash support
if( previousChar !== '\\' && ( ( currentChar === '"' && quoteType ) || ( currentChar === "'" && !quoteType ) ) ){
if(!quote){
quoteType = currentChar === '"';
}

quote = !quote;
}
if( !quote && currentChar == '<' ){
nest--;
}

previousChar = currentChar;
// really the best way would be in finding only needed cheracters instead of lookup at each
findCloseTagPos++;
if( unlimLoop > 5000 )break;
}while( nest > 0 && findCloseTagPos < this._buffer.length);

if( unlimLoop <= 5000 ){
//if not shit happens, lets reinit element and all depended vars
this._next = findCloseTagPos - 2;
var tagSep = this._buffer.charAt(this._next); //The currently found tag marker
var rawData = this._buffer.substring(this._current, this._next); //The next chunk of data to parse

//A new element to eventually be appended to the element list
var element = {
raw: rawData
, data: (this._parseState == ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
, type: this._parseState
};

var elementName = this.parseTagName(element.data);
}
}
element.name = elementName;

if (element.raw.indexOf("!--") == 0) { //This tag is really comment
Expand Down
6 changes: 3 additions & 3 deletions lib/htmlparser.min.js

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions lib/node-htmlparser.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ exports.DefaultHandler = htmlparser.DefaultHandler;
exports.RssHandler = htmlparser.RssHandler;
exports.ElementType = htmlparser.ElementType;
exports.DomUtils = htmlparser.DomUtils;

117 changes: 117 additions & 0 deletions r.js

Large diffs are not rendered by default.

74 changes: 74 additions & 0 deletions runtests.rjs.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/***********************************************
Copyright 2010, Chris Winberry <[email protected]>. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
***********************************************/
require(['sys', 'fs', 'lib/htmlparser'], function(sys, fs, htmlparser) {

var testFolder = "tests";
var chunkSize = 5;

var testFiles = fs.readdirSync(testFolder);
var testCount = 0;
var failedCount = 0;
for (var i in testFiles) {
testCount++;
var fileParts = testFiles[i].split(".");
fileParts.pop();
var moduleName = fileParts.join(".");
require([testFolder + "/" + moduleName], function(test) {
var handlerCallback = function handlerCallback (error) {
if (error)
sys.puts("Handler error: " + error);
};
var handler = (test.type == "rss") ?
new htmlparser.RssHandler(handlerCallback, test.options)
:
new htmlparser.DefaultHandler(handlerCallback, test.options)
;
var parser = new htmlparser.Parser(handler);
parser.parseComplete(test.html);
var resultComplete = handler.dom;
var chunkPos = 0;
parser.reset();
while (chunkPos < test.html.length) {
parser.parseChunk(test.html.substring(chunkPos, chunkPos + chunkSize));
chunkPos += chunkSize;
}
parser.done();
var resultChunk = handler.dom;
var testResult =
sys.inspect(resultComplete, false, null) === sys.inspect(test.expected, false, null)
&&
sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null)
;
sys.puts("[" + test.name + "\]: " + (testResult ? "passed" : "FAILED"));
if (!testResult) {
failedCount++;
sys.puts("== Complete ==");
sys.puts(sys.inspect(resultComplete, false, null));
sys.puts("== Chunked ==");
sys.puts(sys.inspect(resultChunk, false, null));
sys.puts("== Expected ==");
sys.puts(sys.inspect(test.expected, false, null));
}
});
}
sys.puts("Total tests: " + testCount);
sys.puts("Failed tests: " + failedCount);
});
74 changes: 74 additions & 0 deletions runtests.rjs.min.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/***********************************************
Copyright 2010, Chris Winberry <[email protected]>. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
***********************************************/
require(['sys', 'fs', 'lib/htmlparser.min'], function(sys, fs, htmlparser) {

var testFolder = "tests";
var chunkSize = 5;

var testFiles = fs.readdirSync(testFolder);
var testCount = 0;
var failedCount = 0;
for (var i in testFiles) {
testCount++;
var fileParts = testFiles[i].split(".");
fileParts.pop();
var moduleName = fileParts.join(".");
require([testFolder + "/" + moduleName], function(test) {
var handlerCallback = function handlerCallback (error) {
if (error)
sys.puts("Handler error: " + error);
};
var handler = (test.type == "rss") ?
new htmlparser.RssHandler(handlerCallback, test.options)
:
new htmlparser.DefaultHandler(handlerCallback, test.options)
;
var parser = new htmlparser.Parser(handler);
parser.parseComplete(test.html);
var resultComplete = handler.dom;
var chunkPos = 0;
parser.reset();
while (chunkPos < test.html.length) {
parser.parseChunk(test.html.substring(chunkPos, chunkPos + chunkSize));
chunkPos += chunkSize;
}
parser.done();
var resultChunk = handler.dom;
var testResult =
sys.inspect(resultComplete, false, null) === sys.inspect(test.expected, false, null)
&&
sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null)
;
sys.puts("[" + test.name + "\]: " + (testResult ? "passed" : "FAILED"));
if (!testResult) {
failedCount++;
sys.puts("== Complete ==");
sys.puts(sys.inspect(resultComplete, false, null));
sys.puts("== Chunked ==");
sys.puts(sys.inspect(resultChunk, false, null));
sys.puts("== Expected ==");
sys.puts(sys.inspect(test.expected, false, null));
}
});
}
sys.puts("Total tests: " + testCount);
sys.puts("Failed tests: " + failedCount);
});
8 changes: 2 additions & 6 deletions tests/01-basic.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
(function () {

function RunningInNode () {
function runAsModule () {
return(
(typeof require) == "function"
&&
(typeof exports) == "object"
&&
(typeof module) == "object"
&&
(typeof __filename) == "string"
&&
(typeof __dirname) == "string"
);
}

if (!RunningInNode()) {
if (!runAsModule()) {
if (!this.Tautologistics)
this.Tautologistics = {};
if (!this.Tautologistics.NodeHtmlParser)
Expand Down
8 changes: 2 additions & 6 deletions tests/02-single_tag_1.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
(function () {

function RunningInNode () {
function runAsModule () {
return(
(typeof require) == "function"
&&
(typeof exports) == "object"
&&
(typeof module) == "object"
&&
(typeof __filename) == "string"
&&
(typeof __dirname) == "string"
);
}

if (!RunningInNode()) {
if (!runAsModule()) {
if (!this.Tautologistics)
this.Tautologistics = {};
if (!this.Tautologistics.NodeHtmlParser)
Expand Down
8 changes: 2 additions & 6 deletions tests/03-single_tag_2.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
(function () {

function RunningInNode () {
function runAsModule () {
return(
(typeof require) == "function"
&&
(typeof exports) == "object"
&&
(typeof module) == "object"
&&
(typeof __filename) == "string"
&&
(typeof __dirname) == "string"
);
}

if (!RunningInNode()) {
if (!runAsModule()) {
if (!this.Tautologistics)
this.Tautologistics = {};
if (!this.Tautologistics.NodeHtmlParser)
Expand Down
8 changes: 2 additions & 6 deletions tests/04-unescaped_in_script.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
(function () {

function RunningInNode () {
function runAsModule () {
return(
(typeof require) == "function"
&&
(typeof exports) == "object"
&&
(typeof module) == "object"
&&
(typeof __filename) == "string"
&&
(typeof __dirname) == "string"
);
}

if (!RunningInNode()) {
if (!runAsModule()) {
if (!this.Tautologistics)
this.Tautologistics = {};
if (!this.Tautologistics.NodeHtmlParser)
Expand Down
8 changes: 2 additions & 6 deletions tests/05-tags_in_comment.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
(function () {

function RunningInNode () {
function runAsModule () {
return(
(typeof require) == "function"
&&
(typeof exports) == "object"
&&
(typeof module) == "object"
&&
(typeof __filename) == "string"
&&
(typeof __dirname) == "string"
);
}

if (!RunningInNode()) {
if (!runAsModule()) {
if (!this.Tautologistics)
this.Tautologistics = {};
if (!this.Tautologistics.NodeHtmlParser)
Expand Down
Loading