forked from wvbe/slimdom-sax-parser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
144 lines (116 loc) · 3.67 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
const sax = require('sax');
const slimdom = require('slimdom');
const defaultNamespaceMapping = {
'': null,
'xml': 'http://www.w3.org/XML/1998/namespace'
};
/*
* Create the required callbacks for populating a new document from sax event handlers
*/
function createHandler () {
const doc = new slimdom.Document();
// Is rewritten as the handler traverses in and out of nodes
let dom = doc;
// Rewritten to accumulate a text stream
let cdata = null;
const namespaces = [defaultNamespaceMapping];
let currentNamespaces = Object.create(defaultNamespaceMapping);
return {
onText: (text) => {
if (dom.nodeType === slimdom.Node.DOCUMENT_NODE) {
// Do not add text directly to document node (aka. outside document element)
return;
}
dom.appendChild(doc.createTextNode(text));
},
onOpenTag: (node) => {
namespaces.push(node.ns);
currentNamespaces = Object.assign(currentNamespaces, node.ns);
if (currentNamespaces[node.prefix] === undefined) {
throw new Error(`Namespace prefix "${node.prefix}" not known for element "${node.name}"`);
}
dom = dom.appendChild(doc.createElementNS(currentNamespaces[node.prefix], node.name));
// Set attributes, taking the accumulated namespace information into account
Object.keys(node.attributes)
.map(name => node.attributes[name])
.filter(attr => attr.prefix !== 'xmlns' && !(attr.prefix === '' && attr.name === 'xmlns'))
.forEach(attr => {
if (currentNamespaces[attr.prefix] === undefined) {
throw new Error(`Namespace prefix "${attr.prefix}" not known for attribute "${attr.name}"`);
}
dom.setAttributeNS(attr.prefix === '' ? null : currentNamespaces[attr.prefix], attr.name, attr.value);
});
},
onCloseTag: () => {
dom = dom.parentNode;
if (!namespaces.pop()) {
// The namespace info for the level that is popped was empty, so exit early
return;
}
// Recalculate the (subset) portion of known namespace information
currentNamespaces = namespaces.reduce((accum, ns) => Object.assign(accum, ns), {});
},
onProcessingInstruction: (pi) => {
if (pi.name === 'xml' && dom.nodeType === dom.DOCUMENT_NODE) {
return;
}
dom.appendChild(doc.createProcessingInstruction(pi.name, pi.body));
},
onComment: (comment) => {
dom.appendChild(doc.createComment(comment));
},
onDocType: (data) => {
const [
qualifiedName,
_publicSystem,
publicId,
systemId
] = data.match(/(?:[^\s"]+|"[^"]*")+/g);
dom.appendChild(doc.implementation.createDocumentType(
qualifiedName,
publicId && publicId.replace(/^"(.*)"$/, '$1') || '',
systemId && systemId.replace(/^"(.*)"$/, '$1') || ''
));
},
onOpenCdata: () => {
cdata = '';
},
onCdata: (string) => {
cdata += string;
},
onCloseCdata: () => {
dom.appendChild(doc.createCDATASection(cdata));
cdata = null;
},
getDocument: () => {
return doc;
}
};
}
/*
* Export the API of slimdom-sax-parser
*/
exports.slimdom = slimdom;
/**
* Synchronously parse a string of XML to a Slimdom document
* @param {string} xml
* @returns {slimdom.Document}
*/
exports.sync = function synchronousSlimdomSaxParser (xml) {
const handler = createHandler();
// Set up the sax parser
const parser = sax.parser(true, {
xmlns: true
});
parser.ontext = handler.onText;
parser.onopentag = handler.onOpenTag;
parser.onclosetag = handler.onCloseTag;
parser.onprocessinginstruction = handler.onProcessingInstruction;
parser.oncomment = handler.onComment;
parser.ondoctype = handler.onDocType;
parser.onopencdata = handler.onOpenCdata;
parser.oncdata = handler.onCdata;
parser.onclosecdata = handler.onCloseCdata;
parser.write(xml).close();
return handler.getDocument();
};