-
Notifications
You must be signed in to change notification settings - Fork 2
/
scriptformatter.js
388 lines (368 loc) · 16.9 KB
/
scriptformatter.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
(function(opera){
/* options */
var rewriteEval=false; // override window.eval() - might fail with some scripts, for example Google maps.
// this option is entirely ignored on maps.google.*
//opera.postError('scriptformatter active');
function prettyPrintJavaScript( theCode, evalMode, src ){ // if(evalMode)opera.postError(theCode.substr(0,30));
// if(src)opera.postError('formatting '+(src?src:evalMode?'eval script':'inline script')+'...');
if(!theCode){opera.postError('prettyPrintJavaScript failed for '+theCode+' '+src);return '/*nothing*/';}
/* defining the various scopes we care about for this excercise */
var CODE = 0; /* normal JS code */
var STRING_DBL = 1; /* double quoted string */
var STRING_SGL = 2; /* single quoted string */
var REGEXP = 3 ; /* regexp literal */
var ESCAPE = 4 ; /* some escape char (backslash) */
var MULTI_LINE_COMMENT = 5 ;
var SINGLE_LINE_COMMENT = 6 ;
var REGEXP_CHAR_CLASS = 7; /* inside a [ ... ] clause in a regular expression. Requires its own scope because /[/]/ is a valid regexp */
var theStart;
theStart = (new Date()).getTime();
var $output=''; /* would array perform better as in JS? */
var $num_indents = 0;
var current_index = 0;
var current_letter='';
var theScope = CODE;
var $before_escape_scope=0;
var $at_start_of_statement_or_expression=true; /* used to distinguish divisor from regexp literal */
var $last_complete_word = ''; /* some rudimentary tokenisation is required for the divisor-or-regexp problem */
var $statement_words = ['return', 'typeof', 'instanceof', 'break', 'continue', 'delete', 'in', 'new', 'throw'];
var newLine = /*(evalMode ? '\\':'')+*/"\n";
while( current_index < theCode.length ){
current_letter = theCode.charAt( current_index );
//echo ( (time() - theStart). "ms elapsed, now on current_index / ".strlen(theCode)." current_letter, mode: theScope \n");
$pre = ''; /* add this string *before* this character when constructing output */
$post = ''; /* add this string *after* this character when constructing output */
switch( current_letter ){
case '"': /* double quote */
switch( theScope ){
case STRING_DBL:
theScope=CODE ; break; /* a non-escaped quote inside string terminates string */
case ESCAPE:
theScope = $before_escape_scope; break; /* the quote was escaped, return to previous scope */
case CODE:
theScope = STRING_DBL ; /* start-of-string double quote */
$at_start_of_statement_or_expression=false;
}
break;
case '\'': /* single quote */
switch( theScope ){
case STRING_SGL:
theScope=CODE ; break; /* a non-escaped quote inside string terminates string */
case ESCAPE:
theScope = $before_escape_scope; break; /* the quote was escaped, return to previous scope */
case CODE:
theScope = STRING_SGL ; /* start-of-string single quote */
$at_start_of_statement_or_expression=false;
}
break;
case '\\':
if( theScope == STRING_DBL || theScope == STRING_SGL || theScope == REGEXP || theScope == REGEXP_CHAR_CLASS ){
$before_escape_scope = theScope ;
theScope = ESCAPE ; /* next character not to be taken seriously (well..) */
}else if( theScope == ESCAPE ){ /* handle escaped backslashes "\\" */
theScope = $before_escape_scope ;
}
break;
case '/':
if( theScope == CODE ){ /* lookahead: start of comment or something else? */
//alert( $at_start_of_statement_or_expression+' '+$last_complete_word );
$tmp = theCode.charAt( current_index+1 );
if( $tmp == '*' ){ /* start of multi-line comment */
theScope = MULTI_LINE_COMMENT ;
}else if( $tmp == '/' ){ /* start of single-line comment */
theScope = SINGLE_LINE_COMMENT ;
}else if( $at_start_of_statement_or_expression || in_array( $last_complete_word, $statement_words ) ){ /* start of regexp */
theScope = REGEXP ;
}
}else if( theScope == ESCAPE ){
theScope = $before_escape_scope ;
}else if( theScope == REGEXP ){
theScope = CODE ;
}else if( theScope == MULTI_LINE_COMMENT ){ /* time to leave the comment?? */
$tmp = theCode.charAt( current_index-1 );
if( $tmp == '*' ) theScope = CODE ; /* we only enter multi-line-comment mode from CODE scope AFAIK */
}
break;
case '{':
if( theScope == CODE ){ /* start-of-block curly brace */
/* Sigbjørn special: do not wrap and indent empty blocks (object literal) */
if( lookahead( theCode, current_index, true )=='}' ){ /* we have an object literal. We'll simply add a closing brace and jump ahead */
current_index=theCode.indexOf( '}', current_index );
$post='}';
break;
}
$num_indents ++ ;
if( theCode.charAt(current_index+1) !='\n' ){
$post = newLine;
$post += str_repeat( "\t", $num_indents );
}
$at_start_of_statement_or_expression = true;
}else if( theScope == ESCAPE ){
theScope = $before_escape_scope ;
}
break;
case '}':
if( theScope == CODE ){ /* end-of-block curly brace */
if( $num_indents>0 )$num_indents -- ;
$pre = newLine;
$pre += str_repeat( "\t", $num_indents );
$post = ( theCode.charAt(current_index+1) !='\n' ? newLine : '' ) + str_repeat( "\t", $num_indents ) ;
}else if( theScope == ESCAPE ){
theScope = $before_escape_scope ;
}
break;
case ';':
// case ',':
if( theScope == CODE ){ /* end-of-statement semicolon //, or between-variables comma */
$post = ( theCode.charAt(current_index+1) !='\n' ? newLine : '' );
$post += str_repeat( "\t", $num_indents );
$at_start_of_statement_or_expression = true;
}else if( theScope == ESCAPE ){
theScope = $before_escape_scope ;
}
break;
case "\n":
if( theScope == SINGLE_LINE_COMMENT ){
theScope = CODE; /* we only enter SINGLE_LINE_COMMENT mode from CODE, right? */
}else if( theScope == ESCAPE ){
theScope = $before_escape_scope ;
} /* no break, we want to get to the $at_start_of_statement_or_expression bit below */
case '(':
case '!':
case '=':
case '-':
case '+':
case '?':
case '*':
case '&':
case ':':
case ',':
case '|':
if( theScope == CODE ){
$at_start_of_statement_or_expression=true; /* at start of parens, after equal sign etc.. if the next char is a forward slash it will be a start-of-regexp, not a divisor */
}else if( theScope == ESCAPE ){
theScope = $before_escape_scope ;
}
break;
case '[':
if( theScope == REGEXP ){
theScope=REGEXP_CHAR_CLASS;
$at_start_of_statement_or_expression=false;
}else if( theScope == ESCAPE ){
theScope = $before_escape_scope ;
}
break;
case ']':
if( theScope == REGEXP_CHAR_CLASS ){
theScope=REGEXP;
$at_start_of_statement_or_expression=false;
}else if( theScope == ESCAPE ){
theScope = $before_escape_scope ;
}
break;
default:
if( theScope == ESCAPE ){
theScope = $before_escape_scope ; /* always drop out of escape mode on next character.. yes, multi-char escapes exist but it's OK to treat the rest of it as part of the string or regexp */
}
if( theScope == CODE ){
if( !( current_letter==' ' || current_letter=='\t' ) ) $at_start_of_statement_or_expression = false;
}
break;
}
if( current_letter.match(/[a-zA-Z0-9]/) ){
/* if the previous character was whitespace or punctuation, this starts a new word.. */
if( ! theCode.charAt(current_index-1).match(/[a-zA-Z0-9]/) ){
//opera.postError(theCode.charAt(current_index-1)+' not a match for [a-zA-Z0-9] ');
$last_complete_word='';
}
$last_complete_word += current_letter;
}
// if( theScope == CODE && ( current_letter == "\t" || current_letter == "\n" ) ){ /* this script will add formatting whitespace */ // proven too fragile..
//
// }else{
$output += $pre + current_letter + $post ;
// }
current_index++;
}
// trying to reformat eval()ed code - experimental!
// using string replace rather than overriding eval() because there are some subtle scoping issues
// where eval()ed code only runs in the expected scope if called as window.eval() - or something like that
// $output = $output.replace( /eval\s*\((\w+)/g, 'eval(window.opera.prettyPrintJavaScript($1,true))' );
// Google Maps hack:
if(location.hostname.indexOf('maps.google')==0){
$output = $output.replace( /callback\(req.responseText\)/g, 'callback(window.opera.prettyPrintJavaScript(req.responseText,true))' );
}else if(rewriteEval){
var myEval=window.eval;
window.eval=function(){
arguments[0]=window.opera.prettyPrintJavaScript(arguments[0],true);
return myEval(arguments[0]);
}
var myFunction=window.Function;
window.Function=function(){
arguments[arguments.length-1]=window.opera.prettyPrintJavaScript(arguments[arguments.length-1], true);
return myFunction.apply(null, arguments);
} /**/
}
return $output;
}
function str_repeat( str, count ){
var tmp = new Array(count);
return tmp.join(str);
}
function in_array(needle, haystack){
for(var i=0,el;el=haystack[i];i++){
if( el==needle )return true;
}
return false;
}
function lookahead(str, index, ignore_whitespace){ /* returns next character, potentially ignoring whitespace */
var chr = str.substr( index+1, 1 );
while( ignore_whitespace && index<str.length && /^\s+$/.test(chr) ){
index++;
chr = str.substr( index+1, 1 );
}
return chr ? chr : ''; /* if we've gone past end of string, substr() returns false - we'd rather return '' */
}
window.opera.prettyPrintJavaScript=prettyPrintJavaScript;
opera.addEventListener( 'BeforeScript', function( e ){
//opera.postError('will format '+e.element.src+' '+e.element.defer);
e.element.text = prettyPrintJavaScript(e.element.text, false, e.element.src);
}, false);
/* var originalEval = window.eval;
window.eval = function(str){
var newstr=prettyPrintJavaScript(str, true);
try{
return originalEval(newstr);
}catch(e){opera.postError('trying to work around '+e.message);}
return originalEval(str);
} */
/* opera.addEventListener('BeforeExternalScript', function(e){
if(e.element.hasAttribute('defer'))e.element.removeAttribute('defer');
}, false); */
/* Frameworks like Dojo use XHR to load JS */
/* luckily this only works for same-origin scripts, so we can build a list and XHR them too during inlining */
var xhrScriptsList=[];
(function(open){
XMLHttpRequest.prototype.open=function(){
if( /\.js$/i.test(arguments[0]) )xhrScriptsList.push(arguments[0]);
open.apply(this, arguments)
}
})(XMLHttpRequest.prototype.open);
opera.addEventListener('BeforeEvent.dblclick', function(e){
if(e.event.ctrlKey){
/*
GOAL: *raw* markup (from server) with all scripts that can be easily inlined, inlined.
*/
var x = new XMLHttpRequest();
x.open('GET', location.href, false);
x.send(null);
var thesrc=x.responseText;
var tmpdom=document.createElement('html');
tmpdom.innerHTML=x.responseText;
if(tmpdom.getElementsByTagName('html')[0]){ // avoid nested <html> tags, keep the inner one
tmpdom=tmpdom.getElementsByTagName('html')[0];
tmpdom=tmpdom.parentNode.removeChild(tmpdom);
}
if(xhrScriptsList.length){ // think some scripts were loaded with XHR here.. let's add them for inlining..
for(var i=0,src;src=xhrScriptsList[i];i++){
var script=tmpdom.appendChild(document.createElement('script'));
script.setAttribute('declare', 'declare');
script.src=src;
}
}
for(var srcScriptList=document.getElementsByTagName('script'),i=0,srcScript,src;srcScript=srcScriptList[i];i++){
if(src=srcScript.getAttribute('src')){
var found=false;
for(var targetScriptList=tmpdom.getElementsByTagName('script'),j=0,targetScript; targetScript=targetScriptList[j]; j++){ // replace code of all script elements with this source URL
if(targetScript.getAttribute('src')===src){
targetScript.appendChild(document.createTextNode('/* .. inlined from '+srcScript.src+' .. */\n\n'+srcScript.text.replace(/<\/script>/gi, '<\\/script>')));
targetScript.removeAttribute('src');
found=true;
}
}
if( ! found ){ /* there is a script in the DOM that wasn't in the original markup..?
EXPERIMENTAL: We add it to HEAD of the document. This can cause errors, it may be better to add after the previously seen script?
*/
var script=document.createElement('script');
script.appendChild(document.createTextNode('/* .. inlined from '+srcScript.src+' .. (originally added through DOM) */\n\n'+srcScript.text.replace(/<\/script>/gi, '<\\/script>')));
tmpdom.getElementsByTagName('head')[0].appendChild(script);
}
/*if( thesrc.indexOf(src)>-1 ){
var urloffset=thesrc.indexOf(src);
var tagContentsStart=thesrc.indexOf( '>', urloffset )+1;
// this will fail under *many* conditions! mixed case being one
var tagContentsEnd=thesrc.indexOf('</script>', tagContentsStart);
if(tagContentsEnd==-1)tagContentsEnd=thesrc.indexOf('</SCRIPT>', tagContentsStart);
var srcAttrStart=thesrc.lastIndexOf(' src', urloffset);
if(srcAttrStart==-1)srcAttrStart=thesrc.lastIndexOf(' SRC', urloffset); // crude - will fail on sRc= or SRc= but those are rare
if(srcAttrStart==-1||tagContentsEnd==1)continue; // haven't found such a src attribute or end tag after all.. would mangle source
thesrc=thesrc.substr(0, srcAttrStart)+'>\n/* inlined from '+src+' *'++'/\n\n'+s.text+thesrc.substr(tagContentsEnd); // remove (presumably) SRC attribute - maybe other attributes too, don't care much about those - and insert script inline
}else{
opera.postError( 'did not find script tag for '+src+' in unparsed markup' );
}
// s.appendChild(document.createTextNode('/* .. inlined from '+s.src+' .. *'++'/\n\n'+s.text.replace(/<\/script>/gi, '<\\/script>')));
// s.removeAttribute('src');
*/
}
}
/* styles too... */
for(var srcLinksList=document.getElementsByTagName('link'), srcLink, i=0; srcLink=srcLinksList[i]; i++){
if( (src=srcLink.getAttribute('href')) && srcLink.sheet ){ // opera.postError(src+' '+thesrc);
var usedCSS='';
try{
for(var j=0,rule; rule=srcLink.sheet.cssRules[j]; j++){
usedCSS+=rule.cssText+'\n';
}
}catch(e){
opera.postError('Warning: could not inline '+srcLink.href);
continue;
}
for(var targetLinkList=tmpdom.getElementsByTagName('link'),j=0,targetLink; targetLink=targetLinkList[j]; j++){ // replace code of all LINK elements with this href
if(targetLink.getAttribute('href')===src){
var style=document.createElement('style');
if(targetLink.type)style.type=targetLink.type;
if(targetLink.media)style.media=targetLink.media;
style.appendChild(document.createTextNode('/* .. inlined from '+targetLink.href+' .. */\n\n'+prettyPrintJavaScript(usedCSS))); // experimental pretty-printing - is syntax similar enough? should be..
targetLink.parentNode.replaceChild(style, targetLink);
}
}
/*if( thesrc.indexOf(src)>-1 ){
urloffset=thesrc.indexOf(src);
tagContentsEnd=thesrc.indexOf( '>', urloffset )+1;
tagContentsStart=thesrc.lastIndexOf('<', urloffset);
if(tagContentsEnd==-1 || tagContentsStart==-1)continue;
var usedCSS='';
for(var j=0,rule; rule=link.sheet.cssRules[j]; j++){
usedCSS+=rule.cssText+'\n';
}
opera.postError('will now slice and dice - '+tagContentsStart+' '+tagContentsEnd);
thesrc=thesrc.substr(0, tagContentsStart)+'<style type="text/css">\n/* inlined from '+src+' *'++'/\n\n'+prettyPrintJavaScript(usedCSS)+'\n\n</style>'+thesrc.substr(tagContentsEnd); // remove (presumably) SRC attribute - maybe other attributes too, don't care much about those - and insert script inline
}else{
opera.postError( 'did not find link tag for '+src+' in unparsed markup' );
}*/
}
}
//alert('all scripts inlined!');
window.open('data:,'+encodeURIComponent('<!DOCTYPE html>'+tmpdom.outerHTML));
}
}, false);
})(window.opera);
/*
if(location.hostname.indexOf('orkut.com')>-1){
var docqsa=document.querySelectorAll;
document.querySelectorAll=function(){
var output=[];
var tmp=docqsa.apply(this,arguments);
for(var tmpi=0;tmpi<tmp.length;tmpi++){
output.push(tmp[tmpi]);
}
return output;
}
//=document.getElementsByClassName=null;
window.opera=null;
navigator.userAgent='Mozilla/5.0 (Windows; U; Windows NT 5.1; nn-NO; rv:1.9.0.9) Gecko/2009040821 Firefox/3.0.9';
navigator.appName='Netscape';
navigator.product='Gecko';
}
//for(var c=this.R("span","ownPresenceText",this.Fc),d=c[if_Ea](this.R("span","ownPresenceText")),e=0; e<d[if_m]; e++
*/