From dd5c09f6f0a49aef17ae2ccc900557bb7a1b76ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E1=83=95=E1=83=90=E1=83=A0=E1=83=A1=E1=83=A5=E1=83=94?= =?UTF-8?q?=E1=83=9C=20=E1=83=9E=E1=83=98=E1=83=A2=E1=83=98=E1=83=90?= =?UTF-8?q?=E1=83=AE=E1=83=A8=E1=83=98=20=28VARSKEN=20PITIAKHSH=29?= Date: Tue, 22 Oct 2019 16:40:45 +0400 Subject: [PATCH] Revert "Use jsformatter "space_after_named_function" feature" --- .travis.yml | 2 - README.md | 6 +- codeformatter/jsformatter.py | 8 - .../lib/cssbeautifier/css/beautifier.py | 2 +- codeformatter/lib/jsbeautifier/__init__.py | 100 +- codeformatter/lib/jsbeautifier/__version__.py | 2 +- codeformatter/lib/jsbeautifier/core/acorn.py | 70 + .../lib/jsbeautifier/core/directives.py | 53 - .../lib/jsbeautifier/core/inputscanner.py | 88 +- .../lib/jsbeautifier/core/options.py | 174 +-- codeformatter/lib/jsbeautifier/core/output.py | 172 +-- codeformatter/lib/jsbeautifier/core/token.py | 16 +- .../lib/jsbeautifier/core/tokenizer.py | 141 -- .../lib/jsbeautifier/core/tokenstream.py | 74 - .../lib/jsbeautifier/javascript/acorn.py | 68 - .../lib/jsbeautifier/javascript/beautifier.py | 1256 ++++++++--------- .../lib/jsbeautifier/javascript/options.py | 117 +- .../lib/jsbeautifier/javascript/tokenizer.py | 792 +++++------ .../lib/jsbeautifier/unpackers/__init__.py | 7 +- .../lib/jsbeautifier/unpackers/evalbased.py | 6 +- .../unpackers/javascriptobfuscator.py | 7 +- .../lib/jsbeautifier/unpackers/myobfuscate.py | 20 +- .../lib/jsbeautifier/unpackers/packer.py | 65 +- .../jsbeautifier/unpackers/tests/__init__.py | 2 + .../unpackers/tests/test-myobfuscate-input.js | 1 + .../tests/test-myobfuscate-output.js | 65 + .../unpackers/tests/test-packer-62-input.js | 1 + .../tests/test-packer-non62-input.js | 1 + .../tests/testjavascriptobfuscator.py | 46 + .../unpackers/tests/testmyobfuscate.py | 40 + .../unpackers/tests/testpacker.py | 34 + .../unpackers/tests/testurlencode.py | 36 + .../lib/jsbeautifier/unpackers/urlencode.py | 4 +- 33 files changed, 1485 insertions(+), 1991 deletions(-) create mode 100644 codeformatter/lib/jsbeautifier/core/acorn.py delete mode 100644 codeformatter/lib/jsbeautifier/core/directives.py delete mode 100644 codeformatter/lib/jsbeautifier/core/tokenizer.py delete mode 100644 codeformatter/lib/jsbeautifier/core/tokenstream.py delete mode 100644 codeformatter/lib/jsbeautifier/javascript/acorn.py create mode 100644 codeformatter/lib/jsbeautifier/unpackers/tests/__init__.py create mode 100644 codeformatter/lib/jsbeautifier/unpackers/tests/test-myobfuscate-input.js create mode 100644 codeformatter/lib/jsbeautifier/unpackers/tests/test-myobfuscate-output.js create mode 100644 codeformatter/lib/jsbeautifier/unpackers/tests/test-packer-62-input.js create mode 100644 codeformatter/lib/jsbeautifier/unpackers/tests/test-packer-non62-input.js create mode 100644 codeformatter/lib/jsbeautifier/unpackers/tests/testjavascriptobfuscator.py create mode 100644 codeformatter/lib/jsbeautifier/unpackers/tests/testmyobfuscate.py create mode 100644 codeformatter/lib/jsbeautifier/unpackers/tests/testpacker.py create mode 100644 codeformatter/lib/jsbeautifier/unpackers/tests/testurlencode.py diff --git a/.travis.yml b/.travis.yml index 27a122e..b94b4af 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,5 @@ language: python -dist: trusty - python: - 3.3 diff --git a/README.md b/README.md index 4a8272d..d3291b3 100644 --- a/README.md +++ b/README.md @@ -140,11 +140,7 @@ Language specific options: "wrap_line_length": 0, // Wrap lines at next opportunity after N characters "break_chained_methods": false, // Break chained method calls across subsequent lines "end_with_newline": false, // Add new line at end of file - "comma_first": false, // Add comma first - "space_after_anon_function": false, // Add a space before an anonymous function's parens, ie. function () - "space_after_named_function": false, // Add a space before a named function's parens, i.e. function example () - "unindent_chained_methods": false, // Don't indent chained method calls - "operator_position": "before-newline" // Set operator position (before-newline|after-newline|preserve-newline) [before-newline] + "comma_first": false // Add comma first } ``` diff --git a/codeformatter/jsformatter.py b/codeformatter/jsformatter.py index 9b35b84..033f887 100644 --- a/codeformatter/jsformatter.py +++ b/codeformatter/jsformatter.py @@ -141,14 +141,6 @@ def format(self, text): else: options.space_after_anon_function = False - if ( - 'space_after_named_function' in self.opts and - self.opts['space_after_named_function'] - ): - options.space_after_named_function = True - else: - options.space_after_named_function = False - if ( 'unindent_chained_methods' in self.opts and self.opts['unindent_chained_methods'] diff --git a/codeformatter/lib/cssbeautifier/css/beautifier.py b/codeformatter/lib/cssbeautifier/css/beautifier.py index 221570e..1c35a2c 100644 --- a/codeformatter/lib/cssbeautifier/css/beautifier.py +++ b/codeformatter/lib/cssbeautifier/css/beautifier.py @@ -3,7 +3,7 @@ import re import copy from .options import BeautifierOptions -from jsbeautifier.core.options import _mergeOpts as mergeOpts +from jsbeautifier.core.options import mergeOpts from jsbeautifier.core.output import Output from jsbeautifier.__version__ import __version__ diff --git a/codeformatter/lib/jsbeautifier/__init__.py b/codeformatter/lib/jsbeautifier/__init__.py index 4a92e0f..444895b 100644 --- a/codeformatter/lib/jsbeautifier/__init__.py +++ b/codeformatter/lib/jsbeautifier/__init__.py @@ -14,7 +14,7 @@ # # The MIT License (MIT) -# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. +# Copyright (c) 2007-2017 Einar Lielmanis, Liam Newman, and contributors. # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation files @@ -37,9 +37,9 @@ # SOFTWARE. # # Originally written by Einar Lielmanis et al., -# Conversion to python by Einar Lielmanis, einar@beautifier.io, +# Conversion to python by Einar Lielmanis, einar@jsbeautifier.org, # Parsing improvement for brace-less and semicolon-less statements -# by Liam Newman +# by Liam Newman # Python is not my native language, feel free to push things around. # # Use either from command line (script displays its usage when run @@ -62,18 +62,14 @@ # Here are the available options: (read source) -class MissingInputStreamError(Exception): - pass - def default_options(): return BeautifierOptions() -def beautify(string, opts=default_options()): +def beautify(string, opts = default_options() ): b = Beautifier() return b.beautify(string, opts) - def set_file_editorconfig_opts(filename, js_options): from editorconfig import get_properties, EditorConfigError try: @@ -91,8 +87,7 @@ def set_file_editorconfig_opts(filename, js_options): if _ecoptions.get("max_line_length") == "off": js_options.wrap_line_length = 0 else: - js_options.wrap_line_length = int( - _ecoptions["max_line_length"]) + js_options.wrap_line_length = int(_ecoptions["max_line_length"]) if _ecoptions.get("insert_final_newline") == 'true': js_options.end_with_newline = True @@ -107,18 +102,24 @@ def set_file_editorconfig_opts(filename, js_options): elif _ecoptions["end_of_line"] == "crlf": js_options.eol = '\r\n' - except EditorConfigError: + except EditorConfigError as ex: # do not error on bad editor config print("Error loading EditorConfig. Ignoring.", file=sys.stderr) -def beautify_file(file_name, opts=default_options()): - input_string = '' - if file_name == '-': # stdin - if sys.stdin.isatty(): - raise MissingInputStreamError() - stream = sys.stdin - input_string = ''.join(stream.readlines()) +def beautify_file(file_name, opts = default_options() ): + input_string = '' + if file_name == '-': # stdin + try: + if sys.stdin.isatty(): + raise Exception() + + stream = sys.stdin + input_string = ''.join(stream.readlines()) + except Exception as ex: + print("Must pipe input or define at least one file.", file=sys.stderr) + usage(sys.stderr) + raise Exception() else: stream = io.open(file_name, 'rt', newline='') input_string = ''.join(stream.readlines()) @@ -130,11 +131,12 @@ def usage(stream=sys.stdout): print("jsbeautifier.py@" + __version__ + """ -Javascript beautifier (https://beautifier.io/) +Javascript beautifier (http://jsbeautifier.org/) Usage: jsbeautifier.py [options] can be "-", which means stdin. + defaults to stdout Input options: @@ -151,8 +153,7 @@ def usage(stream=sys.stdout): -P, --space-in-paren Add padding spaces within paren, ie. f( a, b ) -E, --space-in-empty-paren Add a single space inside empty paren, ie. f( ) -j, --jslint-happy More jslint-compatible output - -a, --space-after-anon-function Add a space before an anonymous function's parens, ie. function () - --space-after-named-function Add a space before a named function's parens, i.e. function example () + -a, --space_after_anon_function Add a space before an anonymous function's parens, ie. function () -b, --brace-style=collapse Brace style (collapse, expand, end-expand, none)(,preserve-inline) -k, --keep-array-indentation Keep array indentation. -r, --replace Write output in-place, replacing input @@ -160,11 +161,9 @@ def usage(stream=sys.stdout): -f, --keep-function-indentation Do not re-indent function bodies defined in var lines. -x, --unescape-strings Decode printable chars encoded in \\xNN notation. -X, --e4x Pass E4X xml literals through untouched - -C, --comma-first Put commas at the beginning of new line instead of end. - -O, --operator-position=STRING Set operator position (before-newline, after-newline, preserve-newline) - -w, --wrap-line-length Attempt to wrap line when it exceeds this length. + -w, --wrap-line-length Attempt to wrap line when it exceeds this length. NOTE: Line continues until next wrap point is found. - -n, --end-with-newline End output with newline + -n, --end_with_newline End output with newline --editorconfig Enable setting configuration from EditorConfig Rarely needed options: @@ -176,7 +175,7 @@ def usage(stream=sys.stdout): -l, --indent-level=NUMBER Initial indentation level. (default 0). -h, --help, --usage Prints this help statement. - -v, --version Show the version + -v, --version Show the version """, file=stream) if stream == sys.stderr: @@ -189,22 +188,19 @@ def mkdir_p(path): try: if path: os.makedirs(path) - except OSError as exc: # Python >2.5 + except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise Exception() + + def isFileDifferent(filepath, expected): try: - return ( - ''.join( - io.open( - filepath, - 'rt', - newline='').readlines()) != expected) - except BaseException: + return (''.join(io.open(filepath, 'rt', newline='').readlines()) != expected) + except: return True @@ -214,11 +210,11 @@ def main(): try: opts, args = getopt.getopt(argv, "s:c:e:o:rdEPjabkil:xhtfvXnCO:w:", - ['indent-size=', 'indent-char=', 'eol=', 'outfile=', 'replace', 'disable-preserve-newlines', - 'space-in-paren', 'space-in-empty-paren', 'jslint-happy', 'space-after-anon-function', - 'brace-style=', 'keep-array-indentation', 'indent-level=', 'unescape-strings', - 'help', 'usage', 'stdin', 'eval-code', 'indent-with-tabs', 'keep-function-indentation', 'version', - 'e4x', 'end-with-newline', 'comma-first', 'operator-position=', 'wrap-line-length', 'editorconfig', 'space-after-named-function']) + ['indent-size=','indent-char=','eol=''outfile=', 'replace', 'disable-preserve-newlines', + 'space-in-paren', 'space-in-empty-paren', 'jslint-happy', 'space-after-anon-function', + 'brace-style=', 'keep-array-indentation', 'indent-level=', 'unescape-strings', + 'help', 'usage', 'stdin', 'eval-code', 'indent-with-tabs', 'keep-function-indentation', 'version', + 'e4x', 'end-with-newline','comma-first','operator-position=','wrap-line-length','editorconfig']) except getopt.GetoptError as ex: print(ex, file=sys.stderr) return usage(sys.stderr) @@ -234,7 +230,7 @@ def main(): for opt, arg in opts: if opt in ('--keep-array-indentation', '-k'): js_options.keep_array_indentation = True - if opt in ('--keep-function-indentation', '-f'): + if opt in ('--keep-function-indentation','-f'): js_options.keep_function_indentation = True elif opt in ('--outfile', '-o'): outfile = arg @@ -258,8 +254,6 @@ def main(): js_options.jslint_happy = True elif opt in ('--space_after_anon_function', '-a'): js_options.space_after_anon_function = True - elif opt in ('--space_after_named_function'): - js_options.space_after_named_function = True elif opt in ('--eval-code'): js_options.eval_code = True elif opt in ('--brace-style', '-b'): @@ -273,7 +267,7 @@ def main(): elif opt in ('--comma-first', '-C'): js_options.comma_first = True elif opt in ('--operator-position', '-O'): - js_options.operator_position = arg + js_options.operator_position = sanitizeOperatorPosition(arg) elif opt in ('--wrap-line-length ', '-w'): js_options.wrap_line_length = int(arg) elif opt in ('--stdin', '-i'): @@ -285,6 +279,7 @@ def main(): elif opt in ('--help', '--usage', '-h'): return usage() + if not file: file = '-' @@ -329,23 +324,10 @@ def main(): f.write(pretty) except TypeError: # This is not pretty, but given how we did the version import - # it is the only way to do this without having setup.py - # fail on a missing six dependency. + # it is the only way to do this without having setup.py fail on a missing six dependency. six = __import__("six") f.write(six.u(pretty)) - except MissingInputStreamError: - print( - "Must pipe input or define at least one file.\n", - file=sys.stderr) - usage(sys.stderr) - return 1 - - except UnicodeError as ex: - print("Error while decoding input or encoding output:", - file=sys.stderr) - print(ex, file=sys.stderr) - return 1 except Exception as ex: print(ex, file=sys.stderr) @@ -353,7 +335,3 @@ def main(): # Success return 0 - - -if __name__ == "__main__": - main() diff --git a/codeformatter/lib/jsbeautifier/__version__.py b/codeformatter/lib/jsbeautifier/__version__.py index 655be52..043606c 100644 --- a/codeformatter/lib/jsbeautifier/__version__.py +++ b/codeformatter/lib/jsbeautifier/__version__.py @@ -1 +1 @@ -__version__ = '1.8.7' +__version__ = '1.7.4' diff --git a/codeformatter/lib/jsbeautifier/core/acorn.py b/codeformatter/lib/jsbeautifier/core/acorn.py new file mode 100644 index 0000000..fbe5ada --- /dev/null +++ b/codeformatter/lib/jsbeautifier/core/acorn.py @@ -0,0 +1,70 @@ +import re + +# This section of code was translated to python from acorn (javascript). +# +# Acorn was written by Marijn Haverbeke and released under an MIT +# license. The Unicode regexps (for identifiers and whitespace) were +# taken from [Esprima](http://esprima.org) by Ariya Hidayat. +# +# Git repositories for Acorn are available at +# +# http://marijnhaverbeke.nl/git/acorn +# https://github.com/marijnh/acorn.git + +# This is not pretty, but given how we did the version import +# it is the only way to do this without having setup.py fail on a missing six dependency. +six = __import__("six") + +# ## Character categories + +# Big ugly regular expressions that match characters in the +# whitespace, identifier, and identifier-start categories. These +# are only applied when a character is found to actually have a +# code point above 128. + +_nonASCIIwhitespace = re.compile(six.u("[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]")) +_nonASCIIidentifierStartChars = six.u("\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc") +_nonASCIIidentifierChars = six.u("\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u0620-\u0649\u0672-\u06d3\u06e7-\u06e8\u06fb-\u06fc\u0730-\u074a\u0800-\u0814\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0840-\u0857\u08e4-\u08fe\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962-\u0963\u0966-\u096f\u0981-\u0983\u09bc\u09be-\u09c4\u09c7\u09c8\u09d7\u09df-\u09e0\u0a01-\u0a03\u0a3c\u0a3e-\u0a42\u0a47\u0a48\u0a4b-\u0a4d\u0a51\u0a66-\u0a71\u0a75\u0a81-\u0a83\u0abc\u0abe-\u0ac5\u0ac7-\u0ac9\u0acb-\u0acd\u0ae2-\u0ae3\u0ae6-\u0aef\u0b01-\u0b03\u0b3c\u0b3e-\u0b44\u0b47\u0b48\u0b4b-\u0b4d\u0b56\u0b57\u0b5f-\u0b60\u0b66-\u0b6f\u0b82\u0bbe-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7\u0be6-\u0bef\u0c01-\u0c03\u0c46-\u0c48\u0c4a-\u0c4d\u0c55\u0c56\u0c62-\u0c63\u0c66-\u0c6f\u0c82\u0c83\u0cbc\u0cbe-\u0cc4\u0cc6-\u0cc8\u0cca-\u0ccd\u0cd5\u0cd6\u0ce2-\u0ce3\u0ce6-\u0cef\u0d02\u0d03\u0d46-\u0d48\u0d57\u0d62-\u0d63\u0d66-\u0d6f\u0d82\u0d83\u0dca\u0dcf-\u0dd4\u0dd6\u0dd8-\u0ddf\u0df2\u0df3\u0e34-\u0e3a\u0e40-\u0e45\u0e50-\u0e59\u0eb4-\u0eb9\u0ec8-\u0ecd\u0ed0-\u0ed9\u0f18\u0f19\u0f20-\u0f29\u0f35\u0f37\u0f39\u0f41-\u0f47\u0f71-\u0f84\u0f86-\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u1000-\u1029\u1040-\u1049\u1067-\u106d\u1071-\u1074\u1082-\u108d\u108f-\u109d\u135d-\u135f\u170e-\u1710\u1720-\u1730\u1740-\u1750\u1772\u1773\u1780-\u17b2\u17dd\u17e0-\u17e9\u180b-\u180d\u1810-\u1819\u1920-\u192b\u1930-\u193b\u1951-\u196d\u19b0-\u19c0\u19c8-\u19c9\u19d0-\u19d9\u1a00-\u1a15\u1a20-\u1a53\u1a60-\u1a7c\u1a7f-\u1a89\u1a90-\u1a99\u1b46-\u1b4b\u1b50-\u1b59\u1b6b-\u1b73\u1bb0-\u1bb9\u1be6-\u1bf3\u1c00-\u1c22\u1c40-\u1c49\u1c5b-\u1c7d\u1cd0-\u1cd2\u1d00-\u1dbe\u1e01-\u1f15\u200c\u200d\u203f\u2040\u2054\u20d0-\u20dc\u20e1\u20e5-\u20f0\u2d81-\u2d96\u2de0-\u2dff\u3021-\u3028\u3099\u309a\ua640-\ua66d\ua674-\ua67d\ua69f\ua6f0-\ua6f1\ua7f8-\ua800\ua806\ua80b\ua823-\ua827\ua880-\ua881\ua8b4-\ua8c4\ua8d0-\ua8d9\ua8f3-\ua8f7\ua900-\ua909\ua926-\ua92d\ua930-\ua945\ua980-\ua983\ua9b3-\ua9c0\uaa00-\uaa27\uaa40-\uaa41\uaa4c-\uaa4d\uaa50-\uaa59\uaa7b\uaae0-\uaae9\uaaf2-\uaaf3\uabc0-\uabe1\uabec\uabed\uabf0-\uabf9\ufb20-\ufb28\ufe00-\ufe0f\ufe20-\ufe26\ufe33\ufe34\ufe4d-\ufe4f\uff10-\uff19\uff3f") +_nonASCIIidentifierStart = re.compile("[" + _nonASCIIidentifierStartChars + "]") +_nonASCIIidentifier = re.compile("[" + _nonASCIIidentifierStartChars + _nonASCIIidentifierChars + "]") + +# Whether a single character denotes a newline. + +newline = re.compile(six.u("[\n\r\u2028\u2029]")) + +# Matches a whole line break (where CRLF is considered a single +# line break). Used to count lines. + +# in javascript, these two differ +# in python they are the same, different methods are called on them +lineBreak = re.compile(six.u("\r\n|[\n\r\u2028\u2029]")) +allLineBreaks = lineBreak + + +# Test whether a given character code starts an identifier. +def isIdentifierStart(code): + if code < 65: + return code in [36, 64] # permit $ (36) and @ (64). @ is used in ES7 decorators. + if code < 91: + return True # 65 through 91 are uppercase letters + if code < 97: + return code == 95 # permit _ (95) + if code < 123: + return True # 97 through 123 are lowercase letters + return code >= 0xaa and _nonASCIIidentifierStart.match(six.unichr(code)) != None + +# Test whether a given character is part of an identifier. +def isIdentifierChar(code): + if code < 48: + return code == 36 + if code < 58: + return True + if code < 65: + return False + if code < 91: + return True + if code < 97: + return code == 95 + if code < 123: + return True + return code >= 0xaa and _nonASCIIidentifier.match(six.unichr(code)) != None diff --git a/codeformatter/lib/jsbeautifier/core/directives.py b/codeformatter/lib/jsbeautifier/core/directives.py deleted file mode 100644 index f4c73b9..0000000 --- a/codeformatter/lib/jsbeautifier/core/directives.py +++ /dev/null @@ -1,53 +0,0 @@ -# The MIT License (MIT) -# -# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation files -# (the "Software"), to deal in the Software without restriction, -# including without limitation the rights to use, copy, modify, merge, -# publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, -# subject to the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import re - - -class Directives: - - def __init__(self, start_block_pattern, end_block_pattern): - - self.__directives_block_pattern = re.compile(start_block_pattern + r' beautify( \w+[:]\w+)+ ' + end_block_pattern) - self.__directive_pattern = re.compile(r' (\w+)[:](\w+)') - - self.__directives_end_ignore_pattern = re.compile(r'(?:[\s\S]*?)((?:' + start_block_pattern + r'\sbeautify\signore:end\s' + end_block_pattern + r')|$)') - - def get_directives(self, text): - if not self.__directives_block_pattern.match(text): - return None - - directives = {} - directive_match = self.__directive_pattern.search(text) - - while directive_match: - directives[directive_match.group(1)] = directive_match.group(2) - directive_match = self.__directive_pattern.search( - text, directive_match.end()) - - - return directives - - def readIgnored(self, input): - return input.read(self.__directives_end_ignore_pattern) diff --git a/codeformatter/lib/jsbeautifier/core/inputscanner.py b/codeformatter/lib/jsbeautifier/core/inputscanner.py index 436749f..0d8cd6f 100644 --- a/codeformatter/lib/jsbeautifier/core/inputscanner.py +++ b/codeformatter/lib/jsbeautifier/core/inputscanner.py @@ -1,6 +1,6 @@ # The MIT License (MIT) # -# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. +# Copyright (c) 2007-2017 Einar Lielmanis, Liam Newman, and contributors. # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation files @@ -24,19 +24,13 @@ class InputScanner: - def __init__(self, input_string): - if input_string is None: - input_string = '' - self.__input = input_string + def __init__(self, input): + self.__input = input self.__input_length = len(self.__input) self.__position = 0 - def restart(self): - self.__position = 0 - def back(self): - if self.__position > 0: - self.__position -= 1 + self.__position -= 1 def hasNext(self): return self.__position < self.__input_length @@ -47,71 +41,37 @@ def next(self): val = self.__input[self.__position] self.__position += 1 - return val + return val; - def peek(self, index=0): + def peek(self, index = 0): val = None - index += self.__position + index += self.__position; + if index >= 0 and index < self.__input_length: + val = self.__input[index]; + + return val; + + def peekCharCode(self, index = 0): + val = 0 + index += self.__position; if index >= 0 and index < self.__input_length: - val = self.__input[index] + val = ord(self.__input[index]) return val - def test(self, pattern, index=0): - index += self.__position - return index >= 0 and index < self.__input_length and bool( - pattern.match(self.__input, index)) + def test(self, pattern, index = 0): + index += self.__position; + return index >= 0 and index < self.__input_length and pattern.match(self.__input, index) - def testChar(self, pattern, index=0): - # test one character regex match + def testChar(self, pattern, index = 0): val = self.peek(index) - return val is not None and bool(pattern.match(val)) + return val != None and pattern.match(val) def match(self, pattern): pattern_match = None if self.hasNext(): pattern_match = pattern.match(self.__input, self.__position) - if bool(pattern_match): - self.__position = pattern_match.end(0) - return pattern_match + if pattern_match: + self.__position += len(pattern_match.group(0)); - def read(self, pattern): - val = '' - pattern_match = self.match(pattern) - if bool(pattern_match): - val = pattern_match.group(0) - return val - - def readUntil(self, pattern, include_match=False): - val = '' - pattern_match = None - match_index = self.__position - if self.hasNext(): - pattern_match = pattern.search(self.__input, self.__position) - if bool(pattern_match): - if include_match: - match_index = pattern_match.end(0) - else: - match_index = pattern_match.start(0) - else: - match_index = self.__input_length - - val = self.__input[self.__position:match_index] - self.__position = match_index - - return val - - def readUntilAfter(self, pattern): - return self.readUntil(pattern, include_match=True) - - # css beautifier legacy helpers - def peekUntilAfter(self, pattern): - start = self.__position - val = self.readUntilAfter(pattern) - self.__position = start - return val - - def lookBack(self, testVal): - start = self.__position - 1 - return start >= len(testVal) and \ - self.__input[start - len(testVal):start].lower() == testVal + return pattern_match diff --git a/codeformatter/lib/jsbeautifier/core/options.py b/codeformatter/lib/jsbeautifier/core/options.py index eb813a5..f567754 100644 --- a/codeformatter/lib/jsbeautifier/core/options.py +++ b/codeformatter/lib/jsbeautifier/core/options.py @@ -1,6 +1,6 @@ # The MIT License (MIT) # -# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. +# Copyright (c) 2007-2017 Einar Lielmanis, Liam Newman, and contributors. # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation files @@ -23,174 +23,14 @@ # SOFTWARE. import copy -import re -from collections import namedtuple - -class Options: - def __init__(self, options=None, merge_child_field=None): - self.css = None - self.js = None - self.html = None - - self.raw_options = _mergeOpts(options, merge_child_field) - - # Support passing the source text back with no change - self.disabled = self._get_boolean('disabled') - - self.eol = self._get_characters('eol', 'auto') - self.end_with_newline = self._get_boolean('end_with_newline') - self.indent_size = self._get_number('indent_size', 4) - self.indent_char = self._get_characters('indent_char', ' ') - self.indent_level = self._get_number('indent_level') - - self.preserve_newlines = self._get_boolean('preserve_newlines', True) - # TODO: fix difference in js and python - self.max_preserve_newlines = self._get_number( - 'max_preserve_newlines', 10) - if not self.preserve_newlines: - self.max_preserve_newlines = 0 - - self.indent_with_tabs = self._get_boolean('indent_with_tabs') - if self.indent_with_tabs: - self.indent_char = '\t' - self.indent_size = 1 - - # Backwards compat with 1.3.x - self.wrap_line_length = self._get_number( - 'wrap_line_length', self._get_number('max_char')) - - def _get_array(self, name, default_value=[]): - option_value = getattr(self.raw_options, name, default_value) - result = [] - if isinstance(option_value, list): - result = copy.copy(option_value) - elif isinstance(option_value, str): - result = re.compile(r"[^a-zA-Z0-9_/\-]+").split(option_value) - - return result - - def _get_boolean(self, name, default_value=False): - option_value = getattr(self.raw_options, name, default_value) - result = False - try: - result = bool(option_value) - except ValueError: - pass - - return result - - def _get_characters(self, name, default_value=''): - option_value = getattr(self.raw_options, name, default_value) - result = '' - if isinstance(option_value, str): - result = option_value.replace('\\r', '\r').replace( - '\\n', '\n').replace('\\t', '\t') - - return result - - def _get_number(self, name, default_value=0): - option_value = getattr(self.raw_options, name, default_value) - result = 0 - try: - result = int(option_value) - except ValueError: - pass - - return result - - def _get_selection(self, name, selection_list, default_value=None): - result = self._get_selection_list(name, selection_list, default_value) - if len(result) != 1: - raise ValueError( - "Invalid Option Value: The option '" + name + "' can only be one of the following values:\n" + - str(selection_list) + - "\nYou passed in: '" + - str(getattr(self.raw_options, name, None)) + - "'") - - return result[0] - - def _get_selection_list(self, name, selection_list, default_value=None): - if not selection_list: - raise ValueError("Selection list cannot be empty.") - - default_value = default_value or [selection_list[0]] - - if not self._is_valid_selection(default_value, selection_list): - raise ValueError("Invalid Default Value!") - - result = self._get_array(name, default_value) - if not self._is_valid_selection(result, selection_list): - raise ValueError( - "Invalid Option Value: The option '" + name + "' can contain only the following values:\n" + - str(selection_list) + - "\nYou passed in: '" + - str(getattr(self.raw_options, name, None)) + - "'") - - return result - - def _is_valid_selection(self, result, selection_list): - if len(result) == 0 or len(selection_list) == 0: - return False - - for item in result: - if item not in selection_list: - return False - - return True - - -# merges child options up with the parent options object -# Example: obj = {a: 1, b: {a: 2}} -# mergeOpts(obj, 'b') -# -# Returns: {a: 2} - - -def _mergeOpts(options, childFieldName): - if options is None: - options = {} - - if isinstance(options, tuple): - options = dict(options) - - options = _normalizeOpts(options) +def mergeOpts(options, targetType): finalOpts = copy.copy(options) - if isinstance(options, dict): - local = finalOpts.get(childFieldName, None) - if local: - del(finalOpts[childFieldName]) - for key in local: - finalOpts[key] = local[key] - finalOpts = namedtuple("CustomOptions", finalOpts.keys())( - *finalOpts.values()) - if isinstance(options, Options): - local = getattr(finalOpts, childFieldName, None) - if local: - delattr(finalOpts, childFieldName) - for key in local: - setattr(finalOpts, key, local[key]) + local = getattr(finalOpts, targetType) + if (local): + delattr(finalOpts, targetType) + for key in local: + setattr(finalOpts, key, local[key]) return finalOpts - - -def _normalizeOpts(options): - convertedOpts = copy.copy(options) - if isinstance(convertedOpts, dict): - option_keys = list(convertedOpts.keys()) - for key in option_keys: - if '-' in key: - del convertedOpts[key] - convertedOpts[key.replace('-', '_')] = options[key] - else: - option_keys = list(getattr(convertedOpts, '__dict__', {})) - for key in option_keys: - if '-' in key: - delattr(convertedOpts, key) - setattr(convertedOpts, key.replace( - '-', '_'), getattr(options, key, None)) - - return convertedOpts diff --git a/codeformatter/lib/jsbeautifier/core/output.py b/codeformatter/lib/jsbeautifier/core/output.py index bce347e..3505128 100644 --- a/codeformatter/lib/jsbeautifier/core/output.py +++ b/codeformatter/lib/jsbeautifier/core/output.py @@ -1,6 +1,6 @@ # The MIT License (MIT) # -# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. +# Copyright (c) 2007-2017 Einar Lielmanis, Liam Newman, and contributors. # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation files @@ -24,52 +24,44 @@ import re -# Using object instead of string to allow for later expansion of info -# about each line - -__all__ = ["Output"] - - +# Using object instead of string to allow for later expansion of info about each line class OutputLine: def __init__(self, parent): self.__parent = parent self.__character_count = 0 self.__indent_count = -1 - self.__alignment_count = 0 self.__items = [] - - def item(self, index): - return self.__items[index] + self.__empty = True def get_character_count(self): return self.__character_count def is_empty(self): - return len(self.__items) == 0 + return self.__empty - def set_indent(self, indent=0, alignment=0): - self.__indent_count = indent - self.__alignment_count = alignment - self.__character_count = self.__parent.baseIndentLength + \ - self.__alignment_count + \ - self.__indent_count * self.__parent.indent_length + def set_indent(self, level): + self.__character_count = self.__parent.baseIndentLength + level * self.__parent.indent_length + self.__indent_count = level; def last(self): if not self.is_empty(): return self.__items[-1] + else: + return None - return None + def push(self, input): + self.__items.append(input) + self.__character_count += len(input) + self.__empty = False - def push(self, item): - self.__items.append(item) - self.__character_count += len(item) def pop(self): item = None if not self.is_empty(): item = self.__items.pop() self.__character_count -= len(item) + self.__empty = len(self.__items) == 0 return item def remove_indent(self): @@ -79,95 +71,58 @@ def remove_indent(self): def trim(self): while self.last() == ' ': - self.__items.pop() + item = self._items.pop() self.__character_count -= 1 + self.__empty = len(self.__items) == 0 def toString(self): result = '' if not self.is_empty(): if self.__indent_count >= 0: - result = self.__parent.get_indent_string(self.__indent_count) - if self.__alignment_count >= 0: - result += self.__parent.get_alignment_string( - self.__alignment_count) + result = self.__parent.indent_cache[self.__indent_count] result += ''.join(self.__items) return result -class IndentCache: - def __init__(self, base_string, level_string): - self.__cache = [base_string] - self.__level_string = level_string - - def __ensure_cache(self, level): - while level >= len(self.__cache): - self.__cache.append( - self.__cache[-1] + self.__level_string) - - def get_level_string(self, level): - self.__ensure_cache(level) - return self.__cache[level] - - class Output: - def __init__(self, options, baseIndentString=''): + def __init__(self, indent_string, baseIndentString = ''): - indent_string = options.indent_char - if options.indent_size > 0: - indent_string = options.indent_char * options.indent_size - - # Set to null to continue support for auto detection of base levelself. - if options.indent_level > 0: - baseIndentString = options.indent_level * indent_string - - self.__indent_cache = IndentCache(baseIndentString, indent_string) - self.__alignment_cache = IndentCache('', ' ') + self.indent_string = indent_string + self.baseIndentString = baseIndentString + self.indent_cache = [ baseIndentString ] self.baseIndentLength = len(baseIndentString) self.indent_length = len(indent_string) self.raw = False - self._end_with_newline = options.end_with_newline - self.__lines = [] + self.lines = [] self.previous_line = None self.current_line = None self.space_before_token = False + self.add_outputline() - self.__add_outputline() - - def __add_outputline(self): + def add_outputline(self): self.previous_line = self.current_line self.current_line = OutputLine(self) - self.__lines.append(self.current_line) + self.lines.append(self.current_line) def get_line_number(self): - return len(self.__lines) - - def get_indent_string(self, level): - return self.__indent_cache.get_level_string(level) - - def get_alignment_string(self, level): - return self.__alignment_cache.get_level_string(level) - - def is_empty(self): - return self.previous_line is None and self.current_line.is_empty() + return len(self.lines) def add_new_line(self, force_newline=False): - # never newline at the start of file - # otherwise, newline only if we didn't just add one or we're forced - if self.is_empty() or \ - (not force_newline and self.just_added_newline()): + if len(self.lines) == 1 and self.just_added_newline(): + # no newline on start of file return False - # if raw output is enabled, don't print additional newlines, - # but still return True as though you had - if not self.raw: - self.__add_outputline() - return True + if force_newline or not self.just_added_newline(): + if not self.raw: + self.add_outputline() + return True + return False - def get_code(self, eol): - sweet_code = "\n".join(line.toString() for line in self.__lines) + def get_code(self, end_with_newline, eol): + sweet_code = "\n".join(line.toString() for line in self.lines) sweet_code = re.sub('[\r\n\t ]+$', '', sweet_code) - if self._end_with_newline: + if end_with_newline: sweet_code += '\n' if not eol == '\n': @@ -175,17 +130,21 @@ def get_code(self, eol): return sweet_code - def set_indent(self, indent=0, alignment=0): + def set_indent(self, level): # Never indent your first output indent at the start of the file - if len(self.__lines) > 1: - self.current_line.set_indent(indent, alignment) + if len(self.lines) > 1: + while level >= len(self.indent_cache): + self.indent_cache.append(self.indent_cache[-1] + self.indent_string) + + + self.current_line.set_indent(level) return True - self.current_line.set_indent() + self.current_line.set_indent(0) return False def add_raw_token(self, token): for _ in range(token.newlines): - self.__add_outputline() + self.add_outputline() self.current_line.push(token.whitespace_before) self.current_line.push(token.text) @@ -200,22 +159,16 @@ def add_space_before_token(self): self.current_line.push(' ') self.space_before_token = False - def remove_indent(self, index): - while index < len(self.__lines): - self.__lines[index].remove_indent() - index += 1 - - def trim(self, eat_newlines=False): + def trim(self, eat_newlines = False): self.current_line.trim() - while eat_newlines and len( - self.__lines) > 1 and self.current_line.is_empty(): - self.__lines.pop() - self.current_line = self.__lines[-1] + while eat_newlines and len(self.lines) > 1 and self.current_line.is_empty(): + self.lines.pop() + self.current_line = self.lines[-1] self.current_line.trim() - if len(self.__lines) > 1: - self.previous_line = self.__lines[-2] + if len(self.lines) > 1: + self.previous_line = self.lines[-2] else: self.previous_line = None @@ -223,18 +176,11 @@ def just_added_newline(self): return self.current_line.is_empty() def just_added_blankline(self): - return self.is_empty() or \ - (self.current_line.is_empty() and self.previous_line.is_empty()) - - def ensure_empty_line_above(self, starts_with, ends_with): - index = len(self.__lines) - 2 - while index >= 0: - potentialEmptyLine = self.__lines[index] - if potentialEmptyLine.is_empty(): - break - elif not potentialEmptyLine.item(0).startswith(starts_with) and \ - potentialEmptyLine.item(-1) != ends_with: - self.__lines.insert(index + 1, OutputLine(self)) - self.previous_line = self.__lines[-2] - break - index -= 1 + if self.just_added_newline(): + if len(self.lines) == 1: + return True + + line = self.lines[-2] + return line.is_empty() + + return False diff --git a/codeformatter/lib/jsbeautifier/core/token.py b/codeformatter/lib/jsbeautifier/core/token.py index afe999d..06884d0 100644 --- a/codeformatter/lib/jsbeautifier/core/token.py +++ b/codeformatter/lib/jsbeautifier/core/token.py @@ -1,6 +1,6 @@ # The MIT License (MIT) # -# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. +# Copyright (c) 2007-2017 Einar Lielmanis, Liam Newman, and contributors. # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation files @@ -22,22 +22,14 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - class Token: - def __init__( - self, - type, - text, - newlines=0, - whitespace_before=''): + def __init__(self, type, text, newlines = 0, whitespace_before = '', mode = None, parent = None): self.type = type self.text = text - self.comments_before = None + self.comments_before = [] self.newlines = newlines + self.wanted_newline = newlines > 0 self.whitespace_before = whitespace_before self.parent = None - self.next = None - self.previous = None self.opened = None - self.closed = None self.directives = None diff --git a/codeformatter/lib/jsbeautifier/core/tokenizer.py b/codeformatter/lib/jsbeautifier/core/tokenizer.py deleted file mode 100644 index 6a489f3..0000000 --- a/codeformatter/lib/jsbeautifier/core/tokenizer.py +++ /dev/null @@ -1,141 +0,0 @@ -# The MIT License (MIT) -# -# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation files -# (the "Software"), to deal in the Software without restriction, -# including without limitation the rights to use, copy, modify, merge, -# publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, -# subject to the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import re -from ..core.inputscanner import InputScanner -from ..core.token import Token -from ..core.tokenstream import TokenStream - - -__all__ = ["TOKEN", "Tokenizer", "TokenTypes"] - -class TokenTypes: - START = 'TK_START' - RAW = 'TK_RAW' - EOF = 'TK_EOF' - - def __init__(self): - pass - -TOKEN = TokenTypes() - -class Tokenizer: - - def __init__(self, input_string, options): - self._input = InputScanner(input_string) - self._options = options - self.__tokens = None - self.__newline_count = 0 - self.__whitespace_before_token = '' - - self._whitespace_pattern = re.compile(r'[\n\r\t ]+') - self._newline_pattern = re.compile(r'([^\n\r]*)(\r\n|[\n\r])?') - - def tokenize(self): - self._input.restart() - self.__tokens = TokenStream() - - current = None - previous = Token(TOKEN.START,'') - open_token = None - open_stack = [] - comments = TokenStream() - - while previous.type != TOKEN.EOF: - current = self.__get_next_token_with_comments(previous, open_token) - - if self._is_opening(current): - open_stack.append(open_token) - open_token = current - elif open_token is not None and \ - self._is_closing(current, open_token): - current.opened = open_token - open_token.closed = current - open_token = open_stack.pop() - current.parent = open_token - - self.__tokens.add(current) - previous = current - return self.__tokens - - def __get_next_token_with_comments(self, previous, open_token): - current = self._get_next_token(previous, open_token) - - if self._is_comment(current): - comments = TokenStream() - while self._is_comment(current): - comments.add(current) - current = self._get_next_token(previous, open_token) - - if not comments.isEmpty(): - current.comments_before = comments - comments = TokenStream() - - current.parent = open_token - current.previous = previous - previous.next = current - - return current - - def _is_first_token(self): - return self.__tokens.isEmpty() - - def _reset(self): - pass - - def _get_next_token(self, previous_token, open_token): - self._readWhitespace() - resulting_string = self._input.read(re.compile(r'.+')) - if resulting_string: - return self._create_token(TOKEN.RAW, resulting_string) - else: - return self._create_token(TOKEN.EOF, '') - - def _is_comment(self, current_token): - return False - - def _is_opening(self, current_token): - return False - - def _is_closing(self, current_token, open_token): - return False - - def _create_token(self, token_type, text): - token = Token(token_type, text, - self.__newline_count, self.__whitespace_before_token) - self.__newline_count = 0 - self.__whitespace_before_token = '' - return token - - def _readWhitespace(self): - resulting_string = self._input.read(self._whitespace_pattern) - if resulting_string == ' ': - self.__whitespace_before_token = resulting_string - elif resulting_string != '': - for nextMatch in self._newline_pattern.findall(resulting_string): - if nextMatch[1] == '': - self.__whitespace_before_token = nextMatch[0] - break - - self.__newline_count += 1 diff --git a/codeformatter/lib/jsbeautifier/core/tokenstream.py b/codeformatter/lib/jsbeautifier/core/tokenstream.py deleted file mode 100644 index c7fad45..0000000 --- a/codeformatter/lib/jsbeautifier/core/tokenstream.py +++ /dev/null @@ -1,74 +0,0 @@ -# The MIT License (MIT) -# -# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation files -# (the "Software"), to deal in the Software without restriction, -# including without limitation the rights to use, copy, modify, merge, -# publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, -# subject to the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import re -from ..core.inputscanner import InputScanner -from ..core.token import Token - -class TokenStream: - - def __init__(self, parent_token=None): - self.__tokens = [] - self.__tokens_length = len(self.__tokens) - self.__position = 0 - self.__parent_token = parent_token - - def restart(self): - self.__position = 0 - - def isEmpty(self): - return self.__tokens_length == 0 - - def hasNext(self): - return self.__position < self.__tokens_length - - def next(self): - if self.hasNext(): - val = self.__tokens[self.__position] - self.__position += 1 - return val - else: - raise StopIteration - - def peek(self, index=0): - val = None - index += self.__position - if index >= 0 and index < self.__tokens_length: - val = self.__tokens[index] - - return val - - def add(self, token): - if self.__parent_token: - token.parent = self.__parent_token - - self.__tokens.append(token) - self.__tokens_length += 1 - - def __iter__(self): - self.restart() - return self - - def __next__(self): - return self.next() diff --git a/codeformatter/lib/jsbeautifier/javascript/acorn.py b/codeformatter/lib/jsbeautifier/javascript/acorn.py deleted file mode 100644 index a8f833a..0000000 --- a/codeformatter/lib/jsbeautifier/javascript/acorn.py +++ /dev/null @@ -1,68 +0,0 @@ -import re - -# This section of code was translated to python from acorn (javascript). -# -# Acorn was written by Marijn Haverbeke and released under an MIT -# license. The Unicode regexps (for identifiers and whitespace) were -# taken from [Esprima](http://esprima.org) by Ariya Hidayat. -# -# Git repositories for Acorn are available at -# -# http://marijnhaverbeke.nl/git/acorn -# https://github.com/marijnh/acorn.git - -# This is not pretty, but given how we did the version import -# it is the only way to do this without having setup.py fail on a missing -# six dependency. -six = __import__("six") - -# ## Character categories - -# acorn used char codes to squeeze the last bit of performance out -# Beautifier is okay without that, so we're using regex -# permit $ (36) and @ (64). @ is used in ES7 decorators. -# 65 through 91 are uppercase letters. -# permit _ (95). -# 97 through 123 are lowercase letters. -_baseASCIIidentifierStartChars = six.u(r"\x24\x40\x41-\x5a\x5f\x61-\x7a") - -# inside an identifier @ is not allowed but 0-9 are. -_baseASCIIidentifierChars = six.u(r"\x24\x30-\x39\x41-\x5a\x5f\x61-\x7a") - -# Big ugly regular expressions that match characters in the -# whitespace, identifier, and identifier-start categories. These -# are only applied when a character is found to actually have a -# code point above 128. -# IMPORTANT: These strings must be run through six to handle \u chars -_nonASCIIidentifierStartChars = six.u(r"\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc") -_nonASCIIidentifierChars = six.u(r"\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u0620-\u0649\u0672-\u06d3\u06e7-\u06e8\u06fb-\u06fc\u0730-\u074a\u0800-\u0814\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0840-\u0857\u08e4-\u08fe\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962-\u0963\u0966-\u096f\u0981-\u0983\u09bc\u09be-\u09c4\u09c7\u09c8\u09d7\u09df-\u09e0\u0a01-\u0a03\u0a3c\u0a3e-\u0a42\u0a47\u0a48\u0a4b-\u0a4d\u0a51\u0a66-\u0a71\u0a75\u0a81-\u0a83\u0abc\u0abe-\u0ac5\u0ac7-\u0ac9\u0acb-\u0acd\u0ae2-\u0ae3\u0ae6-\u0aef\u0b01-\u0b03\u0b3c\u0b3e-\u0b44\u0b47\u0b48\u0b4b-\u0b4d\u0b56\u0b57\u0b5f-\u0b60\u0b66-\u0b6f\u0b82\u0bbe-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7\u0be6-\u0bef\u0c01-\u0c03\u0c46-\u0c48\u0c4a-\u0c4d\u0c55\u0c56\u0c62-\u0c63\u0c66-\u0c6f\u0c82\u0c83\u0cbc\u0cbe-\u0cc4\u0cc6-\u0cc8\u0cca-\u0ccd\u0cd5\u0cd6\u0ce2-\u0ce3\u0ce6-\u0cef\u0d02\u0d03\u0d46-\u0d48\u0d57\u0d62-\u0d63\u0d66-\u0d6f\u0d82\u0d83\u0dca\u0dcf-\u0dd4\u0dd6\u0dd8-\u0ddf\u0df2\u0df3\u0e34-\u0e3a\u0e40-\u0e45\u0e50-\u0e59\u0eb4-\u0eb9\u0ec8-\u0ecd\u0ed0-\u0ed9\u0f18\u0f19\u0f20-\u0f29\u0f35\u0f37\u0f39\u0f41-\u0f47\u0f71-\u0f84\u0f86-\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u1000-\u1029\u1040-\u1049\u1067-\u106d\u1071-\u1074\u1082-\u108d\u108f-\u109d\u135d-\u135f\u170e-\u1710\u1720-\u1730\u1740-\u1750\u1772\u1773\u1780-\u17b2\u17dd\u17e0-\u17e9\u180b-\u180d\u1810-\u1819\u1920-\u192b\u1930-\u193b\u1951-\u196d\u19b0-\u19c0\u19c8-\u19c9\u19d0-\u19d9\u1a00-\u1a15\u1a20-\u1a53\u1a60-\u1a7c\u1a7f-\u1a89\u1a90-\u1a99\u1b46-\u1b4b\u1b50-\u1b59\u1b6b-\u1b73\u1bb0-\u1bb9\u1be6-\u1bf3\u1c00-\u1c22\u1c40-\u1c49\u1c5b-\u1c7d\u1cd0-\u1cd2\u1d00-\u1dbe\u1e01-\u1f15\u200c\u200d\u203f\u2040\u2054\u20d0-\u20dc\u20e1\u20e5-\u20f0\u2d81-\u2d96\u2de0-\u2dff\u3021-\u3028\u3099\u309a\ua640-\ua66d\ua674-\ua67d\ua69f\ua6f0-\ua6f1\ua7f8-\ua800\ua806\ua80b\ua823-\ua827\ua880-\ua881\ua8b4-\ua8c4\ua8d0-\ua8d9\ua8f3-\ua8f7\ua900-\ua909\ua926-\ua92d\ua930-\ua945\ua980-\ua983\ua9b3-\ua9c0\uaa00-\uaa27\uaa40-\uaa41\uaa4c-\uaa4d\uaa50-\uaa59\uaa7b\uaae0-\uaae9\uaaf2-\uaaf3\uabc0-\uabe1\uabec\uabed\uabf0-\uabf9\ufb20-\ufb28\ufe00-\ufe0f\ufe20-\ufe26\ufe33\ufe34\ufe4d-\ufe4f\uff10-\uff19\uff3f") -#_nonASCIIidentifierStart = re.compile("[" + _nonASCIIidentifierStartChars + "]") -#_nonASCIIidentifier = re.compile("[" + _nonASCIIidentifierStartChars + _nonASCIIidentifierChars + "]") - -_identifierStart = six.u("[") + \ - _baseASCIIidentifierStartChars + \ - _nonASCIIidentifierStartChars + \ - six.u("]") -_identifierChars = six.u("[") + \ - _baseASCIIidentifierChars + \ - _nonASCIIidentifierStartChars + \ - _nonASCIIidentifierChars + \ - six.u("]*") - -identifier = re.compile(_identifierStart + _identifierChars) - -_nonASCIIwhitespace = re.compile( - six.u(r"[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]")) - -# Whether a single character denotes a newline. -# IMPORTANT: This string must be run through six to handle \u chars -newline = re.compile(six.u(r"[\n\r\u2028\u2029]")) - -# Matches a whole line break (where CRLF is considered a single -# line break). Used to count lines. - -# in javascript, these two differ -# in python they are the same, different methods are called on them -# IMPORTANT: This string must be run through six to handle \u chars -lineBreak = re.compile(six.u(r"\r\n|[\n\r\u2028\u2029]")) -allLineBreaks = lineBreak \ No newline at end of file diff --git a/codeformatter/lib/jsbeautifier/javascript/beautifier.py b/codeformatter/lib/jsbeautifier/javascript/beautifier.py index 4813793..19d9848 100644 --- a/codeformatter/lib/jsbeautifier/javascript/beautifier.py +++ b/codeformatter/lib/jsbeautifier/javascript/beautifier.py @@ -1,6 +1,6 @@ # The MIT License (MIT) # -# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. +# Copyright (c) 2007-2017 Einar Lielmanis, Liam Newman, and contributors. # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation files @@ -25,22 +25,19 @@ import re import string import copy -from ..core.token import Token from .tokenizer import Tokenizer -from .tokenizer import TOKEN from .options import BeautifierOptions +from ..core.options import mergeOpts from ..core.output import Output - def default_options(): return BeautifierOptions() - class BeautifierFlags: def __init__(self, mode): self.mode = mode self.parent = None - self.last_token = Token(TOKEN.START_BLOCK, '') + self.last_text = '' self.last_word = '' self.declaration_statement = False self.declaration_assignment = False @@ -62,11 +59,11 @@ def __init__(self, mode): def apply_base(self, flags_base, added_newline): next_indent_level = flags_base.indentation_level if not added_newline and \ - flags_base.line_indent_level > next_indent_level: + flags_base.line_indent_level > next_indent_level: next_indent_level = flags_base.line_indent_level self.parent = flags_base - self.last_token = flags_base.last_token + self.last_text = flags_base.last_text self.last_word = flags_base.last_word self.indentation_level = next_indent_level @@ -76,14 +73,21 @@ def apply_base(self, flags_base, added_newline): 'after_newline': 'after-newline', 'preserve_newline': 'preserve-newline' } -OPERATOR_POSITION_BEFORE_OR_PRESERVE = [ - OPERATOR_POSITION['before_newline'], - OPERATOR_POSITION['preserve_newline']] +OPERATOR_POSITION_BEFORE_OR_PRESERVE = [OPERATOR_POSITION['before_newline'], OPERATOR_POSITION['preserve_newline']]; + +def sanitizeOperatorPosition(opPosition): + if not opPosition: + return OPERATOR_POSITION['before_newline'] + elif opPosition not in OPERATOR_POSITION.values(): + raise ValueError("Invalid Option Value: The option 'operator_position' must be one of the following values\n" + + str(OPERATOR_POSITION.values()) + + "\nYou passed in: '" + opPosition + "'") + return opPosition class MODE: - BlockStatement, Statement, ObjectLiteral, ArrayLiteral, \ - ForInitializer, Conditional, Expression = range(7) + BlockStatement, Statement, ObjectLiteral, ArrayLiteral, \ + ForInitializer, Conditional, Expression = range(7) def remove_redundant_indentation(output, frame): @@ -92,346 +96,338 @@ def remove_redundant_indentation(output, frame): # after wrap points are calculated # These issues are minor compared to ugly indentation. - if frame.multiline_frame or \ - frame.mode == MODE.ForInitializer or \ - frame.mode == MODE.Conditional: + if frame.multiline_frame or frame.mode == MODE.ForInitializer or frame.mode == MODE.Conditional: return # remove one indent from each line inside this section - output.remove_indent(frame.start_line_index) - - -def reserved_word(token, word): - return token and token.type == TOKEN.RESERVED and token.text == word - - -def reserved_array(token, words): - return token and token.type == TOKEN.RESERVED and token.text in words - - -_special_word_set = frozenset([ - 'case', - 'return', - 'do', - 'if', - 'throw', - 'else', - 'await', - 'break', - 'continue', - 'async']) + index = frame.start_line_index + while index < len(output.lines): + output.lines[index].remove_indent() + index += 1 class Beautifier: - def __init__(self, opts=None): - import jsbeautifier.javascript.acorn as acorn + def __init__(self, opts = default_options() ): + import jsbeautifier.core.acorn as acorn self.acorn = acorn - self._options = BeautifierOptions(opts) + self.opts = copy.copy(opts) + self.blank_state() - self._blank_state() - - def _blank_state(self, js_source_text=None): - if js_source_text is None: - js_source_text = '' + def blank_state(self, js_source_text = None): # internal flags - self._flags = None - self._previous_flags = None - self._flag_store = [] - self._tokens = None + self.flags = None + self.previous_flags = None + self.flag_store = [] + self.tokens = [] + self.token_pos = 0 + + + # force opts.space_after_anon_function to true if opts.jslint_happy + if self.opts.jslint_happy: + self.opts.space_after_anon_function = True - if self._options.eol == 'auto': - self._options.eol = '\n' + if self.opts.indent_with_tabs: + self.opts.indent_char = "\t" + self.opts.indent_size = 1 + + if self.opts.eol == 'auto': + self.opts.eol = '\n' if self.acorn.lineBreak.search(js_source_text or ''): - self._options.eol = self.acorn.lineBreak.search( - js_source_text).group() + self.opts.eol = self.acorn.lineBreak.search(js_source_text).group() + + self.opts.eol = self.opts.eol.replace('\\r', '\r').replace('\\n', '\n') + self.indent_string = self.opts.indent_char * self.opts.indent_size - baseIndentString = re.search("^[\t ]*", js_source_text).group(0) - self._last_last_text = '' # pre-last token text + self.baseIndentString = '' + self.last_type = 'TK_START_BLOCK' # last token type + self.last_last_text = '' # pre-last token text + preindent_index = 0; + if not js_source_text == None and len(js_source_text) > 0: + while preindent_index < len(js_source_text) and \ + js_source_text[preindent_index] in [' ', '\t'] : + self.baseIndentString += js_source_text[preindent_index] + preindent_index += 1 + js_source_text = js_source_text[preindent_index:] - self._output = Output(self._options, baseIndentString) - # If testing the ignore directive, start with output disable set to - # true - self._output.raw = self._options.test_output_raw + self.output = Output(self.indent_string, self.baseIndentString) + # If testing the ignore directive, start with output disable set to true + self.output.raw = self.opts.test_output_raw; self.set_mode(MODE.BlockStatement) return js_source_text - def beautify(self, source_text='', opts=None): - if opts is not None: - self._options = BeautifierOptions(opts) - source_text = source_text or '' - if self._options.disabled: - return source_text + def beautify(self, s, opts = None ): - source_text = self._blank_state(source_text) + if opts != None: + opts = mergeOpts(opts, 'js') + self.opts = copy.copy(opts) - source_text = self.unpack(source_text, self._options.eval_code) - self._tokens = Tokenizer(source_text, self._options).tokenize() + #Compat with old form + if self.opts.brace_style == 'collapse-preserve-inline': + self.opts.brace_style = 'collapse,preserve-inline' - for current_token in self._tokens: - self.handle_token(current_token) + #split always returns at least one value + split = re.compile("[^a-zA-Z0-9_\-]+").split(self.opts.brace_style) + #preserve-inline in delimited string will trigger brace_preserve_inline + #Everything else is considered a brace_style and the last one only will + #have an effect + #specify defaults in case one half of meta-option is missing + self.opts.brace_style = "collapse" + self.opts.brace_preserve_inline = False + for bs in split: + if bs == "preserve-inline": + self.opts.brace_preserve_inline = True + else: + #validate each brace_style that's not a preserve-inline + #(results in very similar validation as js version) + if bs not in ['expand', 'collapse', 'end-expand', 'none']: + raise(Exception('opts.brace_style must be "expand", "collapse", "end-expand", or "none".')) + self.opts.brace_style = bs + + s = self.blank_state(s) + + input = self.unpack(s, self.opts.eval_code) + + self.handlers = { + 'TK_START_EXPR': self.handle_start_expr, + 'TK_END_EXPR': self.handle_end_expr, + 'TK_START_BLOCK': self.handle_start_block, + 'TK_END_BLOCK': self.handle_end_block, + 'TK_WORD': self.handle_word, + 'TK_RESERVED': self.handle_word, + 'TK_SEMICOLON': self.handle_semicolon, + 'TK_STRING': self.handle_string, + 'TK_EQUALS': self.handle_equals, + 'TK_OPERATOR': self.handle_operator, + 'TK_COMMA': self.handle_comma, + 'TK_BLOCK_COMMENT': self.handle_block_comment, + 'TK_COMMENT': self.handle_comment, + 'TK_DOT': self.handle_dot, + 'TK_UNKNOWN': self.handle_unknown, + 'TK_EOF': self.handle_eof + } + + self.tokens = Tokenizer(input, self.opts, self.indent_string).tokenize() + self.token_pos = 0 + + current_token = self.get_token() + while current_token != None: + self.handlers[current_token.type](current_token) + + self.last_last_text = self.flags.last_text + self.last_type = current_token.type + self.flags.last_text = current_token.text + self.token_pos += 1 + current_token = self.get_token() + + + sweet_code = self.output.get_code(self.opts.end_with_newline, self.opts.eol) - self._last_last_text = self._flags.last_token.text - self._flags.last_token = current_token + return sweet_code - sweet_code = self._output.get_code(self._options.eol) - return sweet_code + def handle_whitespace_and_comments(self, local_token, preserve_statement_flags = False): + newlines = local_token.newlines + keep_whitespace = self.opts.keep_array_indentation and self.is_array(self.flags.mode) + + for comment_token in local_token.comments_before: + # The cleanest handling of inline comments is to treat them as though they aren't there. + # Just continue formatting and the behavior should be logical. + # Also ignore unknown tokens. Again, this should result in better behavior. + self.handle_whitespace_and_comments(comment_token, preserve_statement_flags) + self.handlers[comment_token.type](comment_token, preserve_statement_flags) - def handle_token(self, current_token, preserve_statement_flags=False): - if current_token.type == TOKEN.START_EXPR: - self.handle_start_expr(current_token) - elif current_token.type == TOKEN.END_EXPR: - self.handle_end_expr(current_token) - elif current_token.type == TOKEN.START_BLOCK: - self.handle_start_block(current_token) - elif current_token.type == TOKEN.END_BLOCK: - self.handle_end_block(current_token) - elif current_token.type == TOKEN.WORD: - self.handle_word(current_token) - elif current_token.type == TOKEN.RESERVED: - self.handle_word(current_token) - elif current_token.type == TOKEN.SEMICOLON: - self.handle_semicolon(current_token) - elif current_token.type == TOKEN.STRING: - self.handle_string(current_token) - elif current_token.type == TOKEN.EQUALS: - self.handle_equals(current_token) - elif current_token.type == TOKEN.OPERATOR: - self.handle_operator(current_token) - elif current_token.type == TOKEN.COMMA: - self.handle_comma(current_token) - elif current_token.type == TOKEN.BLOCK_COMMENT: - self.handle_block_comment(current_token, preserve_statement_flags) - elif current_token.type == TOKEN.COMMENT: - self.handle_comment(current_token, preserve_statement_flags) - elif current_token.type == TOKEN.DOT: - self.handle_dot(current_token) - elif current_token.type == TOKEN.EOF: - self.handle_eof(current_token) - elif current_token.type == TOKEN.UNKNOWN: - self.handle_unknown(current_token, preserve_statement_flags) - else: - self.handle_unknown(current_token, preserve_statement_flags) - - def handle_whitespace_and_comments( - self, current_token, preserve_statement_flags=False): - newlines = current_token.newlines - keep_whitespace = self._options.keep_array_indentation and self.is_array( - self._flags.mode) - - if current_token.comments_before is not None: - for comment_token in current_token.comments_before: - # The cleanest handling of inline comments is to treat them - # as though they aren't there. - # Just continue formatting and the behavior should be logical. - # Also ignore unknown tokens. Again, this should result in better - # behavior. - self.handle_whitespace_and_comments( - comment_token, preserve_statement_flags) - self.handle_token(comment_token, preserve_statement_flags) if keep_whitespace: - for i in range(newlines): - self.print_newline(i > 0, preserve_statement_flags) - else: # not keep_whitespace - if self._options.max_preserve_newlines != 0 and newlines > self._options.max_preserve_newlines: - newlines = self._options.max_preserve_newlines + for i in range(newlines): + self.print_newline(i > 0, preserve_statement_flags) + else: # not keep_whitespace + if self.opts.max_preserve_newlines != 0 and newlines > self.opts.max_preserve_newlines: + newlines = self.opts.max_preserve_newlines - if self._options.preserve_newlines and newlines > 1: + if self.opts.preserve_newlines and newlines > 1: self.print_newline(False, preserve_statement_flags) for i in range(1, newlines): self.print_newline(True, preserve_statement_flags) + def unpack(self, source, evalcode=False): import jsbeautifier.unpackers as unpackers try: return unpackers.run(source, evalcode) - except unpackers.UnpackingError: + except unpackers.UnpackingError as error: return source + def is_special_word(self, s): + return s in ['case', 'return', 'do', 'if', 'throw', 'else'] + def is_array(self, mode): return mode == MODE.ArrayLiteral + def is_expression(self, mode): - return mode == MODE.Expression or mode == MODE.ForInitializer or mode == MODE.Conditional - - _newline_restricted_tokens = frozenset([ - 'async', - 'break', - 'continue', - 'return', - 'throw', - 'yield']) - - def allow_wrap_or_preserved_newline( - self, current_token, force_linewrap=False): + return mode in [MODE.Expression, MODE.ForInitializer, MODE.Conditional] + + + _newline_restricted_tokens = ['break','continue','return', 'throw', 'yield'] + def allow_wrap_or_preserved_newline(self, current_token, force_linewrap = False): # never wrap the first token of a line. - if self._output.just_added_newline(): + if self.output.just_added_newline(): return - shouldPreserveOrForce = ( - self._options.preserve_newlines and current_token.newlines) or force_linewrap - operatorLogicApplies = self._flags.last_token.text in Tokenizer.positionable_operators or current_token.text in Tokenizer.positionable_operators + shouldPreserveOrForce = (self.opts.preserve_newlines and current_token.wanted_newline) or force_linewrap + operatorLogicApplies = self.flags.last_text in Tokenizer.positionable_operators or current_token.text in Tokenizer.positionable_operators if operatorLogicApplies: - shouldPrintOperatorNewline = (self._flags.last_token.text in Tokenizer.positionable_operators and self._options.operator_position in OPERATOR_POSITION_BEFORE_OR_PRESERVE) \ + shouldPrintOperatorNewline = (self.flags.last_text in Tokenizer.positionable_operators and self.opts.operator_position in OPERATOR_POSITION_BEFORE_OR_PRESERVE) \ or current_token.text in Tokenizer.positionable_operators shouldPreserveOrForce = shouldPreserveOrForce and shouldPrintOperatorNewline if shouldPreserveOrForce: - self.print_newline(preserve_statement_flags=True) - elif self._options.wrap_line_length > 0: - if reserved_array(self._flags.last_token, self._newline_restricted_tokens): + self.print_newline(preserve_statement_flags = True) + elif self.opts.wrap_line_length > 0: + if self.last_type == 'TK_RESERVED' and self.flags.last_text in self._newline_restricted_tokens: # These tokens should never have a newline inserted between # them and the following expression. return - proposed_line_length = self._output.current_line.get_character_count() + \ - len(current_token.text) - if self._output.space_before_token: + proposed_line_length = self.output.current_line.get_character_count() + len(current_token.text) + if self.output.space_before_token: proposed_line_length += 1 - if proposed_line_length >= self._options.wrap_line_length: - self.print_newline(preserve_statement_flags=True) + if proposed_line_length >= self.opts.wrap_line_length: + self.print_newline(preserve_statement_flags = True) + - def print_newline( - self, - force_newline=False, - preserve_statement_flags=False): + def print_newline(self, force_newline = False, preserve_statement_flags = False): if not preserve_statement_flags: - if self._flags.last_token.text != ';' and self._flags.last_token.text != ',' and self._flags.last_token.text != '=' and ( - self._flags.last_token.type != TOKEN.OPERATOR or self._flags.last_token.text == '--' or self._flags.last_token.text == '++'): - next_token = self._tokens.peek() - while (self._flags.mode == MODE.Statement and \ - not (self._flags.if_block and reserved_word(next_token, 'else')) and \ - not self._flags.do_block): + if self.flags.last_text != ';' and self.flags.last_text != ',' and self.flags.last_text != '=' and self.last_type != 'TK_OPERATOR': + next_token = self.get_token(1) + while (self.flags.mode == MODE.Statement and + not (self.flags.if_block and next_token and next_token.type == 'TK_RESERVED' and next_token.text == 'else') and + not self.flags.do_block): self.restore_mode() - if self._output.add_new_line(force_newline): - self._flags.multiline_frame = True + if self.output.add_new_line(force_newline): + self.flags.multiline_frame = True def print_token_line_indentation(self, current_token): - if self._output.just_added_newline(): - line = self._output.current_line - if self._options.keep_array_indentation and self.is_array( - self._flags.mode) and current_token.newlines: + if self.output.just_added_newline(): + line = self.output.current_line + if self.opts.keep_array_indentation and self.is_array(self.flags.mode) and current_token.wanted_newline: line.push(current_token.whitespace_before) - self._output.space_before_token = False - elif self._output.set_indent(self._flags.indentation_level): - self._flags.line_indent_level = self._flags.indentation_level + self.output.space_before_token = False + elif self.output.set_indent(self.flags.indentation_level): + self.flags.line_indent_level = self.flags.indentation_level + def print_token(self, current_token, s=None): - if self._output.raw: - self._output.add_raw_token(current_token) + if self.output.raw: + self.output.add_raw_token(current_token) return - if self._options.comma_first and current_token.previous and \ - current_token.previous.type == TOKEN.COMMA and \ - self._output.just_added_newline(): - if self._output.previous_line.last() == ',': + if self.opts.comma_first and self.last_type == 'TK_COMMA' and self.output.just_added_newline(): + if self.output.previous_line.last() == ',': # if the comma was already at the start of the line, # pull back onto that line and reprint the indentation - popped = self._output.previous_line.pop() - if self._output.previous_line.is_empty(): - self._output.previous_line.push(popped) - self._output.trim(True) - self._output.current_line.pop() - self._output.trim() + popped = self.output.previous_line.pop() + if self.output.previous_line.is_empty(): + self.output.previous_line.push(popped) + self.output.trim(True) + self.output.current_line.pop() + self.output.trim() # add the comma in front of the next token self.print_token_line_indentation(current_token) - self._output.add_token(',') - self._output.space_before_token = True + self.output.add_token(',') + self.output.space_before_token = True - if s is None: + if s == None: s = current_token.text self.print_token_line_indentation(current_token) - self._output.add_token(s) + self.output.add_token(s); + def indent(self): - self._flags.indentation_level += 1 + self.flags.indentation_level += 1 def deindent(self): - allow_deindent = self._flags.indentation_level > 0 and ( - (self._flags.parent is None) or self._flags.indentation_level > self._flags.parent.indentation_level) + allow_deindent = self.flags.indentation_level > 0 and ((self.flags.parent == None) or self.flags.indentation_level > self.flags.parent.indentation_level) if allow_deindent: - self._flags.indentation_level -= 1 + self.flags.indentation_level -= 1 def set_mode(self, mode): - if self._flags: - self._flag_store.append(self._flags) - self._previous_flags = self._flags + if self.flags: + self.flag_store.append(self.flags) + self.previous_flags = self.flags else: - self._previous_flags = BeautifierFlags(mode) + self.previous_flags = BeautifierFlags(mode) - self._flags = BeautifierFlags(mode) - self._flags.apply_base( - self._previous_flags, - self._output.just_added_newline()) - self._flags.start_line_index = self._output.get_line_number() + self.flags = BeautifierFlags(mode) + self.flags.apply_base(self.previous_flags, self.output.just_added_newline()) + self.flags.start_line_index = self.output.get_line_number(); def restore_mode(self): - if len(self._flag_store) > 0: - self._previous_flags = self._flags - self._flags = self._flag_store.pop() - if self._previous_flags.mode == MODE.Statement: - remove_redundant_indentation(self._output, self._previous_flags) + if len(self.flag_store) > 0: + self.previous_flags = self.flags + self.flags = self.flag_store.pop() + if self.previous_flags.mode == MODE.Statement and not self.opts.unindent_chained_methods: + remove_redundant_indentation(self.output, self.previous_flags) + def start_of_object_property(self): - return self._flags.parent.mode == MODE.ObjectLiteral and self._flags.mode == MODE.Statement and ( - (self._flags.last_token.text == ':' and self._flags.ternary_depth == 0) or ( - reserved_array(self._flags.last_token, ['get', 'set']))) + return self.flags.parent.mode == MODE.ObjectLiteral and self.flags.mode == MODE.Statement and \ + ((self.flags.last_text == ':' and self.flags.ternary_depth == 0) or (self.last_type == 'TK_RESERVED' and self.flags.last_text in ['get', 'set'])) def start_of_statement(self, current_token): - start = False - start = start or ( - reserved_array(self._flags.last_token, ['var', 'let', 'const']) and - current_token.type == TOKEN.WORD) - start = start or reserved_word(self._flags.last_token, 'do') - start = start or ( - reserved_array(self._flags.last_token, self._newline_restricted_tokens) and - not current_token.newlines) - start = start or ( - reserved_word(self._flags.last_token, 'else') and not ( - reserved_word(current_token, 'if') and \ - current_token.comments_before is None)) - start = start or (self._flags.last_token.type == TOKEN.END_EXPR and ( - self._previous_flags.mode == MODE.ForInitializer or self._previous_flags.mode == MODE.Conditional)) - start = start or (self._flags.last_token.type == TOKEN.WORD and self._flags.mode == MODE.BlockStatement - and not self._flags.in_case - and not (current_token.text == '--' or current_token.text == '++') - and self._last_last_text != 'function' - and current_token.type != TOKEN.WORD and current_token.type != TOKEN.RESERVED) - start = start or ( - self._flags.mode == MODE.ObjectLiteral and ( - (self._flags.last_token.text == ':' and self._flags.ternary_depth == 0) or ( - reserved_array(self._flags.last_token, ['get', 'set'])))) - - if (start): + if ( + (self.last_type == 'TK_RESERVED' and self.flags.last_text in ['var', 'let', 'const'] and current_token.type == 'TK_WORD') \ + or (self.last_type == 'TK_RESERVED' and self.flags.last_text== 'do') \ + or (self.last_type == 'TK_RESERVED' and self.flags.last_text in self._newline_restricted_tokens and not current_token.wanted_newline) \ + or (self.last_type == 'TK_RESERVED' and self.flags.last_text == 'else' \ + and not (current_token.type == 'TK_RESERVED' and current_token.text == 'if' and not len(current_token.comments_before))) \ + or (self.last_type == 'TK_END_EXPR' and (self.previous_flags.mode == MODE.ForInitializer or self.previous_flags.mode == MODE.Conditional)) \ + or (self.last_type == 'TK_WORD' and self.flags.mode == MODE.BlockStatement \ + and not self.flags.in_case + and not (current_token.text == '--' or current_token.text == '++') + and self.last_last_text != 'function' + and current_token.type != 'TK_WORD' and current_token.type != 'TK_RESERVED') \ + or (self.flags.mode == MODE.ObjectLiteral and \ + ((self.flags.last_text == ':' and self.flags.ternary_depth == 0) or (self.last_type == 'TK_RESERVED' and self.flags.last_text in ['get', 'set']))) + ): + self.set_mode(MODE.Statement) - self.indent() + if not self.opts.unindent_chained_methods: + self.indent() - self.handle_whitespace_and_comments(current_token, True) + self.handle_whitespace_and_comments(current_token, True); # Issue #276: # If starting a new statement with [if, for, while, do], push to a new line. # if (a) if (b) if(c) d(); else e(); else f(); if not self.start_of_object_property(): - self.allow_wrap_or_preserved_newline( - current_token, reserved_array(current_token, ['do', 'for', 'if', 'while'])) + self.allow_wrap_or_preserved_newline(current_token, current_token.type == 'TK_RESERVED' and current_token.text in ['do', 'for', 'if', 'while']) + return True else: return False + def get_token(self, offset = 0): + index = self.token_pos + offset + if index < 0 or index >= len(self.tokens): + return None + else: + return self.tokens[index] + + def handle_start_expr(self, current_token): if self.start_of_statement(current_token): # The conditional starts the statement if appropriate. @@ -442,511 +438,449 @@ def handle_start_expr(self, current_token): next_mode = MODE.Expression if current_token.text == '[': - if self._flags.last_token.type == TOKEN.WORD or self._flags.last_token.text == ')': - if reserved_array(self._flags.last_token, Tokenizer.line_starters): - self._output.space_before_token = True + if self.last_type == 'TK_WORD' or self.flags.last_text == ')': + if self.last_type == 'TK_RESERVED' and self.flags.last_text in Tokenizer.line_starters: + self.output.space_before_token = True self.set_mode(next_mode) self.print_token(current_token) self.indent() - if self._options.space_in_paren: - self._output.space_before_token = True + if self.opts.space_in_paren: + self.output.space_before_token = True return next_mode = MODE.ArrayLiteral - if self.is_array(self._flags.mode): - if self._flags.last_token.text == '[' or ( - self._flags.last_token.text == ',' and ( - self._last_last_text == ']' or self._last_last_text == '}')): + if self.is_array(self.flags.mode): + if self.flags.last_text == '[' or ( + self.flags.last_text == ',' and (self.last_last_text == ']' or self.last_last_text == '}')): # ], [ goes to a new line # }, [ goes to a new line - if not self._options.keep_array_indentation: + if not self.opts.keep_array_indentation: self.print_newline() - if self._flags.last_token.type not in [ - TOKEN.START_EXPR, - TOKEN.END_EXPR, - TOKEN.WORD, - TOKEN.OPERATOR]: - self._output.space_before_token = True - else: - if self._flags.last_token.type == TOKEN.RESERVED: - if self._flags.last_token.text == 'for': - self._output.space_before_token = self._options.space_before_conditional - next_mode = MODE.ForInitializer - elif self._flags.last_token.text in ['if', 'while']: - self._output.space_before_token = self._options.space_before_conditional - next_mode = MODE.Conditional - elif self._flags.last_word in ['await', 'async']: - # Should be a space between await and an IIFE, or async and - # an arrow function - self._output.space_before_token = True - elif self._flags.last_token.text == 'import' and current_token.whitespace_before == '': - self._output.space_before_token = False - elif self._flags.last_token.text in Tokenizer.line_starters or self._flags.last_token.text == 'catch': - self._output.space_before_token = True - - elif self._flags.last_token.type in [TOKEN.EQUALS, TOKEN.OPERATOR]: - # Support of this kind of newline preservation: - # a = (b && - # (c || d)); - if not self.start_of_object_property(): - self.allow_wrap_or_preserved_newline(current_token) - elif self._flags.last_token.type == TOKEN.WORD: - self._output.space_before_token = False - # function name() vs function name () - # function* name() vs function* name () - # async name() vs async name () - if self._options.space_after_named_function: - # peek starts at next character so -1 is current token - peek_back_three = self._tokens.peek(-4) - peek_back_two = self._tokens.peek(-3) - if reserved_array(peek_back_two, ['async', 'function']) or ( - reserved_array(peek_back_three, ['async', 'function']) and - peek_back_two.text == '*'): - self._output.space_before_token = True + if self.last_type == 'TK_RESERVED' and self.flags.last_text == 'for': + next_mode = MODE.ForInitializer + elif self.last_type == 'TK_RESERVED' and self.flags.last_text in ['if', 'while']: + next_mode = MODE.Conditional else: - # Support preserving wrapped arrow function expressions - # a.b('c', - # () => d.e - # ) - self.allow_wrap_or_preserved_newline(current_token) + next_mode = MODE.Expression - # function() vs function (), typeof() vs typeof () - # function*() vs function* (), yield*() vs yield* () - if ( - self._flags.last_token.type == TOKEN.RESERVED and ( - self._flags.last_word == 'function' or self._flags.last_word == 'typeof')) or ( - self._flags.last_token.text == '*' and ( - self._last_last_text in [ - 'function', 'yield'] or ( - self._flags.mode == MODE.ObjectLiteral and self._last_last_text in [ - '{', ',']))): - self._output.space_before_token = self._options.space_after_anon_function - - if self._flags.last_token.text == ';' or self._flags.last_token.type == TOKEN.START_BLOCK: + + if self.flags.last_text == ';' or self.last_type == 'TK_START_BLOCK': self.print_newline() - elif self._flags.last_token.type in [TOKEN.END_EXPR, TOKEN.START_EXPR, TOKEN.END_BLOCK, TOKEN.COMMA] or self._flags.last_token.text == '.': + elif self.last_type in ['TK_END_EXPR', 'TK_START_EXPR', 'TK_END_BLOCK'] or self.flags.last_text == '.': # do nothing on (( and )( and ][ and ]( and .( - # TODO: Consider whether forcing this is required. Review failing - # tests when removed. - self.allow_wrap_or_preserved_newline( - current_token, current_token.newlines) + # TODO: Consider whether forcing this is required. Review failing tests when removed. + self.allow_wrap_or_preserved_newline(current_token, current_token.wanted_newline) + + elif not (self.last_type == 'TK_RESERVED' and current_token.text == '(') and self.last_type not in ['TK_WORD', 'TK_OPERATOR']: + self.output.space_before_token = True + elif (self.last_type == 'TK_RESERVED' and (self.flags.last_word == 'function' or self.flags.last_word == 'typeof')) or \ + (self.flags.last_text == '*' and ( + self.last_last_text in ['function', 'yield'] or + (self.flags.mode == MODE.ObjectLiteral and self.last_last_text in ['{', ',']))): + # function() vs function (), typeof() vs typeof () + # function*() vs function* (), yield*() vs yield* () + if self.opts.space_after_anon_function: + self.output.space_before_token = True + elif self.last_type == 'TK_RESERVED' and (self.flags.last_text in Tokenizer.line_starters or self.flags.last_text == 'catch'): + # TODO: option space_before_conditional + self.output.space_before_token = True + + elif current_token.text == '(' and self.last_type == 'TK_RESERVED' and self.flags.last_word in ['await', 'async']: + self.output.space_before_token = True + + + # Support of this kind of newline preservation: + # a = (b && + # (c || d)); + if self.last_type in ['TK_EQUALS', 'TK_OPERATOR']: + if not self.start_of_object_property(): + self.allow_wrap_or_preserved_newline(current_token) + + + # Support preserving wrapped arrow function expressions + # a.b('c', + # () => d.e + # ) + if current_token.text == '(' and self.last_type not in ['TK_WORD', 'TK_RESERVED']: + self.allow_wrap_or_preserved_newline(current_token) + self.set_mode(next_mode) self.print_token(current_token) - if self._options.space_in_paren: - self._output.space_before_token = True + if self.opts.space_in_paren: + self.output.space_before_token = True - # In all cases, if we newline while inside an expression it should be - # indented. + # In all cases, if we newline while inside an expression it should be indented. self.indent() + + def handle_end_expr(self, current_token): # statements inside expressions are not valid syntax, but... # statements must all be closed when their container closes - while self._flags.mode == MODE.Statement: + while self.flags.mode == MODE.Statement: self.restore_mode() self.handle_whitespace_and_comments(current_token) - if self._flags.multiline_frame: - self.allow_wrap_or_preserved_newline( - current_token, current_token.text == ']' and self.is_array( - self._flags.mode) and not self._options.keep_array_indentation) + if self.flags.multiline_frame: + self.allow_wrap_or_preserved_newline(current_token, current_token.text == ']' and self.is_array(self.flags.mode) and not self.opts.keep_array_indentation) - if self._options.space_in_paren: - if self._flags.last_token.type == TOKEN.START_EXPR and not self._options.space_in_empty_paren: + if self.opts.space_in_paren: + if self.last_type == 'TK_START_EXPR' and not self.opts.space_in_empty_paren: # empty parens are always "()" and "[]", not "( )" or "[ ]" - self._output.space_before_token = False - self._output.trim() + self.output.space_before_token = False + self.output.trim() else: - self._output.space_before_token = True + self.output.space_before_token = True - if current_token.text == ']' and self._options.keep_array_indentation: + if current_token.text == ']' and self.opts.keep_array_indentation: self.print_token(current_token) self.restore_mode() else: self.restore_mode() self.print_token(current_token) - remove_redundant_indentation(self._output, self._previous_flags) + remove_redundant_indentation(self.output, self.previous_flags) # do {} while () // no statement required after - if self._flags.do_while and self._previous_flags.mode == MODE.Conditional: - self._previous_flags.mode = MODE.Expression - self._flags.do_block = False - self._flags.do_while = False + if self.flags.do_while and self.previous_flags.mode == MODE.Conditional: + self.previous_flags.mode = MODE.Expression + self.flags.do_block = False + self.flags.do_while = False def handle_start_block(self, current_token): self.handle_whitespace_and_comments(current_token) - # Check if this is a BlockStatement that should be treated as a - # ObjectLiteral - next_token = self._tokens.peek() - second_token = self._tokens.peek(1) - if self._flags.last_word == 'switch' and \ - self._flags.last_token.type == TOKEN.END_EXPR: - self.set_mode(MODE.BlockStatement) - self._flags.in_case_statement = True - elif second_token is not None and ( - (second_token.text in [ - ':', - ','] and next_token.type in [ - TOKEN.STRING, - TOKEN.WORD, - TOKEN.RESERVED]) or ( - next_token.text in [ - 'get', - 'set', - '...'] and second_token.type in [ - TOKEN.WORD, - TOKEN.RESERVED])): + # Check if this is a BlockStatement that should be treated as a ObjectLiteral + next_token = self.get_token(1) + second_token = self.get_token(2) + if second_token != None and \ + ((second_token.text in [':', ','] and next_token.type in ['TK_STRING', 'TK_WORD', 'TK_RESERVED']) \ + or (next_token.text in ['get', 'set', '...'] and second_token.type in ['TK_WORD', 'TK_RESERVED'])): # We don't support TypeScript,but we didn't break it for a very long time. # We'll try to keep not breaking it. - if self._last_last_text not in ['class', 'interface']: + if not self.last_last_text in ['class','interface']: self.set_mode(MODE.ObjectLiteral) else: self.set_mode(MODE.BlockStatement) - elif self._flags.last_token.type == TOKEN.OPERATOR and self._flags.last_token.text == '=>': + elif self.last_type == 'TK_OPERATOR' and self.flags.last_text == '=>': # arrow function: (param1, paramN) => { statements } self.set_mode(MODE.BlockStatement) - elif self._flags.last_token.type in [TOKEN.EQUALS, TOKEN.START_EXPR, TOKEN.COMMA, TOKEN.OPERATOR] or \ - reserved_array(self._flags.last_token, ['return', 'throw', 'import', 'default']): + elif self.last_type in ['TK_EQUALS', 'TK_START_EXPR', 'TK_COMMA', 'TK_OPERATOR'] or \ + (self.last_type == 'TK_RESERVED' and self.flags.last_text in ['return', 'throw', 'import', 'default']): # Detecting shorthand function syntax is difficult by scanning forward, # so check the surrounding context. # If the block is being returned, imported, export default, passed as arg, - # assigned with = or assigned in a nested object, treat as an - # ObjectLiteral. + # assigned with = or assigned in a nested object, treat as an ObjectLiteral. self.set_mode(MODE.ObjectLiteral) else: self.set_mode(MODE.BlockStatement) - empty_braces = (next_token is not None) and \ - next_token.comments_before is None and next_token.text == '}' - empty_anonymous_function = empty_braces and self._flags.last_word == 'function' and \ - self._flags.last_token.type == TOKEN.END_EXPR + empty_braces = (not next_token == None) and len(next_token.comments_before) == 0 and next_token.text == '}' + empty_anonymous_function = empty_braces and self.flags.last_word == 'function' and \ + self.last_type == 'TK_END_EXPR' - if self._options.brace_preserve_inline: # check for inline, set inline_frame if so + if self.opts.brace_preserve_inline: # check for inline, set inline_frame if so # search forward for newline wanted inside this block index = 0 check_token = None - self._flags.inline_frame = True + self.flags.inline_frame = True do_loop = True while (do_loop): index += 1 - check_token = self._tokens.peek(index - 1) - if check_token.newlines: - self._flags.inline_frame = False - - do_loop = ( - check_token.type != TOKEN.EOF and not ( - check_token.type == TOKEN.END_BLOCK and check_token.opened == current_token)) - - if (self._options.brace_style == 'expand' or (self._options.brace_style == - 'none' and current_token.newlines)) and not self._flags.inline_frame: - if self._flags.last_token.type != TOKEN.OPERATOR and ( - empty_anonymous_function or self._flags.last_token.type == TOKEN.EQUALS or ( - reserved_array(self._flags.last_token, _special_word_set) and self._flags.last_token.text != 'else')): - self._output.space_before_token = True + check_token = self.get_token(index) + if check_token.wanted_newline: + self.flags.inline_frame = False + + do_loop = (check_token.type != 'TK_EOF' and + not (check_token.type == 'TK_END_BLOCK' and check_token.opened == current_token)) + + if (self.opts.brace_style == 'expand' or \ + (self.opts.brace_style == 'none' and current_token.wanted_newline)) and \ + not self.flags.inline_frame: + if self.last_type != 'TK_OPERATOR' and \ + (empty_anonymous_function or + self.last_type == 'TK_EQUALS' or + (self.last_type == 'TK_RESERVED' and self.is_special_word(self.flags.last_text) and self.flags.last_text != 'else')): + self.output.space_before_token = True else: - self.print_newline(preserve_statement_flags=True) - else: # collapse || inline_frame - if self.is_array( - self._previous_flags.mode) and ( - self._flags.last_token.type == TOKEN.START_EXPR or self._flags.last_token.type == TOKEN.COMMA): + self.print_newline(preserve_statement_flags = True) + else: # collapse || inline_frame + if self.is_array(self.previous_flags.mode) and (self.last_type == 'TK_START_EXPR' or self.last_type == 'TK_COMMA'): # if we're preserving inline, # allow newline between comma and next brace. - if self._flags.inline_frame: + if self.flags.inline_frame: self.allow_wrap_or_preserved_newline(current_token) - self._flags.inline_frame = True - self._previous_flags.multiline_frame = self._previous_flags.multiline_frame or self._flags.multiline_frame - self._flags.multiline_frame = False - elif self._flags.last_token.type == TOKEN.COMMA: - self._output.space_before_token = True - - elif self._flags.last_token.type not in [TOKEN.OPERATOR, TOKEN.START_EXPR]: - if self._flags.last_token.type == TOKEN.START_BLOCK and not self._flags.inline_frame: + self.flags.inline_frame = True + self.previous_flags.multiline_frame = self.previous_flags.multiline_frame or self.flags.multiline_frame + self.flags.multiline_frame = False + elif self.last_type == 'TK_COMMA': + self.output.space_before_token = True + + elif self.last_type not in ['TK_OPERATOR', 'TK_START_EXPR']: + if self.last_type == 'TK_START_BLOCK' and not self.flags.inline_frame: self.print_newline() else: - self._output.space_before_token = True + self.output.space_before_token = True self.print_token(current_token) self.indent() + def handle_end_block(self, current_token): # statements must all be closed when their container closes self.handle_whitespace_and_comments(current_token) - while self._flags.mode == MODE.Statement: + while self.flags.mode == MODE.Statement: self.restore_mode() - empty_braces = self._flags.last_token.type == TOKEN.START_BLOCK + empty_braces = self.last_type == 'TK_START_BLOCK' - # try inline_frame (only set if opt.braces-preserve-inline) first - if self._flags.inline_frame and not empty_braces: - self._output.space_before_token = True - elif self._options.brace_style == 'expand': + if self.flags.inline_frame and not empty_braces: # try inline_frame (only set if opt.braces-preserve-inline) first + self.output.space_before_token = True; + elif self.opts.brace_style == 'expand': if not empty_braces: self.print_newline() else: # skip {} if not empty_braces: - if self.is_array( - self._flags.mode) and self._options.keep_array_indentation: - self._options.keep_array_indentation = False + if self.is_array(self.flags.mode) and self.opts.keep_array_indentation: + self.opts.keep_array_indentation = False self.print_newline() - self._options.keep_array_indentation = True + self.opts.keep_array_indentation = True else: self.print_newline() self.restore_mode() self.print_token(current_token) + def handle_word(self, current_token): - if current_token.type == TOKEN.RESERVED: - if current_token.text in [ - 'set', 'get'] and self._flags.mode != MODE.ObjectLiteral: - current_token.type = TOKEN.WORD - elif current_token.text == 'import' and self._tokens.peek().text == '(': - current_token.type = TOKEN.WORD - elif current_token.text in ['as', 'from'] and not self._flags.import_block: - current_token.type = TOKEN.WORD - elif self._flags.mode == MODE.ObjectLiteral: - next_token = self._tokens.peek() + if current_token.type == 'TK_RESERVED': + if current_token.text in ['set', 'get'] and self.flags.mode != MODE.ObjectLiteral: + current_token.type = 'TK_WORD' + elif current_token.text in ['as', 'from'] and not self.flags.import_block: + current_token.type = 'TK_WORD' + elif self.flags.mode == MODE.ObjectLiteral: + next_token = self.get_token(1) if next_token.text == ':': - current_token.type = TOKEN.WORD + current_token.type = 'TK_WORD' if self.start_of_statement(current_token): # The conditional starts the statement if appropriate. - if reserved_array(self._flags.last_token, ['var', 'let', 'const']) and \ - current_token.type == TOKEN.WORD: - self._flags.declaration_statement = True - - elif current_token.newlines and \ - not self.is_expression(self._flags.mode) and \ - (self._flags.last_token.type != TOKEN.OPERATOR or (self._flags.last_token.text == '--' or self._flags.last_token.text == '++')) and \ - self._flags.last_token.type != TOKEN.EQUALS and \ - (self._options.preserve_newlines or not reserved_array(self._flags.last_token, ['var', 'let', 'const', 'set', 'get'])): + if self.last_type == 'TK_RESERVED' and self.flags.last_text in ['var', 'let', 'const'] and current_token.type == 'TK_WORD': + self.flags.declaration_statement = True + + elif current_token.wanted_newline and \ + not self.is_expression(self.flags.mode) and \ + (self.last_type != 'TK_OPERATOR' or (self.flags.last_text == '--' or self.flags.last_text == '++')) and \ + self.last_type != 'TK_EQUALS' and \ + (self.opts.preserve_newlines or not (self.last_type == 'TK_RESERVED' and self.flags.last_text in ['var', 'let', 'const', 'set', 'get'])): self.handle_whitespace_and_comments(current_token) self.print_newline() else: self.handle_whitespace_and_comments(current_token) - if self._flags.do_block and not self._flags.do_while: - if reserved_word(current_token, 'while'): + + if self.flags.do_block and not self.flags.do_while: + if current_token.type == 'TK_RESERVED' and current_token.text == 'while': # do {} ## while () - self._output.space_before_token = True + self.output.space_before_token = True self.print_token(current_token) - self._output.space_before_token = True - self._flags.do_while = True + self.output.space_before_token = True + self.flags.do_while = True return else: # do {} should always have while as the next word. # if we don't see the expected while, recover self.print_newline() - self._flags.do_block = False + self.flags.do_block = False # if may be followed by else, or not # Bare/inline ifs are tricky - # Need to unwind the modes correctly: if (a) if (b) c(); else d(); else - # e(); - if self._flags.if_block: - if (not self._flags.else_block) and reserved_word(current_token, 'else'): - self._flags.else_block = True + # Need to unwind the modes correctly: if (a) if (b) c(); else d(); else e(); + if self.flags.if_block: + if (not self.flags.else_block) and (current_token.type == 'TK_RESERVED' and current_token.text == 'else'): + self.flags.else_block = True else: - while self._flags.mode == MODE.Statement: + while self.flags.mode == MODE.Statement: self.restore_mode() - self._flags.if_block = False + self.flags.if_block = False - if self._flags.in_case_statement and reserved_array(current_token, ['case', 'default']): + if current_token.type == 'TK_RESERVED' and (current_token.text == 'case' or (current_token.text == 'default' and self.flags.in_case_statement)): self.print_newline() - if self._flags.case_body or self._options.jslint_happy: - self._flags.case_body = False + if self.flags.case_body or self.opts.jslint_happy: + self.flags.case_body = False self.deindent() self.print_token(current_token) - self._flags.in_case = True + self.flags.in_case = True + self.flags.in_case_statement = True return - if self._flags.last_token.type in [ - TOKEN.COMMA, - TOKEN.START_EXPR, - TOKEN.EQUALS, - TOKEN.OPERATOR]: + if self.last_type in ['TK_COMMA', 'TK_START_EXPR', 'TK_EQUALS', 'TK_OPERATOR']: if not self.start_of_object_property(): self.allow_wrap_or_preserved_newline(current_token) - if reserved_word(current_token, 'function'): - if (self._flags.last_token.text in ['}', ';'] or (self._output.just_added_newline() and not ( - self._flags.last_token.text in ['(', '[', '{', ':', '=', ','] or self._flags.last_token.type == TOKEN.OPERATOR))): + if current_token.type == 'TK_RESERVED' and current_token.text == 'function': + if (self.flags.last_text in ['}', ';'] or + (self.output.just_added_newline() and not (self.flags.last_text in ['(', '[', '{', ':', '=', ','] or self.last_type == 'TK_OPERATOR'))): # make sure there is a nice clean space of at least one blank line # before a new function definition, except in arrays - if not self._output.just_added_blankline() and \ - current_token.comments_before is None: + if not self.output.just_added_blankline() and len(current_token.comments_before) == 0: self.print_newline() self.print_newline(True) - if self._flags.last_token.type == TOKEN.RESERVED or self._flags.last_token.type == TOKEN.WORD: - if reserved_array(self._flags.last_token, ['get', 'set', 'new', 'export']) or \ - reserved_array(self._flags.last_token, self._newline_restricted_tokens): - self._output.space_before_token = True - elif reserved_word(self._flags.last_token, 'default') and self._last_last_text == 'export': - self._output.space_before_token = True - elif self._flags.last_token.text == 'declare': - # accomodates Typescript declare function formatting - self._output.space_before_token = True + if self.last_type == 'TK_RESERVED' or self.last_type == 'TK_WORD': + if self.last_type == 'TK_RESERVED' and ( + self.flags.last_text in ['get', 'set', 'new', 'export', 'async'] or + self.flags.last_text in self._newline_restricted_tokens + ): + self.output.space_before_token = True + elif self.last_type == 'TK_RESERVED' and self.flags.last_text == 'default' and self.last_last_text == 'export': + self.output.space_before_token = True else: self.print_newline() - elif self._flags.last_token.type == TOKEN.OPERATOR or self._flags.last_token.text == '=': + elif self.last_type == 'TK_OPERATOR' or self.flags.last_text == '=': # foo = function - self._output.space_before_token = True - elif not self._flags.multiline_frame and (self.is_expression(self._flags.mode) or self.is_array(self._flags.mode)): + self.output.space_before_token = True + elif not self.flags.multiline_frame and (self.is_expression(self.flags.mode) or self.is_array(self.flags.mode)): # (function pass else: self.print_newline() self.print_token(current_token) - self._flags.last_word = current_token.text + self.flags.last_word = current_token.text return prefix = 'NONE' - if self._flags.last_token.type == TOKEN.END_BLOCK: - if self._previous_flags.inline_frame: + if self.last_type == 'TK_END_BLOCK': + if self.previous_flags.inline_frame: prefix = 'SPACE' - elif not reserved_array(current_token, ['else', 'catch', 'finally', 'from']): + elif not (current_token.type == 'TK_RESERVED' and current_token.text in ['else', 'catch', 'finally', 'from']): prefix = 'NEWLINE' else: - if self._options.brace_style in ['expand', 'end-expand'] or ( - self._options.brace_style == 'none' and current_token.newlines): + if self.opts.brace_style in ['expand', 'end-expand'] or \ + (self.opts.brace_style == 'none' and current_token.wanted_newline): prefix = 'NEWLINE' else: prefix = 'SPACE' - self._output.space_before_token = True - elif self._flags.last_token.type == TOKEN.SEMICOLON and self._flags.mode == MODE.BlockStatement: + self.output.space_before_token = True + elif self.last_type == 'TK_SEMICOLON' and self.flags.mode == MODE.BlockStatement: # TODO: Should this be for STATEMENT as well? prefix = 'NEWLINE' - elif self._flags.last_token.type == TOKEN.SEMICOLON and self.is_expression(self._flags.mode): + elif self.last_type == 'TK_SEMICOLON' and self.is_expression(self.flags.mode): prefix = 'SPACE' - elif self._flags.last_token.type == TOKEN.STRING: + elif self.last_type == 'TK_STRING': prefix = 'NEWLINE' - elif self._flags.last_token.type == TOKEN.RESERVED or self._flags.last_token.type == TOKEN.WORD or \ - (self._flags.last_token.text == '*' and ( - self._last_last_text in ['function', 'yield'] or - (self._flags.mode == MODE.ObjectLiteral and self._last_last_text in ['{', ',']))): + elif self.last_type == 'TK_RESERVED' or self.last_type == 'TK_WORD' or \ + (self.flags.last_text == '*' and ( + self.last_last_text in ['function', 'yield'] or + (self.flags.mode == MODE.ObjectLiteral and self.last_last_text in ['{', ',']))): prefix = 'SPACE' - elif self._flags.last_token.type == TOKEN.START_BLOCK: - if self._flags.inline_frame: + elif self.last_type == 'TK_START_BLOCK': + if self.flags.inline_frame: prefix = 'SPACE' else: prefix = 'NEWLINE' - elif self._flags.last_token.type == TOKEN.END_EXPR: - self._output.space_before_token = True + elif self.last_type == 'TK_END_EXPR': + self.output.space_before_token = True prefix = 'NEWLINE' - if reserved_array(current_token, Tokenizer.line_starters) and self._flags.last_token.text != ')': - if self._flags.inline_frame or self._flags.last_token.text == 'else ' or self._flags.last_token.text == 'export': + if current_token.type == 'TK_RESERVED' and current_token.text in Tokenizer.line_starters and self.flags.last_text != ')': + if self.flags.inline_frame or self.flags.last_text == 'else ' or self.flags.last_text == 'export': prefix = 'SPACE' else: prefix = 'NEWLINE' - if reserved_array(current_token, ['else', 'catch', 'finally']): - if ((not (self._flags.last_token.type == TOKEN.END_BLOCK and self._previous_flags.mode == MODE.BlockStatement)) - or self._options.brace_style == 'expand' - or self._options.brace_style == 'end-expand' - or (self._options.brace_style == 'none' and current_token.newlines)) \ - and not self._flags.inline_frame: + if current_token.type == 'TK_RESERVED' and current_token.text in ['else', 'catch', 'finally']: + if ((not (self.last_type == 'TK_END_BLOCK' and self.previous_flags.mode == MODE.BlockStatement)) \ + or self.opts.brace_style == 'expand' \ + or self.opts.brace_style == 'end-expand' \ + or (self.opts.brace_style == 'none' and current_token.wanted_newline)) \ + and not self.flags.inline_frame: self.print_newline() else: - self._output.trim(True) + self.output.trim(True) # If we trimmed and there's something other than a close block before us # put a newline back in. Handles '} // comment' scenario. - if self._output.current_line.last() != '}': + if self.output.current_line.last() != '}': self.print_newline() - self._output.space_before_token = True + self.output.space_before_token = True elif prefix == 'NEWLINE': - if reserved_array(self._flags.last_token, _special_word_set): + if self.last_type == 'TK_RESERVED' and self.is_special_word(self.flags.last_text): # no newline between return nnn - self._output.space_before_token = True - elif self._flags.last_token.text == 'declare' and reserved_array(current_token, [ - 'var', - 'let', - 'const']): - # accomodates Typescript declare formatting - self._output.space_before_token = True - elif self._flags.last_token.type != TOKEN.END_EXPR: - if ( - self._flags.last_token.type != TOKEN.START_EXPR or not ( - reserved_array(current_token, [ - 'var', - 'let', - 'const']))) and self._flags.last_token.text != ':': + self.output.space_before_token = True + elif self.last_type != 'TK_END_EXPR': + if (self.last_type != 'TK_START_EXPR' or not (current_token.type == 'TK_RESERVED' and current_token.text in ['var', 'let', 'const'])) and self.flags.last_text != ':': # no need to force newline on VAR - # for (var x = 0... - if reserved_word(current_token, 'if') and self._flags.last_token.text == 'else': - self._output.space_before_token = True + if current_token.type == 'TK_RESERVED' and current_token.text == 'if' and self.flags.last_text == 'else': + self.output.space_before_token = True else: self.print_newline() - elif reserved_array(current_token, Tokenizer.line_starters) and self._flags.last_token.text != ')': + elif current_token.type == 'TK_RESERVED' and current_token.text in Tokenizer.line_starters and self.flags.last_text != ')': self.print_newline() - elif self._flags.multiline_frame and self.is_array(self._flags.mode) and self._flags.last_token.text == ',' and self._last_last_text == '}': - self.print_newline() # }, in lists get a newline + elif self.flags.multiline_frame and self.is_array(self.flags.mode) and self.flags.last_text == ',' and self.last_last_text == '}': + self.print_newline() # }, in lists get a newline elif prefix == 'SPACE': - self._output.space_before_token = True + self.output.space_before_token = True - if current_token.previous and (current_token.previous.type == TOKEN.WORD or - current_token.previous.type == TOKEN.RESERVED): - self._output.space_before_token = True self.print_token(current_token) - self._flags.last_word = current_token.text + self.flags.last_word = current_token.text - if current_token.type == TOKEN.RESERVED: + if current_token.type == 'TK_RESERVED': if current_token.text == 'do': - self._flags.do_block = True + self.flags.do_block = True elif current_token.text == 'if': - self._flags.if_block = True + self.flags.if_block = True elif current_token.text == 'import': - self._flags.import_block = True - elif current_token.text == 'from' and self._flags.import_block: - self._flags.import_block = False + self.flags.import_block = True + elif current_token.text == 'from' and self.flags.import_block: + self.flags.import_block = False + def handle_semicolon(self, current_token): if self.start_of_statement(current_token): # The conditional starts the statement if appropriate. # Semicolon can be the start (and end) of a statement - self._output.space_before_token = False + self.output.space_before_token = False else: self.handle_whitespace_and_comments(current_token) - next_token = self._tokens.peek() - while (self._flags.mode == MODE.Statement and - not (self._flags.if_block and reserved_word(next_token, 'else')) and - not self._flags.do_block): + next_token = self.get_token(1) + while (self.flags.mode == MODE.Statement and + not (self.flags.if_block and next_token and next_token.type == 'TK_RESERVED' and next_token.text == 'else') and + not self.flags.do_block): self.restore_mode() - if self._flags.import_block: - self._flags.import_block = False + if self.flags.import_block: + self.flags.import_block = False self.print_token(current_token) + def handle_string(self, current_token): if self.start_of_statement(current_token): # The conditional starts the statement if appropriate. # One difference - strings want at least a space before - self._output.space_before_token = True + self.output.space_before_token = True else: self.handle_whitespace_and_comments(current_token) - if self._flags.last_token.type == TOKEN.RESERVED or self._flags.last_token.type == TOKEN.WORD or self._flags.inline_frame: - self._output.space_before_token = True - elif self._flags.last_token.type in [TOKEN.COMMA, TOKEN.START_EXPR, TOKEN.EQUALS, TOKEN.OPERATOR]: + if self.last_type == 'TK_RESERVED' or self.last_type == 'TK_WORD' or self.flags.inline_frame: + self.output.space_before_token = True + elif self.last_type in ['TK_COMMA', 'TK_START_EXPR', 'TK_EQUALS', 'TK_OPERATOR']: if not self.start_of_object_property(): self.allow_wrap_or_preserved_newline(current_token) else: @@ -954,6 +888,7 @@ def handle_string(self, current_token): self.print_token(current_token) + def handle_equals(self, current_token): if self.start_of_statement(current_token): # The conditional starts the statement if appropriate. @@ -961,72 +896,72 @@ def handle_equals(self, current_token): else: self.handle_whitespace_and_comments(current_token) - if self._flags.declaration_statement: - # just got an '=' in a var-line, different line breaking rules will - # apply - self._flags.declaration_assignment = True - self._output.space_before_token = True + if self.flags.declaration_statement: + # just got an '=' in a var-line, different line breaking rules will apply + self.flags.declaration_assignment = True + + self.output.space_before_token = True self.print_token(current_token) - self._output.space_before_token = True + self.output.space_before_token = True + def handle_comma(self, current_token): self.handle_whitespace_and_comments(current_token, True) self.print_token(current_token) - self._output.space_before_token = True + self.output.space_before_token = True - if self._flags.declaration_statement: - if self.is_expression(self._flags.parent.mode): + if self.flags.declaration_statement: + if self.is_expression(self.flags.parent.mode): # do not break on comma, for ( var a = 1, b = 2 - self._flags.declaration_assignment = False + self.flags.declaration_assignment = False - if self._flags.declaration_assignment: - self._flags.declaration_assignment = False - self.print_newline(preserve_statement_flags=True) - elif self._options.comma_first: + if self.flags.declaration_assignment: + self.flags.declaration_assignment = False + self.print_newline(preserve_statement_flags = True) + elif self.opts.comma_first: # for comma-first, we want to allow a newline before the comma - # to turn into a newline after the comma, which we will fixup - # later + # to turn into a newline after the comma, which we will fixup later self.allow_wrap_or_preserved_newline(current_token) - elif self._flags.mode == MODE.ObjectLiteral \ - or (self._flags.mode == MODE.Statement and self._flags.parent.mode == MODE.ObjectLiteral): - if self._flags.mode == MODE.Statement: + elif self.flags.mode == MODE.ObjectLiteral \ + or (self.flags.mode == MODE.Statement and self.flags.parent.mode == MODE.ObjectLiteral): + if self.flags.mode == MODE.Statement: self.restore_mode() - if not self._flags.inline_frame: + if not self.flags.inline_frame: self.print_newline() - elif self._options.comma_first: + elif self.opts.comma_first: # EXPR or DO_BLOCK # for comma-first, we want to allow a newline before the comma # to turn into a newline after the comma, which we will fixup later self.allow_wrap_or_preserved_newline(current_token) + def handle_operator(self, current_token): isGeneratorAsterisk = current_token.text == '*' and \ - (reserved_array(self._flags.last_token, ['function', 'yield']) or - (self._flags.last_token.type in [TOKEN.START_BLOCK, TOKEN.COMMA, TOKEN.END_BLOCK, TOKEN.SEMICOLON])) + ((self.last_type == 'TK_RESERVED' and self.flags.last_text in ['function', 'yield']) or + (self.last_type in ['TK_START_BLOCK', 'TK_COMMA', 'TK_END_BLOCK', 'TK_SEMICOLON'])) isUnary = current_token.text in ['+', '-'] \ - and (self._flags.last_token.type in [TOKEN.START_BLOCK, TOKEN.START_EXPR, TOKEN.EQUALS, TOKEN.OPERATOR] - or self._flags.last_token.text in Tokenizer.line_starters or self._flags.last_token.text == ',') + and (self.last_type in ['TK_START_BLOCK', 'TK_START_EXPR', 'TK_EQUALS', 'TK_OPERATOR'] \ + or self.flags.last_text in Tokenizer.line_starters or self.flags.last_text == ',') if self.start_of_statement(current_token): # The conditional starts the statement if appropriate. pass else: preserve_statement_flags = not isGeneratorAsterisk - self.handle_whitespace_and_comments( - current_token, preserve_statement_flags) + self.handle_whitespace_and_comments(current_token, preserve_statement_flags) - if reserved_array(self._flags.last_token, _special_word_set): + if self.last_type == 'TK_RESERVED' and self.is_special_word(self.flags.last_text): # return had a special handling in TK_WORD - self._output.space_before_token = True + self.output.space_before_token = True self.print_token(current_token) return # hack for actionscript's import .*; - if current_token.text == '*' and self._flags.last_token.type == TOKEN.DOT: + if current_token.text == '*' and self.last_type == 'TK_DOT': self.print_token(current_token) return @@ -1037,15 +972,15 @@ def handle_operator(self, current_token): # Allow line wrapping between operators when operator_position is # set to before or preserve - if self._flags.last_token.type == TOKEN.OPERATOR and self._options.operator_position in OPERATOR_POSITION_BEFORE_OR_PRESERVE: + if self.last_type == 'TK_OPERATOR' and self.opts.operator_position in OPERATOR_POSITION_BEFORE_OR_PRESERVE: self.allow_wrap_or_preserved_newline(current_token) - if current_token.text == ':' and self._flags.in_case: - self._flags.case_body = True + if current_token.text == ':' and self.flags.in_case: + self.flags.case_body = True self.indent() self.print_token(current_token) self.print_newline() - self._flags.in_case = False + self.flags.in_case = False return space_before = True @@ -1053,160 +988,143 @@ def handle_operator(self, current_token): in_ternary = False if current_token.text == ':': - if self._flags.ternary_depth == 0: - # Colon is invalid javascript outside of ternary and object, - # but do our best to guess what was meant. + if self.flags.ternary_depth == 0: + # Colon is invalid javascript outside of ternary and object, but do our best to guess what was meant. space_before = False else: - self._flags.ternary_depth -= 1 + self.flags.ternary_depth -= 1 in_ternary = True elif current_token.text == '?': - self._flags.ternary_depth += 1 + self.flags.ternary_depth += 1 - # let's handle the operator_position option prior to any conflicting - # logic + # let's handle the operator_position option prior to any conflicting logic if (not isUnary) and (not isGeneratorAsterisk) and \ - self._options.preserve_newlines and current_token.text in Tokenizer.positionable_operators: + self.opts.preserve_newlines and current_token.text in Tokenizer.positionable_operators: isColon = current_token.text == ':' isTernaryColon = isColon and in_ternary isOtherColon = isColon and not in_ternary - if self._options.operator_position == OPERATOR_POSITION['before_newline']: - # if the current token is : and it's not a ternary statement - # then we set space_before to false - self._output.space_before_token = not isOtherColon + if self.opts.operator_position == OPERATOR_POSITION['before_newline']: + # if the current token is : and it's not a ternary statement then we set space_before to false + self.output.space_before_token = not isOtherColon self.print_token(current_token) if (not isColon) or isTernaryColon: self.allow_wrap_or_preserved_newline(current_token) - self._output.space_before_token = True + self.output.space_before_token = True return - elif self._options.operator_position == OPERATOR_POSITION['after_newline']: + elif self.opts.operator_position == OPERATOR_POSITION['after_newline']: # if the current token is anything but colon, or (via deduction) it's a colon and in a ternary statement, # then print a newline. - self._output.space_before_token = True + self.output.space_before_token = True if (not isColon) or isTernaryColon: - if self._tokens.peek().newlines: - self.print_newline(preserve_statement_flags=True) + if self.get_token(1).wanted_newline: + self.print_newline(preserve_statement_flags = True) else: self.allow_wrap_or_preserved_newline(current_token) else: - self._output.space_before_token = False + self.output.space_before_token = False self.print_token(current_token) - self._output.space_before_token = True + self.output.space_before_token = True return - elif self._options.operator_position == OPERATOR_POSITION['preserve_newline']: + elif self.opts.operator_position == OPERATOR_POSITION['preserve_newline']: if not isOtherColon: self.allow_wrap_or_preserved_newline(current_token) # if we just added a newline, or the current token is : and it's not a ternary statement, # then we set space_before to false - self._output.space_before_token = not ( - self._output.just_added_newline() or isOtherColon) + self.output.space_before_token = not (self.output.just_added_newline() or isOtherColon) self.print_token(current_token) - self._output.space_before_token = True + self.output.space_before_token = True return if isGeneratorAsterisk: self.allow_wrap_or_preserved_newline(current_token) space_before = False - next_token = self._tokens.peek() - space_after = next_token and next_token.type in [ - TOKEN.WORD, TOKEN.RESERVED] + next_token = self.get_token(1) + space_after = next_token and next_token.type in ['TK_WORD','TK_RESERVED'] elif current_token.text == '...': self.allow_wrap_or_preserved_newline(current_token) - space_before = self._flags.last_token.type == TOKEN.START_BLOCK + space_before = self.last_type == 'TK_START_BLOCK' space_after = False elif current_token.text in ['--', '++', '!', '~'] or isUnary: - if self._flags.last_token.type == TOKEN.COMMA or self._flags.last_token.type == TOKEN.START_EXPR: - self.allow_wrap_or_preserved_newline(current_token) - space_before = False space_after = False # http://www.ecma-international.org/ecma-262/5.1/#sec-7.9.1 - # if there is a newline between -- or ++ and anything else we - # should preserve it. - if current_token.newlines and ( - current_token.text == '--' or current_token.text == '++'): - self.print_newline(preserve_statement_flags=True) - - if self._flags.last_token.text == ';' and self.is_expression( - self._flags.mode): + # if there is a newline between -- or ++ and anything else we should preserve it. + if current_token.wanted_newline and (current_token.text == '--' or current_token.text == '++'): + self.print_newline(preserve_statement_flags = True) + + if self.flags.last_text == ';' and self.is_expression(self.flags.mode): # for (;; ++i) # ^^ space_before = True - if self._flags.last_token.type == TOKEN.RESERVED: + if self.last_type == 'TK_RESERVED': space_before = True - elif self._flags.last_token.type == TOKEN.END_EXPR: - space_before = not ( - self._flags.last_token.text == ']' and current_token.text in [ - '--', '++']) - elif self._flags.last_token.type == TOKEN.OPERATOR: + elif self.last_type == 'TK_END_EXPR': + space_before = not (self.flags.last_text == ']' and current_token.text in ['--', '++']) + elif self.last_type == 'TK_OPERATOR': # a++ + ++b # a - -b - space_before = current_token.text in [ - '--', '-', '++', '+'] and self._flags.last_token.text in ['--', '-', '++', '+'] + space_before = current_token.text in ['--', '-','++', '+'] and self.flags.last_text in ['--', '-','++', '+'] # + and - are not unary when preceeded by -- or ++ operator # a-- + b # a * +b # a - -b - if current_token.text in [ - '-', '+'] and self._flags.last_token.text in ['--', '++']: + if current_token.text in ['-', '+'] and self.flags.last_text in ['--', '++']: space_after = True - if (((self._flags.mode == MODE.BlockStatement and not self._flags.inline_frame) - or self._flags.mode == MODE.Statement) and self._flags.last_token.text in ['{', ';']): + if (((self.flags.mode == MODE.BlockStatement and not self.flags.inline_frame) or self.flags.mode == MODE.Statement) + and self.flags.last_text in ['{', ';']): # { foo: --i } # foo(): --bar self.print_newline() if space_before: - self._output.space_before_token = True + self.output.space_before_token = True self.print_token(current_token) if space_after: - self._output.space_before_token = True + self.output.space_before_token = True + + def handle_block_comment(self, current_token, preserve_statement_flags): - if self._output.raw: - self._output.add_raw_token(current_token) - if current_token.directives and current_token.directives.get( - 'preserve') == 'end': - # If we're testing the raw output behavior, do not allow a - # directive to turn it off. - self._output.raw = self._options.test_output_raw + if self.output.raw: + self.output.add_raw_token(current_token) + if current_token.directives and current_token.directives.get('preserve') == 'end': + # If we're testing the raw output behavior, do not allow a directive to turn it off. + self.output.raw = self.opts.test_output_raw return if current_token.directives: - self.print_newline( - preserve_statement_flags=preserve_statement_flags) + self.print_newline(preserve_statement_flags = preserve_statement_flags) self.print_token(current_token) if current_token.directives.get('preserve') == 'start': - self._output.raw = True + self.output.raw = True - self.print_newline(preserve_statement_flags=True) + self.print_newline(preserve_statement_flags = True) return # inline block - if not self.acorn.newline.search( - current_token.text) and not current_token.newlines: - self._output.space_before_token = True + if not self.acorn.newline.search(current_token.text) and not current_token.wanted_newline: + self.output.space_before_token = True self.print_token(current_token) - self._output.space_before_token = True + self.output.space_before_token = True return lines = self.acorn.allLineBreaks.split(current_token.text) @@ -1216,17 +1134,15 @@ def handle_block_comment(self, current_token, preserve_statement_flags): last_indent_length = len(last_indent) # block comment starts with a new line - self.print_newline(preserve_statement_flags=preserve_statement_flags) - if len(lines) > 1: - javadoc = not any(l for l in lines[1:] if ( - l.strip() == '' or (l.lstrip())[0] != '*')) - starless = all(l.startswith(last_indent) - or l.strip() == '' for l in lines[1:]) + self.print_newline(preserve_statement_flags = preserve_statement_flags) + if len(lines) > 1: + javadoc = not any(l for l in lines[1:] if ( l.strip() == '' or (l.lstrip())[0] != '*')) + starless = all(l.startswith(last_indent) or l.strip() == '' for l in lines[1:]) # first line always indented self.print_token(current_token, lines[0]) for line in lines[1:]: - self.print_newline(preserve_statement_flags=True) + self.print_newline(preserve_statement_flags = True) if javadoc: # javadoc: reformat and re-indent self.print_token(current_token, ' ' + line.lstrip()) @@ -1235,21 +1151,21 @@ def handle_block_comment(self, current_token, preserve_statement_flags): self.print_token(current_token, line[last_indent_length:]) else: # normal comments output raw - self._output.add_token(line) + self.output.add_token(line) - self.print_newline(preserve_statement_flags=preserve_statement_flags) + self.print_newline(preserve_statement_flags = preserve_statement_flags) def handle_comment(self, current_token, preserve_statement_flags): - if current_token.newlines: - self.print_newline( - preserve_statement_flags=preserve_statement_flags) + if current_token.wanted_newline: + self.print_newline(preserve_statement_flags = preserve_statement_flags) - if not current_token.newlines: - self._output.trim(True) + if not current_token.wanted_newline: + self.output.trim(True) - self._output.space_before_token = True + self.output.space_before_token = True self.print_token(current_token) - self.print_newline(preserve_statement_flags=preserve_statement_flags) + self.print_newline(preserve_statement_flags = preserve_statement_flags) + def handle_dot(self, current_token): if self.start_of_statement(current_token): @@ -1258,34 +1174,24 @@ def handle_dot(self, current_token): else: self.handle_whitespace_and_comments(current_token, True) - if reserved_array(self._flags.last_token, _special_word_set): - self._output.space_before_token = False + if self.last_type == 'TK_RESERVED' and self.is_special_word(self.flags.last_text): + self.output.space_before_token = True else: # allow preserved newlines before dots in general - # force newlines on dots after close paren when break_chained - for - # bar().baz() - self.allow_wrap_or_preserved_newline( - current_token, self._flags.last_token.text == ')' and - self._options.break_chained_methods) - - # Only unindent chained method dot if this dot starts a new line. - # Otherwise the automatic extra indentation removal - # will handle any over indent - if self._options.unindent_chained_methods and \ - self._output.just_added_newline(): - self.deindent() + # force newlines on dots after close paren when break_chained - for bar().baz() + self.allow_wrap_or_preserved_newline(current_token, + self.flags.last_text == ')' and self.opts.break_chained_methods) self.print_token(current_token) def handle_unknown(self, current_token, preserve_statement_flags): self.print_token(current_token) if current_token.text[-1] == '\n': - self.print_newline( - preserve_statement_flags=preserve_statement_flags) + self.print_newline(preserve_statement_flags = preserve_statement_flags) def handle_eof(self, current_token): # Unwind any open statements - while self._flags.mode == MODE.Statement: + while self.flags.mode == MODE.Statement: self.restore_mode() self.handle_whitespace_and_comments(current_token) diff --git a/codeformatter/lib/jsbeautifier/javascript/options.py b/codeformatter/lib/jsbeautifier/javascript/options.py index dc08f21..51d52db 100644 --- a/codeformatter/lib/jsbeautifier/javascript/options.py +++ b/codeformatter/lib/jsbeautifier/javascript/options.py @@ -1,6 +1,6 @@ # The MIT License (MIT) # -# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. +# Copyright (c) 2007-2017 Einar Lielmanis, Liam Newman, and contributors. # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation files @@ -22,73 +22,66 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - -from ..core.options import Options as BaseOptions - -OPERATOR_POSITION = [ - 'before-newline', - 'after-newline', - 'preserve-newline' -] - -class BeautifierOptions(BaseOptions): - def __init__(self, options=None): - BaseOptions.__init__(self, options, 'js') +class BeautifierOptions: + def __init__(self): + self.indent_size = 4 + self.indent_char = ' ' + self.indent_with_tabs = False + self.eol = 'auto' + self.preserve_newlines = True + self.max_preserve_newlines = 10 + self.space_in_paren = False + self.space_in_empty_paren = False + self.e4x = False + self.jslint_happy = False + self.space_after_anon_function = False + self.brace_style = 'collapse' + self.keep_array_indentation = False + self.keep_function_indentation = False + self.eval_code = False + self.unescape_strings = False + self.wrap_line_length = 0 + self.unindent_chained_methods = False + self.break_chained_methods = False + self.end_with_newline = False + self.comma_first = False + self.operator_position = 'before-newline' self.css = None self.js = None self.html = None - # compatibility, re - - raw_brace_style = getattr(self.raw_options, 'brace_style', None) - if raw_brace_style == "expand-strict": # graceful handling of deprecated option - setattr(self.raw_options, 'brace_style', "expand") - elif raw_brace_style == "collapse-preserve-inline": # graceful handling of deprecated option - setattr(self.raw_options, 'brace_style', "collapse,preserve-inline") - # elif bool(self.raw_options.braces_on_own_line): # graceful handling of deprecated option - # raw_brace_style = "expand": "collapse" - # elif raw_brace_style is None: # Nothing exists to set it - # setattr(self.raw_options, 'brace_style', "collapse") - - # preserve-inline in delimited string will trigger brace_preserve_inline, everything - # else is considered a brace_style and the last one only will have an effect - - brace_style_split = self._get_selection_list('brace_style', ['collapse', 'expand', 'end-expand', 'none', 'preserve-inline']) - - # preserve-inline in delimited string will trigger brace_preserve_inline - # Everything else is considered a brace_style and the last one only will - # have an effect - # specify defaults in case one half of meta-option is missing - self.brace_preserve_inline = False - self.brace_style = "collapse" - - for bs in brace_style_split: - if bs == "preserve-inline": - self.brace_preserve_inline = True - else: - self.brace_style = bs - - self.unindent_chained_methods = self._get_boolean('unindent_chained_methods') - self.break_chained_methods = self._get_boolean('break_chained_methods') - self.space_in_paren = self._get_boolean('space_in_paren') - self.space_in_empty_paren = self._get_boolean('space_in_empty_paren') - self.jslint_happy = self._get_boolean('jslint_happy') - self.space_after_anon_function = self._get_boolean('space_after_anon_function') - self.space_after_named_function = self._get_boolean('space_after_named_function') - self.keep_array_indentation = self._get_boolean('keep_array_indentation') - self.space_before_conditional = self._get_boolean('space_before_conditional', True) - self.unescape_strings = self._get_boolean('unescape_strings') - self.e4x = self._get_boolean('e4x') - self.comma_first = self._get_boolean('comma_first') - self.operator_position = self._get_selection('operator_position', OPERATOR_POSITION) - - # For testing of beautify preserve:start directive + # For testing of beautify ignore:start directive self.test_output_raw = False self.editorconfig = False - # force opts.space_after_anon_function to true if opts.jslint_happy - if self.jslint_happy: - self.space_after_anon_function = True - self.eval_code = False + def __repr__(self): + return \ +"""indent_size = %d +indent_char = [%s] +preserve_newlines = %s +max_preserve_newlines = %d +space_in_paren = %s +jslint_happy = %s +space_after_anon_function = %s +indent_with_tabs = %s +brace_style = %s +keep_array_indentation = %s +eval_code = %s +wrap_line_length = %s +unescape_strings = %s +""" % ( self.indent_size, + self.indent_char, + self.preserve_newlines, + self.max_preserve_newlines, + self.space_in_paren, + self.jslint_happy, + self.space_after_anon_function, + self.indent_with_tabs, + self.brace_style, + self.keep_array_indentation, + self.eval_code, + self.wrap_line_length, + self.unescape_strings, + ) diff --git a/codeformatter/lib/jsbeautifier/javascript/tokenizer.py b/codeformatter/lib/jsbeautifier/javascript/tokenizer.py index 5f8f616..f0f9c51 100644 --- a/codeformatter/lib/jsbeautifier/javascript/tokenizer.py +++ b/codeformatter/lib/jsbeautifier/javascript/tokenizer.py @@ -1,6 +1,6 @@ # The MIT License (MIT) # -# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. +# Copyright (c) 2007-2017 Einar Lielmanis, Liam Newman, and contributors. # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation files @@ -24,460 +24,443 @@ import re from ..core.inputscanner import InputScanner -from ..core.tokenizer import TokenTypes as BaseTokenTypes -from ..core.tokenizer import Tokenizer as BaseTokenizer -from ..core.directives import Directives - -__all__ = ["TOKEN", "Tokenizer", "TokenTypes"] - -class TokenTypes(BaseTokenTypes): - START_EXPR = 'TK_START_EXPR' - END_EXPR = 'TK_END_EXPR' - START_BLOCK = 'TK_START_BLOCK' - END_BLOCK = 'TK_END_BLOCK' - WORD = 'TK_WORD' - RESERVED = 'TK_RESERVED' - SEMICOLON = 'TK_SEMICOLON' - STRING = 'TK_STRING' - EQUALS = 'TK_EQUALS' - OPERATOR = 'TK_OPERATOR' - COMMA = 'TK_COMMA' - BLOCK_COMMENT = 'TK_BLOCK_COMMENT' - COMMENT = 'TK_COMMENT' - DOT = 'TK_DOT' - UNKNOWN = 'TK_UNKNOWN' - - def __init__(self): - pass - - -TOKEN = TokenTypes() - -dot_pattern = re.compile(r'[^\d\.]') - -number_pattern = re.compile( - r'0[xX][0123456789abcdefABCDEF]*|0[oO][01234567]*|0[bB][01]*|\d+n|(?:\.\d+|\d+\.?\d*)(?:[eE][+-]?\d+)?') -digit = re.compile(r'[0-9]') - -startXmlRegExp = re.compile( - r'<()([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>') -xmlRegExp = re.compile( - r'[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>') - -positionable_operators = frozenset( - (">>> === !== " + - "<< && >= ** != == <= >> || " + - "< / - + > : & % ? ^ | *").split(' ')) - -punct = (">>>= " + - "... >>= <<= === >>> !== **= " + - "=> ^= :: /= << <= == && -= >= >> != -- += ** || ++ %= &= *= |= " + - "= ! ? > < : / ^ - + * & % ~ |") - -punct = re.compile(r'([-[\]{}()*+?.,\\^$|#])').sub(r'\\\1', punct) -punct = punct.replace(' ', '|') - -punct_pattern = re.compile(punct) -shebang_pattern = re.compile(r'#![^\n]*(?:\r\n|[\n\r\u2028\u2029])?') -include_pattern = re.compile(r'#include[^\n\r\u2028\u2029]*(?:\r\n|[\n\r\u2028\u2029])?') - -# Words which always should start on a new line -line_starters = frozenset( - ('continue,try,throw,return,var,let,const,if,switch,case,default,for,' + - 'while,break,function,import,export').split(',')) -reserved_words = line_starters | frozenset(['do', - 'in', - 'of', - 'else', - 'get', - 'set', - 'new', - 'catch', - 'finally', - 'typeof', - 'yield', - 'async', - 'await', - 'from', - 'as']) - -reserved_word_pattern = re.compile(r'^(?:' + '|'.join(reserved_words) + r')$') - -# /* ... */ comment ends with nearest */ or end of file -block_comment_pattern = re.compile(r'/\*([\s\S]*?)((?:\*\/)|$)') - -directives_core = Directives(r'/\*', r'\*/') - -template_pattern = re.compile( - r'(?:(?:<\?php|<\?=)[\s\S]*?\?>)|(?:<%[\s\S]*?%>)') - - -class Tokenizer(BaseTokenizer): - positionable_operators = positionable_operators - line_starters = line_starters - - def __init__(self, input_string, opts): - BaseTokenizer.__init__(self, input_string, opts) - # This is not pretty, but given how we did the version import - # it is the only way to do this without having setup.py fail on a missing - # six dependency. - self._six = __import__("six") - - import jsbeautifier.javascript.acorn as acorn +from ..core.token import Token + +class Tokenizer: + + whitespace = ["\n", "\r", "\t", " "] + digit = re.compile('[0-9]') + digit_bin = re.compile('[01]') + digit_oct = re.compile('[01234567]') + digit_hex = re.compile('[0123456789abcdefABCDEF]') + + positionable_operators = '!= !== % & && * ** + - / : < << <= == === > >= >> >>> ? ^ | ||'.split(' ') + punct = (positionable_operators + + # non-positionable operators - these do not follow operator position settings + '! %= &= *= **= ++ += , -- -= /= :: <<= = => >>= >>>= ^= |= ~ ...'.split(' ')) + + # Words which always should start on a new line + line_starters = 'continue,try,throw,return,var,let,const,if,switch,case,default,for,while,break,function,import,export'.split(',') + reserved_words = line_starters + ['do', 'in', 'of', 'else', 'get', 'set', 'new', 'catch', 'finally', 'typeof', 'yield', 'async', 'await', 'from', 'as'] + + def __init__ (self, input_string, opts, indent_string): + import jsbeautifier.core.acorn as acorn self.acorn = acorn + self.input = InputScanner(input_string) + self.opts = opts + self.indent_string = indent_string + # /* ... */ comment ends with nearest */ or end of file + self.block_comment_pattern = re.compile('([\s\S]*?)((?:\*\/)|$)') + + # comment ends just before nearest linefeed or end of file + self.comment_pattern = re.compile(self.acorn.six.u('([^\n\r\u2028\u2029]*)')) + + self.directives_block_pattern = re.compile('\/\* beautify( \w+[:]\w+)+ \*\/') + self.directive_pattern = re.compile(' (\w+)[:](\w+)') + self.directives_end_ignore_pattern = re.compile('([\s\S]*?)((?:\/\*\sbeautify\signore:end\s\*\/)|$)') + self.template_pattern = re.compile('((<\?php|<\?=)[\s\S]*?\?>)|(<%[\s\S]*?%>)') + + def tokenize(self): self.in_html_comment = False - self.has_char_escapes = False + self.tokens = [] + + next = None + last = None + open = None + open_stack = [] + comments = [] + + while not (not last == None and last.type == 'TK_EOF'): + token_values = self.__tokenize_next() + next = Token(token_values[1], token_values[0], self.n_newlines, self.whitespace_before_token) + + while next.type == 'TK_COMMENT' or next.type == 'TK_BLOCK_COMMENT' or next.type == 'TK_UNKNOWN': + if next.type == 'TK_BLOCK_COMMENT': + next.directives = token_values[2] + + comments.append(next) + token_values = self.__tokenize_next() + next = Token(token_values[1], token_values[0], self.n_newlines, self.whitespace_before_token) + + if len(comments) > 0: + next.comments_before = comments + comments = [] + + if next.type == 'TK_START_BLOCK' or next.type == 'TK_START_EXPR': + next.parent = last + open_stack.append(open) + open = next + elif (next.type == 'TK_END_BLOCK' or next.type == 'TK_END_EXPR') and \ + (not open == None and ( \ + (next.text == ']' and open.text == '[') or \ + (next.text == ')' and open.text == '(') or \ + (next.text == '}' and open.text == '{'))): + next.parent = open.parent + next.opened = open + open = open_stack.pop() + + self.tokens.append(next) + last = next + return self.tokens + + def get_directives (self, text): + if not self.directives_block_pattern.match(text): + return None + + directives = {} + directive_match = self.directive_pattern.search(text) + while directive_match: + directives[directive_match.group(1)] = directive_match.group(2) + directive_match = self.directive_pattern.search(text, directive_match.end()) + + return directives + + + def __tokenize_next(self): + + whitespace_on_this_line = [] + self.n_newlines = 0 + self.whitespace_before_token = '' + + c = self.input.next() + + if c == None: + return '', 'TK_EOF' + + if len(self.tokens) > 0: + last_token = self.tokens[-1] + else: + # For the sake of tokenizing we can pretend that there was on open brace to start + last_token = Token('TK_START_BLOCK', '{') + + while c in self.whitespace: + if self.acorn.newline.match(c): + # treat \r\n as one newline + if not (c == '\n' and self.input.peek(-2) == '\r'): + self.n_newlines += 1 + whitespace_on_this_line = [] + else: + whitespace_on_this_line.append(c) - # comment ends just before nearest linefeed or end of file - # IMPORTANT: This string must be run through six to handle \u chars - self._whitespace_pattern = re.compile( - self._six.u(r'[\n\r\u2028\u2029\t\u000B\u00A0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff ]+')) - self._newline_pattern = re.compile( - self._six.u(r'([^\n\r\u2028\u2029]*)(\r\n|[\n\r\u2028\u2029])?')) - # // comment ends just before nearest linefeed or end of file + c = self.input.next() - self.comment_pattern = re.compile( - self._six.u(r'//([^\n\r\u2028\u2029]*)')) + if c == None: + return '', 'TK_EOF' + if len(whitespace_on_this_line) != 0: + self.whitespace_before_token = ''.join(whitespace_on_this_line) + if self.digit.match(c) or (c == '.' and self.input.testChar(self.digit)): + allow_decimal = True + allow_e = True + local_digit = self.digit - def _reset(self): - self.in_html_comment = False + if c == '0' and self.input.testChar(re.compile('[XxOoBb]')): + # switch to hex/oct/bin number, no decimal or e, just hex/oct/bin digits + allow_decimal = False + allow_e = False + if self.input.testChar(re.compile('[Bb]')): + local_digit = self.digit_bin + elif self.input.testChar(re.compile('[Oo]')): + local_digit = self.digit_oct + else: + local_digit = self.digit_hex + c += self.input.next() + elif c == '.': + # Already have a decimal for this literal, don't allow another + allow_decimal = False + else: + # we know this first loop will run. It keeps the logic simpler. + c = '' + self.input.back() + + # Add the digits + while self.input.testChar(local_digit): + c += self.input.next() + + if allow_decimal and self.input.peek() == '.': + c += self.input.next() + allow_decimal = False + + # a = 1.e-7 is valid, so we test for . then e in one loop + if allow_e and self.input.testChar(re.compile('[Ee]')): + c += self.input.next() + + if self.input.testChar(re.compile('[+-]')): + c += self.input.next() + + allow_e = False + allow_decimal = False + + return c, 'TK_WORD' + + if self.acorn.isIdentifierStart(self.input.peekCharCode(-1)): + if self.input.hasNext(): + while self.acorn.isIdentifierChar(self.input.peekCharCode()): + c += self.input.next() + if not self.input.hasNext(): + break + + if not (last_token.type == 'TK_DOT' \ + or (last_token.type == 'TK_RESERVED' and last_token.text in ['set', 'get'])) \ + and c in self.reserved_words: + if c == 'in' or c == 'of': # in and of are operators, need to hack + return c, 'TK_OPERATOR' + + return c, 'TK_RESERVED' + + return c, 'TK_WORD' + + if c in '([': + return c, 'TK_START_EXPR' + + if c in ')]': + return c, 'TK_END_EXPR' + + if c == '{': + return c, 'TK_START_BLOCK' + + if c == '}': + return c, 'TK_END_BLOCK' + + if c == ';': + return c, 'TK_SEMICOLON' - def _is_comment(self, current_token): - return current_token.type == TOKEN.COMMENT or \ - current_token.type == TOKEN.BLOCK_COMMENT or \ - current_token.type == TOKEN.UNKNOWN - - - def _is_opening(self, current_token): - return current_token.type == TOKEN.START_BLOCK or current_token.type == TOKEN.START_EXPR - - def _is_closing(self, current_token, open_token): - return (current_token.type == TOKEN.END_BLOCK or current_token.type == TOKEN.END_EXPR) and \ - (open_token is not None and ( - (current_token.text == ']' and open_token.text == '[') or - (current_token.text == ')' and open_token.text == '(') or - (current_token.text == '}' and open_token.text == '{'))) - - def _get_next_token(self, previous_token, open_token): - self._readWhitespace() - token = None - c = self._input.peek() - - token = token or self._read_singles(c) - token = token or self._read_word(previous_token) - token = token or self._read_comment(c) - token = token or self._read_string(c) - token = token or self._read_regexp(c, previous_token) - token = token or self._read_xml(c, previous_token) - token = token or self._read_non_javascript(c) - token = token or self._read_punctuation() - token = token or self._create_token(TOKEN.UNKNOWN, self._input.next()) - - return token - - def _read_singles(self, c): - token = None - - if c is None: - token = self._create_token(TOKEN.EOF, '') - elif c == '(' or c == '[': - token = self._create_token(TOKEN.START_EXPR, c) - elif c == ')' or c == ']': - token = self._create_token(TOKEN.END_EXPR, c) - elif c == '{': - token = self._create_token(TOKEN.START_BLOCK, c) - elif c == '}': - token = self._create_token(TOKEN.END_BLOCK, c) - elif c == ';': - token = self._create_token(TOKEN.SEMICOLON, c) - elif c == '.' and bool(dot_pattern.match(self._input.peek(1))): - token = self._create_token(TOKEN.DOT, c) - elif c == ',': - token = self._create_token(TOKEN.COMMA, c) - - if token is not None: - self._input.next() - - return token - - def _read_word(self, previous_token): - resulting_string = self._input.read(self.acorn.identifier) - if resulting_string != '': - if not (previous_token.type == TOKEN.DOT or ( - previous_token.type == TOKEN.RESERVED and ( - previous_token.text == 'set' or previous_token.text == 'get') - )) and reserved_word_pattern.match(resulting_string): - if resulting_string == 'in' or resulting_string == 'of': - # in and of are operators, need to hack - return self._create_token(TOKEN.OPERATOR, resulting_string) - - return self._create_token(TOKEN.RESERVED, resulting_string) - - return self._create_token(TOKEN.WORD, resulting_string) - - resulting_string = self._input.read(number_pattern) - if resulting_string != '': - return self._create_token(TOKEN.WORD, resulting_string) - - def _read_comment(self, c): - token = None if c == '/': comment = '' - if self._input.peek(1) == '*': # peek /* .. */ comment - comment = self._input.read(block_comment_pattern) + inline_comment = True + if self.input.peek() == '*': # peek /* .. */ comment + self.input.next() + comment_match = self.input.match(self.block_comment_pattern) + comment = '/*' + comment_match.group(0) - directives = directives_core.get_directives(comment) + directives = self.get_directives(comment) if directives and directives.get('ignore') == 'start': - comment += directives_core.readIgnored(self._input) + comment_match = self.input.match(self.directives_end_ignore_pattern) + comment += comment_match.group(0) comment = re.sub(self.acorn.allLineBreaks, '\n', comment) - token = self._create_token(TOKEN.BLOCK_COMMENT, comment) - token.directives = directives + return comment, 'TK_BLOCK_COMMENT', directives - elif self._input.peek(1) == '/': # peek // comment - comment = self._input.read(self.comment_pattern) - token = self._create_token(TOKEN.COMMENT, comment) + if self.input.peek() == '/': # peek // comment + self.input.next() + comment_match = self.input.match(self.comment_pattern) + comment = '//' + comment_match.group(0) + return comment, 'TK_COMMENT' - return token + startXmlRegExp = re.compile('<()([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>') + + self.has_char_escapes = False + if c == '`' or c == "'" or c == '"' or \ + ( \ + (c == '/') or \ + (self.opts.e4x and c == "<" and self.input.test(startXmlRegExp, -1)) \ + ) and ( \ + (last_token.type == 'TK_RESERVED' and last_token.text in ['return', 'case', 'throw', 'else', 'do', 'typeof', 'yield']) or \ + (last_token.type == 'TK_END_EXPR' and last_token.text == ')' and \ + last_token.parent and last_token.parent.type == 'TK_RESERVED' and last_token.parent.text in ['if', 'while', 'for']) or \ + (last_token.type in ['TK_COMMENT', 'TK_START_EXPR', 'TK_START_BLOCK', 'TK_END_BLOCK', 'TK_OPERATOR', \ + 'TK_EQUALS', 'TK_EOF', 'TK_SEMICOLON', 'TK_COMMA'])): + sep = c + esc = False + esc1 = 0 + esc2 = 0 + resulting_string = c + in_char_class = False - def _read_string(self, c): - if c == '`' or c == "'" or c == '"': - resulting_string = self._input.next() - self.has_char_escapes = False + if sep == '/': + # handle regexp + in_char_class = False + while self.input.hasNext() and \ + (esc or in_char_class or self.input.peek()!= sep) and \ + not self.input.testChar(self.acorn.newline): + resulting_string += self.input.peek() + if not esc: + esc = self.input.peek() == '\\' + if self.input.peek() == '[': + in_char_class = True + elif self.input.peek() == ']': + in_char_class = False + else: + esc = False + self.input.next() + + elif self.opts.e4x and sep == '<': + # handle e4x xml literals + xmlRegExp = re.compile('[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>') + self.input.back() + xmlStr = "" + match = self.input.match(xmlRegExp) + if match: + rootTag = match.group(2) + rootTag = re.sub(r'^{\s+', '{', re.sub(r'\s+}$', '}', rootTag)) + isCurlyRoot = rootTag.startswith('{') + depth = 0 + while (match): + isEndTag = match.group(1) + tagName = match.group(2) + isSingletonTag = (match.groups()[-1] != "") or (match.group(2)[0:8] == "![CDATA[") + if not isSingletonTag and ( + tagName == rootTag or (isCurlyRoot and re.sub(r'^{\s+', '{', re.sub(r'\s+}$', '}', tagName)))): + if isEndTag: + depth -= 1 + else: + depth += 1 + + xmlStr += match.group(0) + if depth <= 0: + break + + match = self.input.match(xmlRegExp) + + + # if we didn't close correctly, keep unformatted. + if not match: + xmlStr += self.input.match(re.compile('[\s\S]*')).group(0) + + xmlStr = re.sub(self.acorn.allLineBreaks, '\n', xmlStr) + return xmlStr, 'TK_STRING' - if c == '`': - resulting_string = self.parse_string( - resulting_string, '`', True, '${') else: - resulting_string = self.parse_string(resulting_string, c) - if self.has_char_escapes and self._options.unescape_strings: - resulting_string = self.unescape_string(resulting_string) + # handle string + def parse_string(self, resulting_string, delimiter, allow_unescaped_newlines = False, start_sub = None): + esc = False + while self.input.hasNext(): + current_char = self.input.peek() + if not (esc or (current_char != delimiter and + (allow_unescaped_newlines or not self.acorn.newline.match(current_char)))): + break + + # Handle \r\n linebreaks after escapes or in template strings + if (esc or allow_unescaped_newlines) and self.acorn.newline.match(current_char): + if current_char == '\r' and self.input.peek(1) == '\n': + self.input.next() + current_char = self.input.peek() + + resulting_string += '\n' + else: + resulting_string += current_char - if self._input.peek() == c : - resulting_string += self._input.next() + if esc: + if current_char == 'x' or current_char == 'u': + self.has_char_escapes = True - resulting_string = re.sub( - self.acorn.allLineBreaks, '\n', resulting_string) + esc = False + else: + esc = current_char == '\\' - return self._create_token(TOKEN.STRING, resulting_string) + self.input.next() - return None + if start_sub and resulting_string.endswith(start_sub): + if delimiter == '`': + resulting_string = parse_string(self, resulting_string, '}', allow_unescaped_newlines, '`') + else: + resulting_string = parse_string(self, resulting_string, '`', allow_unescaped_newlines, '${') - def _read_regexp(self, c, previous_token): + if self.input.hasNext(): + resulting_string += self.input.next() - if c == '/' and self.allowRegExOrXML(previous_token): - # handle regexp - resulting_string = self._input.next() - esc = False + return resulting_string - in_char_class = False - while self._input.hasNext() and \ - (esc or in_char_class or self._input.peek() != c) and \ - not self._input.testChar(self.acorn.newline): - resulting_string += self._input.peek() - if not esc: - esc = self._input.peek() == '\\' - if self._input.peek() == '[': - in_char_class = True - elif self._input.peek() == ']': - in_char_class = False + if sep == '`': + resulting_string = parse_string(self, resulting_string, '`', True, '${') else: - esc = False - self._input.next() + resulting_string = parse_string(self, resulting_string, sep) - if self._input.peek() == c: - resulting_string += self._input.next() - if c == '/': - # regexps may have modifiers /regexp/MOD, so fetch those too - # Only [gim] are valid, but if the user puts in garbage, do - # what we can to take it. - resulting_string += self._input.read( - self.acorn.identifier) - - return self._create_token(TOKEN.STRING, resulting_string) - - return None - - - def _read_xml(self, c, previous_token): - if self._options.e4x and c == "<" and self._input.test( - startXmlRegExp) and self.allowRegExOrXML(previous_token): - # handle e4x xml literals - xmlStr = "" - match = self._input.match(xmlRegExp) - if match: - rootTag = match.group(2) - rootTag = re.sub(r'^{\s+', '{', re.sub(r'\s+}$', '}', rootTag)) - isCurlyRoot = rootTag.startswith('{') - depth = 0 - while bool(match): - isEndTag = match.group(1) - tagName = match.group(2) - isSingletonTag = ( - match.groups()[-1] != "") or (match.group(2)[0:8] == "![CDATA[") - if not isSingletonTag and (tagName == rootTag or ( - isCurlyRoot and re.sub(r'^{\s+', '{', re.sub(r'\s+}$', '}', tagName)))): - if isEndTag: - depth -= 1 - else: - depth += 1 - - xmlStr += match.group(0) - if depth <= 0: - break - - match = self._input.match(xmlRegExp) + if self.has_char_escapes and self.opts.unescape_strings: + resulting_string = self.unescape_string(resulting_string) - # if we didn't close correctly, keep unformatted. - if not match: - xmlStr += self._input.match(re.compile(r'[\s\S]*')).group(0) + if self.input.peek() == sep: + resulting_string += self.input.next() - xmlStr = re.sub(self.acorn.allLineBreaks, '\n', xmlStr) - return self._create_token(TOKEN.STRING, xmlStr) + if sep == '/': + # regexps may have modifiers /regexp/MOD, so fetch those too + # Only [gim] are valid, but if the user puts in garbage, do what we can to take it. + while self.input.hasNext() and self.acorn.isIdentifierStart(self.input.peekCharCode()): + resulting_string += self.input.next() - return None + resulting_string = re.sub(self.acorn.allLineBreaks, '\n', resulting_string) - def _read_non_javascript(self, c): - resulting_string = '' + return resulting_string, 'TK_STRING' if c == '#': # she-bang - if self._is_first_token(): - resulting_string = self._input.read(shebang_pattern) - if resulting_string: - return self._create_token(TOKEN.UNKNOWN, resulting_string.strip() + '\n') - - # handles extendscript #includes - resulting_string = self._input.read(include_pattern) - - if resulting_string: - return self._create_token(TOKEN.UNKNOWN, resulting_string.strip() + '\n') + if len(self.tokens) == 0 and self.input.peek() == '!': + resulting_string = c + while self.input.hasNext() and c != '\n': + c = self.input.next() + resulting_string += c + return resulting_string.strip() + '\n', 'TK_UNKNOWN' - c = self._input.next() # Spidermonkey-specific sharp variables for circular references # https://developer.mozilla.org/En/Sharp_variables_in_JavaScript - # http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp - # around line 1935 + # http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp around line 1935 sharp = '#' - if self._input.hasNext() and self._input.testChar(digit): + if self.input.hasNext() and self.input.testChar(self.digit): while True: - c = self._input.next() + c = self.input.next() sharp += c - if (not self._input.hasNext()) or c == '#' or c == '=': + if (not self.input.hasNext()) or c == '#' or c == '=': break - if c == '#': - pass - elif self._input.peek() == '[' and self._input.peek(1) == ']': - sharp += '[]' - self._input.next() - self._input.next() - elif self._input.peek() == '{' and self._input.peek(1) == '}': - sharp += '{}' - self._input.next() - self._input.next() - - return self._create_token(TOKEN.WORD, sharp) - - self._input.back() - - elif c == '<': - if self._input.peek(1) == '?' or self._input.peek(1) == '%': - resulting_string = self._input.read(template_pattern) - if resulting_string: - resulting_string = re.sub(self.acorn.allLineBreaks, '\n', resulting_string) - return self._create_token(TOKEN.STRING, resulting_string) - - elif self._input.match(re.compile(r'<\!--')): - c = '')): + if c == '#': + pass + elif self.input.peek() == '[' and self.input.peek(1) == ']': + sharp += '[]' + self.input.next() + self.input.next() + elif self.input.peek() == '{' and self.input.peek(1) == '}': + sharp += '{}' + self.input.next() + self.input.next() + return sharp, 'TK_WORD' + + if c == '<' and self.input.peek() in ['?', '%']: + self.input.back() + template_match = self.input.match(self.template_pattern) + if template_match: + c = template_match.group(0) + c = re.sub(self.acorn.allLineBreaks, '\n', c) + return c, 'TK_STRING' + + + if c == '<' and self.input.match(re.compile('\!--')): + c = '') + return '-->', 'TK_COMMENT' - return None + if c == '.': + if self.input.peek() == '.' and self.input.peek(1) == '.': + c += self.input.next() + self.input.next() + return c, 'TK_OPERATOR' - def _read_punctuation(self): - token = None - resulting_string = self._input.read(punct_pattern) - if resulting_string != '': - if resulting_string == '=': - token = self._create_token(TOKEN.EQUALS, resulting_string) - else: - token = self._create_token(TOKEN.OPERATOR, resulting_string) - - return token - - __regexTokens = { TOKEN.COMMENT, TOKEN.START_EXPR, TOKEN.START_BLOCK, - TOKEN.START, TOKEN.END_BLOCK, TOKEN.OPERATOR, - TOKEN.EQUALS, TOKEN.EOF, TOKEN.SEMICOLON, TOKEN.COMMA } - def allowRegExOrXML(self, previous_token): - return (previous_token.type == TOKEN.RESERVED and previous_token.text in {'return', 'case', 'throw', 'else', 'do', 'typeof', 'yield'}) or \ - (previous_token.type == TOKEN.END_EXPR and previous_token.text == ')' and - previous_token.opened.previous.type == TOKEN.RESERVED and previous_token.opened.previous.text in {'if', 'while', 'for'}) or \ - (previous_token.type in self.__regexTokens ) - - def parse_string( - self, - resulting_string, - delimiter, - allow_unescaped_newlines=False, - start_sub=None): - esc = False - while self._input.hasNext(): - current_char = self._input.peek() - if not (esc or (current_char != delimiter and ( - allow_unescaped_newlines or not bool( - self.acorn.newline.match(current_char))))): - break - - # Handle \r\n linebreaks after escapes or in template - # strings - if (esc or allow_unescaped_newlines) and bool( - self.acorn.newline.match(current_char)): - if current_char == '\r' and self._input.peek(1) == '\n': - self._input.next() - current_char = self._input.peek() - - resulting_string += '\n' - else: - resulting_string += current_char + return c, 'TK_DOT' - if esc: - if current_char == 'x' or current_char == 'u': - self.has_char_escapes = True - - esc = False - else: - esc = current_char == '\\' - - self._input.next() - - if start_sub and resulting_string.endswith(start_sub): - if delimiter == '`': - resulting_string = self.parse_string( - resulting_string, '}', allow_unescaped_newlines, '`') - else: - resulting_string = self.parse_string( - resulting_string, '`', allow_unescaped_newlines, '${') + if c in self.punct: + while self.input.hasNext() and c + self.input.peek() in self.punct: + c += self.input.next() + if not self.input.hasNext(): + break - if self._input.hasNext(): - resulting_string += self._input.next() + if c == ',': + return c, 'TK_COMMA' + if c == '=': + return c, 'TK_EQUALS' - return resulting_string + return c, 'TK_OPERATOR' + return c, 'TK_UNKNOWN' def unescape_string(self, s): # You think that a regex would work for this @@ -504,9 +487,9 @@ def unescape_string(self, s): input_scan.next() if input_scan.peek() == 'x': - matched = input_scan.match(re.compile(r'x([0-9A-Fa-f]{2})')) + matched = input_scan.match(re.compile('x([0-9A-Fa-f]{2})')) elif input_scan.peek() == 'u': - matched = input_scan.match(re.compile(r'u([0-9A-Fa-f]{4})')) + matched = input_scan.match(re.compile('u([0-9A-Fa-f]{4})')); else: out += '\\' if input_scan.hasNext(): @@ -519,8 +502,7 @@ def unescape_string(self, s): escaped = int(matched.group(1), 16) - if escaped > 0x7e and escaped <= 0xff and matched.group( - 0).startswith('x'): + if escaped > 0x7e and escaped <= 0xff and matched.group(0).startswith('x'): # we bail out on \x7f..\xff, # leaving whole string escaped, # as it's probably completely binary diff --git a/codeformatter/lib/jsbeautifier/unpackers/__init__.py b/codeformatter/lib/jsbeautifier/unpackers/__init__.py index 15c1f0c..6d13653 100644 --- a/codeformatter/lib/jsbeautifier/unpackers/__init__.py +++ b/codeformatter/lib/jsbeautifier/unpackers/__init__.py @@ -12,13 +12,11 @@ # NOTE: AT THE MOMENT, IT IS DEACTIVATED FOR YOUR SECURITY: it runs js! BLACKLIST = ['jsbeautifier.unpackers.evalbased'] - class UnpackingError(Exception): """Badly packed source or general error. Argument is a meaningful description.""" pass - def getunpackers(): """Scans the unpackers dir, finds unpackers and add them to UNPACKERS list. An unpacker will be loaded only if it is a valid python module (name must @@ -37,12 +35,10 @@ def getunpackers(): else: unpackers.append(module) - return sorted(unpackers, key=lambda mod: mod.PRIORITY) - + return sorted(unpackers, key = lambda mod: mod.PRIORITY) UNPACKERS = getunpackers() - def run(source, evalcode=False): """Runs the applicable unpackers and return unpacked source as a string.""" for unpacker in [mod for mod in UNPACKERS if mod.detect(source)]: @@ -51,7 +47,6 @@ def run(source, evalcode=False): source = evalbased.unpack(source) return source - def filtercomments(source): """NOT USED: strips trailing comments and put them at the top.""" trailing_comments = [] diff --git a/codeformatter/lib/jsbeautifier/unpackers/evalbased.py b/codeformatter/lib/jsbeautifier/unpackers/evalbased.py index 62995ca..b17d926 100644 --- a/codeformatter/lib/jsbeautifier/unpackers/evalbased.py +++ b/codeformatter/lib/jsbeautifier/unpackers/evalbased.py @@ -1,6 +1,6 @@ # # Unpacker for eval() based packers, a part of javascript beautifier -# by Einar Lielmanis +# by Einar Lielmanis # # written by Stefano Sanfilippo # @@ -18,19 +18,15 @@ PRIORITY = 3 - def detect(source): """Detects if source is likely to be eval() packed.""" return source.strip().lower().startswith('eval(function(') - def unpack(source): """Runs source and return resulting code.""" return jseval('print %s;' % source[4:]) if detect(source) else source # In case of failure, we'll just return the original, without crashing on user. - - def jseval(script): """Run code in the JS interpreter and return output.""" try: diff --git a/codeformatter/lib/jsbeautifier/unpackers/javascriptobfuscator.py b/codeformatter/lib/jsbeautifier/unpackers/javascriptobfuscator.py index 0a9edb1..aa4344a 100644 --- a/codeformatter/lib/jsbeautifier/unpackers/javascriptobfuscator.py +++ b/codeformatter/lib/jsbeautifier/unpackers/javascriptobfuscator.py @@ -2,7 +2,7 @@ # simple unpacker/deobfuscator for scripts messed up with # javascriptobfuscator.com # -# written by Einar Lielmanis +# written by Einar Lielmanis # rewritten in Python by Stefano Sanfilippo # # Will always return valid javascript: if `detect()` is false, `code` is @@ -20,14 +20,13 @@ PRIORITY = 1 - def smartsplit(code): """Split `code` at " symbol, only if it is not escaped.""" strings = [] pos = 0 while pos < len(code): if code[pos] == '"': - word = '' # new word + word = '' # new word pos += 1 while pos < len(code): if code[pos] == '"': @@ -41,13 +40,11 @@ def smartsplit(code): pos += 1 return strings - def detect(code): """Detects if `code` is JavascriptObfuscator.com packed.""" # prefer `is not` idiom, so that a true boolean is returned return (re.search(r'^var _0x[a-f0-9]+ ?\= ?\[', code) is not None) - def unpack(code): """Unpacks JavascriptObfuscator.com packed code.""" if detect(code): diff --git a/codeformatter/lib/jsbeautifier/unpackers/myobfuscate.py b/codeformatter/lib/jsbeautifier/unpackers/myobfuscate.py index d8bc8fd..52e1003 100644 --- a/codeformatter/lib/jsbeautifier/unpackers/myobfuscate.py +++ b/codeformatter/lib/jsbeautifier/unpackers/myobfuscate.py @@ -1,6 +1,6 @@ # # deobfuscator for scripts messed up with myobfuscate.com -# by Einar Lielmanis +# by Einar Lielmanis # # written by Stefano Sanfilippo # @@ -51,21 +51,18 @@ """ -SIGNATURE = ( - r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F' - r'\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65' - r'\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75' - r'\x76\x77\x78\x79\x7A\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x2B' - r'\x2F\x3D","","\x63\x68\x61\x72\x41\x74","\x69\x6E\x64\x65\x78' - r'\x4F\x66","\x66\x72\x6F\x6D\x43\x68\x61\x72\x43\x6F\x64\x65","' - r'\x6C\x65\x6E\x67\x74\x68"]') - +SIGNATURE = (r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F' + r'\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65' + r'\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75' + r'\x76\x77\x78\x79\x7A\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x2B' + r'\x2F\x3D","","\x63\x68\x61\x72\x41\x74","\x69\x6E\x64\x65\x78' + r'\x4F\x66","\x66\x72\x6F\x6D\x43\x68\x61\x72\x43\x6F\x64\x65","' + r'\x6C\x65\x6E\x67\x74\x68"]') def detect(source): """Detects MyObfuscate.com packer.""" return SIGNATURE in source - def unpack(source): """Unpacks js code packed with MyObfuscate.com""" if not detect(source): @@ -76,7 +73,6 @@ def unpack(source): polished = match.group(1) if match else source return CAVEAT + polished - def _filter(source): """Extracts and decode payload (original file) from `source`""" try: diff --git a/codeformatter/lib/jsbeautifier/unpackers/packer.py b/codeformatter/lib/jsbeautifier/unpackers/packer.py index c8e4ede..b4d860b 100644 --- a/codeformatter/lib/jsbeautifier/unpackers/packer.py +++ b/codeformatter/lib/jsbeautifier/unpackers/packer.py @@ -1,6 +1,6 @@ # # Unpacker for Dean Edward's p.a.c.k.e.r, a part of javascript beautifier -# by Einar Lielmanis +# by Einar Lielmanis # # written by Stefano Sanfilippo # @@ -18,27 +18,9 @@ PRIORITY = 1 - def detect(source): - global beginstr - global endstr - beginstr = '' - endstr = '' """Detects whether `source` is P.A.C.K.E.R. coded.""" - mystr = source.replace(' ', '').find('eval(function(p,a,c,k,e,') - if(mystr > 0): - beginstr = source[:mystr] - if(mystr != -1): - """ Find endstr""" - if(source.split("')))", 1)[0] == source): - try: - endstr = source.split("}))", 1)[1] - except IndexError: - endstr = '' - else: - endstr = source.split("')))", 1)[1] - return (mystr != -1) - + return source.replace(' ', '').startswith('eval(function(p,a,c,k,e,') def unpack(source): """Unpacks P.A.C.K.E.R. packed js code.""" @@ -54,40 +36,32 @@ def unpack(source): def lookup(match): """Look up symbols in the synthetic symtab.""" - word = match.group(0) + word = match.group(0) return symtab[unbase(word)] or word source = re.sub(r'\b\w+\b', lookup, payload) return _replacestrings(source) - def _filterargs(source): """Juice from a source file the four args needed by decoder.""" - juicers = [ - (r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\), *(\d+), *(.*)\)\)"), - (r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\)"), - ] + juicers = [ (r"}\('(.*)', *(\d+), *(\d+), *'(.*)'\.split\('\|'\), *(\d+), *(.*)\)\)"), + (r"}\('(.*)', *(\d+), *(\d+), *'(.*)'\.split\('\|'\)"), + ] for juicer in juicers: args = re.search(juicer, source, re.DOTALL) if args: a = args.groups() - if a[1] == "[]": - a = list(a) - a[1] = 62 - a = tuple(a) try: return a[0], a[3].split('|'), int(a[1]), int(a[2]) except ValueError: raise UnpackingError('Corrupted p.a.c.k.e.r. data.') # could not find a satisfying regex - raise UnpackingError( - 'Could not make sense of p.a.c.k.e.r data (unexpected code structure)') + raise UnpackingError('Could not make sense of p.a.c.k.e.r data (unexpected code structure)') + def _replacestrings(source): - global beginstr - global endstr """Strip string lookup table (list) and replace values in source.""" match = re.search(r'var *(_\w+)\=\["(.*?)"\];', source, re.DOTALL) @@ -99,36 +73,31 @@ def _replacestrings(source): for index, value in enumerate(lookup): source = source.replace(variable % index, '"%s"' % value) return source[startpoint:] - return beginstr + source + endstr + return source class Unbaser(object): """Functor for a given base. Will efficiently convert strings to natural numbers.""" - ALPHABET = { - 62: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', - 95: (' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ' - '[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~') + ALPHABET = { + 53 : '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ', + 59 : '0123456789abcdefghijklmnopqrstuvwABCDEFGHIJKLMNOPQRSTUVWXYZ', + 62 : '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', + 95 : (' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '[\]^_`abcdefghijklmnopqrstuvwxyz{|}~') } def __init__(self, base): self.base = base - # fill elements 37...61, if necessary - if 36 < base < 62: - if not hasattr(self.ALPHABET, self.ALPHABET[62][:base]): - self.ALPHABET[base] = self.ALPHABET[62][:base] - # attrs = self.ALPHABET - # print ', '.join("%s: %s" % item for item in attrs.items()) # If base can be handled by int() builtin, let it do it for us if 2 <= base <= 36: self.unbase = lambda string: int(string, base) else: # Build conversion dictionary cache try: - self.dictionary = dict( - (cipher, index) for index, cipher in enumerate( - self.ALPHABET[base])) + self.dictionary = dict((cipher, index) for + index, cipher in enumerate(self.ALPHABET[base])) except KeyError: raise TypeError('Unsupported base encoding.') diff --git a/codeformatter/lib/jsbeautifier/unpackers/tests/__init__.py b/codeformatter/lib/jsbeautifier/unpackers/tests/__init__.py new file mode 100644 index 0000000..dfe67ba --- /dev/null +++ b/codeformatter/lib/jsbeautifier/unpackers/tests/__init__.py @@ -0,0 +1,2 @@ +# Empty file :) +# pylint: disable=C0111 diff --git a/codeformatter/lib/jsbeautifier/unpackers/tests/test-myobfuscate-input.js b/codeformatter/lib/jsbeautifier/unpackers/tests/test-myobfuscate-input.js new file mode 100644 index 0000000..da16917 --- /dev/null +++ b/codeformatter/lib/jsbeautifier/unpackers/tests/test-myobfuscate-input.js @@ -0,0 +1 @@ +var OO0=["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x2B\x2F\x3D","","\x63\x68\x61\x72\x41\x74","\x69\x6E\x64\x65\x78\x4F\x66","\x66\x72\x6F\x6D\x43\x68\x61\x72\x43\x6F\x64\x65","\x6C\x65\x6E\x67\x74\x68"];function _1OO(_0IO){var _011=OO0[0];var lOO,O10,_0ll,OlO,_01O,IOO,I01,_001,_11O=0,Ol0=OO0[1];do{OlO=_011[OO0[3]](_0IO[OO0[2]](_11O++));_01O=_011[OO0[3]](_0IO[OO0[2]](_11O++));IOO=_011[OO0[3]](_0IO[OO0[2]](_11O++));I01=_011[OO0[3]](_0IO[OO0[2]](_11O++));_001=OlO<<18|_01O<<12|IOO<<6|I01;lOO=_001>>16&0xff;O10=_001>>8&0xff;_0ll=_001&0xff;if(IOO==64){Ol0+=String[OO0[4]](lOO);}else{if(I01==64){Ol0+=String[OO0[4]](lOO,O10);}else{Ol0+=String[OO0[4]](lOO,O10,_0ll);};};} while(_11O<_0IO[OO0[5]]);return Ol0;};function O0I(O11){var OO1=OO0[1],_11O=0;for(_11O=O11[OO0[5]]-1;_11O>=0;_11O--){OO1+=O11[OO0[2]](_11O);};return OO1;};var _0OO='=sTKpUGchN2cl9FKlBXYjNXZuVHKlRXaydnL05WZtV3YvR2Op80TPhCZslGaDRmblBHch5SMwwmC70FMblyJkFWZodCKl1WYOdWYUlnQzRnbl1WZsVEdldmL05WZtV3YvRGI9ASMwwGIyFmdKsTKMJVVuQnbl1Wdj9GZoQnbl52bw12bDlkUVVGZvNmbltyJ9wmc1ZyJrkiclJnclZWZy5CduVWb1N2bkhCduVmbvBXbvNUSSVVZk92YuV2Kn0jZlJnJnsyJr9WPjJ3c0V2Z/8SbvNmLlRXYjNXdmJ2b51mLpBXYv8iOwRHdodCI9AyYyNnLP90TKsTKnQHcpJ3YzdCK05WZtVGbFVGdhVmcj5CduVWb1N2bkBSPg80TPBichZ3OnU0MlQHcpJ3Yz9yQzUSQwUCR3USQwUCR3UCMyUCMyUCMyUCMyUSQwUiQzUCdwITJuJXd0VmcwITJwITJwITJwITJwITJwITJwITJwITJBBTJCNTJ5ITJlNHbhZGMyUyQyUyNyUyNyUCOyUCdjVGc4VmL0BjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlEEMlI0MlkjMlUWdyRHMyUyQyUyNyUSY3ITJ4ITJ0NWZwhXZuQHMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUSQwUiQzUSOyUSZzxWYmBjMlMkMlcjMlQ2YidjMlgjMlQ3YlBHel5CdwITJwITJwITJwITJwITJwITJwITJwITJBBTJCNTJ5ITJlVnc0BjMlMkMlcjMlMmYhdjMlgjMlQ3YlBHel5CdwITJwITJwITJwITJwITJwITJwITJwITJBBTJCNTJ5ITJyITJ5ITJhBjMlMkMlY0MlgjMlgGdpd3XzRnchR3cf5iUfV0XL91QfF0XQJjMlAjMlMkMlE2XoRXa391c0JXY0NHOyUibvlGdj5Wdm9FdzVGduQHMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUSQwUCR3UCMyUiQzUSOyUyNyUSY3ITJwITJDJTJ0FGa3hjMlgGdpd3XzRnchR3cf5iUfV0XL91QfF0XQBjMl4mc1RXZyBjMlI0NlAjMlkjMlQXYodHOyUibvlGdj5WdmBjMlQ0MlAjMlE2XoRXa391c0JXY0NHMyUichZHMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUSQwUSQwUiQzUSOyUyNyUSMENTJhBjMlIXY2djMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlEEMlMkMlIjMlkjMlkjMlQ0NlI0NlMkMlAzQyUSOyUyNyUyQ3UyNyUCOyUCdpxGcz5yNyUSYDdTJDdTJyFmd3ITJDJTJzMkMlMzQyUyNyUSMENTJyAjMlAzNyUCOyUCR3UCcwITJuJXd0VmcCNTJ5ITJEVTJjJUNls2QyUSOyUyNyUyZ3ITJDJTJ3ITJiNUNlMUNlMUNlMUNlcjMlsSOyUyY4ITJltyNyUiYDVTJDVTJDVTJDVTJ3ITJ4ITJwhXRnVmUwITJ3Vmb4ITJlNWYsBXZy5CcENTJwljMlQUNlMmQ1Uya4ITJmlWOyUSLtMGOyUSZslGa3J0MlQ0NlEDRzUyYCNTJEdTJ3ITJrc3Q1UyQ1UyQ1UyQ1UyNyUibyVHdlJnQ3USOyUCOyUibvlGdj5WdmR0MlUmQzUCR1UCR3UCR1USZCVTJyBjMl4mc1RXZyJ0NlkjMlUGOyUibvlGdj5WdmJUNlQ0MlsmQzUyYDdTJDdTJEVTJjJUNlsGRzUCR1UyYCVTJyljMl0SLjhjMlUGbph2dCdTJ5ITJ5ITJn5WayR3UDJTJvUUNl8COyUSZjFGbwVmcucjMlcjMlEjMlgjMlYWaCNTJn5WayR3UENTJlJ0NlkjMlI3QyUSZDJTJrNkMlM2QyUSYDJTJwhjMl42bpR3YuVnZ4ITJsFmdlJjMlgjMlQ3YlBHel5CdwITJwITJwITJwITJwITJwITJwITJwITJBBTJCNTJ5ITJ3ITJrNWYw5WduI1XF91SfN0XB9FU3ITJwITJDJTJrNWYw5WduI1XF91SfN0XB9FU4ITJu9Wa0Nmb1Z2X0NXZ05CdwITJwITJwITJwITJwITJwITJwITJwITJBBTJBBTJCNTJ5ITJlVnc0BjMlMkMlcjMlIHMyUyQyUSZwITJDJTJrBjMlMkMlMGMyUyQyUSYwITJDJTJwhjMl42bpR3YuVnZwITJ4ITJwITJsFmdldjMlgjMlQ3YlBHel5CdwITJwITJwITJwITJwITJwITJwITJwITJBBTJCNTJ5ITJlVnc0BjMlMkMlcjMlI3QyUSZDJTJrNkMlM2QyUSYDJTJwhjMl42bpR3YuVnZ4ITJsFmdldjMlgjMlQ3YlBHel5CdwITJwITJwITJwITJwITJwITJwITJwITJBBTJCNTJ5ITJlNHbhZGMyUyQyUyNyUiYwITJENTJwITJhBjMlIXY2djMlgjMlQ3YlBHel5CdwITJwITJwITJwITJwITJwITJwITJwITJBBTJCNTJ5ITJlNHbhZGMyUyQyUyNyUyNyUCOyUCdjVGc4VmL0BjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlEEMlI0MlkjMlIjMlQ3YlRXZk5iUfV0XL91QfF0XQJjMlAjMlMkMlQ3YlRXZk5iUfV0XL91QfF0XQhjMl42bpR3YuVnZfR3clRnL0BjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlEEMlI0MlkjMlgjMlQ3clRVe0lmbhNFMyUydl5GMyUyQ3UyQ3UCMyUCdzVGdflHdp5WYzBjMlQ0MlAjMlQHMyUichZHMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUSQwUiQ3UCMyUSOyUCdzVGdflHdp5WYzhjMlAjMl42bpR3YuVnZwITJBNTJzR3clR3XuVncwITJwITJwITJwITJBBTJBBTJDJTJEdTJwITJwITJwITJwITJBBTJCNTJ0FGa3BjMlQ0MlQ0MlQ0MlAjMlkjMlgGdn5WZs5Cdhh2dwITJDJTJwgjMlIHdzJWdz5ic0NHMyUibyVHdlJHMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUSQwUiQ3UCMyUSOyUCdhh2dwITJDJTJyR3c4ITJwITJu9Wa0Nmb1ZGMyUSQzUCa0l2dfNHdyFGdz9FMyUCMyUCMyUCMyUSQwUSQwUyQyUCR3UCMyUCMyUCMyUCMyUSQwUiQzUic0NHMyUibyVHdlJHMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUSQwUCR3UCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUSQwUCR3UCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUSQwUiLyV2c1BjMl42bwITJn5WaoNXYyNGMyUiZvBjMlQWYlR3culGMyUyQyUCbh5WanlmcvBjMlUGa0BjMl4mc1RXZyBjMlQ3c1pGMyUCbsdjMlU2dwITJuQWZslWYmBjMlQXawITJDJTJsxWZ3BjMl8yLwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJBBTJCdTJwITJ5ITJy9mcyVGOyUCMyUCajRXYjBjMlQ0NlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlEEMlQ0NlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlEEMlI0MlU2YyV3bz9FZlt2YhBnb1BjMlQ0MlAjMlIHdzBjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlEEMlI0NlAjMlkjMlU2YyV3bz9FZlt2YhBnb1BjMlYjMlYjMlAjMlcjMlcmbpJHdzdjMlAjMlQ0MlQ0MlAjMlU2YyV3bz9FZlt2YhBnb1BjMlY2blBXe0hjMlAjMlYWawITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJBBTJ5ITJ3ITJCNTJ3ITJwITJrAjMlkjMlQDOyUyZulmc0NnY1NnLyR3cwITJrAjMlcjMlAjMlQ0MlAjMlU2YyV3bz9FZlt2YhBnb1djMlgjMlwWY2VGMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUSQwUiQ3UCMyUSeyRHMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUCMyUSQwUiQ3UCMyUSOyUSOyUic0NHOyUCdjVGdlRmLS9VRft0XD9VQfBFOyUCMyUiZpBjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlEEMlI0MlcjMlcjMlAjMlQ0MlAjMlU2YyV3bz9FZlt2YhBnb1BjMlIXY2BjMlAjMlAjMlAjMlAjMlAjMlAjMlAjMlEEMlI0NlAjMlkjMlIHdzhjMlAjMl42bpR3YuVnZwITJBNTJrNWYw5WdwITJwITJwITJwITJBBTJBBTJDJTJEdTJwITJwITJwITJwITJBBTJCNTJwITJ5ITJ3ITJ4ITJu9Wa0Nmb1ZGOyUCOyUCbhZXZ3ITJwITJDJTJ5ITJ3ITJ3ITJwITJDJTJn9yKwITJvgjMlU2YhxGclJnL5ITJ4ITJlNXYDJXZ39GTvRnLyR3c4ITJoRXa391c0JXY0N3XuI1XF91SfN0XB9FUwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJwITJBBTJDdTJDdTJwITJ5ITJ3ITJ4ITJu9Wa0Nmb1ZGOyUCbhZXZ3ITJwITJDJTJ5ITJ3ITJ3ITJwITJDJTJn9yKwITJvgjMlU2YhxGclJnL5ITJ4ITJlNXYDJXZ39GTvRnLyR3c4ITJoRXa391c0JXY0N3XuI1XF91SfN0XB9FUwITJuJXd0VmcwITJwITJwITJwITJwITJwITJwITJwITJBBTJCdTJwITJ5ITJyR3c4ITJwITJu9Wa0Nmb1ZGMyUSQzUCdjVGdlRGMyUCMyUCMyUCMyUSQwUiQ3UCMyUCRzUCMyUiUfV0XL91QfF0XQBjMlIXY2FEMlEEMl8yLBBTJv8SQwUCR3UCMyUyLvEEMlI0MlkjMlcmbpJHdz9VZt92c4ITJrNWYw5WduI1XF91SfN0XB9FUwITJENTJwITJkV2ajFGcuVHMyUichZHMyUCMyUCMyUCMyUCMyUyLvEEMlI0NlAjMlkjMlkjMlcmbpJHdz9VZt92c4ITJ0NWZ0VGZuI1XF91SfN0XB9FU4ITJwITJmlGMyUyLvEEMl8yLBBTJBNTJldWYzVHMyUyLvEEMl8yLBBTJuMncvN3clJHct92YwITJkV2chJWLsFmdlBjMlIXZoR3bwITJm9GMyUSZsBXdvNGMyUSYwITJ0FWZmVGZwITJuF2YwITJ0lGMyUyQyUSesxWY05WZkl2Yul2bDBjMl8yLBBTJv8SQwUSRzUyZy9mLyVWamlGd1FWZiNnaAJXYulWZDNTJwITJzlmbh1GbllGTwITJyFmbpVEMyUSeiBjMl4WZ0RXaydHMyUyLvEEMlIXZpZWa0VXYlJGMyUCdwlmcjNXY2FmawITJm9GMyUCdyFGcwITJhBjMlMkMlInLl5yauMmLh5CcwITJzdjMlQmchdHZFBjMl4WYlREMyUicvZGMyUiclt2YhBnbVBjMl8yLBBTJv8SRzUCdwlmcjN3QzUyJ9UGchN2cl9FIyFmd';eval(_1OO(O0I(_0OO))); \ No newline at end of file diff --git a/codeformatter/lib/jsbeautifier/unpackers/tests/test-myobfuscate-output.js b/codeformatter/lib/jsbeautifier/unpackers/tests/test-myobfuscate-output.js new file mode 100644 index 0000000..5ec9457 --- /dev/null +++ b/codeformatter/lib/jsbeautifier/unpackers/tests/test-myobfuscate-output.js @@ -0,0 +1,65 @@ +// +// Unpacker warning: be careful when using myobfuscate.com for your projects: +// scripts obfuscated by the free online version call back home. +// + +// +// Unpacker for Dean Edward's p.a.c.k.e.r, a part of javascript beautifier +// written by Einar Lielmanis +// +// Coincidentally, it can defeat a couple of other eval-based compressors. +// +// usage: +// +// if (P_A_C_K_E_R.detect(some_string)) { +// var unpacked = P_A_C_K_E_R.unpack(some_string); +// } +// +// + +var P_A_C_K_E_R = { + detect: function (str) { + return P_A_C_K_E_R._starts_with(str.toLowerCase().replace(/ +/g, ''), 'eval(function(') || + P_A_C_K_E_R._starts_with(str.toLowerCase().replace(/ +/g, ''), 'eval((function(') ; + }, + + unpack: function (str) { + var unpacked_source = ''; + if (P_A_C_K_E_R.detect(str)) { + try { + eval('unpacked_source = ' + str.substring(4) + ';') + if (typeof unpacked_source == 'string' && unpacked_source) { + str = unpacked_source; + } + } catch (error) { + // well, it failed. we'll just return the original, instead of crashing on user. + } + } + return str; + }, + + _starts_with: function (str, what) { + return str.substr(0, what.length) === what; + }, + + run_tests: function (sanity_test) { + var t = sanity_test || new SanityTest(); + t.test_function(P_A_C_K_E_R.detect, "P_A_C_K_E_R.detect"); + t.expect('', false); + t.expect('var a = b', false); + t.expect('eval(function(p,a,c,k,e,r', true); + t.expect('eval ( function(p, a, c, k, e, r', true); + + t.test_function(P_A_C_K_E_R.unpack, 'P_A_C_K_E_R.unpack'); + t.expect("eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e=function(){return'\\\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\\\b'+e(c)+'\\\\b','g'),k[c]);return p}('0 2=1',3,3,'var||a'.split('|'),0,{}))", + 'var a=1'); + + var starts_with_a = function(what) { return P_A_C_K_E_R._starts_with(what, 'a'); } + t.test_function(starts_with_a, "P_A_C_K_E_R._starts_with(?, a)"); + t.expect('abc', true); + t.expect('bcd', false); + t.expect('a', true); + t.expect('', false); + return t; + } +} diff --git a/codeformatter/lib/jsbeautifier/unpackers/tests/test-packer-62-input.js b/codeformatter/lib/jsbeautifier/unpackers/tests/test-packer-62-input.js new file mode 100644 index 0000000..20a43f9 --- /dev/null +++ b/codeformatter/lib/jsbeautifier/unpackers/tests/test-packer-62-input.js @@ -0,0 +1 @@ +eval(function(p,a,c,k,e,r){e=function(c){return(c35?String.fromCharCode(c+29):c.toString(36))};if(!''.replace(/^/,String)){while(c--)r[e(c)]=k[c]||e(c);k=[function(e){return r[e]}];e=function(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}('9 H=(j["Y"]["n"]("1h")!=-1)?s:v;9 I=(j["Y"]["w"]()["n"]("1i")!=-1)?s:v;9 J=(j["K"]["n"]("1j")!=-1)?s:v;p Z(){9 a;9 b;9 c;y{b=o z("l.l.7");a=b.P("$Q")}A(c){};i(!a){y{b=o z("l.l.6");a="R 6,0,21,0";b["1k"]="1l";a=b.P("$Q")}A(c){}};i(!a){y{b=o z("l.l.3");a=b.P("$Q")}A(c){}};i(!a){y{b=o z("l.l.3");a="R 3,0,18,0"}A(c){}};i(!a){y{b=o z("l.l");a="R 2,0,0,11"}A(c){a=-1}};m a};p 10(){9 a=-1;i(j["t"]!=12&&j["t"]["13"]>0){i(j["t"]["L M 2.0"]||j["t"]["L M"]){9 b=j["t"]["L M 2.0"]?" 2.0":"";9 c=j["t"]["L M"+b]["1m"];9 d=c["B"](" ");9 e=d[2]["B"](".");9 f=e[0];9 g=e[1];9 h=d[3];i(h==""){h=d[4]};i(h[0]=="d"){h=h["S"](1)}k{i(h[0]=="r"){h=h["S"](1);i(h["n"]("d")>0){h=h["S"](0,h["n"]("d"))}}};9 a=f+"."+g+"."+h}}k{i(j["K"]["w"]()["n"]("T/2.6")!=-1){a=4}k{i(j["K"]["w"]()["n"]("T/2.5")!=-1){a=3}k{i(j["K"]["w"]()["n"]("T")!=-1){a=2}k{i(H&&I&&!J){a=Z()}}}}};m a};p 1n(a,b,c){C=10();i(C==-1){m v}k{i(C!=0){i(H&&I&&!J){14=C["B"](" ");15=14[1];D=15["B"](",")}k{D=C["B"](".")};9 d=D[0];9 e=D[1];9 f=D[2];i(d>E(a)){m s}k{i(d==E(a)){i(e>E(b)){m s}k{i(e==E(b)){i(f>=E(c)){m s}}}}};m v}}};p 16(a,b){i(a["n"]("?")!=-1){m a["1o"](/\\?/,b+"?")}k{m a+b}};p U(a,b,c){9 d="";i(H&&I&&!J){d+="<17 ";N(9 e V a){d+=e+"=""+a[e]+"" "};d+=">";N(9 e V b){d+="<1p 19=""+e+"" 1q=""+b[e]+"" /> "};d+=""}k{d+="<1a ";N(9 e V c){d+=e+"=""+c[e]+"" "};d+="> "};1r["1s"](d)};p 1t(){9 a=W(1b,".1u","1c","1d:1v-1w-1x-1y-1e","1z/x-1A-1B");U(a["u"],a["F"],a["q"])};p 1C(){9 a=W(1b,".1D","X","1d:1E-1F-1G-1H-1e",12);U(a["u"],a["F"],a["q"])};p W(a,b,c,d,e){9 f=o O();f["q"]=o O();f["F"]=o O();f["u"]=o O();N(9 g=0;g0){if(navigator["plugins"]["Shockwave Flash 2.0"]||navigator["plugins"]["Shockwave Flash"]){var b=navigator["plugins"]["Shockwave Flash 2.0"]?" 2.0":"";var c=navigator["plugins"]["Shockwave Flash"+b]["description"];var d=c["split"](" ");var e=d[2]["split"](".");var f=e[0];var g=e[1];var h=d[3];if(h==""){h=d[4]};if(h[0]=="d"){h=h["substring"](1)}else{if(h[0]=="r"){h=h["substring"](1);if(h["indexOf"]("d")>0){h=h["substring"](0,h["indexOf"]("d"))}}};var a=f+"."+g+"."+h}}else{if(navigator["userAgent"]["toLowerCase"]()["indexOf"]("webtv/2.6")!=-1){a=4}else{if(navigator["userAgent"]["toLowerCase"]()["indexOf"]("webtv/2.5")!=-1){a=3}else{if(navigator["userAgent"]["toLowerCase"]()["indexOf"]("webtv")!=-1){a=2}else{if(isIE&&isWin&&!isOpera){a=ControlVersion()}}}}};return a};function DetectFlashVer(a,b,c){versionStr=GetSwfVer();if(versionStr==-1){return false}else{if(versionStr!=0){if(isIE&&isWin&&!isOpera){tempArray=versionStr["split"](" ");tempString=tempArray[1];versionArray=tempString["split"](",")}else{versionArray=versionStr["split"](".")};var d=versionArray[0];var e=versionArray[1];var f=versionArray[2];if(d>parseFloat(a)){return true}else{if(d==parseFloat(a)){if(e>parseFloat(b)){return true}else{if(e==parseFloat(b)){if(f>=parseFloat(c)){return true}}}}};return false}}};function AC_AddExtension(a,b){if(a["indexOf"]("?")!=-1){return a["replace"](/\?/,b+"?")}else{return a+b}};function AC_Generateobj(a,b,c){var d="";if(isIE&&isWin&&!isOpera){d+=" "};d+=""}else{d+=" +# + +"""Tests for JavaScriptObfuscator unpacker.""" + +import unittest +from jsbeautifier.unpackers.javascriptobfuscator import ( + unpack, detect, smartsplit) + +# pylint: disable=R0904 +class TestJavascriptObfuscator(unittest.TestCase): + """JavascriptObfuscator.com test case.""" + def test_smartsplit(self): + """Test smartsplit() function.""" + split = smartsplit + equals = lambda data, result: self.assertEqual(split(data), result) + + equals('', []) + equals('"a", "b"', ['"a"', '"b"']) + equals('"aaa","bbbb"', ['"aaa"', '"bbbb"']) + equals('"a", "b\\\""', ['"a"', '"b\\\""']) + + def test_detect(self): + """Test detect() function.""" + positive = lambda source: self.assertTrue(detect(source)) + negative = lambda source: self.assertFalse(detect(source)) + + negative('') + negative('abcd') + negative('var _0xaaaa') + positive('var _0xaaaa = ["a", "b"]') + positive('var _0xaaaa=["a", "b"]') + positive('var _0x1234=["a","b"]') + + def test_unpack(self): + """Test unpack() function.""" + decodeto = lambda ob, original: self.assertEqual(unpack(ob), original) + + decodeto('var _0x8df3=[];var a=10;', 'var a=10;') + decodeto('var _0xb2a7=["\x74\x27\x65\x73\x74"];var i;for(i=0;i<10;++i)' + '{alert(_0xb2a7[0]);} ;', 'var i;for(i=0;i<10;++i){alert' + '("t\'est");} ;') + +if __name__ == '__main__': + unittest.main() diff --git a/codeformatter/lib/jsbeautifier/unpackers/tests/testmyobfuscate.py b/codeformatter/lib/jsbeautifier/unpackers/tests/testmyobfuscate.py new file mode 100644 index 0000000..51ab077 --- /dev/null +++ b/codeformatter/lib/jsbeautifier/unpackers/tests/testmyobfuscate.py @@ -0,0 +1,40 @@ +# +# written by Stefano Sanfilippo +# + +"""Tests for MyObfuscate unpacker.""" + +import unittest +import os +from jsbeautifier.unpackers.myobfuscate import detect, unpack +from jsbeautifier.unpackers.tests import __path__ as path + +INPUT = os.path.join(path[0], 'test-myobfuscate-input.js') +OUTPUT = os.path.join(path[0], 'test-myobfuscate-output.js') + +# pylint: disable=R0904 +class TestMyObfuscate(unittest.TestCase): + # pylint: disable=C0103 + """MyObfuscate obfuscator testcase.""" + @classmethod + def setUpClass(cls): + """Load source files (encoded and decoded version) for tests.""" + with open(INPUT, 'r') as data: + cls.input = data.read() + with open(OUTPUT, 'r') as data: + cls.output = data.read() + + def test_detect(self): + """Test detect() function.""" + detected = lambda source: self.assertTrue(detect(source)) + + detected(self.input) + + def test_unpack(self): + """Test unpack() function.""" + check = lambda inp, out: self.assertEqual(unpack(inp), out) + + check(self.input, self.output) + +if __name__ == '__main__': + unittest.main() diff --git a/codeformatter/lib/jsbeautifier/unpackers/tests/testpacker.py b/codeformatter/lib/jsbeautifier/unpackers/tests/testpacker.py new file mode 100644 index 0000000..507f33b --- /dev/null +++ b/codeformatter/lib/jsbeautifier/unpackers/tests/testpacker.py @@ -0,0 +1,34 @@ +# +# written by Stefano Sanfilippo +# + +"""Tests for P.A.C.K.E.R. unpacker.""" + +import unittest +from jsbeautifier.unpackers.packer import detect, unpack + +# pylint: disable=R0904 +class TestPacker(unittest.TestCase): + """P.A.C.K.E.R. testcase.""" + def test_detect(self): + """Test detect() function.""" + positive = lambda source: self.assertTrue(detect(source)) + negative = lambda source: self.assertFalse(detect(source)) + + negative('') + negative('var a = b') + positive('eval(function(p,a,c,k,e,r') + positive('eval ( function(p, a, c, k, e, r') + + def test_unpack(self): + """Test unpack() function.""" + check = lambda inp, out: self.assertEqual(unpack(inp), out) + + check("eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)" + "){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e=" + "function(){return'\\\\w+'};c=1};while(c--)if(k[c])p=p.replace(" + "new RegExp('\\\\b'+e(c)+'\\\\b','g'),k[c]);return p}('0 2=1'," + "62,3,'var||a'.split('|'),0,{}))", 'var a=1') + +if __name__ == '__main__': + unittest.main() diff --git a/codeformatter/lib/jsbeautifier/unpackers/tests/testurlencode.py b/codeformatter/lib/jsbeautifier/unpackers/tests/testurlencode.py new file mode 100644 index 0000000..5287893 --- /dev/null +++ b/codeformatter/lib/jsbeautifier/unpackers/tests/testurlencode.py @@ -0,0 +1,36 @@ +# +# written by Stefano Sanfilippo +# + +"""Tests for urlencoded unpacker.""" + +import unittest + +from jsbeautifier.unpackers.urlencode import detect, unpack + +# pylint: disable=R0904 +class TestUrlencode(unittest.TestCase): + """urlencode test case.""" + def test_detect(self): + """Test detect() function.""" + encoded = lambda source: self.assertTrue(detect(source)) + unencoded = lambda source: self.assertFalse(detect(source)) + + unencoded('') + unencoded('var a = b') + encoded('var%20a+=+b') + encoded('var%20a=b') + encoded('var%20%21%22') + + def test_unpack(self): + """Test unpack function.""" + equals = lambda source, result: self.assertEqual(unpack(source), result) + + equals('', '') + equals('abcd', 'abcd') + equals('var a = b', 'var a = b') + equals('var%20a=b', 'var a=b') + equals('var%20a+=+b', 'var a = b') + +if __name__ == '__main__': + unittest.main() diff --git a/codeformatter/lib/jsbeautifier/unpackers/urlencode.py b/codeformatter/lib/jsbeautifier/unpackers/urlencode.py index 7b79d0d..72d2bd1 100644 --- a/codeformatter/lib/jsbeautifier/unpackers/urlencode.py +++ b/codeformatter/lib/jsbeautifier/unpackers/urlencode.py @@ -1,6 +1,6 @@ # # Trivial bookmarklet/escaped script detector for the javascript beautifier -# written by Einar Lielmanis +# written by Einar Lielmanis # rewritten in Python by Stefano Sanfilippo # # Will always return valid javascript: if `detect()` is false, `code` is @@ -23,14 +23,12 @@ PRIORITY = 0 - def detect(code): """Detects if a scriptlet is urlencoded.""" # the fact that script doesn't contain any space, but has %20 instead # should be sufficient check for now. return ' ' not in code and ('%20' in code or code.count('%') > 3) - def unpack(code): """URL decode `code` source string.""" return unquote_plus(code) if detect(code) else code