1
2
3 r"""
4 =====================
5 Javascript Minifier
6 =====================
7
8 rJSmin is a javascript minifier written in python.
9
10 The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\\.
11
12 :Copyright:
13
14 Copyright 2011 - 2015
15 Andr\xe9 Malo or his licensors, as applicable
16
17 :License:
18
19 Licensed under the Apache License, Version 2.0 (the "License");
20 you may not use this file except in compliance with the License.
21 You may obtain a copy of the License at
22
23 http://www.apache.org/licenses/LICENSE-2.0
24
25 Unless required by applicable law or agreed to in writing, software
26 distributed under the License is distributed on an "AS IS" BASIS,
27 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 See the License for the specific language governing permissions and
29 limitations under the License.
30
31 The module is a re-implementation aiming for speed, so it can be used at
32 runtime (rather than during a preprocessing step). Usually it produces the
33 same results as the original ``jsmin.c``. It differs in the following ways:
34
35 - there is no error detection: unterminated string, regex and comment
36 literals are treated as regular javascript code and minified as such.
37 - Control characters inside string and regex literals are left untouched; they
38 are not converted to spaces (nor to \\n)
39 - Newline characters are not allowed inside string and regex literals, except
40 for line continuations in string literals (ECMA-5).
41 - "return /regex/" is recognized correctly.
42 - Line terminators after regex literals are handled more sensibly
43 - "+ +" and "- -" sequences are not collapsed to '++' or '--'
44 - Newlines before ! operators are removed more sensibly
45 - Comments starting with an exclamation mark (``!``) can be kept optionally
46 - rJSmin does not handle streams, but only complete strings. (However, the
47 module provides a "streamy" interface).
48
49 Since most parts of the logic are handled by the regex engine it's way faster
50 than the original python port of ``jsmin.c`` by Baruch Even. The speed factor
51 varies between about 6 and 55 depending on input and python version (it gets
52 faster the more compressed the input already is). Compared to the
53 speed-refactored python port by Dave St.Germain the performance gain is less
54 dramatic but still between 3 and 50 (for huge inputs). See the docs/BENCHMARKS
55 file for details.
56
57 rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more.
58
59 Both python 2 and python 3 are supported.
60
61 .. _jsmin.c by Douglas Crockford:
62 http://www.crockford.com/javascript/jsmin.c
63 """
64 if __doc__:
65
66 __doc__ = __doc__.encode('ascii').decode('unicode_escape')
67 __author__ = r"Andr\xe9 Malo".encode('ascii').decode('unicode_escape')
68 __docformat__ = "restructuredtext en"
69 __license__ = "Apache License, Version 2.0"
70 __version__ = '1.0.12'
71 __all__ = ['jsmin']
72
73 import re as _re
74
75
77 """
78 Generate JS minifier based on `jsmin.c by Douglas Crockford`_
79
80 .. _jsmin.c by Douglas Crockford:
81 http://www.crockford.com/javascript/jsmin.c
82
83 :Parameters:
84 `python_only` : ``bool``
85 Use only the python variant. If true, the c extension is not even
86 tried to be loaded.
87
88 :Return: Minifier
89 :Rtype: ``callable``
90 """
91
92
93
94 if not python_only:
95 try:
96 import _rjsmin
97 except ImportError:
98 pass
99 else:
100 return _rjsmin.jsmin
101 try:
102 xrange
103 except NameError:
104 xrange = range
105
106 space_chars = r'[\000-\011\013\014\016-\040]'
107
108 line_comment = r'(?://[^\r\n]*)'
109 space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
110 space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)'
111 bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)'
112
113 string1 = \
114 r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
115 string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
116 strings = r'(?:%s|%s)' % (string1, string2)
117
118 charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
119 nospecial = r'[^/\\\[\r\n]'
120 regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
121 nospecial, charclass, nospecial
122 )
123 space = r'(?:%s|%s)' % (space_chars, space_comment)
124 newline = r'(?:%s?[\r\n])' % line_comment
125
126 def fix_charclass(result):
127 """ Fixup string of chars to fit into a regex char class """
128 pos = result.find('-')
129 if pos >= 0:
130 result = r'%s%s-' % (result[:pos], result[pos + 1:])
131
132 def sequentize(string):
133 """
134 Notate consecutive characters as sequence
135
136 (1-4 instead of 1234)
137 """
138 first, last, result = None, None, []
139 for char in map(ord, string):
140 if last is None:
141 first = last = char
142 elif last + 1 == char:
143 last = char
144 else:
145 result.append((first, last))
146 first = last = char
147 if last is not None:
148 result.append((first, last))
149 return ''.join(['%s%s%s' % (
150 chr(first),
151 last > first + 1 and '-' or '',
152 last != first and chr(last) or ''
153 ) for first, last in result])
154
155 return _re.sub(
156 r'([\000-\040\047])',
157 lambda m: '\\%03o' % ord(m.group(1)), (
158 sequentize(result)
159 .replace('\\', '\\\\')
160 .replace('[', '\\[')
161 .replace(']', '\\]')
162 )
163 )
164
165 def id_literal_(what):
166 """ Make id_literal like char class """
167 match = _re.compile(what).match
168 result = ''.join([
169 chr(c) for c in xrange(127) if not match(chr(c))
170 ])
171 return '[^%s]' % fix_charclass(result)
172
173 def not_id_literal_(keep):
174 """ Make negated id_literal like char class """
175 match = _re.compile(id_literal_(keep)).match
176 result = ''.join([
177 chr(c) for c in xrange(127) if not match(chr(c))
178 ])
179 return r'[%s]' % fix_charclass(result)
180
181 not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
182 preregex1 = r'[(,=:\[!&|?{};\r\n]'
183 preregex2 = r'%(not_id_literal)sreturn' % locals()
184
185 id_literal = id_literal_(r'[a-zA-Z0-9_$]')
186 id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]')
187 id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
188 post_regex_off = id_literal_(r'[^\000-\040}\])?:|,;.&=+-]')
189
190 dull = r'[^\047"/\000-\040]'
191
192 space_sub_simple = _re.compile((
193
194
195 r'(%(dull)s+)'
196 r'|(%(strings)s%(dull)s*)'
197 r'|(?<=%(preregex1)s)'
198 r'%(space)s*(?:%(newline)s%(space)s*)*'
199 r'(%(regex)s)'
200 r'(%(space)s*(?:%(newline)s%(space)s*)+'
201 r'(?=%(post_regex_off)s))?'
202 r'|(?<=%(preregex2)s)'
203 r'%(space)s*(?:(%(newline)s)%(space)s*)*'
204 r'(%(regex)s)'
205 r'(%(space)s*(?:%(newline)s%(space)s*)+'
206 r'(?=%(post_regex_off)s))?'
207 r'|(?<=%(id_literal_close)s)'
208 r'%(space)s*(?:(%(newline)s)%(space)s*)+'
209 r'(?=%(id_literal_open)s)'
210 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
211 r'|(?<=\+)(%(space)s)+(?=\+)'
212 r'|(?<=-)(%(space)s)+(?=-)'
213 r'|%(space)s+'
214 r'|(?:%(newline)s%(space)s*)+'
215 ) % locals()).sub
216
217
218
219 def space_subber_simple(match):
220 """ Substitution callback """
221
222
223 groups = match.groups()
224 if groups[0]:
225 return groups[0]
226 elif groups[1]:
227 return groups[1]
228 elif groups[2]:
229 if groups[3]:
230 return groups[2] + '\n'
231 return groups[2]
232 elif groups[5]:
233 return "%s%s%s" % (
234 groups[4] and '\n' or '',
235 groups[5],
236 groups[6] and '\n' or '',
237 )
238 elif groups[7]:
239 return '\n'
240 elif groups[8] or groups[9] or groups[10]:
241 return ' '
242 else:
243 return ''
244
245 space_sub_banged = _re.compile((
246
247
248 r'(%(dull)s+)'
249 r'|(%(strings)s%(dull)s*)'
250 r'|(?<=%(preregex1)s)'
251 r'(%(space)s*(?:%(newline)s%(space)s*)*)'
252 r'(%(regex)s)'
253 r'(%(space)s*(?:%(newline)s%(space)s*)+'
254 r'(?=%(post_regex_off)s))?'
255 r'|(?<=%(preregex2)s)'
256 r'(%(space)s*(?:(%(newline)s)%(space)s*)*)'
257 r'(%(regex)s)'
258 r'(%(space)s*(?:%(newline)s%(space)s*)+'
259 r'(?=%(post_regex_off)s))?'
260 r'|(?<=%(id_literal_close)s)'
261 r'(%(space)s*(?:%(newline)s%(space)s*)+)'
262 r'(?=%(id_literal_open)s)'
263 r'|(?<=%(id_literal)s)(%(space)s+)(?=%(id_literal)s)'
264 r'|(?<=\+)(%(space)s+)(?=\+)'
265 r'|(?<=-)(%(space)s+)(?=-)'
266 r'|(%(space)s+)'
267 r'|((?:%(newline)s%(space)s*)+)'
268 ) % locals()).sub
269
270
271
272 keep = _re.compile((
273 r'%(space_chars)s+|%(space_comment_nobang)s+|%(newline)s+'
274 r'|(%(bang_comment)s+)'
275 ) % locals()).sub
276 keeper = lambda m: m.groups()[0] or ''
277
278
279
280 def space_subber_banged(match):
281 """ Substitution callback """
282
283
284 groups = match.groups()
285 if groups[0]:
286 return groups[0]
287 elif groups[1]:
288 return groups[1]
289 elif groups[3]:
290 return "%s%s%s%s" % (
291 keep(keeper, groups[2]),
292 groups[3],
293 keep(keeper, groups[4] or ''),
294 groups[4] and '\n' or '',
295 )
296 elif groups[7]:
297 return "%s%s%s%s%s" % (
298 keep(keeper, groups[5]),
299 groups[6] and '\n' or '',
300 groups[7],
301 keep(keeper, groups[8] or ''),
302 groups[8] and '\n' or '',
303 )
304 elif groups[9]:
305 return keep(keeper, groups[9]) + '\n'
306 elif groups[10] or groups[11] or groups[12]:
307 return keep(keeper, groups[10] or groups[11] or groups[12]) or ' '
308 else:
309 return keep(keeper, groups[13] or groups[14])
310
311 def jsmin(script, keep_bang_comments=False):
312 r"""
313 Minify javascript based on `jsmin.c by Douglas Crockford`_\.
314
315 Instead of parsing the stream char by char, it uses a regular
316 expression approach which minifies the whole script with one big
317 substitution regex.
318
319 .. _jsmin.c by Douglas Crockford:
320 http://www.crockford.com/javascript/jsmin.c
321
322 :Parameters:
323 `script` : ``str``
324 Script to minify
325
326 `keep_bang_comments` : ``bool``
327 Keep comments starting with an exclamation mark? (``/*!...*/``)
328
329 :Return: Minified script
330 :Rtype: ``str``
331 """
332
333
334 if keep_bang_comments:
335 return space_sub_banged(
336 space_subber_banged, '\n%s\n' % script
337 ).strip()
338 else:
339 return space_sub_simple(
340 space_subber_simple, '\n%s\n' % script
341 ).strip()
342
343 return jsmin
344
345 jsmin = _make_jsmin()
346
347
349 r"""
350 Minify javascript based on `jsmin.c by Douglas Crockford`_\.
351
352 Instead of parsing the stream char by char, it uses a regular
353 expression approach which minifies the whole script with one big
354 substitution regex.
355
356 .. _jsmin.c by Douglas Crockford:
357 http://www.crockford.com/javascript/jsmin.c
358
359 :Warning: This function is the digest of a _make_jsmin() call. It just
360 utilizes the resulting regexes. It's here for fun and may
361 vanish any time. Use the `jsmin` function instead.
362
363 :Parameters:
364 `script` : ``str``
365 Script to minify
366
367 `keep_bang_comments` : ``bool``
368 Keep comments starting with an exclamation mark? (``/*!...*/``)
369
370 :Return: Minified script
371 :Rtype: ``str``
372 """
373 if not keep_bang_comments:
374 rex = (
375 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
376 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
377 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?'
378 r'{};\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*'
379 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0'
380 r'14\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r'
381 r'\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r'
382 r'\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/))((?:[\000-\011\013\014'
383 r'\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^\r'
384 r'\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:'
385 r'[^/*][^*]*\*+)*/))*)+(?=[^\000-\040&)+,.:;=?\]|}-]))?|(?<=[\00'
386 r'0-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\016-\040]|(?'
387 r':/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n]'
388 r'))(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*'
389 r'\*+)*/))*)*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\['
390 r'[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/))(('
391 r'?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)'
392 r'*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\04'
393 r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040&)+,.:;'
394 r'=?\]|}-]))?|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000-\011\01'
395 r'3\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:((?:(?:'
396 r'//[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]'
397 r'*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,./:-@\\-^'
398 r'`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\013\014\0'
399 r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./'
400 r':-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]|(?:/\*['
401 r'^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013'
402 r'\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:['
403 r'\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
404 r')+|(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]'
405 r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
406 )
407
408 def subber(match):
409 """ Substitution callback """
410 groups = match.groups()
411 return (
412 groups[0] or
413 groups[1] or
414 (groups[3] and (groups[2] + '\n')) or
415 groups[2] or
416 (groups[5] and "%s%s%s" % (
417 groups[4] and '\n' or '',
418 groups[5],
419 groups[6] and '\n' or '',
420 )) or
421 (groups[7] and '\n') or
422 (groups[8] and ' ') or
423 (groups[9] and ' ') or
424 (groups[10] and ' ') or
425 ''
426 )
427 else:
428 rex = (
429 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
430 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
431 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?'
432 r'{};\r\n])((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/'
433 r'*][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013'
434 r'\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*)((?:/(?!'
435 r'[\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^'
436 r'\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/))((?:[\000-\011\013\01'
437 r'4\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^'
438 r'\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+('
439 r'?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040&)+,.:;=?\]|}-]))?|(?<=['
440 r'\000-#%-,./:-@\[-^`{-~-]return)((?:[\000-\011\013\014\016-\040'
441 r']|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?['
442 r'\r\n]))(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*]['
443 r'^*]*\*+)*/))*)*)((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|'
444 r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*'
445 r'/))((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]'
446 r'*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\01'
447 r'6-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040&)'
448 r'+,.:;=?\]|}-]))?|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])((?:[\000-'
449 r'\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:'
450 r'(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/'
451 r'\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+)(?=[^\000-\040"#%-\047)*,./'
452 r':-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\01'
453 r'3\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+)(?=[^\000'
454 r'-#%-,./:-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]|'
455 r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+)(?=\+)|(?<=-)((?:[\000-\0'
456 r'11\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+)(?=-'
457 r')|((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*'
458 r'\*+)*/))+)|((?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014'
459 r'\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+)'
460 )
461
462 keep = _re.compile((
463 r'[\000-\011\013\014\016-\040]+|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*'
464 r'\*+)*/)+|(?:(?://[^\r\n]*)?[\r\n])+|((?:/\*![^*]*\*+(?:[^/*][^'
465 r'*]*\*+)*/)+)'
466 ) % locals()).sub
467 keeper = lambda m: m.groups()[0] or ''
468
469 def subber(match):
470 """ Substitution callback """
471 groups = match.groups()
472 return (
473 groups[0] or
474 groups[1] or
475 (groups[3] and "%s%s%s%s" % (
476 keep(keeper, groups[2]),
477 groups[3],
478 keep(keeper, groups[4] or ''),
479 groups[4] and '\n' or '',
480 )) or
481 (groups[7] and "%s%s%s%s%s" % (
482 keep(keeper, groups[5]),
483 groups[6] and '\n' or '',
484 groups[7],
485 keep(keeper, groups[8] or ''),
486 groups[8] and '\n' or '',
487 )) or
488 (groups[9] and keep(keeper, groups[9] + '\n')) or
489 (groups[10] and keep(keeper, groups[10]) or ' ') or
490 (groups[11] and keep(keeper, groups[11]) or ' ') or
491 (groups[12] and keep(keeper, groups[12]) or ' ') or
492 keep(keeper, groups[13] or groups[14])
493 )
494
495 return _re.sub(rex, subber, '\n%s\n' % script).strip()
496
497
498 if __name__ == '__main__':
500 """ Main """
501 import sys as _sys
502
503 argv = _sys.argv[1:]
504 keep_bang_comments = '-b' in argv or '-bp' in argv or '-pb' in argv
505 if '-p' in argv or '-bp' in argv or '-pb' in argv:
506 xjsmin = _make_jsmin(python_only=True)
507 else:
508 xjsmin = jsmin
509
510 _sys.stdout.write(xjsmin(
511 _sys.stdin.read(), keep_bang_comments=keep_bang_comments
512 ))
513
514 main()
515