prettify.js 55 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477
  1. // Copyright (C) 2006 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. /**
  15. * @fileoverview
  16. * some functions for browser-side pretty printing of code contained in html.
  17. *
  18. * <p>
  19. * For a fairly comprehensive set of languages see the
  20. * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a>
  21. * file that came with this source. At a minimum, the lexer should work on a
  22. * number of languages including C and friends, Java, Python, Bash, SQL, HTML,
  23. * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk
  24. * and a subset of Perl, but, because of commenting conventions, doesn't work on
  25. * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class.
  26. * <p>
  27. * Usage: <ol>
  28. * <li> include this source file in an html page via
  29. * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>}
  30. * <li> define style rules. See the example page for examples.
  31. * <li> mark the {@code <pre>} and {@code <code>} tags in your source with
  32. * {@code class=prettyprint.}
  33. * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty
  34. * printer needs to do more substantial DOM manipulations to support that, so
  35. * some css styles may not be preserved.
  36. * </ol>
  37. * That's it. I wanted to keep the API as simple as possible, so there's no
  38. * need to specify which language the code is in, but if you wish, you can add
  39. * another class to the {@code <pre>} or {@code <code>} element to specify the
  40. * language, as in {@code <pre class="prettyprint lang-java">}. Any class that
  41. * starts with "lang-" followed by a file extension, specifies the file type.
  42. * See the "lang-*.js" files in this directory for code that implements
  43. * per-language file handlers.
  44. * <p>
  45. * Change log:<br>
  46. * cbeust, 2006/08/22
  47. * <blockquote>
  48. * Java annotations (start with "@") are now captured as literals ("lit")
  49. * </blockquote>
  50. * @requires console
  51. */
  52. // JSLint declarations
  53. /*global console, document, navigator, setTimeout, window */
  54. /**
  55. * Split {@code prettyPrint} into multiple timeouts so as not to interfere with
  56. * UI events.
  57. * If set to {@code false}, {@code prettyPrint()} is synchronous.
  58. */
  59. window['PR_SHOULD_USE_CONTINUATION'] = true;
  60. (function () {
  61. // Keyword lists for various languages.
  62. // We use things that coerce to strings to make them compact when minified
  63. // and to defeat aggressive optimizers that fold large string constants.
  64. var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"];
  65. var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," +
  66. "double,enum,extern,float,goto,int,long,register,short,signed,sizeof," +
  67. "static,struct,switch,typedef,union,unsigned,void,volatile"];
  68. var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," +
  69. "new,operator,private,protected,public,this,throw,true,try,typeof"];
  70. var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignof,align_union,asm,axiom,bool," +
  71. "concept,concept_map,const_cast,constexpr,decltype," +
  72. "dynamic_cast,explicit,export,friend,inline,late_check," +
  73. "mutable,namespace,nullptr,reinterpret_cast,static_assert,static_cast," +
  74. "template,typeid,typename,using,virtual,where"];
  75. var JAVA_KEYWORDS = [COMMON_KEYWORDS,
  76. "abstract,boolean,byte,extends,final,finally,implements,import," +
  77. "instanceof,null,native,package,strictfp,super,synchronized,throws," +
  78. "transient"];
  79. var CSHARP_KEYWORDS = [JAVA_KEYWORDS,
  80. "as,base,by,checked,decimal,delegate,descending,dynamic,event," +
  81. "fixed,foreach,from,group,implicit,in,interface,internal,into,is,lock," +
  82. "object,out,override,orderby,params,partial,readonly,ref,sbyte,sealed," +
  83. "stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort,var"];
  84. var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," +
  85. "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," +
  86. "true,try,unless,until,when,while,yes";
  87. var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS,
  88. "debugger,eval,export,function,get,null,set,undefined,var,with," +
  89. "Infinity,NaN"];
  90. var PERL_KEYWORDS = "caller,delete,die,do,dump,elsif,eval,exit,foreach,for," +
  91. "goto,if,import,last,local,my,next,no,our,print,package,redo,require," +
  92. "sub,undef,unless,until,use,wantarray,while,BEGIN,END";
  93. var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," +
  94. "elif,except,exec,finally,from,global,import,in,is,lambda," +
  95. "nonlocal,not,or,pass,print,raise,try,with,yield," +
  96. "False,True,None"];
  97. var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," +
  98. "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," +
  99. "rescue,retry,self,super,then,true,undef,unless,until,when,yield," +
  100. "BEGIN,END"];
  101. var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," +
  102. "function,in,local,set,then,until"];
  103. var ALL_KEYWORDS = [
  104. CPP_KEYWORDS, CSHARP_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS +
  105. PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS];
  106. var C_TYPES = /^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iterator|(multi)?(set|map)|bitset|u?(int|float)\d*)/;
  107. // token style names. correspond to css classes
  108. /**
  109. * token style for a string literal
  110. * @const
  111. */
  112. var PR_STRING = 'str';
  113. /**
  114. * token style for a keyword
  115. * @const
  116. */
  117. var PR_KEYWORD = 'kwd';
  118. /**
  119. * token style for a comment
  120. * @const
  121. */
  122. var PR_COMMENT = 'com';
  123. /**
  124. * token style for a type
  125. * @const
  126. */
  127. var PR_TYPE = 'typ';
  128. /**
  129. * token style for a literal value. e.g. 1, null, true.
  130. * @const
  131. */
  132. var PR_LITERAL = 'lit';
  133. /**
  134. * token style for a punctuation string.
  135. * @const
  136. */
  137. var PR_PUNCTUATION = 'pun';
  138. /**
  139. * token style for a punctuation string.
  140. * @const
  141. */
  142. var PR_PLAIN = 'pln';
  143. /**
  144. * token style for an sgml tag.
  145. * @const
  146. */
  147. var PR_TAG = 'tag';
  148. /**
  149. * token style for a markup declaration such as a DOCTYPE.
  150. * @const
  151. */
  152. var PR_DECLARATION = 'dec';
  153. /**
  154. * token style for embedded source.
  155. * @const
  156. */
  157. var PR_SOURCE = 'src';
  158. /**
  159. * token style for an sgml attribute name.
  160. * @const
  161. */
  162. var PR_ATTRIB_NAME = 'atn';
  163. /**
  164. * token style for an sgml attribute value.
  165. * @const
  166. */
  167. var PR_ATTRIB_VALUE = 'atv';
  168. /**
  169. * A class that indicates a section of markup that is not code, e.g. to allow
  170. * embedding of line numbers within code listings.
  171. * @const
  172. */
  173. var PR_NOCODE = 'nocode';
  174. /**
  175. * A set of tokens that can precede a regular expression literal in
  176. * javascript
  177. * http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/language/js20/rationale/syntax.html
  178. * has the full list, but I've removed ones that might be problematic when
  179. * seen in languages that don't support regular expression literals.
  180. *
  181. * <p>Specifically, I've removed any keywords that can't precede a regexp
  182. * literal in a syntactically legal javascript program, and I've removed the
  183. * "in" keyword since it's not a keyword in many languages, and might be used
  184. * as a count of inches.
  185. *
  186. * <p>The link a above does not accurately describe EcmaScript rules since
  187. * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works
  188. * very well in practice.
  189. *
  190. * @private
  191. * @const
  192. */
  193. var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|\\!|\\!=|\\!==|\\#|\\%|\\%=|&|&&|&&=|&=|\\(|\\*|\\*=|\\+=|\\,|\\-=|\\->|\\/|\\/=|:|::|\\;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\@|\\[|\\^|\\^=|\\^\\^|\\^\\^=|\\{|\\||\\|=|\\|\\||\\|\\|=|\\~|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*';
  194. // CAVEAT: this does not properly handle the case where a regular
  195. // expression immediately follows another since a regular expression may
  196. // have flags for case-sensitivity and the like. Having regexp tokens
  197. // adjacent is not valid in any language I'm aware of, so I'm punting.
  198. // TODO: maybe style special characters inside a regexp as punctuation.
  199. /**
  200. * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally
  201. * matches the union of the sets of strings matched by the input RegExp.
  202. * Since it matches globally, if the input strings have a start-of-input
  203. * anchor (/^.../), it is ignored for the purposes of unioning.
  204. * @param {Array.<RegExp>} regexs non multiline, non-global regexs.
  205. * @return {RegExp} a global regex.
  206. */
  207. function combinePrefixPatterns(regexs) {
  208. var capturedGroupIndex = 0;
  209. var needToFoldCase = false;
  210. var ignoreCase = false;
  211. for (var i = 0, n = regexs.length; i < n; ++i) {
  212. var regex = regexs[i];
  213. if (regex.ignoreCase) {
  214. ignoreCase = true;
  215. } else if (/[a-z]/i.test(regex.source.replace(
  216. /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) {
  217. needToFoldCase = true;
  218. ignoreCase = false;
  219. break;
  220. }
  221. }
  222. var escapeCharToCodeUnit = {
  223. 'b': 8,
  224. 't': 9,
  225. 'n': 0xa,
  226. 'v': 0xb,
  227. 'f': 0xc,
  228. 'r': 0xd
  229. };
  230. function decodeEscape(charsetPart) {
  231. var cc0 = charsetPart.charCodeAt(0);
  232. if (cc0 !== 92 /* \\ */) {
  233. return cc0;
  234. }
  235. var c1 = charsetPart.charAt(1);
  236. cc0 = escapeCharToCodeUnit[c1];
  237. if (cc0) {
  238. return cc0;
  239. } else if ('0' <= c1 && c1 <= '7') {
  240. return parseInt(charsetPart.substring(1), 8);
  241. } else if (c1 === 'u' || c1 === 'x') {
  242. return parseInt(charsetPart.substring(2), 16);
  243. } else {
  244. return charsetPart.charCodeAt(1);
  245. }
  246. }
  247. function encodeEscape(charCode) {
  248. if (charCode < 0x20) {
  249. return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16);
  250. }
  251. var ch = String.fromCharCode(charCode);
  252. if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') {
  253. ch = '\\' + ch;
  254. }
  255. return ch;
  256. }
  257. function caseFoldCharset(charSet) {
  258. var charsetParts = charSet.substring(1, charSet.length - 1).match(
  259. new RegExp(
  260. '\\\\u[0-9A-Fa-f]{4}'
  261. + '|\\\\x[0-9A-Fa-f]{2}'
  262. + '|\\\\[0-3][0-7]{0,2}'
  263. + '|\\\\[0-7]{1,2}'
  264. + '|\\\\[\\s\\S]'
  265. + '|-'
  266. + '|[^-\\\\]',
  267. 'g'));
  268. var groups = [];
  269. var ranges = [];
  270. var inverse = charsetParts[0] === '^';
  271. for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) {
  272. var p = charsetParts[i];
  273. if (/\\[bdsw]/i.test(p)) { // Don't muck with named groups.
  274. groups.push(p);
  275. } else {
  276. var start = decodeEscape(p);
  277. var end;
  278. if (i + 2 < n && '-' === charsetParts[i + 1]) {
  279. end = decodeEscape(charsetParts[i + 2]);
  280. i += 2;
  281. } else {
  282. end = start;
  283. }
  284. ranges.push([start, end]);
  285. // If the range might intersect letters, then expand it.
  286. // This case handling is too simplistic.
  287. // It does not deal with non-latin case folding.
  288. // It works for latin source code identifiers though.
  289. if (!(end < 65 || start > 122)) {
  290. if (!(end < 65 || start > 90)) {
  291. ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]);
  292. }
  293. if (!(end < 97 || start > 122)) {
  294. ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]);
  295. }
  296. }
  297. }
  298. }
  299. // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]]
  300. // -> [[1, 12], [14, 14], [16, 17]]
  301. ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); });
  302. var consolidatedRanges = [];
  303. var lastRange = [NaN, NaN];
  304. for (var i = 0; i < ranges.length; ++i) {
  305. var range = ranges[i];
  306. if (range[0] <= lastRange[1] + 1) {
  307. lastRange[1] = Math.max(lastRange[1], range[1]);
  308. } else {
  309. consolidatedRanges.push(lastRange = range);
  310. }
  311. }
  312. var out = ['['];
  313. if (inverse) { out.push('^'); }
  314. out.push.apply(out, groups);
  315. for (var i = 0; i < consolidatedRanges.length; ++i) {
  316. var range = consolidatedRanges[i];
  317. out.push(encodeEscape(range[0]));
  318. if (range[1] > range[0]) {
  319. if (range[1] + 1 > range[0]) { out.push('-'); }
  320. out.push(encodeEscape(range[1]));
  321. }
  322. }
  323. out.push(']');
  324. return out.join('');
  325. }
  326. function allowAnywhereFoldCaseAndRenumberGroups(regex) {
  327. // Split into character sets, escape sequences, punctuation strings
  328. // like ('(', '(?:', ')', '^'), and runs of characters that do not
  329. // include any of the above.
  330. var parts = regex.source.match(
  331. new RegExp(
  332. '(?:'
  333. + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set
  334. + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape
  335. + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape
  336. + '|\\\\[0-9]+' // a back-reference or octal escape
  337. + '|\\\\[^ux0-9]' // other escape sequence
  338. + '|\\(\\?[:!=]' // start of a non-capturing group
  339. + '|[\\(\\)\\^]' // start/emd of a group, or line start
  340. + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters
  341. + ')',
  342. 'g'));
  343. var n = parts.length;
  344. // Maps captured group numbers to the number they will occupy in
  345. // the output or to -1 if that has not been determined, or to
  346. // undefined if they need not be capturing in the output.
  347. var capturedGroups = [];
  348. // Walk over and identify back references to build the capturedGroups
  349. // mapping.
  350. for (var i = 0, groupIndex = 0; i < n; ++i) {
  351. var p = parts[i];
  352. if (p === '(') {
  353. // groups are 1-indexed, so max group index is count of '('
  354. ++groupIndex;
  355. } else if ('\\' === p.charAt(0)) {
  356. var decimalValue = +p.substring(1);
  357. if (decimalValue && decimalValue <= groupIndex) {
  358. capturedGroups[decimalValue] = -1;
  359. }
  360. }
  361. }
  362. // Renumber groups and reduce capturing groups to non-capturing groups
  363. // where possible.
  364. for (var i = 1; i < capturedGroups.length; ++i) {
  365. if (-1 === capturedGroups[i]) {
  366. capturedGroups[i] = ++capturedGroupIndex;
  367. }
  368. }
  369. for (var i = 0, groupIndex = 0; i < n; ++i) {
  370. var p = parts[i];
  371. if (p === '(') {
  372. ++groupIndex;
  373. if (capturedGroups[groupIndex] === undefined) {
  374. parts[i] = '(?:';
  375. }
  376. } else if ('\\' === p.charAt(0)) {
  377. var decimalValue = +p.substring(1);
  378. if (decimalValue && decimalValue <= groupIndex) {
  379. parts[i] = '\\' + capturedGroups[groupIndex];
  380. }
  381. }
  382. }
  383. // Remove any prefix anchors so that the output will match anywhere.
  384. // ^^ really does mean an anchored match though.
  385. for (var i = 0, groupIndex = 0; i < n; ++i) {
  386. if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; }
  387. }
  388. // Expand letters to groups to handle mixing of case-sensitive and
  389. // case-insensitive patterns if necessary.
  390. if (regex.ignoreCase && needToFoldCase) {
  391. for (var i = 0; i < n; ++i) {
  392. var p = parts[i];
  393. var ch0 = p.charAt(0);
  394. if (p.length >= 2 && ch0 === '[') {
  395. parts[i] = caseFoldCharset(p);
  396. } else if (ch0 !== '\\') {
  397. // TODO: handle letters in numeric escapes.
  398. parts[i] = p.replace(
  399. /[a-zA-Z]/g,
  400. function (ch) {
  401. var cc = ch.charCodeAt(0);
  402. return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']';
  403. });
  404. }
  405. }
  406. }
  407. return parts.join('');
  408. }
  409. var rewritten = [];
  410. for (var i = 0, n = regexs.length; i < n; ++i) {
  411. var regex = regexs[i];
  412. if (regex.global || regex.multiline) { throw new Error('' + regex); }
  413. rewritten.push(
  414. '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')');
  415. }
  416. return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g');
  417. }
  418. /**
  419. * Split markup into a string of source code and an array mapping ranges in
  420. * that string to the text nodes in which they appear.
  421. *
  422. * <p>
  423. * The HTML DOM structure:</p>
  424. * <pre>
  425. * (Element "p"
  426. * (Element "b"
  427. * (Text "print ")) ; #1
  428. * (Text "'Hello '") ; #2
  429. * (Element "br") ; #3
  430. * (Text " + 'World';")) ; #4
  431. * </pre>
  432. * <p>
  433. * corresponds to the HTML
  434. * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p>
  435. *
  436. * <p>
  437. * It will produce the output:</p>
  438. * <pre>
  439. * {
  440. * sourceCode: "print 'Hello '\n + 'World';",
  441. * // 1 2
  442. * // 012345678901234 5678901234567
  443. * spans: [0, #1, 6, #2, 14, #3, 15, #4]
  444. * }
  445. * </pre>
  446. * <p>
  447. * where #1 is a reference to the {@code "print "} text node above, and so
  448. * on for the other text nodes.
  449. * </p>
  450. *
  451. * <p>
  452. * The {@code} spans array is an array of pairs. Even elements are the start
  453. * indices of substrings, and odd elements are the text nodes (or BR elements)
  454. * that contain the text for those substrings.
  455. * Substrings continue until the next index or the end of the source.
  456. * </p>
  457. *
  458. * @param {Node} node an HTML DOM subtree containing source-code.
  459. * @return {Object} source code and the text nodes in which they occur.
  460. */
  461. function extractSourceSpans(node) {
  462. var nocode = /(?:^|\s)nocode(?:\s|$)/;
  463. var chunks = [];
  464. var length = 0;
  465. var spans = [];
  466. var k = 0;
  467. var whitespace;
  468. if (node.currentStyle) {
  469. whitespace = node.currentStyle.whiteSpace;
  470. } else if (window.getComputedStyle) {
  471. whitespace = document.defaultView.getComputedStyle(node, null)
  472. .getPropertyValue('white-space');
  473. }
  474. var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3);
  475. function walk(node) {
  476. switch (node.nodeType) {
  477. case 1: // Element
  478. if (nocode.test(node.className)) { return; }
  479. for (var child = node.firstChild; child; child = child.nextSibling) {
  480. walk(child);
  481. }
  482. var nodeName = node.nodeName;
  483. if ('BR' === nodeName || 'LI' === nodeName) {
  484. chunks[k] = '\n';
  485. spans[k << 1] = length++;
  486. spans[(k++ << 1) | 1] = node;
  487. }
  488. break;
  489. case 3: case 4: // Text
  490. var text = node.nodeValue;
  491. if (text.length) {
  492. if (!isPreformatted) {
  493. text = text.replace(/[ \t\r\n]+/g, ' ');
  494. } else {
  495. text = text.replace(/\r\n?/g, '\n'); // Normalize newlines.
  496. }
  497. // TODO: handle tabs here?
  498. chunks[k] = text;
  499. spans[k << 1] = length;
  500. length += text.length;
  501. spans[(k++ << 1) | 1] = node;
  502. }
  503. break;
  504. }
  505. }
  506. walk(node);
  507. return {
  508. sourceCode: chunks.join('').replace(/\n$/, ''),
  509. spans: spans
  510. };
  511. }
  512. /**
  513. * Apply the given language handler to sourceCode and add the resulting
  514. * decorations to out.
  515. * @param {number} basePos the index of sourceCode within the chunk of source
  516. * whose decorations are already present on out.
  517. */
  518. function appendDecorations(basePos, sourceCode, langHandler, out) {
  519. if (!sourceCode) { return; }
  520. var job = {
  521. sourceCode: sourceCode,
  522. basePos: basePos
  523. };
  524. langHandler(job);
  525. out.push.apply(out, job.decorations);
  526. }
  527. var notWs = /\S/;
  528. /**
  529. * Given an element, if it contains only one child element and any text nodes
  530. * it contains contain only space characters, return the sole child element.
  531. * Otherwise returns undefined.
  532. * <p>
  533. * This is meant to return the CODE element in {@code <pre><code ...>} when
  534. * there is a single child element that contains all the non-space textual
  535. * content, but not to return anything where there are multiple child elements
  536. * as in {@code <pre><code>...</code><code>...</code></pre>} or when there
  537. * is textual content.
  538. */
  539. function childContentWrapper(element) {
  540. var wrapper = undefined;
  541. for (var c = element.firstChild; c; c = c.nextSibling) {
  542. var type = c.nodeType;
  543. wrapper = (type === 1) // Element Node
  544. ? (wrapper ? element : c)
  545. : (type === 3) // Text Node
  546. ? (notWs.test(c.nodeValue) ? element : wrapper)
  547. : wrapper;
  548. }
  549. return wrapper === element ? undefined : wrapper;
  550. }
  551. /** Given triples of [style, pattern, context] returns a lexing function,
  552. * The lexing function interprets the patterns to find token boundaries and
  553. * returns a decoration list of the form
  554. * [index_0, style_0, index_1, style_1, ..., index_n, style_n]
  555. * where index_n is an index into the sourceCode, and style_n is a style
  556. * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to
  557. * all characters in sourceCode[index_n-1:index_n].
  558. *
  559. * The stylePatterns is a list whose elements have the form
  560. * [style : string, pattern : RegExp, DEPRECATED, shortcut : string].
  561. *
  562. * Style is a style constant like PR_PLAIN, or can be a string of the
  563. * form 'lang-FOO', where FOO is a language extension describing the
  564. * language of the portion of the token in $1 after pattern executes.
  565. * E.g., if style is 'lang-lisp', and group 1 contains the text
  566. * '(hello (world))', then that portion of the token will be passed to the
  567. * registered lisp handler for formatting.
  568. * The text before and after group 1 will be restyled using this decorator
  569. * so decorators should take care that this doesn't result in infinite
  570. * recursion. For example, the HTML lexer rule for SCRIPT elements looks
  571. * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match
  572. * '<script>foo()<\/script>', which would cause the current decorator to
  573. * be called with '<script>' which would not match the same rule since
  574. * group 1 must not be empty, so it would be instead styled as PR_TAG by
  575. * the generic tag rule. The handler registered for the 'js' extension would
  576. * then be called with 'foo()', and finally, the current decorator would
  577. * be called with '<\/script>' which would not match the original rule and
  578. * so the generic tag rule would identify it as a tag.
  579. *
  580. * Pattern must only match prefixes, and if it matches a prefix, then that
  581. * match is considered a token with the same style.
  582. *
  583. * Context is applied to the last non-whitespace, non-comment token
  584. * recognized.
  585. *
  586. * Shortcut is an optional string of characters, any of which, if the first
  587. * character, gurantee that this pattern and only this pattern matches.
  588. *
  589. * @param {Array} shortcutStylePatterns patterns that always start with
  590. * a known character. Must have a shortcut string.
  591. * @param {Array} fallthroughStylePatterns patterns that will be tried in
  592. * order if the shortcut ones fail. May have shortcuts.
  593. *
  594. * @return {function (Object)} a
  595. * function that takes source code and returns a list of decorations.
  596. */
  597. function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) {
  598. var shortcuts = {};
  599. var tokenizer;
  600. (function () {
  601. var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);
  602. var allRegexs = [];
  603. var regexKeys = {};
  604. for (var i = 0, n = allPatterns.length; i < n; ++i) {
  605. var patternParts = allPatterns[i];
  606. var shortcutChars = patternParts[3];
  607. if (shortcutChars) {
  608. for (var c = shortcutChars.length; --c >= 0;) {
  609. shortcuts[shortcutChars.charAt(c)] = patternParts;
  610. }
  611. }
  612. var regex = patternParts[1];
  613. var k = '' + regex;
  614. if (!regexKeys.hasOwnProperty(k)) {
  615. allRegexs.push(regex);
  616. regexKeys[k] = null;
  617. }
  618. }
  619. allRegexs.push(/[\0-\uffff]/);
  620. tokenizer = combinePrefixPatterns(allRegexs);
  621. })();
  622. var nPatterns = fallthroughStylePatterns.length;
  623. /**
  624. * Lexes job.sourceCode and produces an output array job.decorations of
  625. * style classes preceded by the position at which they start in
  626. * job.sourceCode in order.
  627. *
  628. * @param {Object} job an object like <pre>{
  629. * sourceCode: {string} sourceText plain text,
  630. * basePos: {int} position of job.sourceCode in the larger chunk of
  631. * sourceCode.
  632. * }</pre>
  633. */
  634. var decorate = function (job) {
  635. var sourceCode = job.sourceCode, basePos = job.basePos;
  636. /** Even entries are positions in source in ascending order. Odd enties
  637. * are style markers (e.g., PR_COMMENT) that run from that position until
  638. * the end.
  639. * @type {Array.<number|string>}
  640. */
  641. var decorations = [basePos, PR_PLAIN];
  642. var pos = 0; // index into sourceCode
  643. var tokens = sourceCode.match(tokenizer) || [];
  644. var styleCache = {};
  645. for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) {
  646. var token = tokens[ti];
  647. var style = styleCache[token];
  648. var match = void 0;
  649. var isEmbedded;
  650. if (typeof style === 'string') {
  651. isEmbedded = false;
  652. } else {
  653. var patternParts = shortcuts[token.charAt(0)];
  654. if (patternParts) {
  655. match = token.match(patternParts[1]);
  656. style = patternParts[0];
  657. } else {
  658. for (var i = 0; i < nPatterns; ++i) {
  659. patternParts = fallthroughStylePatterns[i];
  660. match = token.match(patternParts[1]);
  661. if (match) {
  662. style = patternParts[0];
  663. break;
  664. }
  665. }
  666. if (!match) { // make sure that we make progress
  667. style = PR_PLAIN;
  668. }
  669. }
  670. isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5);
  671. if (isEmbedded && !(match && typeof match[1] === 'string')) {
  672. isEmbedded = false;
  673. style = PR_SOURCE;
  674. }
  675. if (!isEmbedded) { styleCache[token] = style; }
  676. }
  677. var tokenStart = pos;
  678. pos += token.length;
  679. if (!isEmbedded) {
  680. decorations.push(basePos + tokenStart, style);
  681. } else { // Treat group 1 as an embedded block of source code.
  682. var embeddedSource = match[1];
  683. var embeddedSourceStart = token.indexOf(embeddedSource);
  684. var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length;
  685. if (match[2]) {
  686. // If embeddedSource can be blank, then it would match at the
  687. // beginning which would cause us to infinitely recurse on the
  688. // entire token, so we catch the right context in match[2].
  689. embeddedSourceEnd = token.length - match[2].length;
  690. embeddedSourceStart = embeddedSourceEnd - embeddedSource.length;
  691. }
  692. var lang = style.substring(5);
  693. // Decorate the left of the embedded source
  694. appendDecorations(
  695. basePos + tokenStart,
  696. token.substring(0, embeddedSourceStart),
  697. decorate, decorations);
  698. // Decorate the embedded source
  699. appendDecorations(
  700. basePos + tokenStart + embeddedSourceStart,
  701. embeddedSource,
  702. langHandlerForExtension(lang, embeddedSource),
  703. decorations);
  704. // Decorate the right of the embedded section
  705. appendDecorations(
  706. basePos + tokenStart + embeddedSourceEnd,
  707. token.substring(embeddedSourceEnd),
  708. decorate, decorations);
  709. }
  710. }
  711. job.decorations = decorations;
  712. };
  713. return decorate;
  714. }
  715. /** returns a function that produces a list of decorations from source text.
  716. *
  717. * This code treats ", ', and ` as string delimiters, and \ as a string
  718. * escape. It does not recognize perl's qq() style strings.
  719. * It has no special handling for double delimiter escapes as in basic, or
  720. * the tripled delimiters used in python, but should work on those regardless
  721. * although in those cases a single string literal may be broken up into
  722. * multiple adjacent string literals.
  723. *
  724. * It recognizes C, C++, and shell style comments.
  725. *
  726. * @param {Object} options a set of optional parameters.
  727. * @return {function (Object)} a function that examines the source code
  728. * in the input job and builds the decoration list.
  729. */
  730. function sourceDecorator(options) {
  731. var shortcutStylePatterns = [], fallthroughStylePatterns = [];
  732. if (options['tripleQuotedStrings']) {
  733. // '''multi-line-string''', 'single-line-string', and double-quoted
  734. shortcutStylePatterns.push(
  735. [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/,
  736. null, '\'"']);
  737. } else if (options['multiLineStrings']) {
  738. // 'multi-line-string', "multi-line-string"
  739. shortcutStylePatterns.push(
  740. [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/,
  741. null, '\'"`']);
  742. } else {
  743. // 'single-line-string', "single-line-string"
  744. shortcutStylePatterns.push(
  745. [PR_STRING,
  746. /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/,
  747. null, '"\'']);
  748. }
  749. if (options['verbatimStrings']) {
  750. // verbatim-string-literal production from the C# grammar. See issue 93.
  751. fallthroughStylePatterns.push(
  752. [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]);
  753. }
  754. var hc = options['hashComments'];
  755. if (hc) {
  756. if (options['cStyleComments']) {
  757. if (hc > 1) { // multiline hash comments
  758. shortcutStylePatterns.push(
  759. [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']);
  760. } else {
  761. // Stop C preprocessor declarations at an unclosed open comment
  762. shortcutStylePatterns.push(
  763. [PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/,
  764. null, '#']);
  765. }
  766. fallthroughStylePatterns.push(
  767. [PR_STRING,
  768. /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/,
  769. null]);
  770. } else {
  771. shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);
  772. }
  773. }
  774. if (options['cStyleComments']) {
  775. fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);
  776. fallthroughStylePatterns.push(
  777. [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]);
  778. }
  779. if (options['regexLiterals']) {
  780. /**
  781. * @const
  782. */
  783. var REGEX_LITERAL = (
  784. // A regular expression literal starts with a slash that is
  785. // not followed by * or / so that it is not confused with
  786. // comments.
  787. '/(?=[^/*])'
  788. // and then contains any number of raw characters,
  789. + '(?:[^/\\x5B\\x5C]'
  790. // escape sequences (\x5C),
  791. + '|\\x5C[\\s\\S]'
  792. // or non-nesting character sets (\x5B\x5D);
  793. + '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+'
  794. // finally closed by a /.
  795. + '/');
  796. fallthroughStylePatterns.push(
  797. ['lang-regex',
  798. new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')')
  799. ]);
  800. }
  801. var types = options['types'];
  802. if (types) {
  803. fallthroughStylePatterns.push([PR_TYPE, types]);
  804. }
  805. var keywords = ("" + options['keywords']).replace(/^ | $/g, '');
  806. if (keywords.length) {
  807. fallthroughStylePatterns.push(
  808. [PR_KEYWORD,
  809. new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'),
  810. null]);
  811. }
  812. shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']);
  813. fallthroughStylePatterns.push(
  814. // TODO(mikesamuel): recognize non-latin letters and numerals in idents
  815. [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null],
  816. [PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_t\b)/, null],
  817. [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null],
  818. [PR_LITERAL,
  819. new RegExp(
  820. '^(?:'
  821. // A hex number
  822. + '0x[a-f0-9]+'
  823. // or an octal or decimal number,
  824. + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)'
  825. // possibly in scientific notation
  826. + '(?:e[+\\-]?\\d+)?'
  827. + ')'
  828. // with an optional modifier like UL for unsigned long
  829. + '[a-z]*', 'i'),
  830. null, '0123456789'],
  831. // Don't treat escaped quotes in bash as starting strings. See issue 144.
  832. [PR_PLAIN, /^\\[\s\S]?/, null],
  833. [PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#\\]*/, null]);
  834. return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns);
  835. }
  836. var decorateSource = sourceDecorator({
  837. 'keywords': ALL_KEYWORDS,
  838. 'hashComments': true,
  839. 'cStyleComments': true,
  840. 'multiLineStrings': true,
  841. 'regexLiterals': true
  842. });
  843. /**
  844. * Given a DOM subtree, wraps it in a list, and puts each line into its own
  845. * list item.
  846. *
  847. * @param {Node} node modified in place. Its content is pulled into an
  848. * HTMLOListElement, and each line is moved into a separate list item.
  849. * This requires cloning elements, so the input might not have unique
  850. * IDs after numbering.
  851. */
  852. function numberLines(node, opt_startLineNum) {
  853. var nocode = /(?:^|\s)nocode(?:\s|$)/;
  854. var lineBreak = /\r\n?|\n/;
  855. var document = node.ownerDocument;
  856. var whitespace;
  857. if (node.currentStyle) {
  858. whitespace = node.currentStyle.whiteSpace;
  859. } else if (window.getComputedStyle) {
  860. whitespace = document.defaultView.getComputedStyle(node, null)
  861. .getPropertyValue('white-space');
  862. }
  863. // If it's preformatted, then we need to split lines on line breaks
  864. // in addition to <BR>s.
  865. var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3);
  866. var li = document.createElement('LI');
  867. while (node.firstChild) {
  868. li.appendChild(node.firstChild);
  869. }
  870. // An array of lines. We split below, so this is initialized to one
  871. // un-split line.
  872. var listItems = [li];
  873. function walk(node) {
  874. switch (node.nodeType) {
  875. case 1: // Element
  876. if (nocode.test(node.className)) { break; }
  877. if ('BR' === node.nodeName) {
  878. breakAfter(node);
  879. // Discard the <BR> since it is now flush against a </LI>.
  880. if (node.parentNode) {
  881. node.parentNode.removeChild(node);
  882. }
  883. } else {
  884. for (var child = node.firstChild; child; child = child.nextSibling) {
  885. walk(child);
  886. }
  887. }
  888. break;
  889. case 3: case 4: // Text
  890. if (isPreformatted) {
  891. var text = node.nodeValue;
  892. var match = text.match(lineBreak);
  893. if (match) {
  894. var firstLine = text.substring(0, match.index);
  895. node.nodeValue = firstLine;
  896. var tail = text.substring(match.index + match[0].length);
  897. if (tail) {
  898. var parent = node.parentNode;
  899. parent.insertBefore(
  900. document.createTextNode(tail), node.nextSibling);
  901. }
  902. breakAfter(node);
  903. if (!firstLine) {
  904. // Don't leave blank text nodes in the DOM.
  905. node.parentNode.removeChild(node);
  906. }
  907. }
  908. }
  909. break;
  910. }
  911. }
  912. // Split a line after the given node.
  913. function breakAfter(lineEndNode) {
  914. // If there's nothing to the right, then we can skip ending the line
  915. // here, and move root-wards since splitting just before an end-tag
  916. // would require us to create a bunch of empty copies.
  917. while (!lineEndNode.nextSibling) {
  918. lineEndNode = lineEndNode.parentNode;
  919. if (!lineEndNode) { return; }
  920. }
  921. function breakLeftOf(limit, copy) {
  922. // Clone shallowly if this node needs to be on both sides of the break.
  923. var rightSide = copy ? limit.cloneNode(false) : limit;
  924. var parent = limit.parentNode;
  925. if (parent) {
  926. // We clone the parent chain.
  927. // This helps us resurrect important styling elements that cross lines.
  928. // E.g. in <i>Foo<br>Bar</i>
  929. // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>.
  930. var parentClone = breakLeftOf(parent, 1);
  931. // Move the clone and everything to the right of the original
  932. // onto the cloned parent.
  933. var next = limit.nextSibling;
  934. parentClone.appendChild(rightSide);
  935. for (var sibling = next; sibling; sibling = next) {
  936. next = sibling.nextSibling;
  937. parentClone.appendChild(sibling);
  938. }
  939. }
  940. return rightSide;
  941. }
  942. var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0);
  943. // Walk the parent chain until we reach an unattached LI.
  944. for (var parent;
  945. // Check nodeType since IE invents document fragments.
  946. (parent = copiedListItem.parentNode) && parent.nodeType === 1;) {
  947. copiedListItem = parent;
  948. }
  949. // Put it on the list of lines for later processing.
  950. listItems.push(copiedListItem);
  951. }
  952. // Split lines while there are lines left to split.
  953. for (var i = 0; // Number of lines that have been split so far.
  954. i < listItems.length; // length updated by breakAfter calls.
  955. ++i) {
  956. walk(listItems[i]);
  957. }
  958. // Make sure numeric indices show correctly.
  959. if (opt_startLineNum === (opt_startLineNum|0)) {
  960. listItems[0].setAttribute('value', opt_startLineNum);
  961. }
  962. var ol = document.createElement('OL');
  963. ol.className = 'linenums';
  964. var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) | 0) || 0;
  965. for (var i = 0, n = listItems.length; i < n; ++i) {
  966. li = listItems[i];
  967. // Stick a class on the LIs so that stylesheets can
  968. // color odd/even rows, or any other row pattern that
  969. // is co-prime with 10.
  970. li.className = 'L' + ((i + offset) % 10);
  971. if (!li.firstChild) {
  972. li.appendChild(document.createTextNode('\xA0'));
  973. }
  974. ol.appendChild(li);
  975. }
  976. node.appendChild(ol);
  977. }
  978. /**
  979. * Breaks {@code job.sourceCode} around style boundaries in
  980. * {@code job.decorations} and modifies {@code job.sourceNode} in place.
  981. * @param {Object} job like <pre>{
  982. * sourceCode: {string} source as plain text,
  983. * spans: {Array.<number|Node>} alternating span start indices into source
  984. * and the text node or element (e.g. {@code <BR>}) corresponding to that
  985. * span.
  986. * decorations: {Array.<number|string} an array of style classes preceded
  987. * by the position at which they start in job.sourceCode in order
  988. * }</pre>
  989. * @private
  990. */
  991. function recombineTagsAndDecorations(job) {
  992. var isIE = /\bMSIE\b/.test(navigator.userAgent);
  993. var newlineRe = /\n/g;
  994. var source = job.sourceCode;
  995. var sourceLength = source.length;
  996. // Index into source after the last code-unit recombined.
  997. var sourceIndex = 0;
  998. var spans = job.spans;
  999. var nSpans = spans.length;
  1000. // Index into spans after the last span which ends at or before sourceIndex.
  1001. var spanIndex = 0;
  1002. var decorations = job.decorations;
  1003. var nDecorations = decorations.length;
  1004. // Index into decorations after the last decoration which ends at or before
  1005. // sourceIndex.
  1006. var decorationIndex = 0;
  1007. // Remove all zero-length decorations.
  1008. decorations[nDecorations] = sourceLength;
  1009. var decPos, i;
  1010. for (i = decPos = 0; i < nDecorations;) {
  1011. if (decorations[i] !== decorations[i + 2]) {
  1012. decorations[decPos++] = decorations[i++];
  1013. decorations[decPos++] = decorations[i++];
  1014. } else {
  1015. i += 2;
  1016. }
  1017. }
  1018. nDecorations = decPos;
  1019. // Simplify decorations.
  1020. for (i = decPos = 0; i < nDecorations;) {
  1021. var startPos = decorations[i];
  1022. // Conflate all adjacent decorations that use the same style.
  1023. var startDec = decorations[i + 1];
  1024. var end = i + 2;
  1025. while (end + 2 <= nDecorations && decorations[end + 1] === startDec) {
  1026. end += 2;
  1027. }
  1028. decorations[decPos++] = startPos;
  1029. decorations[decPos++] = startDec;
  1030. i = end;
  1031. }
  1032. nDecorations = decorations.length = decPos;
  1033. var decoration = null;
  1034. while (spanIndex < nSpans) {
  1035. var spanStart = spans[spanIndex];
  1036. var spanEnd = spans[spanIndex + 2] || sourceLength;
  1037. var decStart = decorations[decorationIndex];
  1038. var decEnd = decorations[decorationIndex + 2] || sourceLength;
  1039. var end = Math.min(spanEnd, decEnd);
  1040. var textNode = spans[spanIndex + 1];
  1041. var styledText;
  1042. if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s
  1043. // Don't introduce spans around empty text nodes.
  1044. && (styledText = source.substring(sourceIndex, end))) {
  1045. // This may seem bizarre, and it is. Emitting LF on IE causes the
  1046. // code to display with spaces instead of line breaks.
  1047. // Emitting Windows standard issue linebreaks (CRLF) causes a blank
  1048. // space to appear at the beginning of every line but the first.
  1049. // Emitting an old Mac OS 9 line separator makes everything spiffy.
  1050. if (isIE) { styledText = styledText.replace(newlineRe, '\r'); }
  1051. textNode.nodeValue = styledText;
  1052. var document = textNode.ownerDocument;
  1053. var span = document.createElement('SPAN');
  1054. span.className = decorations[decorationIndex + 1];
  1055. var parentNode = textNode.parentNode;
  1056. parentNode.replaceChild(span, textNode);
  1057. span.appendChild(textNode);
  1058. if (sourceIndex < spanEnd) { // Split off a text node.
  1059. spans[spanIndex + 1] = textNode
  1060. // TODO: Possibly optimize by using '' if there's no flicker.
  1061. = document.createTextNode(source.substring(end, spanEnd));
  1062. parentNode.insertBefore(textNode, span.nextSibling);
  1063. }
  1064. }
  1065. sourceIndex = end;
  1066. if (sourceIndex >= spanEnd) {
  1067. spanIndex += 2;
  1068. }
  1069. if (sourceIndex >= decEnd) {
  1070. decorationIndex += 2;
  1071. }
  1072. }
  1073. }
  1074. /** Maps language-specific file extensions to handlers. */
  1075. var langHandlerRegistry = {};
  1076. /** Register a language handler for the given file extensions.
  1077. * @param {function (Object)} handler a function from source code to a list
  1078. * of decorations. Takes a single argument job which describes the
  1079. * state of the computation. The single parameter has the form
  1080. * {@code {
  1081. * sourceCode: {string} as plain text.
  1082. * decorations: {Array.<number|string>} an array of style classes
  1083. * preceded by the position at which they start in
  1084. * job.sourceCode in order.
  1085. * The language handler should assigned this field.
  1086. * basePos: {int} the position of source in the larger source chunk.
  1087. * All positions in the output decorations array are relative
  1088. * to the larger source chunk.
  1089. * } }
  1090. * @param {Array.<string>} fileExtensions
  1091. */
  1092. function registerLangHandler(handler, fileExtensions) {
  1093. for (var i = fileExtensions.length; --i >= 0;) {
  1094. var ext = fileExtensions[i];
  1095. if (!langHandlerRegistry.hasOwnProperty(ext)) {
  1096. langHandlerRegistry[ext] = handler;
  1097. } else if (window['console']) {
  1098. console['warn']('cannot override language handler %s', ext);
  1099. }
  1100. }
  1101. }
  1102. function langHandlerForExtension(extension, source) {
  1103. if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) {
  1104. // Treat it as markup if the first non whitespace character is a < and
  1105. // the last non-whitespace character is a >.
  1106. extension = /^\s*</.test(source)
  1107. ? 'default-markup'
  1108. : 'default-code';
  1109. }
  1110. return langHandlerRegistry[extension];
  1111. }
  1112. registerLangHandler(decorateSource, ['default-code']);
  1113. registerLangHandler(
  1114. createSimpleLexer(
  1115. [],
  1116. [
  1117. [PR_PLAIN, /^[^<?]+/],
  1118. [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/],
  1119. [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/],
  1120. // Unescaped content in an unknown language
  1121. ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/],
  1122. ['lang-', /^<%([\s\S]+?)(?:%>|$)/],
  1123. [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/],
  1124. ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i],
  1125. // Unescaped content in javascript. (Or possibly vbscript).
  1126. ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i],
  1127. // Contains unescaped stylesheet content
  1128. ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i],
  1129. ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i]
  1130. ]),
  1131. ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']);
  1132. registerLangHandler(
  1133. createSimpleLexer(
  1134. [
  1135. [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'],
  1136. [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\'']
  1137. ],
  1138. [
  1139. [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i],
  1140. [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i],
  1141. ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/],
  1142. [PR_PUNCTUATION, /^[=<>\/]+/],
  1143. ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i],
  1144. ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i],
  1145. ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i],
  1146. ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i],
  1147. ['lang-css', /^style\s*=\s*\'([^\']+)\'/i],
  1148. ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i]
  1149. ]),
  1150. ['in.tag']);
  1151. registerLangHandler(
  1152. createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']);
  1153. registerLangHandler(sourceDecorator({
  1154. 'keywords': CPP_KEYWORDS,
  1155. 'hashComments': true,
  1156. 'cStyleComments': true,
  1157. 'types': C_TYPES
  1158. }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']);
  1159. registerLangHandler(sourceDecorator({
  1160. 'keywords': 'null,true,false'
  1161. }), ['json']);
  1162. registerLangHandler(sourceDecorator({
  1163. 'keywords': CSHARP_KEYWORDS,
  1164. 'hashComments': true,
  1165. 'cStyleComments': true,
  1166. 'verbatimStrings': true,
  1167. 'types': C_TYPES
  1168. }), ['cs']);
  1169. registerLangHandler(sourceDecorator({
  1170. 'keywords': JAVA_KEYWORDS,
  1171. 'cStyleComments': true
  1172. }), ['java']);
  1173. registerLangHandler(sourceDecorator({
  1174. 'keywords': SH_KEYWORDS,
  1175. 'hashComments': true,
  1176. 'multiLineStrings': true
  1177. }), ['bsh', 'csh', 'sh']);
  1178. registerLangHandler(sourceDecorator({
  1179. 'keywords': PYTHON_KEYWORDS,
  1180. 'hashComments': true,
  1181. 'multiLineStrings': true,
  1182. 'tripleQuotedStrings': true
  1183. }), ['cv', 'py']);
  1184. registerLangHandler(sourceDecorator({
  1185. 'keywords': PERL_KEYWORDS,
  1186. 'hashComments': true,
  1187. 'multiLineStrings': true,
  1188. 'regexLiterals': true
  1189. }), ['perl', 'pl', 'pm']);
  1190. registerLangHandler(sourceDecorator({
  1191. 'keywords': RUBY_KEYWORDS,
  1192. 'hashComments': true,
  1193. 'multiLineStrings': true,
  1194. 'regexLiterals': true
  1195. }), ['rb']);
  1196. registerLangHandler(sourceDecorator({
  1197. 'keywords': JSCRIPT_KEYWORDS,
  1198. 'cStyleComments': true,
  1199. 'regexLiterals': true
  1200. }), ['js']);
  1201. registerLangHandler(sourceDecorator({
  1202. 'keywords': COFFEE_KEYWORDS,
  1203. 'hashComments': 3, // ### style block comments
  1204. 'cStyleComments': true,
  1205. 'multilineStrings': true,
  1206. 'tripleQuotedStrings': true,
  1207. 'regexLiterals': true
  1208. }), ['coffee']);
  1209. registerLangHandler(createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']);
  1210. function applyDecorator(job) {
  1211. var opt_langExtension = job.langExtension;
  1212. try {
  1213. // Extract tags, and convert the source code to plain text.
  1214. var sourceAndSpans = extractSourceSpans(job.sourceNode);
  1215. /** Plain text. @type {string} */
  1216. var source = sourceAndSpans.sourceCode;
  1217. job.sourceCode = source;
  1218. job.spans = sourceAndSpans.spans;
  1219. job.basePos = 0;
  1220. // Apply the appropriate language handler
  1221. langHandlerForExtension(opt_langExtension, source)(job);
  1222. // Integrate the decorations and tags back into the source code,
  1223. // modifying the sourceNode in place.
  1224. recombineTagsAndDecorations(job);
  1225. } catch (e) {
  1226. if ('console' in window) {
  1227. console['log'](e && e['stack'] ? e['stack'] : e);
  1228. }
  1229. }
  1230. }
  1231. /**
  1232. * @param sourceCodeHtml {string} The HTML to pretty print.
  1233. * @param opt_langExtension {string} The language name to use.
  1234. * Typically, a filename extension like 'cpp' or 'java'.
  1235. * @param opt_numberLines {number|boolean} True to number lines,
  1236. * or the 1-indexed number of the first line in sourceCodeHtml.
  1237. */
  1238. function prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) {
  1239. var container = document.createElement('PRE');
  1240. // This could cause images to load and onload listeners to fire.
  1241. // E.g. <img onerror="alert(1337)" src="nosuchimage.png">.
  1242. // We assume that the inner HTML is from a trusted source.
  1243. container.innerHTML = sourceCodeHtml;
  1244. if (opt_numberLines) {
  1245. numberLines(container, opt_numberLines);
  1246. }
  1247. var job = {
  1248. langExtension: opt_langExtension,
  1249. numberLines: opt_numberLines,
  1250. sourceNode: container
  1251. };
  1252. applyDecorator(job);
  1253. return container.innerHTML;
  1254. }
  1255. function prettyPrint(opt_whenDone) {
  1256. function byTagName(tn) { return document.getElementsByTagName(tn); }
  1257. // fetch a list of nodes to rewrite
  1258. var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')];
  1259. var elements = [];
  1260. for (var i = 0; i < codeSegments.length; ++i) {
  1261. for (var j = 0, n = codeSegments[i].length; j < n; ++j) {
  1262. elements.push(codeSegments[i][j]);
  1263. }
  1264. }
  1265. codeSegments = null;
  1266. var clock = Date;
  1267. if (!clock['now']) {
  1268. clock = { 'now': function () { return +(new Date); } };
  1269. }
  1270. // The loop is broken into a series of continuations to make sure that we
  1271. // don't make the browser unresponsive when rewriting a large page.
  1272. var k = 0;
  1273. var prettyPrintingJob;
  1274. var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/;
  1275. var prettyPrintRe = /\bprettyprint\b/;
  1276. function doWork() {
  1277. var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ?
  1278. clock['now']() + 250 /* ms */ :
  1279. Infinity);
  1280. for (; k < elements.length && clock['now']() < endTime; k++) {
  1281. var cs = elements[k];
  1282. var className = cs.className;
  1283. if (className.indexOf('prettyprint') >= 0) {
  1284. // If the classes includes a language extensions, use it.
  1285. // Language extensions can be specified like
  1286. // <pre class="prettyprint lang-cpp">
  1287. // the language extension "cpp" is used to find a language handler as
  1288. // passed to PR.registerLangHandler.
  1289. // HTML5 recommends that a language be specified using "language-"
  1290. // as the prefix instead. Google Code Prettify supports both.
  1291. // http://dev.w3.org/html5/spec-author-view/the-code-element.html
  1292. var langExtension = className.match(langExtensionRe);
  1293. // Support <pre class="prettyprint"><code class="language-c">
  1294. var wrapper;
  1295. if (!langExtension && (wrapper = childContentWrapper(cs))
  1296. && "CODE" === wrapper.tagName) {
  1297. langExtension = wrapper.className.match(langExtensionRe);
  1298. }
  1299. if (langExtension) {
  1300. langExtension = langExtension[1];
  1301. }
  1302. // make sure this is not nested in an already prettified element
  1303. var nested = false;
  1304. for (var p = cs.parentNode; p; p = p.parentNode) {
  1305. if ((p.tagName === 'pre' || p.tagName === 'code' ||
  1306. p.tagName === 'xmp') &&
  1307. p.className && p.className.indexOf('prettyprint') >= 0) {
  1308. nested = true;
  1309. break;
  1310. }
  1311. }
  1312. if (!nested) {
  1313. // Look for a class like linenums or linenums:<n> where <n> is the
  1314. // 1-indexed number of the first line.
  1315. var lineNums = cs.className.match(/\blinenums\b(?::(\d+))?/);
  1316. lineNums = lineNums
  1317. ? lineNums[1] && lineNums[1].length ? +lineNums[1] : true
  1318. : false;
  1319. if (lineNums) { numberLines(cs, lineNums); }
  1320. // do the pretty printing
  1321. prettyPrintingJob = {
  1322. langExtension: langExtension,
  1323. sourceNode: cs,
  1324. numberLines: lineNums
  1325. };
  1326. applyDecorator(prettyPrintingJob);
  1327. }
  1328. }
  1329. }
  1330. if (k < elements.length) {
  1331. // finish up in a continuation
  1332. setTimeout(doWork, 250);
  1333. } else if (opt_whenDone) {
  1334. opt_whenDone();
  1335. }
  1336. }
  1337. doWork();
  1338. }
  1339. /**
  1340. * Find all the {@code <pre>} and {@code <code>} tags in the DOM with
  1341. * {@code class=prettyprint} and prettify them.
  1342. *
  1343. * @param {Function?} opt_whenDone if specified, called when the last entry
  1344. * has been finished.
  1345. */
  1346. window['prettyPrintOne'] = prettyPrintOne;
  1347. /**
  1348. * Pretty print a chunk of code.
  1349. *
  1350. * @param {string} sourceCodeHtml code as html
  1351. * @return {string} code as html, but prettier
  1352. */
  1353. window['prettyPrint'] = prettyPrint;
  1354. /**
  1355. * Contains functions for creating and registering new language handlers.
  1356. * @type {Object}
  1357. */
  1358. window['PR'] = {
  1359. 'createSimpleLexer': createSimpleLexer,
  1360. 'registerLangHandler': registerLangHandler,
  1361. 'sourceDecorator': sourceDecorator,
  1362. 'PR_ATTRIB_NAME': PR_ATTRIB_NAME,
  1363. 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE,
  1364. 'PR_COMMENT': PR_COMMENT,
  1365. 'PR_DECLARATION': PR_DECLARATION,
  1366. 'PR_KEYWORD': PR_KEYWORD,
  1367. 'PR_LITERAL': PR_LITERAL,
  1368. 'PR_NOCODE': PR_NOCODE,
  1369. 'PR_PLAIN': PR_PLAIN,
  1370. 'PR_PUNCTUATION': PR_PUNCTUATION,
  1371. 'PR_SOURCE': PR_SOURCE,
  1372. 'PR_STRING': PR_STRING,
  1373. 'PR_TAG': PR_TAG,
  1374. 'PR_TYPE': PR_TYPE
  1375. };
  1376. })();