lexer.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775
  1. /*!
  2. * Jade - Lexer
  3. * Copyright(c) 2010 TJ Holowaychuk <tj@vision-media.ca>
  4. * MIT Licensed
  5. */
  6. var utils = require('./utils');
  7. /**
  8. * Initialize `Lexer` with the given `str`.
  9. *
  10. * Options:
  11. *
  12. * - `colons` allow colons for attr delimiters
  13. *
  14. * @param {String} str
  15. * @param {Object} options
  16. * @api private
  17. */
  18. var Lexer = module.exports = function Lexer(str, options) {
  19. options = options || {};
  20. this.input = str.replace(/\r\n|\r/g, '\n');
  21. this.colons = options.colons;
  22. this.deferredTokens = [];
  23. this.lastIndents = 0;
  24. this.lineno = 1;
  25. this.stash = [];
  26. this.indentStack = [];
  27. this.indentRe = null;
  28. this.pipeless = false;
  29. };
  30. /**
  31. * Lexer prototype.
  32. */
  33. Lexer.prototype = {
  34. /**
  35. * Construct a token with the given `type` and `val`.
  36. *
  37. * @param {String} type
  38. * @param {String} val
  39. * @return {Object}
  40. * @api private
  41. */
  42. tok: function(type, val){
  43. return {
  44. type: type
  45. , line: this.lineno
  46. , val: val
  47. }
  48. },
  49. /**
  50. * Consume the given `len` of input.
  51. *
  52. * @param {Number} len
  53. * @api private
  54. */
  55. consume: function(len){
  56. this.input = this.input.substr(len);
  57. },
  58. /**
  59. * Scan for `type` with the given `regexp`.
  60. *
  61. * @param {String} type
  62. * @param {RegExp} regexp
  63. * @return {Object}
  64. * @api private
  65. */
  66. scan: function(regexp, type){
  67. var captures;
  68. if (captures = regexp.exec(this.input)) {
  69. this.consume(captures[0].length);
  70. return this.tok(type, captures[1]);
  71. }
  72. },
  73. /**
  74. * Defer the given `tok`.
  75. *
  76. * @param {Object} tok
  77. * @api private
  78. */
  79. defer: function(tok){
  80. this.deferredTokens.push(tok);
  81. },
  82. /**
  83. * Lookahead `n` tokens.
  84. *
  85. * @param {Number} n
  86. * @return {Object}
  87. * @api private
  88. */
  89. lookahead: function(n){
  90. var fetch = n - this.stash.length;
  91. while (fetch-- > 0) this.stash.push(this.next());
  92. return this.stash[--n];
  93. },
  94. /**
  95. * Return the indexOf `start` / `end` delimiters.
  96. *
  97. * @param {String} start
  98. * @param {String} end
  99. * @return {Number}
  100. * @api private
  101. */
  102. indexOfDelimiters: function(start, end){
  103. var str = this.input
  104. , nstart = 0
  105. , nend = 0
  106. , pos = 0;
  107. for (var i = 0, len = str.length; i < len; ++i) {
  108. if (start == str.charAt(i)) {
  109. ++nstart;
  110. } else if (end == str.charAt(i)) {
  111. if (++nend == nstart) {
  112. pos = i;
  113. break;
  114. }
  115. }
  116. }
  117. return pos;
  118. },
  119. /**
  120. * Stashed token.
  121. */
  122. stashed: function() {
  123. return this.stash.length
  124. && this.stash.shift();
  125. },
  126. /**
  127. * Deferred token.
  128. */
  129. deferred: function() {
  130. return this.deferredTokens.length
  131. && this.deferredTokens.shift();
  132. },
  133. /**
  134. * end-of-source.
  135. */
  136. eos: function() {
  137. if (this.input.length) return;
  138. if (this.indentStack.length) {
  139. this.indentStack.shift();
  140. return this.tok('outdent');
  141. } else {
  142. return this.tok('eos');
  143. }
  144. },
  145. /**
  146. * Blank line.
  147. */
  148. blank: function() {
  149. var captures;
  150. if (captures = /^\n *\n/.exec(this.input)) {
  151. this.consume(captures[0].length - 1);
  152. ++this.lineno;
  153. if (this.pipeless) return this.tok('text', '');
  154. return this.next();
  155. }
  156. },
  157. /**
  158. * Comment.
  159. */
  160. comment: function() {
  161. var captures;
  162. if (captures = /^ *\/\/(-)?([^\n]*)/.exec(this.input)) {
  163. this.consume(captures[0].length);
  164. var tok = this.tok('comment', captures[2]);
  165. tok.buffer = '-' != captures[1];
  166. return tok;
  167. }
  168. },
  169. /**
  170. * Interpolated tag.
  171. */
  172. interpolation: function() {
  173. var captures;
  174. if (captures = /^#\{(.*?)\}/.exec(this.input)) {
  175. this.consume(captures[0].length);
  176. return this.tok('interpolation', captures[1]);
  177. }
  178. },
  179. /**
  180. * Tag.
  181. */
  182. tag: function() {
  183. var captures;
  184. if (captures = /^(\w[-:\w]*)(\/?)/.exec(this.input)) {
  185. this.consume(captures[0].length);
  186. var tok, name = captures[1];
  187. if (':' == name[name.length - 1]) {
  188. name = name.slice(0, -1);
  189. tok = this.tok('tag', name);
  190. this.defer(this.tok(':'));
  191. while (' ' == this.input[0]) this.input = this.input.substr(1);
  192. } else {
  193. tok = this.tok('tag', name);
  194. }
  195. tok.selfClosing = !! captures[2];
  196. return tok;
  197. }
  198. },
  199. /**
  200. * Filter.
  201. */
  202. filter: function() {
  203. return this.scan(/^:(\w+)/, 'filter');
  204. },
  205. /**
  206. * Doctype.
  207. */
  208. doctype: function() {
  209. return this.scan(/^(?:!!!|doctype) *([^\n]+)?/, 'doctype');
  210. },
  211. /**
  212. * Id.
  213. */
  214. id: function() {
  215. return this.scan(/^#([\w-]+)/, 'id');
  216. },
  217. /**
  218. * Class.
  219. */
  220. className: function() {
  221. return this.scan(/^\.([\w-]+)/, 'class');
  222. },
  223. /**
  224. * Text.
  225. */
  226. text: function() {
  227. return this.scan(/^(?:\| ?| ?)?([^\n]+)/, 'text');
  228. },
  229. /**
  230. * Extends.
  231. */
  232. "extends": function() {
  233. return this.scan(/^extends? +([^\n]+)/, 'extends');
  234. },
  235. /**
  236. * Block prepend.
  237. */
  238. prepend: function() {
  239. var captures;
  240. if (captures = /^prepend +([^\n]+)/.exec(this.input)) {
  241. this.consume(captures[0].length);
  242. var mode = 'prepend'
  243. , name = captures[1]
  244. , tok = this.tok('block', name);
  245. tok.mode = mode;
  246. return tok;
  247. }
  248. },
  249. /**
  250. * Block append.
  251. */
  252. append: function() {
  253. var captures;
  254. if (captures = /^append +([^\n]+)/.exec(this.input)) {
  255. this.consume(captures[0].length);
  256. var mode = 'append'
  257. , name = captures[1]
  258. , tok = this.tok('block', name);
  259. tok.mode = mode;
  260. return tok;
  261. }
  262. },
  263. /**
  264. * Block.
  265. */
  266. block: function() {
  267. var captures;
  268. if (captures = /^block\b *(?:(prepend|append) +)?([^\n]*)/.exec(this.input)) {
  269. this.consume(captures[0].length);
  270. var mode = captures[1] || 'replace'
  271. , name = captures[2]
  272. , tok = this.tok('block', name);
  273. tok.mode = mode;
  274. return tok;
  275. }
  276. },
  277. /**
  278. * Yield.
  279. */
  280. yield: function() {
  281. return this.scan(/^yield */, 'yield');
  282. },
  283. /**
  284. * Include.
  285. */
  286. include: function() {
  287. return this.scan(/^include +([^\n]+)/, 'include');
  288. },
  289. /**
  290. * Case.
  291. */
  292. "case": function() {
  293. return this.scan(/^case +([^\n]+)/, 'case');
  294. },
  295. /**
  296. * When.
  297. */
  298. when: function() {
  299. return this.scan(/^when +([^:\n]+)/, 'when');
  300. },
  301. /**
  302. * Default.
  303. */
  304. "default": function() {
  305. return this.scan(/^default */, 'default');
  306. },
  307. /**
  308. * Assignment.
  309. */
  310. assignment: function() {
  311. var captures;
  312. if (captures = /^(\w+) += *([^;\n]+)( *;? *)/.exec(this.input)) {
  313. this.consume(captures[0].length);
  314. var name = captures[1]
  315. , val = captures[2];
  316. return this.tok('code', 'var ' + name + ' = (' + val + ');');
  317. }
  318. },
  319. /**
  320. * Call mixin.
  321. */
  322. call: function(){
  323. var captures;
  324. if (captures = /^\+([-\w]+)/.exec(this.input)) {
  325. this.consume(captures[0].length);
  326. var tok = this.tok('call', captures[1]);
  327. // Check for args (not attributes)
  328. if (captures = /^ *\((.*?)\)/.exec(this.input)) {
  329. if (!/^ *[-\w]+ *=/.test(captures[1])) {
  330. this.consume(captures[0].length);
  331. tok.args = captures[1];
  332. }
  333. }
  334. return tok;
  335. }
  336. },
  337. /**
  338. * Mixin.
  339. */
  340. mixin: function(){
  341. var captures;
  342. if (captures = /^mixin +([-\w]+)(?: *\((.*)\))?/.exec(this.input)) {
  343. this.consume(captures[0].length);
  344. var tok = this.tok('mixin', captures[1]);
  345. tok.args = captures[2];
  346. return tok;
  347. }
  348. },
  349. /**
  350. * Conditional.
  351. */
  352. conditional: function() {
  353. var captures;
  354. if (captures = /^(if|unless|else if|else)\b([^\n]*)/.exec(this.input)) {
  355. this.consume(captures[0].length);
  356. var type = captures[1]
  357. , js = captures[2];
  358. switch (type) {
  359. case 'if': js = 'if (' + js + ')'; break;
  360. case 'unless': js = 'if (!(' + js + '))'; break;
  361. case 'else if': js = 'else if (' + js + ')'; break;
  362. case 'else': js = 'else'; break;
  363. }
  364. return this.tok('code', js);
  365. }
  366. },
  367. /**
  368. * While.
  369. */
  370. "while": function() {
  371. var captures;
  372. if (captures = /^while +([^\n]+)/.exec(this.input)) {
  373. this.consume(captures[0].length);
  374. return this.tok('code', 'while (' + captures[1] + ')');
  375. }
  376. },
  377. /**
  378. * Each.
  379. */
  380. each: function() {
  381. var captures;
  382. if (captures = /^(?:- *)?(?:each|for) +(\w+)(?: *, *(\w+))? * in *([^\n]+)/.exec(this.input)) {
  383. this.consume(captures[0].length);
  384. var tok = this.tok('each', captures[1]);
  385. tok.key = captures[2] || '$index';
  386. tok.code = captures[3];
  387. return tok;
  388. }
  389. },
  390. /**
  391. * Code.
  392. */
  393. code: function() {
  394. var captures;
  395. if (captures = /^(!?=|-)([^\n]+)/.exec(this.input)) {
  396. this.consume(captures[0].length);
  397. var flags = captures[1];
  398. captures[1] = captures[2];
  399. var tok = this.tok('code', captures[1]);
  400. tok.escape = flags.charAt(0) === '=';
  401. tok.buffer = flags.charAt(0) === '=' || flags.charAt(1) === '=';
  402. return tok;
  403. }
  404. },
  405. /**
  406. * Attributes.
  407. */
  408. attrs: function() {
  409. if ('(' == this.input.charAt(0)) {
  410. var index = this.indexOfDelimiters('(', ')')
  411. , str = this.input.substr(1, index-1)
  412. , tok = this.tok('attrs')
  413. , len = str.length
  414. , colons = this.colons
  415. , states = ['key']
  416. , escapedAttr
  417. , key = ''
  418. , val = ''
  419. , quote
  420. , c
  421. , p;
  422. function state(){
  423. return states[states.length - 1];
  424. }
  425. function interpolate(attr) {
  426. return attr.replace(/(\\)?#\{([^}]+)\}/g, function(_, escape, expr){
  427. return escape
  428. ? _
  429. : quote + " + (" + expr + ") + " + quote;
  430. });
  431. }
  432. this.consume(index + 1);
  433. tok.attrs = {};
  434. tok.escaped = {};
  435. function parse(c) {
  436. var real = c;
  437. // TODO: remove when people fix ":"
  438. if (colons && ':' == c) c = '=';
  439. switch (c) {
  440. case ',':
  441. case '\n':
  442. switch (state()) {
  443. case 'expr':
  444. case 'array':
  445. case 'string':
  446. case 'object':
  447. val += c;
  448. break;
  449. default:
  450. states.push('key');
  451. val = val.trim();
  452. key = key.trim();
  453. if ('' == key) return;
  454. key = key.replace(/^['"]|['"]$/g, '').replace('!', '');
  455. tok.escaped[key] = escapedAttr;
  456. tok.attrs[key] = '' == val
  457. ? true
  458. : interpolate(val);
  459. key = val = '';
  460. }
  461. break;
  462. case '=':
  463. switch (state()) {
  464. case 'key char':
  465. key += real;
  466. break;
  467. case 'val':
  468. case 'expr':
  469. case 'array':
  470. case 'string':
  471. case 'object':
  472. val += real;
  473. break;
  474. default:
  475. escapedAttr = '!' != p;
  476. states.push('val');
  477. }
  478. break;
  479. case '(':
  480. if ('val' == state()
  481. || 'expr' == state()) states.push('expr');
  482. val += c;
  483. break;
  484. case ')':
  485. if ('expr' == state()
  486. || 'val' == state()) states.pop();
  487. val += c;
  488. break;
  489. case '{':
  490. if ('val' == state()) states.push('object');
  491. val += c;
  492. break;
  493. case '}':
  494. if ('object' == state()) states.pop();
  495. val += c;
  496. break;
  497. case '[':
  498. if ('val' == state()) states.push('array');
  499. val += c;
  500. break;
  501. case ']':
  502. if ('array' == state()) states.pop();
  503. val += c;
  504. break;
  505. case '"':
  506. case "'":
  507. switch (state()) {
  508. case 'key':
  509. states.push('key char');
  510. break;
  511. case 'key char':
  512. states.pop();
  513. break;
  514. case 'string':
  515. if (c == quote) states.pop();
  516. val += c;
  517. break;
  518. default:
  519. states.push('string');
  520. val += c;
  521. quote = c;
  522. }
  523. break;
  524. case '':
  525. break;
  526. default:
  527. switch (state()) {
  528. case 'key':
  529. case 'key char':
  530. key += c;
  531. break;
  532. default:
  533. val += c;
  534. }
  535. }
  536. p = c;
  537. }
  538. for (var i = 0; i < len; ++i) {
  539. parse(str.charAt(i));
  540. }
  541. parse(',');
  542. if ('/' == this.input.charAt(0)) {
  543. this.consume(1);
  544. tok.selfClosing = true;
  545. }
  546. return tok;
  547. }
  548. },
  549. /**
  550. * Indent | Outdent | Newline.
  551. */
  552. indent: function() {
  553. var captures, re;
  554. // established regexp
  555. if (this.indentRe) {
  556. captures = this.indentRe.exec(this.input);
  557. // determine regexp
  558. } else {
  559. // tabs
  560. re = /^\n(\t*) */;
  561. captures = re.exec(this.input);
  562. // spaces
  563. if (captures && !captures[1].length) {
  564. re = /^\n( *)/;
  565. captures = re.exec(this.input);
  566. }
  567. // established
  568. if (captures && captures[1].length) this.indentRe = re;
  569. }
  570. if (captures) {
  571. var tok
  572. , indents = captures[1].length;
  573. ++this.lineno;
  574. this.consume(indents + 1);
  575. if (' ' == this.input[0] || '\t' == this.input[0]) {
  576. throw new Error('Invalid indentation, you can use tabs or spaces but not both');
  577. }
  578. // blank line
  579. if ('\n' == this.input[0]) return this.tok('newline');
  580. // outdent
  581. if (this.indentStack.length && indents < this.indentStack[0]) {
  582. while (this.indentStack.length && this.indentStack[0] > indents) {
  583. this.stash.push(this.tok('outdent'));
  584. this.indentStack.shift();
  585. }
  586. tok = this.stash.pop();
  587. // indent
  588. } else if (indents && indents != this.indentStack[0]) {
  589. this.indentStack.unshift(indents);
  590. tok = this.tok('indent', indents);
  591. // newline
  592. } else {
  593. tok = this.tok('newline');
  594. }
  595. return tok;
  596. }
  597. },
  598. /**
  599. * Pipe-less text consumed only when
  600. * pipeless is true;
  601. */
  602. pipelessText: function() {
  603. if (this.pipeless) {
  604. if ('\n' == this.input[0]) return;
  605. var i = this.input.indexOf('\n');
  606. if (-1 == i) i = this.input.length;
  607. var str = this.input.substr(0, i);
  608. this.consume(str.length);
  609. return this.tok('text', str);
  610. }
  611. },
  612. /**
  613. * ':'
  614. */
  615. colon: function() {
  616. return this.scan(/^: */, ':');
  617. },
  618. /**
  619. * Return the next token object, or those
  620. * previously stashed by lookahead.
  621. *
  622. * @return {Object}
  623. * @api private
  624. */
  625. advance: function(){
  626. return this.stashed()
  627. || this.next();
  628. },
  629. /**
  630. * Return the next token object.
  631. *
  632. * @return {Object}
  633. * @api private
  634. */
  635. next: function() {
  636. return this.deferred()
  637. || this.blank()
  638. || this.eos()
  639. || this.pipelessText()
  640. || this.yield()
  641. || this.doctype()
  642. || this.interpolation()
  643. || this["case"]()
  644. || this.when()
  645. || this["default"]()
  646. || this["extends"]()
  647. || this.append()
  648. || this.prepend()
  649. || this.block()
  650. || this.include()
  651. || this.mixin()
  652. || this.call()
  653. || this.conditional()
  654. || this.each()
  655. || this["while"]()
  656. || this.assignment()
  657. || this.tag()
  658. || this.filter()
  659. || this.code()
  660. || this.id()
  661. || this.className()
  662. || this.attrs()
  663. || this.indent()
  664. || this.comment()
  665. || this.colon()
  666. || this.text();
  667. }
  668. };