Last active
August 29, 2015 13:57
-
-
Save pborenstein/9378700 to your computer and use it in GitHub Desktop.
node-tokenizer bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var Tokenizer = require('tokenizer'); | |
t = new Tokenizer(); | |
t.addRule(/^{\w+}$/, 'curlytoken'); | |
t.addRule(Tokenizer.whitespace); | |
t.addRule(Tokenizer.word); | |
t.on('token', function (token, type) { | |
console.log('%s %s', token, type); | |
}); | |
// _getMatchingRule finds the rule OK | |
console.log('someword: ', t._getMatchingRule('someword')); | |
console.log('489 : ', t._getMatchingRule('489')); | |
console.log('{token} : ', t._getMatchingRule('{token}')); | |
// but disect() in _tokenize() means that for non-matches | |
// it should try shorter and shorter strings, which doesn't | |
// work for rules with literals at the end of the regex | |
// https://github.com/Floby/node-tokenizer/blob/master/lib/Tokenizer.js#L61-L64 | |
t.write('someword 489 {token}'); | |
t.end(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ node tkbug.js | |
someword: { regex: /^\w+$/, type: 'word' } | |
489 : { regex: /^\w+$/, type: 'word' } | |
{token} : { regex: /^{\w+}$/, type: 'curlytoken' } | |
someword word | |
whitespace | |
489 word | |
whitespace | |
events.js:72 | |
throw er; // Unhandled 'error' event | |
^ | |
SyntaxError: could not tokenize "{token}" | |
at Tokenizer._tokenize (/Users/philip/Joyent/Source/pborenstein/project/node_modules/tokenizer/lib/Tokenizer.js:68:13) | |
at Tokenizer._tokenize (/Users/philip/Joyent/Source/pborenstein/project/node_modules/tokenizer/lib/Tokenizer.js:83:12) | |
at Tokenizer._tokenize (/Users/philip/Joyent/Source/pborenstein/project/node_modules/tokenizer/lib/Tokenizer.js:83:12) | |
at Tokenizer._tokenize (/Users/philip/Joyent/Source/pborenstein/project/node_modules/tokenizer/lib/Tokenizer.js:83:12) | |
at Tokenizer._tokenize (/Users/philip/Joyent/Source/pborenstein/project/node_modules/tokenizer/lib/Tokenizer.js:83:12) | |
at /Users/philip/Joyent/Source/pborenstein/project/node_modules/tokenizer/lib/Tokenizer.js:32:14 | |
at process._tickCallback (node.js:415:13) | |
at Function.Module.runMain (module.js:499:11) | |
at startup (node.js:119:16) | |
at node.js:902:3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment