Skip to content

Commit 9fd6098

Browse files
committed
Initial commit of the lexer
1 parent 80dc43e commit 9fd6098

15 files changed

Lines changed: 1124 additions & 0 deletions

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
node_modules
2+
.nyc_output
3+
coverage
4+

package.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"name": "sql-templates",
3+
"version": "0.0.1",
4+
"description": "A SQL template engine",
5+
"main": "src/index.js",
6+
"directories": {
7+
"test": "test"
8+
},
9+
"scripts": {
10+
"test": "ava --timeout=5s **/*.test.js",
11+
"test:coverage": "nyc --reporter=html --reporter=text-summary --lines=100 --funcions=100 --branches=100 --check-coverage npm test"
12+
},
13+
"author": "Tim Oram <mitmaro@gmail.com>",
14+
"license": "ISC",
15+
"devDependencies": {
16+
"ava": "^0.15.2",
17+
"chalk": "^1.1.3",
18+
"diff": "^2.2.3",
19+
"nyc": "^6.4.4"
20+
},
21+
"nyc": {
22+
"lines": 100,
23+
"functions": 100,
24+
"branches": 100,
25+
"check-coverage": true,
26+
"report-dir": "./coverage"
27+
}
28+
}

src/Lexer.js

Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
'use strict';
2+
3+
const LexerError = require('./error/Lexer');
4+
const {
5+
TOKEN_TYPE_VALUE,
6+
TOKEN_TYPE_STRUCTURE,
7+
TOKEN_TYPE_STATEMENT,
8+
TOKEN_TYPE_BOUNDARY,
9+
TOKEN_TYPE_UNARY_OPERATOR,
10+
TOKEN_TYPE_BINARY_OPERATOR,
11+
12+
TOKEN_VALUE_VARIABLE,
13+
TOKEN_VALUE_STRING,
14+
TOKEN_VALUE_INTEGER,
15+
TOKEN_VALUE_FLOAT,
16+
17+
TOKEN_STRUCTURE_TEXT_LITERAL,
18+
TOKEN_STRUCTURE_EOF,
19+
20+
TOKEN_BOUNDARY_TAG_START,
21+
TOKEN_BOUNDARY_TAG_END,
22+
TOKEN_BOUNDARY_BRACKET_OPEN,
23+
TOKEN_BOUNDARY_BRACKET_CLOSE,
24+
TOKEN_BOUNDARY_STRING_SINGLE,
25+
TOKEN_BOUNDARY_STRING_DOUBLE,
26+
27+
TOKEN_STATEMENT_IF,
28+
TOKEN_STATEMENT_ELIF,
29+
TOKEN_STATEMENT_ELSE,
30+
TOKEN_STATEMENT_FI,
31+
TOKEN_STATEMENT_INCLUDE,
32+
33+
TOKEN_OPERATOR_EQUALS,
34+
TOKEN_OPERATOR_NOT_EQUALS,
35+
TOKEN_OPERATOR_STRICT_EQUALS,
36+
TOKEN_OPERATOR_STRICT_NOT_EQUALS,
37+
TOKEN_OPERATOR_AND,
38+
TOKEN_OPERATOR_OR,
39+
TOKEN_OPERATOR_NOT,
40+
TOKEN_OPERATOR_GREATER_THAN,
41+
TOKEN_OPERATOR_LESS_THAN,
42+
TOKEN_OPERATOR_GREATER_EQUAL_THAN,
43+
TOKEN_OPERATOR_LESS_EQUAL_THAN
44+
} = require('./constants');
45+
46+
const boundaryTypeLookup = {
47+
'{{': TOKEN_BOUNDARY_TAG_START,
48+
'}}': TOKEN_BOUNDARY_TAG_END,
49+
'(': TOKEN_BOUNDARY_BRACKET_OPEN,
50+
')': TOKEN_BOUNDARY_BRACKET_CLOSE,
51+
'"': TOKEN_BOUNDARY_STRING_DOUBLE,
52+
"'": TOKEN_BOUNDARY_STRING_SINGLE
53+
};
54+
55+
const statementTypeLookup = {
56+
'if': TOKEN_STATEMENT_IF,
57+
'elif': TOKEN_STATEMENT_ELIF,
58+
'else': TOKEN_STATEMENT_ELSE,
59+
'fi': TOKEN_STATEMENT_FI,
60+
'include': TOKEN_STATEMENT_INCLUDE
61+
};
62+
63+
const binaryOperatorTypeLookup = {
64+
'==': TOKEN_OPERATOR_EQUALS,
65+
'!=': TOKEN_OPERATOR_NOT_EQUALS,
66+
'===': TOKEN_OPERATOR_STRICT_EQUALS,
67+
'!==': TOKEN_OPERATOR_STRICT_NOT_EQUALS,
68+
'&&': TOKEN_OPERATOR_AND,
69+
'||': TOKEN_OPERATOR_OR,
70+
'>': TOKEN_OPERATOR_GREATER_THAN,
71+
'<': TOKEN_OPERATOR_LESS_THAN,
72+
'>=': TOKEN_OPERATOR_GREATER_EQUAL_THAN,
73+
'<=': TOKEN_OPERATOR_LESS_EQUAL_THAN
74+
};
75+
76+
const unaryOperatorTypeLookup = {
77+
'!': TOKEN_OPERATOR_NOT
78+
};
79+
80+
const STATE_TAG = 'TAG';
81+
const STATE_TEXT_LITERAL = 'TEXT';
82+
const STATE_STRING = 'STRING';
83+
const STATE_END_STRING = 'STRING_END';
84+
85+
const generalDelimiters = [
86+
' ', '\t', '\n'
87+
];
88+
89+
const delimiters = [null, [], [], []];
90+
91+
// build delimiters
92+
Array.prototype.concat(
93+
generalDelimiters,
94+
Object.keys(boundaryTypeLookup),
95+
Object.keys(binaryOperatorTypeLookup),
96+
Object.keys(unaryOperatorTypeLookup)
97+
).forEach((delimiter) => {
98+
delimiters[delimiter.length].push(delimiter)
99+
});
100+
101+
class Lexer {
102+
constructor(input) {
103+
this.state = STATE_TEXT_LITERAL;
104+
this.pointer = 0;
105+
this.input = input;
106+
// length used for cases of checking ahead by one character from pointer
107+
this._lookAheadLength = this.input.length -1;
108+
this._stringDelimiter = null;
109+
}
110+
111+
// skip all whitespace or until EOF reached
112+
_skipWhitespace() {
113+
while (this.pointer < this.input.length && this.input[this.pointer].trim().length === 0) {
114+
this.pointer++;
115+
}
116+
}
117+
118+
// scan until {{ is found or EOF reached
119+
_scanTextLiteral() {
120+
while (
121+
this.pointer < this._lookAheadLength
122+
&& !(
123+
this.input[this.pointer] === '{'
124+
&& this.input[this.pointer + 1] === '{'
125+
)
126+
) {
127+
this.pointer++;
128+
}
129+
130+
// because we look two characters ahead, we need to increment the
131+
// pointer by one if end of input is reached
132+
if (this._lookAheadLength >= 0 && this.pointer >= this._lookAheadLength) {
133+
this.pointer++;
134+
}
135+
}
136+
137+
//
138+
_scanString() {
139+
// if next character is the delimiter we have a zero length string
140+
if (this.input[this.pointer] === this._stringDelimiter) {
141+
return;
142+
}
143+
144+
while (this.pointer < this._lookAheadLength) {
145+
146+
// check ahead for delimiter but only if current isn't an escape
147+
if (this.input[this.pointer + 1] === this._stringDelimiter && this.input[this.pointer] !== '\\') {
148+
break;
149+
}
150+
151+
this.pointer++;
152+
}
153+
this.pointer++;
154+
}
155+
156+
_isAtDelimiter() {
157+
for (let length = 3; length > 0; length--) {
158+
// if we don't have enough input remaining then stop
159+
if (length + this.pointer > this.input.length) {
160+
continue;
161+
}
162+
163+
d: for (const delimiter of delimiters[length]) {
164+
// check for match
165+
for (let i = 0; i < length; i++) {
166+
if (this.input[this.pointer + i] !== delimiter[i]) {
167+
continue d;
168+
}
169+
}
170+
// match found so return
171+
return delimiter;
172+
}
173+
}
174+
return false;
175+
}
176+
177+
178+
// scan up to the next delimiter
179+
_scanToNextDelimiter() {
180+
// if already at delimiter
181+
if (this._isAtDelimiter()) {
182+
return;
183+
}
184+
this.pointer++;
185+
186+
while (this.pointer < this.input.length) {
187+
if (this._isAtDelimiter()) {
188+
return;
189+
}
190+
this.pointer++;
191+
}
192+
}
193+
194+
// scan pass the next delimiter
195+
_scanNextDelimiter() {
196+
const delimiter = this._isAtDelimiter() || '';
197+
this.pointer += delimiter.length;
198+
}
199+
200+
* tokens() {
201+
while (this.pointer < this.input.length) {
202+
if (this.state === STATE_TEXT_LITERAL) {
203+
let startIndex = this.pointer;
204+
this._scanTextLiteral();
205+
this.state = STATE_TAG;
206+
const value = this.input.substring(startIndex, this.pointer);
207+
if (!value.length) {
208+
continue;
209+
}
210+
yield {
211+
type: TOKEN_TYPE_STRUCTURE,
212+
subType: TOKEN_STRUCTURE_TEXT_LITERAL,
213+
value,
214+
startIndex,
215+
endIndex: this.pointer
216+
};
217+
}
218+
else if (this.state === STATE_TAG || this.state === STATE_END_STRING) {
219+
let type;
220+
this._skipWhitespace();
221+
let startIndex = this.pointer;
222+
223+
// scan until next delimiter
224+
this._scanToNextDelimiter();
225+
let value = this.input.substring(startIndex, this.pointer);
226+
if (value.length) {
227+
type = statementTypeLookup[value];
228+
if (type) {
229+
yield {
230+
type: TOKEN_TYPE_STATEMENT,
231+
subType: type,
232+
value: value,
233+
startIndex,
234+
endIndex: this.pointer
235+
};
236+
continue;
237+
}
238+
239+
// if first digit is a number, it's a numerical value
240+
if ((value[0] >= '0' && value[0] <= '9') || value[0] === '-') {
241+
// floats have a decimal number
242+
if (value.indexOf('.') !== -1) {
243+
type = TOKEN_VALUE_FLOAT;
244+
}
245+
else {
246+
type = TOKEN_VALUE_INTEGER;
247+
}
248+
}
249+
else {
250+
type = TOKEN_VALUE_VARIABLE;
251+
}
252+
253+
yield {
254+
type: TOKEN_TYPE_VALUE,
255+
subType: type,
256+
value: value,
257+
startIndex,
258+
endIndex: this.pointer
259+
};
260+
continue;
261+
}
262+
263+
startIndex = this.pointer;
264+
this._scanNextDelimiter();
265+
value = this.input.substring(startIndex, this.pointer);
266+
// boundary types are the most complicated
267+
type = boundaryTypeLookup[value];
268+
if (type) {
269+
if (type === TOKEN_BOUNDARY_STRING_DOUBLE || type === TOKEN_BOUNDARY_STRING_SINGLE) {
270+
this._stringDelimiter = value;
271+
this.state = this.state === STATE_END_STRING ? STATE_TAG : STATE_STRING;
272+
}
273+
else if (type === TOKEN_BOUNDARY_TAG_END) {
274+
this.state = STATE_TEXT_LITERAL;
275+
}
276+
yield {
277+
type: TOKEN_TYPE_BOUNDARY,
278+
subType: type,
279+
value: value,
280+
startIndex,
281+
endIndex: this.pointer
282+
};
283+
continue;
284+
}
285+
type = binaryOperatorTypeLookup[value];
286+
if (type) {
287+
yield {
288+
type: TOKEN_TYPE_BINARY_OPERATOR,
289+
subType: type,
290+
value: value,
291+
startIndex,
292+
endIndex: this.pointer
293+
};
294+
continue;
295+
}
296+
type = unaryOperatorTypeLookup[value];
297+
yield {
298+
type: TOKEN_TYPE_UNARY_OPERATOR,
299+
subType: type,
300+
value: value,
301+
startIndex,
302+
endIndex: this.pointer
303+
};
304+
}
305+
else if (this.state === STATE_STRING) {
306+
let startIndex = this.pointer;
307+
this._scanString();
308+
this.state = STATE_END_STRING;
309+
yield {
310+
type: TOKEN_TYPE_VALUE,
311+
subType: TOKEN_VALUE_STRING,
312+
value: this.input.substring(startIndex, this.pointer),
313+
startIndex,
314+
endIndex: this.pointer
315+
}
316+
}
317+
else {
318+
throw new LexerError('Invalid state incurred');
319+
}
320+
}
321+
322+
yield {
323+
type: TOKEN_TYPE_STRUCTURE,
324+
subType: TOKEN_STRUCTURE_EOF,
325+
value: null,
326+
startIndex: this.pointer,
327+
endIndex: this.pointer
328+
};
329+
}
330+
}
331+
332+
module.exports = Lexer;

0 commit comments

Comments
 (0)