Skip to content

Commit 91340f9

Browse files
committed
a tokenizer without 'type' and 'loc'
1 parent 8eac198 commit 91340f9

File tree

5 files changed

+237
-7
lines changed

5 files changed

+237
-7
lines changed

.vscode/launch.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
// Use IntelliSense to learn about possible attributes.
3+
// Hover to view descriptions of existing attributes.
4+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5+
"version": "0.2.0",
6+
"configurations": [
7+
{
8+
"type": "chrome",
9+
"request": "launch",
10+
"name": "Launch Chrome against localhost",
11+
"url": "http://localhost:10001",
12+
"webRoot": "${workspaceFolder}"
13+
}
14+
]
15+
}

index.html

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@
2929
<div class="container">
3030
<h4>JSPython development console</h4>
3131
<div id="editor">a = 1
32-
b = a+2*3
32+
b = a+2*3
3333
</div>
34+
<button onclick="tokenize()">Tokenize</button>
3435
<button onclick="parse()">Parse</button>
3536
<button onclick="runInterpreter()">Run</button>
3637
<textarea id="result"></textarea>
@@ -42,6 +43,23 @@ <h4>JSPython development console</h4>
4243
editor.session.setMode("ace/mode/python");
4344

4445
const jsPython = jspython.jsPython;
46+
function tokenize() {
47+
tokenizer = (s) => console.log(`tokens => ${s}`, jsPython().tokenize(s))
48+
49+
tokenizer("x='hello' + ' ' + 'world'")
50+
51+
const scripts = editor.getValue();
52+
try {
53+
const result = jsPython()
54+
.tokenize(scripts).map(t => t[0]);
55+
56+
document.getElementById('result').value = typeof result === 'object' ? JSON.stringify(result, null, '\t') : result
57+
console.log('Result => ', result);
58+
} catch (err) {
59+
document.getElementById('result').value = err;
60+
console.error(err);
61+
}
62+
}
4563

4664
async function parse() {
4765

src/interpreter.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { Ast } from './common';
1+
import { Ast, Token } from './common';
22
import { Evaluator } from './evaluator';
33
import { Parser } from './parser';
44
import { Tokenizer } from './tokenizer';
@@ -17,6 +17,11 @@ export class Interpreter {
1717
return new Interpreter();
1818
}
1919

20+
tokenize(script: string): Token[] {
21+
const tokenizer = new Tokenizer();
22+
return tokenizer.tokenize(script);
23+
}
24+
2025
parse(script: string): Ast {
2126
const tokenizer = new Tokenizer();
2227
const parser = new Parser();

src/tokenizer/tokenizer.spec.ts

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import { Tokenizer } from "./tokenizer";
2+
3+
describe('Tokenizer => ', () => {
4+
5+
6+
beforeEach(() => {});
7+
8+
it('a + b + 55', async () => {
9+
let tokens = new Tokenizer().tokenize("a + b + 55")
10+
expect(tokens.length).toBe(5);
11+
tokens = new Tokenizer().tokenize("a+b+55")
12+
expect(tokens.length).toBe(5);
13+
});
14+
15+
it('s = 255 + 23 * 45', async () => {
16+
let tokens = new Tokenizer().tokenize("s = 255 + 23 * 45")
17+
expect(tokens.length).toBe(7);
18+
tokens = new Tokenizer().tokenize("s =255+23*45")
19+
expect(tokens.length).toBe(7);
20+
});
21+
22+
it('s=(255 + 23) * 45', async () => {
23+
let tokens = new Tokenizer().tokenize("s = (255 + 23 ) * 45")
24+
expect(tokens.length).toBe(9);
25+
tokens = new Tokenizer().tokenize("s=(255 + 23) * 45")
26+
expect(tokens.length).toBe(9);
27+
tokens = new Tokenizer().tokenize("s=(255 \n +\n 23) \n * 45")
28+
expect(tokens.length).toBe(9);
29+
});
30+
31+
32+
it('if someVar == 20/40:\n someVar = 55', async () => {
33+
let tokens = new Tokenizer().tokenize("if someVar == 20/40:\n someVar = 55")
34+
expect(tokens.length).toBe(10);
35+
tokens = new Tokenizer().tokenize("if someVar== 20/40:\n someVar=55")
36+
expect(tokens.length).toBe(10);
37+
tokens = new Tokenizer().tokenize("if someVar==20/40:\n someVar= 55")
38+
expect(tokens.length).toBe(10);
39+
});
40+
41+
it('x="test1"', async () => {
42+
let tokens = new Tokenizer().tokenize('x="test1"')
43+
expect(tokens.length).toBe(3);
44+
expect(tokens[2][0]).toBe('test1');
45+
tokens = new Tokenizer().tokenize('x ="test1" ')
46+
expect(tokens.length).toBe(3);
47+
expect(tokens[2][0]).toBe('test1');
48+
tokens = new Tokenizer().tokenize('x="test1" ')
49+
expect(tokens.length).toBe(3);
50+
expect(tokens[2][0]).toBe('test1');
51+
});
52+
53+
it('x="hello" + " " + "world"', async () => {
54+
let tokens = new Tokenizer().tokenize('x="hello"+" "+"world"')
55+
expect(tokens.length).toBe(7);
56+
expect(tokens[2][0]).toBe('hello');
57+
expect(tokens[4][0]).toBe(' ');
58+
expect(tokens[6][0]).toBe('world');
59+
60+
tokens = new Tokenizer().tokenize('x="hello" + " "+"world"')
61+
expect(tokens.length).toBe(7);
62+
expect(tokens[2][0]).toBe('hello');
63+
expect(tokens[4][0]).toBe(' ');
64+
expect(tokens[5][0]).toBe('+');
65+
expect(tokens[6][0]).toBe('world');
66+
tokens = new Tokenizer().tokenize("x='hello' + ' ' + 'world'")
67+
expect(tokens.length).toBe(7);
68+
expect(tokens[2][0]).toBe('hello');
69+
expect(tokens[4][0]).toBe(' ');
70+
expect(tokens[6][0]).toBe('world');
71+
});
72+
73+
it('x=""', async () => {
74+
let tokens = new Tokenizer().tokenize('x=""')
75+
expect(tokens.length).toBe(3);
76+
expect(tokens[2][0]).toBe('');
77+
});
78+
79+
it('x="" # this is comment', async () => {
80+
let tokens = new Tokenizer().tokenize('x="" # this is comment')
81+
expect(tokens.length).toBe(4);
82+
expect(tokens[3][0]).toBe(' this is comment');
83+
});
84+
85+
it('x= # this is comment \n 5+6', async () => {
86+
let tokens = new Tokenizer().tokenize('x= # this is comment \n 5+6')
87+
expect(tokens.length).toBe(6);
88+
expect(tokens[4][0]).toBe('+');
89+
});
90+
91+
92+
93+
});

src/tokenizer/tokenizer.ts

Lines changed: 104 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,112 @@
1-
import { Token } from "../common";
1+
import { Token, TokenTypes } from "../common";
22

3-
export class Tokenizer
4-
{
3+
const SeparatorsMap: Record<string, string[]> = {
4+
'=': ['=', '==', '=>'],
5+
6+
'+': ['+', '++', '+='],
7+
'-': ['-', '--', '-='],
8+
'*': ['*', '**', '*='],
9+
'/': ['/', '//', '/='],
10+
11+
'.': ['.'],
12+
'?': ['?'],
13+
'!': ['!='],
14+
':': [':'],
15+
',': [','],
16+
17+
'>': ['>', '>='],
18+
'<': ['<', '<=', '<>'],
19+
20+
'(': ['('],
21+
')': [')'],
22+
'{': ['{'],
23+
'}': ['}'],
24+
'[': ['['],
25+
']': [']'],
26+
};
27+
28+
export class Tokenizer {
29+
private recognizeToken(tokenText: string): { value: string | number | boolean, type: TokenTypes } {
30+
return {
31+
value: tokenText,
32+
type: TokenTypes.LiteralString
33+
}
34+
35+
}
36+
private processToken(strToken: string, tokens: Token[], allowEmptyString = false): string {
37+
// ignore empty tokens
38+
if(!strToken.length && !allowEmptyString) return "";
39+
40+
var token = this.recognizeToken(strToken);
41+
tokens.push([token.value, Uint16Array.of(token.type as number, 0, 0, 0, 0)] as Token)
42+
return "";
43+
}
544
/**
645
* Splits script code into a tokens
746
* @param script A jsPython text
847
*/
9-
tokenize(script: string): Token[] {
10-
return [];
48+
tokenize(script: string): Token[] {
49+
if (!script || !script.length) { return []; }
50+
51+
let cursor = 0;
52+
const tokens: Token[] = [];
53+
let tokenText = "";
54+
let currentLine = 1;
55+
let currentColumn = 1;
56+
57+
do {
58+
let symbol = script[cursor]
59+
currentColumn++;
60+
if (symbol == '\n') {
61+
currentLine++;
62+
currentColumn = 1
63+
continue;
64+
} else if (symbol == ' ' && tokenText.length !== 0) {
65+
tokenText = this.processToken(tokenText, tokens);
66+
continue;
67+
} else if (SeparatorsMap[symbol] !== undefined) {
68+
tokenText = this.processToken(tokenText, tokens);
69+
tokenText = symbol;
70+
71+
const sepsMap = SeparatorsMap[symbol];
72+
73+
if (sepsMap.length > 1) {
74+
// process longer operators
75+
while (sepsMap.includes(tokenText + script[cursor + 1])) {
76+
tokenText += script[++cursor];
77+
}
78+
}
79+
tokenText = this.processToken(tokenText, tokens);
80+
81+
} else if (symbol === '#') {
82+
83+
while(script[++cursor] !== '\n') {
84+
tokenText += script[cursor];
85+
if(cursor + 1 >= script.length) break;
86+
}
87+
tokenText = this.processToken(tokenText, tokens, true);
88+
89+
} else if (symbol === '"' || symbol === "'") {
90+
// remember either it is single or double quote
91+
const q = symbol;
92+
// we are not expecting token to be added here.
93+
// it should pass a failt to parser
94+
tokenText = this.processToken(tokenText, tokens);
95+
96+
while(script[++cursor] !== q) {
97+
tokenText += script[cursor];
98+
if(cursor + 1 >= script.length) break;
99+
}
100+
tokenText = this.processToken(tokenText, tokens, true);
101+
} else if (symbol != ' ') {
102+
tokenText += symbol;
103+
}
104+
}
105+
while (++cursor < script.length)
106+
107+
this.processToken(tokenText, tokens);
108+
109+
return tokens;
11110
}
12111

13112
}

0 commit comments

Comments
 (0)