Skip to content

Commit 16d4361

Browse files
committed
Initial commit; tutorial finished
0 parents  commit 16d4361

12 files changed

+622
-0
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Teeny Tiny Compiler
2+
This is just my code from following [this](https://web.eecs.utk.edu/~azh/blog/teenytinycompiler1.html) awesome tutorial by Dr. Austin Henley.
3+
4+
I will probably make a few changes or add some stuff just to practice, but this is basically for me to figure out how lexers/parsers/emitters work in a language akin to pseudocode (Python) so that I can make one in Rust later.

__pycache__/emitter.cpython-38.pyc

1.04 KB
Binary file not shown.

__pycache__/lexer.cpython-38.pyc

4.31 KB
Binary file not shown.

__pycache__/parser.cpython-38.pyc

6.64 KB
Binary file not shown.

emitter.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
2+
3+
# Emitter object keeps track of the generated code and outputs it.
4+
class Emitter:
5+
def __init__(self, fullPath):
6+
self.fullPath = fullPath
7+
self.header = ""
8+
self.code = ""
9+
10+
def emit(self, code):
11+
self.code += code
12+
13+
def emitLine(self, code):
14+
self.code += code + '\n'
15+
16+
def headerLine(self, code):
17+
self.header += code + '\n'
18+
19+
def writeFile(self):
20+
with open(self.fullPath, 'w') as outputFile:
21+
outputFile.write(self.header + self.code)

example.bs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
PRINT "How many fibonacci numbers do you want?"
2+
INPUT nums
3+
PRINT ""
4+
5+
LET a = 0
6+
LET b = 1
7+
WHILE nums > 0 REPEAT
8+
PRINT a
9+
LET c = a + b
10+
LET a = b
11+
LET b = c
12+
LET nums = nums - 1
13+
ENDWHILE

example.bs-out

16.5 KB
Binary file not shown.

example.bs.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/* Emitted by Teeny Compiler */
2+
#include <stdio.h>
3+
int main(void){
4+
float nums;
5+
float a;
6+
float b;
7+
float c;
8+
printf("How many fibonacci numbers do you want?\n");
9+
if(0 == scanf("%f", &nums)) {
10+
nums = 0;
11+
scanf("%*s");
12+
}
13+
printf("\n");
14+
a = 0;
15+
b = 1;
16+
while(nums>0){
17+
printf("%.2f\n", (float)(a));
18+
c = a+b;
19+
a = b;
20+
b = c;
21+
nums = nums-1;
22+
}
23+
return 0;
24+
}

lexer.py

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
import enum
2+
import sys
3+
4+
class LexingError(enum.Enum):
5+
UNKNOWN_TOKEN = 1
6+
7+
class TokenType(enum.Enum):
8+
EOF = -1
9+
NEWLINE = 0
10+
NUMBER = 1
11+
IDENT = 2
12+
STRING = 3
13+
# Keywords.
14+
LABEL = 101
15+
GOTO = 102
16+
PRINT = 103
17+
INPUT = 104
18+
LET = 105
19+
IF = 106
20+
THEN = 107
21+
ENDIF = 108
22+
WHILE = 109
23+
REPEAT = 110
24+
ENDWHILE = 111
25+
FUNC = 112
26+
RETURN = 113
27+
# Operators.
28+
EQ = 201
29+
PLUS = 202
30+
MINUS = 203
31+
ASTERISK = 204
32+
SLASH = 205
33+
EQEQ = 206
34+
NOTEQ = 207
35+
LT = 208
36+
LTEQ = 209
37+
GT = 210
38+
GTEQ = 211
39+
LOGNOT = 212
40+
41+
42+
class Token:
43+
def __init__(self, tokenText, tokenKind):
44+
self.text = tokenText
45+
self.kind = tokenKind
46+
47+
@staticmethod
48+
def checkIfKeyword(tokenText):
49+
for kind in TokenType:
50+
# Relies on all keyword enum values being 1XX.
51+
if kind.name == tokenText and kind.value >= 100 and kind.value < 200:
52+
return kind
53+
return None
54+
55+
class Lexer:
56+
def __init__(self, _input):
57+
self.source = _input + '\n' # Newline simplifies last statement
58+
self.curChar = '' # Current char in the input
59+
self.curPos = -1 # Current position in the string
60+
self.nextChar()
61+
62+
63+
# Process next character in input
64+
def nextChar(self):
65+
self.curPos += 1
66+
if self.curPos >= len(self.source):
67+
self.curChar = '\0' # null terminator EOF char
68+
else:
69+
self.curChar = self.source[self.curPos]
70+
71+
# Return the next character in a look-ahead
72+
def peek(self):
73+
if self.curPos + 1 >= len(self.source):
74+
return '\0'
75+
else:
76+
return self.source[self.curPos + 1]
77+
78+
# Crash if a token is invalid.
79+
def abort(self, error):
80+
message = f"Unknown error. 0x{0:x}"
81+
if error == LexingError.UNKNOWN_TOKEN:
82+
message = f"Unknown token error (0x1) at position {self.curPos}: '{self.curChar}'."
83+
84+
sys.exit("LEXING ERROR:\n"+message)
85+
86+
# Skip whitespace (except newlines, which are not ignored and indicate end-of-statement)
87+
def skipWhitespace(self):
88+
while self.curChar == ' ' or self.curChar == '\t' or self.curChar == '\r':
89+
self.nextChar()
90+
91+
# Skip comments (delimited by #)
92+
def skipComment(self):
93+
if self.curChar == '#':
94+
while self.curChar != '\n':
95+
self.nextChar()
96+
97+
# Return the next token
98+
def getToken(self):
99+
self.skipWhitespace()
100+
self.skipComment()
101+
token = None
102+
103+
if self.curChar == '+':
104+
token = Token(self.curChar, TokenType.PLUS)
105+
elif self.curChar == '-':
106+
token = Token(self.curChar, TokenType.MINUS)
107+
elif self.curChar == '*':
108+
token = Token(self.curChar, TokenType.ASTERISK)
109+
elif self.curChar == '/':
110+
token = Token(self.curChar, TokenType.SLASH)
111+
elif self.curChar == '\n':
112+
token = Token(self.curChar, TokenType.NEWLINE)
113+
elif self.curChar == '\0':
114+
token = Token(self.curChar, TokenType.EOF)
115+
elif self.curChar == '=':
116+
# Multiple tokens possible
117+
if self.peek() == '=':
118+
lastChar = self.curChar
119+
self.nextChar()
120+
token = Token(lastChar + self.curChar, TokenType.EQEQ)
121+
else:
122+
token = Token(self.curChar, TokenType.EQ)
123+
elif self.curChar == '>':
124+
if self.peek() == '=':
125+
lastChar = self.curChar
126+
self.nextChar()
127+
token = Token(lastChar + self.curChar, TokenType.GTEQ)
128+
else:
129+
token = Token(self.curChar, TokenType.GT)
130+
elif self.curChar == '<':
131+
if self.peek() == '=':
132+
lastChar = self.curChar
133+
self.nextChar()
134+
token = Token(lastChar + self.curChar, TokenType.LTEQ)
135+
else:
136+
token = Token(self.curChar, TokenType.LT)
137+
elif self.curChar == '!':
138+
if self.peek() == '=':
139+
lastChar = self.curChar
140+
self.nextChar()
141+
token = Token(lastChar + self.curChar, TokenType.NOTEQ)
142+
else:
143+
token = Token(self.curChar, TokenType.LOGNOT)
144+
elif self.curChar == '\"':
145+
# Get characters between quotations.
146+
self.nextChar()
147+
startPos = self.curPos
148+
149+
while self.curChar != '\"':
150+
# Don't allow special characters in the string. No escape characters, newlines, tabs, or %.
151+
# We will be using C's printf on this string.
152+
if self.curChar == '\r' or self.curChar == '\n' or self.curChar == '\t' or self.curChar == '\\' or self.curChar == '%':
153+
self.abort("Illegal character in string.")
154+
self.nextChar()
155+
156+
tokText = self.source[startPos : self.curPos] # Get the substring.
157+
token = Token(tokText, TokenType.STRING)
158+
159+
elif self.curChar.isdigit():
160+
# Leading character is a digit, so this must be a number.
161+
# Get all consecutive digits and decimal if there is one.
162+
startPos = self.curPos
163+
while self.peek().isdigit():
164+
self.nextChar()
165+
if self.peek() == '.': # Decimal!
166+
self.nextChar()
167+
168+
# Must have at least one digit after decimal.
169+
if not self.peek().isdigit():
170+
# Error!
171+
self.abort("Illegal character in number.")
172+
while self.peek().isdigit():
173+
self.nextChar()
174+
175+
tokText = self.source[startPos : self.curPos + 1] # Get the substring.
176+
token = Token(tokText, TokenType.NUMBER)
177+
178+
elif self.curChar.isalpha():
179+
# Leading character is a letter, so this must be an identifier or a keyword.
180+
# Get all consecutive alpha numeric characters.
181+
startPos = self.curPos
182+
while self.peek().isalnum():
183+
self.nextChar()
184+
185+
# Check if the token is in the list of keywords.
186+
tokText = self.source[startPos : self.curPos + 1] # Get the substring.
187+
keyword = Token.checkIfKeyword(tokText)
188+
if keyword == None: # Identifier
189+
token = Token(tokText, TokenType.IDENT)
190+
else: # Keyword
191+
token = Token(tokText, keyword)
192+
193+
else:
194+
# Unknown token!
195+
self.abort(LexingError.UNKNOWN_TOKEN)
196+
197+
self.nextChar()
198+
return token

maketiny.sh

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Author: Stephen Marz
2+
3+
PYTHON="python3"
4+
COMPILER="tiny.py"
5+
CC="gcc"
6+
7+
function comp {
8+
BN=$(basename -s .teeny $1)
9+
TTOUTPUT=$(${PYTHON} ${COMPILER} $1 2>&1)
10+
if [ $? -ne 0 ]; then
11+
echo "${TTOUTPUT}"
12+
else
13+
mv out.c ${BN}.c
14+
CCOUTPUT=$(${CC} -o ${BN}-out ${BN}.c)
15+
if [ $? -ne 0 ]; then
16+
echo "${CCOUTPUT}"
17+
else
18+
echo "${TTOUTPUT}"
19+
fi
20+
fi
21+
}
22+
23+
if [ $# -eq 0 ]; then
24+
for i in $(ls examples/*.teeny); do
25+
comp $i
26+
done
27+
else
28+
comp $1
29+
fi

0 commit comments

Comments
 (0)