Elsklivet
diff --git a/‎README.md
Lines changed: 4 additions & 0 deletions b/‎README.md
Lines changed: 4 additions & 0 deletions
diff --git a/‎__pycache__/emitter.cpython-38.pyc
1.04 KB b/‎__pycache__/emitter.cpython-38.pyc
1.04 KB
diff --git a/‎__pycache__/lexer.cpython-38.pyc
4.31 KB b/‎__pycache__/lexer.cpython-38.pyc
4.31 KB
diff --git a/‎__pycache__/parser.cpython-38.pyc
6.64 KB b/‎__pycache__/parser.cpython-38.pyc
6.64 KB
diff --git a/‎emitter.py
Lines changed: 21 additions & 0 deletions b/‎emitter.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎example.bs
Lines changed: 13 additions & 0 deletions b/‎example.bs
Lines changed: 13 additions & 0 deletions
diff --git a/‎example.bs-out
16.5 KB b/‎example.bs-out
16.5 KB
diff --git a/‎example.bs.c
Lines changed: 24 additions & 0 deletions b/‎example.bs.c
Lines changed: 24 additions & 0 deletions
diff --git a/‎lexer.py
Lines changed: 198 additions & 0 deletions b/‎lexer.py
Lines changed: 198 additions & 0 deletions
diff --git a/‎maketiny.sh
Lines changed: 29 additions & 0 deletions b/‎maketiny.sh
Lines changed: 29 additions & 0 deletions
@@ -0,0 +1,4 @@
+# Teeny Tiny Compiler
+This is just my code from following [this](https://web.eecs.utk.edu/~azh/blog/teenytinycompiler1.html) awesome tutorial by Dr. Austin Henley.
+
+I will probably make a few changes or add some stuff just to practice, but this is basically for me to figure out how lexers/parsers/emitters work in a language akin to pseudocode (Python) so that I can make one in Rust later.
@@ -0,0 +1,21 @@
+
+
+# Emitter object keeps track of the generated code and outputs it.
+class Emitter:
+    def __init__(self, fullPath):
+        self.fullPath = fullPath
+        self.header = ""
+        self.code = ""
+
+    def emit(self, code):
+        self.code += code
+
+    def emitLine(self, code):
+        self.code += code + '\n'
+
+    def headerLine(self, code):
+        self.header += code + '\n'
+
+    def writeFile(self):
+        with open(self.fullPath, 'w') as outputFile:
+            outputFile.write(self.header + self.code)
@@ -0,0 +1,13 @@
+PRINT "How many fibonacci numbers do you want?"
+INPUT nums
+PRINT ""
+
+LET a = 0
+LET b = 1
+WHILE nums > 0 REPEAT
+    PRINT a
+    LET c = a + b
+    LET a = b
+    LET b = c
+    LET nums = nums - 1
+ENDWHILE
@@ -0,0 +1,24 @@
+/* Emitted by Teeny Compiler */
+#include <stdio.h>
+int main(void){
+float nums;
+float a;
+float b;
+float c;
+printf("How many fibonacci numbers do you want?\n");
+if(0 == scanf("%f", &nums)) {
+nums = 0;
+scanf("%*s");
+}
+printf("\n");
+a = 0;
+b = 1;
+while(nums>0){
+printf("%.2f\n", (float)(a));
+c = a+b;
+a = b;
+b = c;
+nums = nums-1;
+}
+	return 0;
+}
@@ -0,0 +1,198 @@
+import enum
+import sys
+
+class LexingError(enum.Enum):
+    UNKNOWN_TOKEN = 1
+
+class TokenType(enum.Enum):
+    EOF = -1
+    NEWLINE = 0
+    NUMBER = 1
+    IDENT = 2
+    STRING = 3
+    # Keywords.
+    LABEL = 101
+    GOTO = 102
+    PRINT = 103
+    INPUT = 104
+    LET = 105
+    IF = 106
+    THEN = 107
+    ENDIF = 108
+    WHILE = 109
+    REPEAT = 110
+    ENDWHILE = 111
+    FUNC = 112
+    RETURN = 113
+    # Operators.
+    EQ = 201  
+    PLUS = 202
+    MINUS = 203
+    ASTERISK = 204
+    SLASH = 205
+    EQEQ = 206
+    NOTEQ = 207
+    LT = 208
+    LTEQ = 209
+    GT = 210
+    GTEQ = 211
+    LOGNOT = 212
+
+
+class Token:
+    def __init__(self, tokenText, tokenKind):
+        self.text = tokenText
+        self.kind = tokenKind
+
+    @staticmethod
+    def checkIfKeyword(tokenText):
+        for kind in TokenType:
+            # Relies on all keyword enum values being 1XX.
+            if kind.name == tokenText and kind.value >= 100 and kind.value < 200:
+                return kind
+        return None
+
+class Lexer:
+    def __init__(self, _input):
+        self.source = _input + '\n' # Newline simplifies last statement
+        self.curChar = ''           # Current char in the input
+        self.curPos = -1            # Current position in the string
+        self.nextChar()
+
+
+    # Process next character in input
+    def nextChar(self):
+        self.curPos += 1
+        if self.curPos >= len(self.source):
+            self.curChar = '\0' # null terminator EOF char
+        else:
+            self.curChar = self.source[self.curPos]
+
+    # Return the next character in a look-ahead
+    def peek(self):
+        if self.curPos + 1 >= len(self.source):
+            return '\0'
+        else:
+            return self.source[self.curPos + 1]
+
+    # Crash if a token is invalid.
+    def abort(self, error):
+        message = f"Unknown error. 0x{0:x}"
+        if error == LexingError.UNKNOWN_TOKEN:
+            message = f"Unknown token error (0x1) at position {self.curPos}: '{self.curChar}'."
+
+        sys.exit("LEXING ERROR:\n"+message)
+
+    # Skip whitespace (except newlines, which are not ignored and indicate end-of-statement)
+    def skipWhitespace(self):
+        while self.curChar == ' ' or self.curChar == '\t' or self.curChar == '\r':
+            self.nextChar()
+
+    # Skip comments (delimited by #)
+    def skipComment(self):
+        if self.curChar == '#':
+            while self.curChar != '\n':
+                self.nextChar()
+
+    # Return the next token
+    def getToken(self):
+        self.skipWhitespace()
+        self.skipComment()
+        token = None
+
+        if self.curChar == '+':
+            token = Token(self.curChar, TokenType.PLUS)
+        elif self.curChar == '-':
+            token = Token(self.curChar, TokenType.MINUS)
+        elif self.curChar == '*':
+            token = Token(self.curChar, TokenType.ASTERISK)
+        elif self.curChar == '/':
+            token = Token(self.curChar, TokenType.SLASH)
+        elif self.curChar == '\n':
+            token = Token(self.curChar, TokenType.NEWLINE)
+        elif self.curChar == '\0':
+            token = Token(self.curChar, TokenType.EOF)
+        elif self.curChar == '=':
+            # Multiple tokens possible
+            if self.peek() == '=':
+                lastChar = self.curChar
+                self.nextChar()
+                token = Token(lastChar + self.curChar, TokenType.EQEQ)
+            else:
+                token = Token(self.curChar, TokenType.EQ)
+        elif self.curChar == '>':
+            if self.peek() == '=':
+                lastChar = self.curChar
+                self.nextChar()
+                token = Token(lastChar + self.curChar, TokenType.GTEQ)
+            else:
+                token = Token(self.curChar, TokenType.GT)
+        elif self.curChar == '<':
+            if self.peek() == '=':
+                lastChar = self.curChar
+                self.nextChar()
+                token = Token(lastChar + self.curChar, TokenType.LTEQ)
+            else:
+                token = Token(self.curChar, TokenType.LT)
+        elif self.curChar == '!':
+            if self.peek() == '=':
+                lastChar = self.curChar
+                self.nextChar()
+                token = Token(lastChar + self.curChar, TokenType.NOTEQ)
+            else:
+                token = Token(self.curChar, TokenType.LOGNOT)
+        elif self.curChar == '\"':
+            # Get characters between quotations.
+            self.nextChar()
+            startPos = self.curPos
+
+            while self.curChar != '\"':
+                # Don't allow special characters in the string. No escape characters, newlines, tabs, or %.
+                # We will be using C's printf on this string.
+                if self.curChar == '\r' or self.curChar == '\n' or self.curChar == '\t' or self.curChar == '\\' or self.curChar == '%':
+                    self.abort("Illegal character in string.")
+                self.nextChar()
+
+            tokText = self.source[startPos : self.curPos] # Get the substring.
+            token = Token(tokText, TokenType.STRING)
+
+        elif self.curChar.isdigit():
+            # Leading character is a digit, so this must be a number.
+            # Get all consecutive digits and decimal if there is one.
+            startPos = self.curPos
+            while self.peek().isdigit():
+                self.nextChar()
+            if self.peek() == '.': # Decimal!
+                self.nextChar()
+
+                # Must have at least one digit after decimal.
+                if not self.peek().isdigit(): 
+                    # Error!
+                    self.abort("Illegal character in number.")
+                while self.peek().isdigit():
+                    self.nextChar()
+
+            tokText = self.source[startPos : self.curPos + 1] # Get the substring.
+            token = Token(tokText, TokenType.NUMBER)
+
+        elif self.curChar.isalpha():
+            # Leading character is a letter, so this must be an identifier or a keyword.
+            # Get all consecutive alpha numeric characters.
+            startPos = self.curPos
+            while self.peek().isalnum():
+                self.nextChar()
+
+            # Check if the token is in the list of keywords.
+            tokText = self.source[startPos : self.curPos + 1] # Get the substring.
+            keyword = Token.checkIfKeyword(tokText)
+            if keyword == None: # Identifier
+                token = Token(tokText, TokenType.IDENT)
+            else:   # Keyword
+                token = Token(tokText, keyword)
+
+        else:
+            # Unknown token!
+            self.abort(LexingError.UNKNOWN_TOKEN)
+
+        self.nextChar()
+        return token
@@ -0,0 +1,29 @@
+# Author: Stephen Marz
+
+PYTHON="python3"
+COMPILER="tiny.py"
+CC="gcc"
+
+function comp {
+    BN=$(basename -s .teeny $1)
+    TTOUTPUT=$(${PYTHON} ${COMPILER} $1 2>&1)
+    if [ $? -ne 0 ]; then
+        echo "${TTOUTPUT}"
+    else
+        mv out.c ${BN}.c
+        CCOUTPUT=$(${CC} -o ${BN}-out ${BN}.c)
+        if [ $? -ne 0 ]; then
+            echo "${CCOUTPUT}"
+        else
+            echo "${TTOUTPUT}"
+        fi
+    fi
+}
+
+if [ $# -eq 0 ]; then
+    for i in $(ls examples/*.teeny); do
+        comp $i
+    done
+else
+    comp $1
+fi