diff --git a/fixtures/fibonacci.mk b/fixtures/fibonacci.mk new file mode 100644 index 0000000..1bec49b --- /dev/null +++ b/fixtures/fibonacci.mk @@ -0,0 +1,13 @@ +let fibonacci = fn(x) { + if (x == 0) { + 0 + } else { + if (x == 1) { + 1 + } else { + fibonacci(x - 1) + fibonacci(x - 2) + } + } +}; +puts("fibonacci(10)", fibonacci(10)); +puts("fibonacci(15)", fibonacci(15)); diff --git a/src/benchmark.rs b/src/benchmark.rs index 80ce1bc..95598b6 100644 --- a/src/benchmark.rs +++ b/src/benchmark.rs @@ -24,8 +24,8 @@ pub fn run(mode: Mode) { fibonacci(35);"; let lexer = Lexer::new(code.to_owned()); - let mut parser = Parser::new(lexer); - let program = parser.parse_program(); + let parser = Parser::new(lexer); + let program = parser.parse_program().expect("parser error"); match mode { Mode::Eval => { diff --git a/src/code.rs b/src/code/mod.rs similarity index 67% rename from src/code.rs rename to src/code/mod.rs index 3b7ca43..2a8d90a 100644 --- a/src/code.rs +++ b/src/code/mod.rs @@ -1,5 +1,12 @@ +use std::convert::TryInto; use std::fmt; +use std::io; +pub trait Serializable { + fn serialize(&self, f: &mut dyn io::Write) -> io::Result<()>; +} + +#[derive(Debug, PartialEq)] pub struct Bytecode { pub instructions: Instructions, pub constants: Vec, @@ -12,6 +19,87 @@ impl Bytecode { constants, } } + + pub fn from_bytes(bytes: &[u8]) -> Result { + let mut cur = 0; + // TODO: What happens on 32-bit systems? + let instructions_size = read_be_u64(bytes, cur) as usize; + cur += 8; + let instructions = bytes[cur..(cur + instructions_size)].to_vec(); + cur += instructions_size; + + let constants_count = read_be_u64(bytes, cur) as usize; + cur += 8; + + let mut constants = Vec::new(); + while cur < bytes.len() { + let tag = bytes[cur]; + cur += 1; + + if tag == TAG_INTEGER { + constants.push(Constant::Integer(read_be_i64(bytes, cur))); + cur += 8; + } else if tag == TAG_FLOAT { + constants.push(Constant::Float(f64::from_bits(read_be_u64(bytes, cur)))); + cur += 8; + } else if tag == TAG_STRING { + let string_len = read_be_u64(bytes, cur) as usize; + cur += 8; + let string = String::from_utf8(bytes[cur..(cur + string_len)].to_vec()) + .expect("error: Failed to read string constant"); + cur += string_len; + constants.push(Constant::String(string)); + } else if tag == TAG_FUNCTION { + let num_locals = bytes[cur]; + let num_parameters = bytes[cur + 1]; + cur += 2; + let instructions_size = read_be_u64(bytes, cur) as usize; + cur += 8; + let instructions = bytes[cur..(cur + instructions_size)].to_vec(); + cur += instructions_size; + let cf = CompiledFunction { + instructions, + num_locals, + num_parameters, + }; + constants.push(Constant::CompiledFunction(cf)); + } else { + return Err(format!("Unexpected tag {} at position {}", tag, cur)); + } + } + + if constants.len() != constants_count { + return Err(format!( + "Invalid constants cound: expect {} but got {}", + constants_count, + constants.len() + )); + } + + Ok(Bytecode::new(instructions, constants)) + } +} + +fn read_be_u64(bytes: &[u8], start: usize) -> u64 { + // https://doc.rust-lang.org/std/primitive.u64.html#method.from_be_bytes + u64::from_be_bytes(bytes[start..(start + 8)].try_into().unwrap()) +} + +fn read_be_i64(bytes: &[u8], start: usize) -> i64 { + // https://doc.rust-lang.org/std/primitive.i64.html#method.from_be_bytes + i64::from_be_bytes(bytes[start..(start + 8)].try_into().unwrap()) +} + +impl Serializable for Bytecode { + fn serialize(&self, w: &mut dyn io::Write) -> io::Result<()> { + w.write_all(&(self.instructions.len() as u64).to_be_bytes())?; + w.write_all(&self.instructions)?; + w.write_all(&(self.constants.len() as u64).to_be_bytes())?; + for constant in &self.constants { + constant.serialize(w)?; + } + io::Result::Ok(()) + } } // Instructions are a series of bytes. @@ -326,6 +414,38 @@ impl Constant { } } +const TAG_INTEGER: u8 = 1; +const TAG_FLOAT: u8 = 2; +const TAG_STRING: u8 = 3; +const TAG_FUNCTION: u8 = 4; + +impl Serializable for Constant { + fn serialize(&self, w: &mut dyn io::Write) -> io::Result<()> { + match self { + Constant::Integer(value) => { + w.write_all(&TAG_INTEGER.to_be_bytes())?; + w.write_all(&value.to_be_bytes())?; + } + Constant::Float(value) => { + w.write_all(&TAG_FLOAT.to_be_bytes())?; + // https://github.com/rust-lang/rust/issues/60446 + w.write_all(&value.to_bits().to_be_bytes())?; + } + Constant::String(value) => { + w.write_all(&TAG_STRING.to_be_bytes())?; + w.write_all(&(value.len() as u64).to_be_bytes())?; + w.write_all(value.as_bytes())?; + } + Constant::CompiledFunction(cf) => { + w.write_all(&[TAG_FUNCTION, cf.num_locals, cf.num_parameters])?; + w.write_all(&(cf.instructions.len() as u64).to_be_bytes())?; + w.write_all(&cf.instructions)?; + } + } + io::Result::Ok(()) + } +} + impl fmt::Display for Constant { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -358,7 +478,7 @@ impl fmt::Display for CompiledFunction { #[cfg(test)] mod tests { - use crate::code::{make, make_u16, make_u16_u8, make_u8, print_instructions, OpCode}; + use crate::code::{make, make_u16, make_u16_u8, make_u8, print_instructions, Bytecode, OpCode}; #[test] fn test_print_instructions() { @@ -380,4 +500,17 @@ mod tests { assert_eq!(&print_instructions(&insts), expected); } + + #[test] + fn empty_bytecode_from_bytes() { + let bytes = vec![(0 as u64).to_be_bytes(), (0 as u64).to_be_bytes()].concat(); + let bytecode = Bytecode::from_bytes(&bytes).expect("Failed to parse bytecode"); + + let expected = Bytecode { + instructions: vec![], + constants: vec![], + }; + + assert_eq!(bytecode, expected); + } } diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index a3743b1..17009ca 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -1284,22 +1284,9 @@ mod tests { fn parse(input: &str) -> Program { let lexer = Lexer::new(input.to_owned()); - let mut parser = Parser::new(lexer); + let parser = Parser::new(lexer); - let program = parser.parse_program(); - check_parser_errors(&parser); - program - } - - fn check_parser_errors(parser: &Parser) { - let errors = parser.errors(); - if errors.len() > 0 { - panic!( - "for input '{}', got parser errors: {:?}", - parser.input(), - errors - ); - } + parser.parse_program().expect("parser error") } fn compiled_function( diff --git a/src/evaluator.rs b/src/evaluator.rs index 457f572..316d07e 100644 --- a/src/evaluator.rs +++ b/src/evaluator.rs @@ -679,9 +679,9 @@ mod evalator_tests { fn eval_input(input: &str) -> EvalResult { let lexer = Lexer::new(input.to_owned()); - let mut parser = Parser::new(lexer); + let parser = Parser::new(lexer); - let program = parser.parse_program(); + let program = parser.parse_program().expect("parser error"); let env = Rc::new(RefCell::new(Environment::new())); evaluator::eval(&program, env) } diff --git a/src/main.rs b/src/main.rs index fa553f6..02b82fe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,19 +1,31 @@ use cymbal::benchmark; +use cymbal::code::{Bytecode, Serializable}; +use cymbal::compiler::Compiler; +use cymbal::lexer::Lexer; use cymbal::mode::Mode; +use cymbal::parser::Parser; use cymbal::repl; +use cymbal::vm::Vm; use std::env; +use std::fs; use std::process; fn main() { - let maybe_subcommand = env::args().nth(1); - match maybe_subcommand { + match env::args().nth(1) { Some(subcommand) => match subcommand.as_ref() { "repl" => repl::start(eval_or_compile()), "benchmark" => benchmark::run(eval_or_compile()), - unknown => { - println!("cymbal: '{}' is not a valid subcommand\n", unknown); + "compile" => { + compile(); + } + "run" => { + run(); + } + "help" => { help(); - process::exit(1); + } + unknown => { + unknown_subcommand(unknown); } }, None => { @@ -22,6 +34,40 @@ fn main() { } } +// -- Actions + +fn compile() { + let source_path = env::args() + .nth(2) + .expect("error: specify a source file to compile"); + + let source = fs::read_to_string(source_path).expect("error: failed to read a source file"); + let parser = Parser::new(Lexer::new(source)); + let program = parser + .parse_program() + .expect("error: Failed to parse source"); + let compiler = Compiler::new(); + let bytecode = compiler + .compile(&program) + .expect("error: Failed to compile"); + // TODO: Make the output path flexible. + let mut file = fs::File::create("out.mo").expect("error: Failed to open an output file"); + bytecode + .serialize(&mut file) + .expect("error: Failed to serialize bytecode"); + println!("Wrote bytecode into 'out.mo'"); +} + +fn run() { + let source_path = env::args() + .nth(2) + .expect("error: specify a bytecode file to run"); + let bytes = fs::read(source_path).expect("error: failed to read a bytecode file"); + let bytecode = Bytecode::from_bytes(&bytes).expect("error: Failed to deserialize bytecode"); + let vm = Vm::new(bytecode); + vm.run().expect("error: Failed to run bytecode"); +} + fn help() { println!( r#"Usage: cymbal SUBCOMMAND [OPTIONS] @@ -37,6 +83,14 @@ Subcommands: ); } +fn unknown_subcommand(subcommand: &str) { + println!("cymbal: '{}' is not a valid subcommand\n", subcommand); + help(); + process::exit(1); +} + +// -- Helpers + fn has_flag(flag: &str) -> bool { env::args().any(|arg| arg == flag) } diff --git a/src/parser.rs b/src/parser.rs index 1dd0abf..4709103 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -17,7 +17,7 @@ pub enum Precedence { type Result = std::result::Result; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum ParserError { ExpectedPrefixToken(Token), ExpectedInfixToken(Token), @@ -75,7 +75,7 @@ impl Parser { self.cur_token = mem::replace(&mut self.peek_token, self.lexer.next_token()); } - pub fn parse_program(&mut self) -> Program { + pub fn parse_program(mut self) -> Result { let mut statements = vec![]; while self.cur_token != Token::Eof { @@ -90,7 +90,12 @@ impl Parser { self.next_token(); } - Program { statements } + if self.errors.is_empty() { + Ok(Program { statements }) + } else { + // TODO: Can we do this without clone()? + Err(self.errors[0].clone()) + } } fn parse_statement(&mut self) -> Result { @@ -549,10 +554,9 @@ mod tests { let foobar = x + y; "; let lexer = Lexer::new(input.to_owned()); - let mut parser = Parser::new(lexer); + let parser = Parser::new(lexer); - let program = parser.parse_program(); - check_parser_errors(&parser); + let program = parser.parse_program().expect("parser error"); assert_eq!( program.statements, @@ -581,10 +585,9 @@ mod tests { "; let lexer = Lexer::new(input.to_owned()); - let mut parser = Parser::new(lexer); + let parser = Parser::new(lexer); - let program = parser.parse_program(); - check_parser_errors(&parser); + let program = parser.parse_program().expect("parser error"); assert_eq!( program.statements, @@ -602,10 +605,9 @@ mod tests { let input = "foobar;"; let lexer = Lexer::new(input.to_owned()); - let mut parser = Parser::new(lexer); + let parser = Parser::new(lexer); - let program = parser.parse_program(); - check_parser_errors(&parser); + let program = parser.parse_program().expect("parser error"); assert_eq!( program.statements, @@ -620,10 +622,9 @@ mod tests { let input = "5;"; let lexer = Lexer::new(input.to_owned()); - let mut parser = Parser::new(lexer); + let parser = Parser::new(lexer); - let program = parser.parse_program(); - check_parser_errors(&parser); + let program = parser.parse_program().expect("parser error"); assert_eq!( program.statements, @@ -642,10 +643,9 @@ mod tests { ]; for (input, operator, value) in tests { let lexer = Lexer::new(input.to_owned()); - let mut parser = Parser::new(lexer); + let parser = Parser::new(lexer); - let program = parser.parse_program(); - check_parser_errors(&parser); + let program = parser.parse_program().expect("parser error"); assert_eq!( program.statements, @@ -671,10 +671,9 @@ mod tests { ]; for (input, left, operator, right) in tests { let lexer = Lexer::new(input.to_owned()); - let mut parser = Parser::new(lexer); + let parser = Parser::new(lexer); - let program = parser.parse_program(); - check_parser_errors(&parser); + let program = parser.parse_program().expect("parser error"); assert_eq!( program.statements, @@ -696,10 +695,9 @@ mod tests { ]; for (input, left, operator, right) in tests { let lexer = Lexer::new(input.to_owned()); - let mut parser = Parser::new(lexer); + let parser = Parser::new(lexer); - let program = parser.parse_program(); - check_parser_errors(&parser); + let program = parser.parse_program().expect("parser error"); assert_eq!( program.statements, @@ -798,23 +796,11 @@ mod tests { fn test_parsing(tests: Vec<(&str, &str)>) { for (input, expected) in tests { let lexer = Lexer::new(input.to_owned()); - let mut parser = Parser::new(lexer); + let parser = Parser::new(lexer); - let program = parser.parse_program(); - check_parser_errors(&parser); + let program = parser.parse_program().expect("parser error"); assert_eq!(program.to_string(), expected); } } - - fn check_parser_errors(parser: &Parser) { - let errors = parser.errors(); - if errors.len() > 0 { - panic!( - "for input '{}', got parser errors: {:?}", - parser.input(), - errors - ); - } - } } diff --git a/src/repl.rs b/src/repl.rs index a976534..7776412 100644 --- a/src/repl.rs +++ b/src/repl.rs @@ -33,17 +33,16 @@ pub fn start(mode: Mode) { loop { let input = ask_input(">> "); - let mut parser = Parser::new(Lexer::new(input)); + let parser = Parser::new(Lexer::new(input)); - let program = parser.parse_program(); - if !parser.errors().is_empty() { - println!("Woops! We ran into some monkey business here!"); - println!(" parser errors:"); - for error in parser.errors() { - println!("\t{:?}", error); + let program = match parser.parse_program() { + Ok(pg) => pg, + Err(err) => { + println!("Woops! We ran into some monkey business here!"); + println!("\t{:?}", err); + continue; } - continue; - } + }; match mode { Mode::Eval => match evaluator::eval(&program, Rc::clone(&env)) { diff --git a/src/vm/mod.rs b/src/vm/mod.rs index a8bc3e4..2d90770 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -1111,12 +1111,8 @@ mod tests { fn make_vm(input: &str) -> Vm { let lexer = Lexer::new(input.to_owned()); - let mut parser = Parser::new(lexer); - let program = parser.parse_program(); - let errors = parser.errors(); - if errors.len() > 0 { - panic!("for input '{}', got parser errors: {:?}", input, errors); - } + let parser = Parser::new(lexer); + let program = parser.parse_program().expect("parser error"); let compiler = Compiler::new(); let bytecode = match compiler.compile(&program) {