From 0b913d434af99ab88e5072da124794a2bf31e367 Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Mon, 3 May 2021 17:47:36 -0400 Subject: [PATCH] Add Part 9: Statements --- site/posts/2021-05-03-statements.md | 97 +++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 site/posts/2021-05-03-statements.md diff --git a/site/posts/2021-05-03-statements.md b/site/posts/2021-05-03-statements.md new file mode 100644 index 0000000..850482e --- /dev/null +++ b/site/posts/2021-05-03-statements.md @@ -0,0 +1,97 @@ +``` +metadata.title = "Part 9: Statements" +metadata.tags = ["build a programming language", "rust"] +metadata.date = "2021-05-03 17:46:42 -0400" +metadata.shortDesc = "" +metadata.slug = "statements" +metadata.preamble = `

This post is part of a series about learning Rust and building a small programming language.


` +``` + +So the parser can handle a single expression, but since we're not building a Lisp, that's not enough. It needs to handle multiple statements. For context, an expression is a piece of code that represents a value whereas a statement is a piece of code that can be executed but does not result in a value. + + + +In the AST, there's a new top-level type: `Statement`. For now, the only type of statement is one that contains an expression and nothing else. + +```rust +enum Statement { + Expr(Node), +} +``` + +The top level `parse` function has also changed to reflect this. It now returns a vector of statements, instead of a single expression node. The `do_parse` function continues to work exactly as it has, but is renamed `parse_expression` to since that's what it's actually doing. + +```rust +fn parse(tokens: &[Token]) -> Vec { + let mut it = tokens.iter().peekable(); + let mut statements = Vec = vec![]; + while let Some(_) = it.peek() { + match parse_statement(&mut it) { + Some(statement) => statements.push(statement), + None => (), + } + } + statements +} +``` + +The `parse_statement` function does exactly what the name suggests. + +```rust +fn parse_statement<'a, I: Iterator>(it: &mut Peekable<'a, I>) -> Option { + if it.peek().is_none() { + return None; + } + + let node = parse_expression(it).map(|node| Statement::Expr(node)); + node +} +``` + +With that in place, parsing multiple statements is easy. The only change is that, after successfully parsing a statement, we need to consume a semicolon if there is one. Then, the `parse` loop will continue and the next statement can be parsed. + +```rust +fn parse_statement<'a, I: Iterator>(it: &mut Peekable<'a, I>) -> Option { + // ... + match it.peek() { + Some(Token::Semicolon) => { + it.next(); + } + Some(tok) => { + panic!("unexpected token {:?} after statement", tok); + } + None => (), + } + + node +} +``` + +I intend to make semicolons optional and allow newline-delimited statements, but that is more complicated and will have to wait for another time. For now, this is good enough: + +```rust +fn main() { + let tokens = tokenize("1 + 2; foo();"); + print("statements: {:?}", parse(&tokens)); +} +``` + +```sh +$ cargo run +statements: [ + Expr( + BinaryOp { + left: Integer(1), + op: Add, + right: Integer(2), + }, + ), + Expr( + Call { + name: "foo", + params: [], + }, + ), +] +``` +