grass/src/lexer.rs
ConnorSkees 1127897bd8 rustfmt
2020-03-22 00:56:41 -04:00

365 lines
12 KiB
Rust
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use std::convert::TryFrom;
use std::iter::Peekable;
use std::str::Chars;
use std::sync::atomic::{AtomicBool, Ordering};
use crate::atrule::AtRuleKind;
use crate::common::{Keyword, Op, Pos, Symbol};
use crate::{Token, TokenKind, Whitespace};
// Rust does not allow us to escape '\f'
const FORM_FEED: char = '\x0C';
pub static IS_UTF8: AtomicBool = AtomicBool::new(false);
#[derive(Debug, Clone)]
pub(crate) struct Lexer<'a> {
tokens: Vec<Token>,
buf: Peekable<Chars<'a>>,
pos: Pos,
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
macro_rules! symbol {
($self:ident, $symbol:ident) => {{
$self.buf.next();
$self.pos.next_char();
TokenKind::Symbol(Symbol::$symbol)
}};
}
macro_rules! whitespace {
($self:ident, $whitespace:ident) => {{
$self.buf.next();
$self.pos.next_char();
TokenKind::Whitespace(Whitespace::$whitespace)
}};
}
let kind: TokenKind = match self.buf.peek().unwrap_or(&'\0') {
'a'..='z' | 'A'..='Z' | '_' => self.lex_ident(),
'-' => {
self.buf.next();
self.pos.next_char();
match self.buf.peek().unwrap() {
'0'..='9' | '.' => match self.lex_num() {
TokenKind::Number(n) => {
let mut s = String::from("-");
s.push_str(&n);
TokenKind::Number(s)
}
e @ TokenKind::Error(..) => e,
_ => unsafe { std::hint::unreachable_unchecked() },
},
'a'..='z' | 'A'..='Z' | '_' | '-' => match self.lex_ident() {
TokenKind::Ident(i) => {
let mut s = String::from("-");
s.push_str(&i);
TokenKind::Ident(s)
}
TokenKind::Keyword(kw) => {
let mut s = String::from("-");
s.push_str(&kw.to_string());
TokenKind::Ident(s)
}
TokenKind::Symbol(Symbol::Minus) => TokenKind::Ident(String::from("--")),
e @ TokenKind::Error(..) => e,
_ => unsafe { std::hint::unreachable_unchecked() },
},
_ => TokenKind::Symbol(Symbol::Minus),
}
}
'@' => self.lex_at_rule(),
'0'..='9' => self.lex_num(),
'.' => {
self.buf.next();
self.pos.next_char();
match self.buf.peek().unwrap() {
'0'..='9' => match self.lex_num() {
TokenKind::Number(n) => {
let mut s = String::from("0.");
s.push_str(&n);
TokenKind::Number(s)
}
e @ TokenKind::Error(..) => e,
_ => unsafe { std::hint::unreachable_unchecked() },
},
_ => TokenKind::Symbol(Symbol::Period),
}
}
'$' => self.lex_variable(),
':' => symbol!(self, Colon),
',' => symbol!(self, Comma),
';' => symbol!(self, SemiColon),
'(' => symbol!(self, OpenParen),
')' => symbol!(self, CloseParen),
'+' => symbol!(self, Plus),
'=' => {
self.buf.next();
self.pos.next_char();
match self.buf.peek() {
Some('=') => {
self.buf.next();
self.pos.next_char();
TokenKind::Op(Op::Equal)
}
_ => TokenKind::Symbol(Symbol::Equal),
}
}
'?' => symbol!(self, QuestionMark),
'\\' => symbol!(self, BackSlash),
'~' => symbol!(self, Tilde),
'\'' => symbol!(self, SingleQuote),
'"' => symbol!(self, DoubleQuote),
' ' => whitespace!(self, Space),
'\t' => whitespace!(self, Tab),
'\n' | &FORM_FEED => {
self.buf.next();
self.pos.newline();
TokenKind::Whitespace(Whitespace::Newline)
}
'\r' => {
self.buf.next();
TokenKind::Whitespace(Whitespace::Newline)
}
'#' => self.lex_hash(),
'{' => symbol!(self, OpenCurlyBrace),
'*' => symbol!(self, Mul),
'}' => symbol!(self, CloseCurlyBrace),
'&' => symbol!(self, BitAnd),
'|' => symbol!(self, BitOr),
'/' => self.lex_forward_slash(),
'%' => symbol!(self, Percent),
'[' => symbol!(self, OpenSquareBrace),
']' => symbol!(self, CloseSquareBrace),
'!' => self.lex_exclamation(),
'<' => symbol!(self, Lt),
'>' => symbol!(self, Gt),
'^' => symbol!(self, Xor),
'`' => symbol!(self, BackTick),
'\0' => return None,
c if c.is_control() => {
self.buf.next();
TokenKind::Error("Expected expression.".into())
}
_ => self.lex_ident(),
};
self.pos.next_char();
Some(Token {
kind,
pos: self.pos,
})
}
}
impl<'a> Lexer<'a> {
pub fn new(buf: &'a str) -> Lexer<'a> {
Lexer {
tokens: Vec::with_capacity(buf.len()),
buf: buf.chars().peekable(),
pos: Pos::new(),
}
}
fn lex_exclamation(&mut self) -> TokenKind {
self.buf.next();
self.pos.next_char();
macro_rules! assert_char {
($self:ident, $($char:literal)*) => {
$(
assert_eq!($char, $self.buf.next().expect("expected char").to_ascii_lowercase(), "expected keyword `important`");
)*
}
};
match self.buf.peek() {
Some('i') | Some('I') => {
self.buf.next();
assert_char!(self, 'm' 'p' 'o' 'r' 't' 'a' 'n' 't');
TokenKind::Keyword(Keyword::Important)
}
Some('d') | Some('D') => {
self.buf.next();
assert_char!(self, 'e' 'f' 'a' 'u' 'l' 't');
TokenKind::Keyword(Keyword::Default)
}
Some('g') | Some('G') => {
self.buf.next();
assert_char!(self, 'l' 'o' 'b' 'a' 'l');
TokenKind::Keyword(Keyword::Global)
}
Some('=') => {
self.buf.next();
TokenKind::Op(Op::NotEqual)
}
_ => todo!("expected either `i` or `=` after `!`"),
}
}
fn lex_at_rule(&mut self) -> TokenKind {
self.buf.next();
self.pos.next_char();
if let TokenKind::Ident(s) = self.lex_ident() {
if !s.is_empty() {
TokenKind::AtRule(AtRuleKind::from(s.as_ref()))
} else {
TokenKind::Error("Expected identifier.".into())
}
} else {
TokenKind::Error("Expected identifier.".into())
}
}
fn lex_forward_slash(&mut self) -> TokenKind {
self.buf.next();
self.pos.next_char();
match self.buf.peek().expect("expected something after '/'") {
'/' => {
self.buf.by_ref().take_while(|x| x != &'\n').for_each(drop);
self.pos.newline();
}
'*' => {
self.buf.next();
self.pos.next_char();
let mut comment = String::new();
while let Some(tok) = self.buf.next() {
match tok {
'\n' => self.pos.newline(),
FORM_FEED => {
self.pos.newline();
comment.push('\n');
continue;
}
'\r' => {
if self.buf.peek() == Some(&'\n') {
self.buf.next();
}
self.pos.newline();
comment.push('\n');
continue;
}
'*' if self.buf.peek() == Some(&'/') => {
self.buf.next();
break;
}
_ => self.pos.next_char(),
}
comment.push(tok);
}
return TokenKind::MultilineComment(comment);
}
_ => return TokenKind::Symbol(Symbol::Div),
}
TokenKind::Whitespace(Whitespace::Newline)
}
fn lex_num(&mut self) -> TokenKind {
let mut whole = String::new();
while let Some(c) = self.buf.peek() {
if !c.is_numeric() {
break;
}
let tok = self.buf.next().unwrap();
self.pos.next_char();
whole.push(tok);
}
let mut dec = String::new();
if self.buf.peek() == Some(&'.') {
self.buf.next();
dec.push('.');
while let Some(c) = self.buf.peek() {
if !c.is_numeric() {
break;
}
let tok = self.buf.next().unwrap();
self.pos.next_char();
dec.push(tok);
}
}
if dec.len() == 1 {
return TokenKind::Error("Expected digit.".into());
}
whole.push_str(&dec);
TokenKind::Number(whole)
}
fn lex_hash(&mut self) -> TokenKind {
self.buf.next();
self.pos.next_char();
if self.buf.peek() == Some(&'{') {
self.buf.next();
self.pos.next_char();
return TokenKind::Interpolation;
}
TokenKind::Symbol(Symbol::Hash)
}
fn lex_variable(&mut self) -> TokenKind {
self.buf.next();
self.pos.next_char();
let mut name = String::with_capacity(99);
if let Some(c) = self.buf.peek() {
if c == &'=' {
return TokenKind::Symbol(Symbol::Dollar);
} else if !c.is_alphabetic() && c != &'-' && c != &'_' {
return TokenKind::Error("Expected identifier.".into());
} else {
self.pos.next_char();
name.push(*c);
}
self.buf.next();
}
while let Some(c) = self.buf.peek() {
if !c.is_alphanumeric() && c != &'-' && c != &'_' {
break;
}
let tok = self
.buf
.next()
.expect("this is impossible because we have already peeked");
self.pos.next_char();
name.push(tok);
}
if name.is_empty() {
TokenKind::Symbol(Symbol::Dollar)
} else {
TokenKind::Variable(name)
}
}
// TODO: handle weird characters that *are* ascii
// e.g. how do we handle `color: ;`
fn lex_ident(&mut self) -> TokenKind {
let mut string = String::with_capacity(99);
while let Some(c) = self.buf.peek() {
// we know that the first char is alphabetic from peeking
if !c.is_alphanumeric() && c != &'-' && c != &'_' && c.is_ascii() {
break;
}
if !c.is_ascii() {
IS_UTF8.store(true, Ordering::Relaxed);
}
let tok = self
.buf
.next()
.expect("this is impossible because we have already peeked");
self.pos.next_char();
string.push(tok);
}
if let Ok(kw) = Keyword::try_from(string.as_ref()) {
return TokenKind::Keyword(kw);
}
if string == "-" {
return TokenKind::Symbol(Symbol::Minus);
}
TokenKind::Ident(string)
}
}