2020-01-04 22:55:04 -05:00
|
|
|
|
use std::convert::TryFrom;
|
|
|
|
|
use std::iter::Peekable;
|
|
|
|
|
use std::str::Chars;
|
2020-02-28 18:27:32 -05:00
|
|
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
2020-01-04 22:55:04 -05:00
|
|
|
|
|
2020-01-25 12:43:07 -05:00
|
|
|
|
use crate::atrule::AtRuleKind;
|
|
|
|
|
use crate::common::{Keyword, Op, Pos, Symbol};
|
2020-01-04 22:55:04 -05:00
|
|
|
|
use crate::{Token, TokenKind, Whitespace};
|
|
|
|
|
|
2020-01-26 16:50:08 -05:00
|
|
|
|
// Rust does not allow us to escape '\f'
|
|
|
|
|
const FORM_FEED: char = '\x0C';
|
|
|
|
|
|
2020-02-28 18:27:32 -05:00
|
|
|
|
pub static IS_UTF8: AtomicBool = AtomicBool::new(false);
|
|
|
|
|
|
2020-01-04 22:55:04 -05:00
|
|
|
|
#[derive(Debug, Clone)]
|
2020-01-20 13:15:47 -05:00
|
|
|
|
pub(crate) struct Lexer<'a> {
|
2020-01-04 22:55:04 -05:00
|
|
|
|
tokens: Vec<Token>,
|
|
|
|
|
buf: Peekable<Chars<'a>>,
|
|
|
|
|
pos: Pos,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a> Iterator for Lexer<'a> {
|
|
|
|
|
type Item = Token;
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
|
macro_rules! symbol {
|
|
|
|
|
($self:ident, $symbol:ident) => {{
|
|
|
|
|
$self.buf.next();
|
|
|
|
|
$self.pos.next_char();
|
|
|
|
|
TokenKind::Symbol(Symbol::$symbol)
|
|
|
|
|
}};
|
|
|
|
|
}
|
|
|
|
|
macro_rules! whitespace {
|
|
|
|
|
($self:ident, $whitespace:ident) => {{
|
|
|
|
|
$self.buf.next();
|
|
|
|
|
$self.pos.next_char();
|
|
|
|
|
TokenKind::Whitespace(Whitespace::$whitespace)
|
|
|
|
|
}};
|
|
|
|
|
}
|
|
|
|
|
let kind: TokenKind = match self.buf.peek().unwrap_or(&'\0') {
|
2020-02-09 15:08:23 -05:00
|
|
|
|
'a'..='z' | 'A'..='Z' | '_' => self.lex_ident(),
|
|
|
|
|
'-' => {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
self.pos.next_char();
|
|
|
|
|
match self.buf.peek().unwrap() {
|
2020-02-16 15:20:38 -05:00
|
|
|
|
'0'..='9' | '.' => match self.lex_num() {
|
2020-02-09 15:08:23 -05:00
|
|
|
|
TokenKind::Number(n) => {
|
|
|
|
|
let mut s = String::from("-");
|
|
|
|
|
s.push_str(&n);
|
|
|
|
|
TokenKind::Number(s)
|
|
|
|
|
}
|
|
|
|
|
_ => unsafe { std::hint::unreachable_unchecked() },
|
|
|
|
|
},
|
|
|
|
|
'a'..='z' | 'A'..='Z' | '_' | '-' => match self.lex_ident() {
|
|
|
|
|
TokenKind::Ident(i) => {
|
|
|
|
|
let mut s = String::from("-");
|
|
|
|
|
s.push_str(&i);
|
|
|
|
|
TokenKind::Ident(s)
|
|
|
|
|
}
|
|
|
|
|
TokenKind::Keyword(kw) => {
|
|
|
|
|
let mut s = String::from("-");
|
|
|
|
|
s.push_str(&kw.to_string());
|
|
|
|
|
TokenKind::Ident(s)
|
|
|
|
|
}
|
2020-02-14 13:27:08 -05:00
|
|
|
|
TokenKind::Symbol(Symbol::Minus) => TokenKind::Ident(String::from("--")),
|
2020-02-09 15:08:23 -05:00
|
|
|
|
_ => unsafe { std::hint::unreachable_unchecked() },
|
2020-02-09 18:28:24 -05:00
|
|
|
|
},
|
2020-02-09 15:08:23 -05:00
|
|
|
|
_ => TokenKind::Symbol(Symbol::Minus),
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-04 22:55:04 -05:00
|
|
|
|
'@' => self.lex_at_rule(),
|
|
|
|
|
'0'..='9' => self.lex_num(),
|
2020-02-08 20:07:20 -05:00
|
|
|
|
'.' => {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
self.pos.next_char();
|
|
|
|
|
match self.buf.peek().unwrap() {
|
|
|
|
|
'0'..='9' => match self.lex_num() {
|
|
|
|
|
TokenKind::Number(n) => {
|
|
|
|
|
let mut s = String::from("0.");
|
|
|
|
|
s.push_str(&n);
|
|
|
|
|
TokenKind::Number(s)
|
|
|
|
|
}
|
2020-02-08 20:20:03 -05:00
|
|
|
|
_ => unsafe { std::hint::unreachable_unchecked() },
|
|
|
|
|
},
|
|
|
|
|
_ => TokenKind::Symbol(Symbol::Period),
|
2020-02-08 20:07:20 -05:00
|
|
|
|
}
|
|
|
|
|
}
|
2020-01-04 22:55:04 -05:00
|
|
|
|
'$' => self.lex_variable(),
|
|
|
|
|
':' => symbol!(self, Colon),
|
|
|
|
|
',' => symbol!(self, Comma),
|
|
|
|
|
';' => symbol!(self, SemiColon),
|
2020-01-11 19:16:59 -05:00
|
|
|
|
'(' => symbol!(self, OpenParen),
|
|
|
|
|
')' => symbol!(self, CloseParen),
|
2020-01-04 22:55:04 -05:00
|
|
|
|
'+' => symbol!(self, Plus),
|
2020-02-08 15:53:49 -05:00
|
|
|
|
'=' => {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
self.pos.next_char();
|
|
|
|
|
match self.buf.peek() {
|
|
|
|
|
Some('=') => {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
self.pos.next_char();
|
|
|
|
|
TokenKind::Op(Op::Equal)
|
|
|
|
|
}
|
2020-02-08 16:08:35 -05:00
|
|
|
|
_ => TokenKind::Symbol(Symbol::Equal),
|
2020-02-08 15:53:49 -05:00
|
|
|
|
}
|
2020-02-08 16:08:35 -05:00
|
|
|
|
}
|
2020-01-20 11:39:05 -05:00
|
|
|
|
'?' => symbol!(self, QuestionMark),
|
|
|
|
|
'\\' => symbol!(self, BackSlash),
|
2020-01-04 22:55:04 -05:00
|
|
|
|
'~' => symbol!(self, Tilde),
|
|
|
|
|
'\'' => symbol!(self, SingleQuote),
|
|
|
|
|
'"' => symbol!(self, DoubleQuote),
|
|
|
|
|
' ' => whitespace!(self, Space),
|
|
|
|
|
'\t' => whitespace!(self, Tab),
|
2020-01-26 16:50:08 -05:00
|
|
|
|
'\n' | &FORM_FEED => {
|
2020-01-07 18:37:28 -05:00
|
|
|
|
self.buf.next();
|
|
|
|
|
self.pos.newline();
|
|
|
|
|
TokenKind::Whitespace(Whitespace::Newline)
|
|
|
|
|
}
|
|
|
|
|
'\r' => {
|
|
|
|
|
self.buf.next();
|
2020-02-22 17:26:30 -05:00
|
|
|
|
TokenKind::Whitespace(Whitespace::Newline)
|
2020-01-07 18:37:28 -05:00
|
|
|
|
}
|
2020-01-12 10:52:51 -05:00
|
|
|
|
'#' => self.lex_hash(),
|
2020-01-12 19:56:58 -05:00
|
|
|
|
'{' => symbol!(self, OpenCurlyBrace),
|
2020-01-04 22:55:04 -05:00
|
|
|
|
'*' => symbol!(self, Mul),
|
2020-01-12 19:56:58 -05:00
|
|
|
|
'}' => symbol!(self, CloseCurlyBrace),
|
2020-01-11 14:51:31 -05:00
|
|
|
|
'&' => symbol!(self, BitAnd),
|
2020-01-20 11:39:05 -05:00
|
|
|
|
'|' => symbol!(self, BitOr),
|
2020-01-05 12:22:38 -05:00
|
|
|
|
'/' => self.lex_forward_slash(),
|
2020-01-22 00:40:32 -05:00
|
|
|
|
'%' => symbol!(self, Percent),
|
2020-02-24 15:07:18 -05:00
|
|
|
|
'[' => symbol!(self, OpenSquareBrace),
|
|
|
|
|
']' => symbol!(self, CloseSquareBrace),
|
2020-01-06 00:39:49 -05:00
|
|
|
|
'!' => self.lex_exclamation(),
|
2020-01-04 22:55:04 -05:00
|
|
|
|
'<' => symbol!(self, Lt),
|
|
|
|
|
'>' => symbol!(self, Gt),
|
2020-02-24 15:07:18 -05:00
|
|
|
|
'^' => symbol!(self, Xor),
|
2020-03-21 17:29:12 -04:00
|
|
|
|
'`' => symbol!(self, BackTick),
|
2020-01-04 22:55:04 -05:00
|
|
|
|
'\0' => return None,
|
2020-03-21 17:29:12 -04:00
|
|
|
|
c if c.is_control() => {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
TokenKind::Error("Expected expression.".into())
|
|
|
|
|
},
|
2020-03-20 23:53:26 -04:00
|
|
|
|
_ => self.lex_ident(),
|
2020-01-04 22:55:04 -05:00
|
|
|
|
};
|
|
|
|
|
self.pos.next_char();
|
|
|
|
|
Some(Token {
|
|
|
|
|
kind,
|
|
|
|
|
pos: self.pos,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a> Lexer<'a> {
|
|
|
|
|
pub fn new(buf: &'a str) -> Lexer<'a> {
|
|
|
|
|
Lexer {
|
|
|
|
|
tokens: Vec::with_capacity(buf.len()),
|
|
|
|
|
buf: buf.chars().peekable(),
|
|
|
|
|
pos: Pos::new(),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-06 00:39:49 -05:00
|
|
|
|
fn lex_exclamation(&mut self) -> TokenKind {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
self.pos.next_char();
|
|
|
|
|
macro_rules! assert_char {
|
|
|
|
|
($self:ident, $($char:literal)*) => {
|
|
|
|
|
$(
|
|
|
|
|
assert_eq!($char, $self.buf.next().expect("expected char").to_ascii_lowercase(), "expected keyword `important`");
|
|
|
|
|
)*
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
match self.buf.peek() {
|
|
|
|
|
Some('i') | Some('I') => {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
assert_char!(self, 'm' 'p' 'o' 'r' 't' 'a' 'n' 't');
|
2020-01-29 21:02:32 -05:00
|
|
|
|
TokenKind::Keyword(Keyword::Important)
|
|
|
|
|
}
|
|
|
|
|
Some('d') | Some('D') => {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
assert_char!(self, 'e' 'f' 'a' 'u' 'l' 't');
|
|
|
|
|
TokenKind::Keyword(Keyword::Default)
|
2020-01-06 00:39:49 -05:00
|
|
|
|
}
|
2020-03-17 20:13:53 -04:00
|
|
|
|
Some('g') | Some('G') => {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
assert_char!(self, 'l' 'o' 'b' 'a' 'l');
|
|
|
|
|
TokenKind::Keyword(Keyword::Global)
|
|
|
|
|
}
|
2020-01-06 00:39:49 -05:00
|
|
|
|
Some('=') => {
|
|
|
|
|
self.buf.next();
|
2020-01-29 21:02:32 -05:00
|
|
|
|
TokenKind::Op(Op::NotEqual)
|
2020-01-06 00:39:49 -05:00
|
|
|
|
}
|
|
|
|
|
_ => todo!("expected either `i` or `=` after `!`"),
|
2020-01-29 21:02:32 -05:00
|
|
|
|
}
|
2020-01-06 00:39:49 -05:00
|
|
|
|
}
|
|
|
|
|
|
2020-01-04 22:55:04 -05:00
|
|
|
|
fn lex_at_rule(&mut self) -> TokenKind {
|
2020-01-07 18:37:28 -05:00
|
|
|
|
self.buf.next();
|
2020-01-17 08:14:10 -05:00
|
|
|
|
self.pos.next_char();
|
2020-03-21 23:57:36 -04:00
|
|
|
|
if let TokenKind::Ident(s) = self.lex_ident() {
|
|
|
|
|
if !s.is_empty() {
|
|
|
|
|
TokenKind::AtRule(AtRuleKind::from(s.as_ref()))
|
|
|
|
|
} else {
|
|
|
|
|
TokenKind::Error("Expected identifier.".into())
|
2020-01-04 22:55:04 -05:00
|
|
|
|
}
|
2020-03-21 23:57:36 -04:00
|
|
|
|
} else {
|
|
|
|
|
TokenKind::Error("Expected identifier.".into())
|
2020-01-04 22:55:04 -05:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-05 12:22:38 -05:00
|
|
|
|
fn lex_forward_slash(&mut self) -> TokenKind {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
self.pos.next_char();
|
|
|
|
|
match self.buf.peek().expect("expected something after '/'") {
|
|
|
|
|
'/' => {
|
2020-01-17 10:38:59 -05:00
|
|
|
|
self.buf.by_ref().take_while(|x| x != &'\n').for_each(drop);
|
2020-01-17 08:14:10 -05:00
|
|
|
|
self.pos.newline();
|
2020-01-05 12:22:38 -05:00
|
|
|
|
}
|
|
|
|
|
'*' => {
|
2020-01-08 20:39:05 -05:00
|
|
|
|
self.buf.next();
|
|
|
|
|
self.pos.next_char();
|
2020-01-07 19:58:13 -05:00
|
|
|
|
let mut comment = String::new();
|
2020-01-05 12:22:38 -05:00
|
|
|
|
while let Some(tok) = self.buf.next() {
|
2020-01-26 16:50:08 -05:00
|
|
|
|
match tok {
|
|
|
|
|
'\n' => self.pos.newline(),
|
|
|
|
|
FORM_FEED => {
|
|
|
|
|
self.pos.newline();
|
|
|
|
|
comment.push('\n');
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
'\r' => {
|
|
|
|
|
if self.buf.peek() == Some(&'\n') {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
}
|
|
|
|
|
self.pos.newline();
|
|
|
|
|
comment.push('\n');
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
'*' if self.buf.peek() == Some(&'/') => {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
_ => self.pos.next_char(),
|
2020-01-05 12:22:38 -05:00
|
|
|
|
}
|
2020-01-07 19:58:13 -05:00
|
|
|
|
comment.push(tok);
|
2020-01-05 12:22:38 -05:00
|
|
|
|
}
|
2020-01-08 20:39:05 -05:00
|
|
|
|
return TokenKind::MultilineComment(comment);
|
2020-01-05 12:22:38 -05:00
|
|
|
|
}
|
|
|
|
|
_ => return TokenKind::Symbol(Symbol::Div),
|
|
|
|
|
}
|
|
|
|
|
TokenKind::Whitespace(Whitespace::Newline)
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-04 22:55:04 -05:00
|
|
|
|
fn lex_num(&mut self) -> TokenKind {
|
2020-03-21 14:39:16 -04:00
|
|
|
|
let mut whole = String::new();
|
2020-01-04 22:55:04 -05:00
|
|
|
|
while let Some(c) = self.buf.peek() {
|
2020-03-21 14:39:16 -04:00
|
|
|
|
if !c.is_numeric() {
|
2020-01-04 22:55:04 -05:00
|
|
|
|
break;
|
|
|
|
|
}
|
2020-03-21 14:39:16 -04:00
|
|
|
|
let tok = self.buf.next().unwrap();
|
2020-01-04 22:55:04 -05:00
|
|
|
|
self.pos.next_char();
|
2020-03-21 14:39:16 -04:00
|
|
|
|
whole.push(tok);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut dec = String::new();
|
|
|
|
|
|
|
|
|
|
if self.buf.peek() == Some(&'.') {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
dec.push('.');
|
|
|
|
|
while let Some(c) = self.buf.peek() {
|
|
|
|
|
if !c.is_numeric() {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
let tok = self.buf.next().unwrap();
|
|
|
|
|
self.pos.next_char();
|
|
|
|
|
dec.push(tok);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if dec.len() == 1 {
|
|
|
|
|
return TokenKind::Error("Expected digit.".into());
|
2020-01-04 22:55:04 -05:00
|
|
|
|
}
|
|
|
|
|
|
2020-03-21 14:39:16 -04:00
|
|
|
|
whole.push_str(&dec);
|
|
|
|
|
|
|
|
|
|
TokenKind::Number(whole)
|
2020-01-04 22:55:04 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn lex_hash(&mut self) -> TokenKind {
|
2020-01-12 10:52:51 -05:00
|
|
|
|
self.buf.next();
|
|
|
|
|
self.pos.next_char();
|
|
|
|
|
if self.buf.peek() == Some(&'{') {
|
|
|
|
|
self.buf.next();
|
|
|
|
|
self.pos.next_char();
|
|
|
|
|
return TokenKind::Interpolation;
|
|
|
|
|
}
|
|
|
|
|
TokenKind::Symbol(Symbol::Hash)
|
2020-01-04 22:55:04 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn lex_variable(&mut self) -> TokenKind {
|
2020-01-05 19:10:43 -05:00
|
|
|
|
self.buf.next();
|
|
|
|
|
self.pos.next_char();
|
|
|
|
|
let mut name = String::with_capacity(99);
|
2020-03-21 14:20:33 -04:00
|
|
|
|
if let Some(c) = self.buf.peek() {
|
|
|
|
|
if c == &'=' {
|
|
|
|
|
return TokenKind::Symbol(Symbol::Dollar);
|
|
|
|
|
} else if !c.is_alphabetic() && c != &'-' && c != &'_' {
|
2020-03-21 14:33:27 -04:00
|
|
|
|
return TokenKind::Error("Expected identifier.".into());
|
2020-03-21 13:02:24 -04:00
|
|
|
|
} else {
|
|
|
|
|
self.pos.next_char();
|
2020-03-21 14:20:33 -04:00
|
|
|
|
name.push(*c);
|
2020-03-21 13:02:24 -04:00
|
|
|
|
}
|
2020-03-21 14:20:33 -04:00
|
|
|
|
self.buf.next();
|
2020-03-21 13:02:24 -04:00
|
|
|
|
}
|
2020-01-04 22:55:04 -05:00
|
|
|
|
while let Some(c) = self.buf.peek() {
|
2020-01-27 17:21:18 -05:00
|
|
|
|
if !c.is_alphanumeric() && c != &'-' && c != &'_' {
|
2020-01-04 22:55:04 -05:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
let tok = self
|
|
|
|
|
.buf
|
|
|
|
|
.next()
|
|
|
|
|
.expect("this is impossible because we have already peeked");
|
|
|
|
|
self.pos.next_char();
|
2020-03-21 13:02:24 -04:00
|
|
|
|
name.push(tok);
|
2020-01-04 22:55:04 -05:00
|
|
|
|
}
|
2020-02-24 15:07:18 -05:00
|
|
|
|
if name.is_empty() {
|
|
|
|
|
TokenKind::Symbol(Symbol::Dollar)
|
|
|
|
|
} else {
|
|
|
|
|
TokenKind::Variable(name)
|
|
|
|
|
}
|
2020-01-04 22:55:04 -05:00
|
|
|
|
}
|
|
|
|
|
|
2020-03-21 00:10:15 -04:00
|
|
|
|
// TODO: handle weird characters that *are* ascii
|
|
|
|
|
// e.g. how do we handle `color: ;`
|
2020-01-04 22:55:04 -05:00
|
|
|
|
fn lex_ident(&mut self) -> TokenKind {
|
|
|
|
|
let mut string = String::with_capacity(99);
|
|
|
|
|
while let Some(c) = self.buf.peek() {
|
|
|
|
|
// we know that the first char is alphabetic from peeking
|
2020-03-20 23:53:26 -04:00
|
|
|
|
if !c.is_alphanumeric() && c != &'-' && c != &'_' && c.is_ascii() {
|
2020-01-04 22:55:04 -05:00
|
|
|
|
break;
|
|
|
|
|
}
|
2020-03-20 23:53:26 -04:00
|
|
|
|
if !c.is_ascii() {
|
|
|
|
|
IS_UTF8.store(true, Ordering::Relaxed);
|
|
|
|
|
}
|
2020-01-04 22:55:04 -05:00
|
|
|
|
let tok = self
|
|
|
|
|
.buf
|
|
|
|
|
.next()
|
|
|
|
|
.expect("this is impossible because we have already peeked");
|
|
|
|
|
self.pos.next_char();
|
2020-01-20 16:00:37 -05:00
|
|
|
|
string.push(tok);
|
2020-01-04 22:55:04 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Ok(kw) = Keyword::try_from(string.as_ref()) {
|
|
|
|
|
return TokenKind::Keyword(kw);
|
|
|
|
|
}
|
2020-02-08 08:48:31 -05:00
|
|
|
|
|
2020-02-02 14:46:58 -05:00
|
|
|
|
if string == "-" {
|
|
|
|
|
return TokenKind::Symbol(Symbol::Minus);
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-04 22:55:04 -05:00
|
|
|
|
TokenKind::Ident(string)
|
|
|
|
|
}
|
|
|
|
|
}
|