2020-01-04 22:55:04 -05:00
|
|
|
use std::convert::TryFrom;
|
|
|
|
use std::iter::Peekable;
|
|
|
|
use std::str::Chars;
|
|
|
|
|
2020-01-20 08:36:06 -05:00
|
|
|
use crate::common::{AtRuleKind, Keyword, Op, Pos, Symbol};
|
2020-01-11 14:51:31 -05:00
|
|
|
use crate::selector::{Attribute, AttributeKind};
|
2020-01-04 22:55:04 -05:00
|
|
|
use crate::units::Unit;
|
|
|
|
use crate::{Token, TokenKind, Whitespace};
|
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub struct Lexer<'a> {
|
|
|
|
tokens: Vec<Token>,
|
|
|
|
buf: Peekable<Chars<'a>>,
|
|
|
|
pos: Pos,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Iterator for Lexer<'a> {
|
|
|
|
type Item = Token;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
macro_rules! symbol {
|
|
|
|
($self:ident, $symbol:ident) => {{
|
|
|
|
$self.buf.next();
|
|
|
|
$self.pos.next_char();
|
|
|
|
TokenKind::Symbol(Symbol::$symbol)
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
macro_rules! whitespace {
|
|
|
|
($self:ident, $whitespace:ident) => {{
|
|
|
|
$self.buf.next();
|
|
|
|
$self.pos.next_char();
|
|
|
|
TokenKind::Whitespace(Whitespace::$whitespace)
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
let kind: TokenKind = match self.buf.peek().unwrap_or(&'\0') {
|
2020-01-06 00:39:49 -05:00
|
|
|
'a'..='z' | 'A'..='Z' | '-' | '_' => self.lex_ident(),
|
2020-01-04 22:55:04 -05:00
|
|
|
'@' => self.lex_at_rule(),
|
|
|
|
'0'..='9' => self.lex_num(),
|
|
|
|
'$' => self.lex_variable(),
|
|
|
|
':' => symbol!(self, Colon),
|
|
|
|
',' => symbol!(self, Comma),
|
|
|
|
'.' => symbol!(self, Period),
|
|
|
|
';' => symbol!(self, SemiColon),
|
2020-01-11 19:16:59 -05:00
|
|
|
'(' => symbol!(self, OpenParen),
|
|
|
|
')' => symbol!(self, CloseParen),
|
2020-01-04 22:55:04 -05:00
|
|
|
'+' => symbol!(self, Plus),
|
|
|
|
'~' => symbol!(self, Tilde),
|
|
|
|
'\'' => symbol!(self, SingleQuote),
|
|
|
|
'"' => symbol!(self, DoubleQuote),
|
|
|
|
' ' => whitespace!(self, Space),
|
|
|
|
'\t' => whitespace!(self, Tab),
|
2020-01-07 18:37:28 -05:00
|
|
|
'\n' => {
|
|
|
|
self.buf.next();
|
|
|
|
self.pos.newline();
|
|
|
|
TokenKind::Whitespace(Whitespace::Newline)
|
|
|
|
}
|
|
|
|
'\r' => {
|
|
|
|
self.buf.next();
|
|
|
|
TokenKind::Whitespace(Whitespace::CarriageReturn)
|
|
|
|
}
|
2020-01-12 10:52:51 -05:00
|
|
|
'#' => self.lex_hash(),
|
2020-01-12 19:56:58 -05:00
|
|
|
'{' => symbol!(self, OpenCurlyBrace),
|
2020-01-04 22:55:04 -05:00
|
|
|
'*' => symbol!(self, Mul),
|
2020-01-12 19:56:58 -05:00
|
|
|
'}' => symbol!(self, CloseCurlyBrace),
|
2020-01-11 14:51:31 -05:00
|
|
|
'&' => symbol!(self, BitAnd),
|
2020-01-05 12:22:38 -05:00
|
|
|
'/' => self.lex_forward_slash(),
|
2020-01-04 22:55:04 -05:00
|
|
|
'%' => {
|
|
|
|
self.buf.next();
|
|
|
|
self.pos.next_char();
|
|
|
|
TokenKind::Unit(Unit::Percent)
|
|
|
|
}
|
|
|
|
'[' => {
|
|
|
|
self.buf.next();
|
|
|
|
self.pos.next_char();
|
|
|
|
self.lex_attr()
|
|
|
|
}
|
2020-01-06 00:39:49 -05:00
|
|
|
'!' => self.lex_exclamation(),
|
2020-01-04 22:55:04 -05:00
|
|
|
'<' => symbol!(self, Lt),
|
|
|
|
'>' => symbol!(self, Gt),
|
|
|
|
'\0' => return None,
|
|
|
|
_ => todo!("unknown char"),
|
|
|
|
};
|
|
|
|
self.pos.next_char();
|
|
|
|
Some(Token {
|
|
|
|
kind,
|
|
|
|
pos: self.pos,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_whitespace(c: char) -> bool {
|
|
|
|
c == ' ' || c == '\n' || c == '\r'
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Lexer<'a> {
|
|
|
|
pub fn new(buf: &'a str) -> Lexer<'a> {
|
|
|
|
Lexer {
|
|
|
|
tokens: Vec::with_capacity(buf.len()),
|
|
|
|
buf: buf.chars().peekable(),
|
|
|
|
pos: Pos::new(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-06 00:39:49 -05:00
|
|
|
fn lex_exclamation(&mut self) -> TokenKind {
|
|
|
|
self.buf.next();
|
|
|
|
self.pos.next_char();
|
|
|
|
macro_rules! assert_char {
|
|
|
|
($self:ident, $($char:literal)*) => {
|
|
|
|
$(
|
|
|
|
assert_eq!($char, $self.buf.next().expect("expected char").to_ascii_lowercase(), "expected keyword `important`");
|
|
|
|
)*
|
|
|
|
}
|
|
|
|
};
|
|
|
|
match self.buf.peek() {
|
|
|
|
Some('i') | Some('I') => {
|
|
|
|
self.buf.next();
|
|
|
|
assert_char!(self, 'm' 'p' 'o' 'r' 't' 'a' 'n' 't');
|
|
|
|
}
|
|
|
|
Some('=') => {
|
|
|
|
self.buf.next();
|
|
|
|
return TokenKind::Op(Op::NotEqual);
|
|
|
|
}
|
|
|
|
_ => todo!("expected either `i` or `=` after `!`"),
|
|
|
|
};
|
|
|
|
TokenKind::Keyword(Keyword::Important)
|
|
|
|
}
|
|
|
|
|
2020-01-04 22:55:04 -05:00
|
|
|
fn devour_whitespace(&mut self) {
|
|
|
|
while let Some(c) = self.buf.peek() {
|
|
|
|
if !is_whitespace(*c) {
|
|
|
|
break;
|
|
|
|
}
|
2020-01-05 19:10:43 -05:00
|
|
|
self.buf.next();
|
|
|
|
self.pos.next_char();
|
2020-01-04 22:55:04 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn lex_at_rule(&mut self) -> TokenKind {
|
2020-01-07 18:37:28 -05:00
|
|
|
self.buf.next();
|
2020-01-17 08:14:10 -05:00
|
|
|
self.pos.next_char();
|
2020-01-04 22:55:04 -05:00
|
|
|
let mut string = String::with_capacity(99);
|
|
|
|
while let Some(c) = self.buf.peek() {
|
2020-01-05 20:23:35 -05:00
|
|
|
if !c.is_alphabetic() && c != &'-' && c != &'_' {
|
2020-01-04 22:55:04 -05:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
let tok = self
|
|
|
|
.buf
|
|
|
|
.next()
|
|
|
|
.expect("this is impossible because we have already peeked");
|
|
|
|
self.pos.next_char();
|
|
|
|
string.push(tok);
|
|
|
|
}
|
|
|
|
|
2020-01-20 08:36:06 -05:00
|
|
|
if let Ok(rule) = AtRuleKind::try_from(string.as_ref()) {
|
2020-01-07 18:37:28 -05:00
|
|
|
TokenKind::AtRule(rule)
|
2020-01-04 22:55:04 -05:00
|
|
|
} else {
|
|
|
|
panic!("expected ident after `@`")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-05 12:22:38 -05:00
|
|
|
fn lex_forward_slash(&mut self) -> TokenKind {
|
|
|
|
self.buf.next();
|
|
|
|
self.pos.next_char();
|
|
|
|
match self.buf.peek().expect("expected something after '/'") {
|
|
|
|
'/' => {
|
2020-01-17 10:38:59 -05:00
|
|
|
self.buf.by_ref().take_while(|x| x != &'\n').for_each(drop);
|
2020-01-17 08:14:10 -05:00
|
|
|
self.pos.newline();
|
2020-01-05 12:22:38 -05:00
|
|
|
}
|
|
|
|
'*' => {
|
2020-01-08 20:39:05 -05:00
|
|
|
self.buf.next();
|
|
|
|
self.pos.next_char();
|
2020-01-07 19:58:13 -05:00
|
|
|
let mut comment = String::new();
|
2020-01-05 12:22:38 -05:00
|
|
|
while let Some(tok) = self.buf.next() {
|
2020-01-07 19:58:13 -05:00
|
|
|
if tok == '\n' {
|
|
|
|
self.pos.newline()
|
|
|
|
} else {
|
|
|
|
self.pos.next_char();
|
|
|
|
}
|
|
|
|
if tok == '*' && self.buf.peek() == Some(&'/') {
|
|
|
|
self.buf.next();
|
2020-01-05 12:52:50 -05:00
|
|
|
break;
|
2020-01-05 12:22:38 -05:00
|
|
|
}
|
2020-01-07 19:58:13 -05:00
|
|
|
comment.push(tok);
|
2020-01-05 12:22:38 -05:00
|
|
|
}
|
2020-01-08 20:39:05 -05:00
|
|
|
return TokenKind::MultilineComment(comment);
|
2020-01-05 12:22:38 -05:00
|
|
|
}
|
|
|
|
_ => return TokenKind::Symbol(Symbol::Div),
|
|
|
|
}
|
|
|
|
TokenKind::Whitespace(Whitespace::Newline)
|
|
|
|
}
|
|
|
|
|
2020-01-04 22:55:04 -05:00
|
|
|
fn lex_num(&mut self) -> TokenKind {
|
|
|
|
let mut string = String::with_capacity(99);
|
|
|
|
while let Some(c) = self.buf.peek() {
|
|
|
|
if !c.is_numeric() && c != &'.' {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
let tok = self
|
|
|
|
.buf
|
|
|
|
.next()
|
|
|
|
.expect("this is impossible because we have already peeked");
|
|
|
|
self.pos.next_char();
|
|
|
|
string.push(tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
TokenKind::Number(string)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn lex_hash(&mut self) -> TokenKind {
|
2020-01-12 10:52:51 -05:00
|
|
|
self.buf.next();
|
|
|
|
self.pos.next_char();
|
|
|
|
if self.buf.peek() == Some(&'{') {
|
|
|
|
self.buf.next();
|
|
|
|
self.pos.next_char();
|
|
|
|
return TokenKind::Interpolation;
|
|
|
|
}
|
|
|
|
TokenKind::Symbol(Symbol::Hash)
|
2020-01-04 22:55:04 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn lex_attr(&mut self) -> TokenKind {
|
|
|
|
let mut attr = String::with_capacity(99);
|
2020-01-14 20:23:05 -05:00
|
|
|
self.devour_whitespace();
|
2020-01-04 22:55:04 -05:00
|
|
|
while let Some(c) = self.buf.peek() {
|
2020-01-05 20:23:35 -05:00
|
|
|
if !c.is_alphabetic() && c != &'-' && c != &'_' {
|
2020-01-04 22:55:04 -05:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
let tok = self
|
|
|
|
.buf
|
|
|
|
.next()
|
|
|
|
.expect("this is impossible because we have already peeked");
|
|
|
|
self.pos.next_char();
|
|
|
|
attr.push(tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
self.devour_whitespace();
|
|
|
|
|
|
|
|
let kind = match self
|
|
|
|
.buf
|
|
|
|
.next()
|
|
|
|
.expect("todo! expected kind (should be error)")
|
|
|
|
{
|
|
|
|
']' => {
|
2020-01-11 14:51:31 -05:00
|
|
|
return TokenKind::Attribute(Attribute {
|
2020-01-04 22:55:04 -05:00
|
|
|
kind: AttributeKind::Any,
|
|
|
|
attr,
|
|
|
|
value: String::new(),
|
|
|
|
case_sensitive: true,
|
2020-01-11 14:51:31 -05:00
|
|
|
})
|
2020-01-04 22:55:04 -05:00
|
|
|
}
|
|
|
|
'i' => {
|
|
|
|
self.devour_whitespace();
|
|
|
|
assert!(self.buf.next() == Some(']'));
|
2020-01-11 14:51:31 -05:00
|
|
|
return TokenKind::Attribute(Attribute {
|
2020-01-04 22:55:04 -05:00
|
|
|
kind: AttributeKind::Any,
|
|
|
|
attr,
|
|
|
|
value: String::new(),
|
|
|
|
case_sensitive: false,
|
2020-01-11 14:51:31 -05:00
|
|
|
});
|
2020-01-04 22:55:04 -05:00
|
|
|
}
|
|
|
|
'=' => AttributeKind::Equals,
|
|
|
|
'~' => AttributeKind::InList,
|
|
|
|
'|' => AttributeKind::BeginsWithHyphenOrExact,
|
|
|
|
'^' => AttributeKind::StartsWith,
|
|
|
|
'$' => AttributeKind::EndsWith,
|
|
|
|
'*' => AttributeKind::Contains,
|
|
|
|
_ => todo!("expected kind (should be error)"),
|
|
|
|
};
|
|
|
|
|
|
|
|
if kind != AttributeKind::Equals {
|
|
|
|
assert!(self.buf.next() == Some('='));
|
|
|
|
}
|
|
|
|
|
|
|
|
self.devour_whitespace();
|
|
|
|
|
|
|
|
let mut value = String::with_capacity(99);
|
|
|
|
let mut case_sensitive = true;
|
|
|
|
|
|
|
|
while let Some(c) = self.buf.peek() {
|
2020-01-05 20:51:14 -05:00
|
|
|
if !c.is_alphabetic() && c != &'-' && c != &'_' && c != &'"' && c != &'\'' {
|
2020-01-04 22:55:04 -05:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if c == &'i' {
|
|
|
|
let tok = self
|
|
|
|
.buf
|
|
|
|
.next()
|
|
|
|
.expect("this is impossible because we have already peeked");
|
|
|
|
self.pos.next_char();
|
|
|
|
self.devour_whitespace();
|
|
|
|
match self.buf.next() {
|
|
|
|
Some(']') => case_sensitive = false,
|
|
|
|
Some(val) => {
|
|
|
|
self.pos.next_char();
|
|
|
|
value.push(tok);
|
|
|
|
value.push(val);
|
|
|
|
}
|
|
|
|
None => todo!("expected something to come after "),
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
let tok = self
|
|
|
|
.buf
|
|
|
|
.next()
|
|
|
|
.expect("this is impossible because we have already peeked");
|
|
|
|
self.pos.next_char();
|
|
|
|
value.push(tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
self.devour_whitespace();
|
|
|
|
|
|
|
|
assert!(self.buf.next() == Some(']'));
|
|
|
|
|
2020-01-11 14:51:31 -05:00
|
|
|
TokenKind::Attribute(Attribute {
|
2020-01-04 22:55:04 -05:00
|
|
|
kind,
|
|
|
|
attr,
|
|
|
|
value,
|
|
|
|
case_sensitive,
|
2020-01-11 14:51:31 -05:00
|
|
|
})
|
2020-01-04 22:55:04 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn lex_variable(&mut self) -> TokenKind {
|
2020-01-05 19:10:43 -05:00
|
|
|
self.buf.next();
|
|
|
|
self.pos.next_char();
|
|
|
|
let mut name = String::with_capacity(99);
|
2020-01-04 22:55:04 -05:00
|
|
|
while let Some(c) = self.buf.peek() {
|
2020-01-05 20:23:35 -05:00
|
|
|
if !c.is_alphabetic() && c != &'-' && c != &'_' {
|
2020-01-04 22:55:04 -05:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
let tok = self
|
|
|
|
.buf
|
|
|
|
.next()
|
|
|
|
.expect("this is impossible because we have already peeked");
|
|
|
|
self.pos.next_char();
|
2020-01-11 20:41:36 -05:00
|
|
|
if tok == '_' {
|
|
|
|
name.push('-');
|
|
|
|
} else {
|
|
|
|
name.push(tok);
|
|
|
|
}
|
2020-01-04 22:55:04 -05:00
|
|
|
}
|
2020-01-05 19:10:43 -05:00
|
|
|
TokenKind::Variable(name)
|
2020-01-04 22:55:04 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn lex_ident(&mut self) -> TokenKind {
|
|
|
|
let mut string = String::with_capacity(99);
|
|
|
|
while let Some(c) = self.buf.peek() {
|
|
|
|
// we know that the first char is alphabetic from peeking
|
2020-01-05 20:23:35 -05:00
|
|
|
if !c.is_alphanumeric() && c != &'-' && c != &'_' {
|
2020-01-04 22:55:04 -05:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
let tok = self
|
|
|
|
.buf
|
|
|
|
.next()
|
|
|
|
.expect("this is impossible because we have already peeked");
|
|
|
|
self.pos.next_char();
|
2020-01-11 20:41:36 -05:00
|
|
|
if tok == '_' {
|
|
|
|
string.push('-');
|
|
|
|
} else {
|
|
|
|
string.push(tok);
|
|
|
|
}
|
2020-01-04 22:55:04 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if let Ok(kw) = Keyword::try_from(string.as_ref()) {
|
|
|
|
return TokenKind::Keyword(kw);
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Ok(kw) = Unit::try_from(string.as_ref()) {
|
|
|
|
return TokenKind::Unit(kw);
|
|
|
|
}
|
|
|
|
|
|
|
|
TokenKind::Ident(string)
|
|
|
|
}
|
|
|
|
}
|