HACK: somewhat handle unicode escapes

This commit is contained in:
ConnorSkees 2020-03-22 13:45:41 -04:00
parent 2f0fbd2415
commit f9730b9ec5
4 changed files with 117 additions and 5 deletions

View File

@ -17,6 +17,7 @@ pub(crate) struct Lexer<'a> {
tokens: Vec<Token>,
buf: Peekable<Chars<'a>>,
pos: Pos,
should_emit_backslash: usize,
}
impl<'a> Iterator for Lexer<'a> {
@ -36,6 +37,13 @@ impl<'a> Iterator for Lexer<'a> {
TokenKind::Whitespace(Whitespace::$whitespace)
}};
}
if self.should_emit_backslash > 0 {
self.should_emit_backslash -= 1;
return Some(Token {
kind: TokenKind::Symbol(Symbol::BackSlash),
pos: self.pos,
});
}
let kind: TokenKind = match self.buf.peek().unwrap_or(&'\0') {
'a'..='z' | 'A'..='Z' | '_' => self.lex_ident(),
'-' => {
@ -107,7 +115,7 @@ impl<'a> Iterator for Lexer<'a> {
}
}
'?' => symbol!(self, QuestionMark),
'\\' => symbol!(self, BackSlash),
'\\' => self.lex_back_slash().0,
'~' => symbol!(self, Tilde),
'\'' => symbol!(self, SingleQuote),
'"' => symbol!(self, DoubleQuote),
@ -158,6 +166,7 @@ impl<'a> Lexer<'a> {
tokens: Vec::with_capacity(buf.len()),
buf: buf.chars().peekable(),
pos: Pos::new(),
should_emit_backslash: 0,
}
}
@ -209,6 +218,51 @@ impl<'a> Lexer<'a> {
}
}
fn lex_back_slash(&mut self) -> (TokenKind, bool) {
self.buf.next();
self.pos.next_char();
if self.buf.peek() == Some(&'\\') {
self.buf.next();
self.pos.next_char();
self.should_emit_backslash = 1;
(TokenKind::Symbol(Symbol::BackSlash), true)
} else {
let mut n = String::new();
while let Some(c) = self.buf.peek() {
if !c.is_ascii_hexdigit() || n.len() > 6 {
break;
}
n.push(*c);
self.buf.next();
self.pos.next_char();
}
if n.is_empty() {
return (TokenKind::Symbol(Symbol::BackSlash), false);
}
let mut string = std::char::from_u32(u32::from_str_radix(&n, 16).unwrap())
.unwrap()
.to_string();
self.devour_whitespace();
if let TokenKind::Ident(s) = self.lex_ident() {
string.push_str(&s);
}
(TokenKind::Ident(string), false)
}
}
fn devour_whitespace(&mut self) {
while let Some(c) = self.buf.peek() {
if c.is_ascii_whitespace() {
self.buf.next();
self.pos.next_char();
continue;
}
break;
}
}
fn lex_forward_slash(&mut self) -> TokenKind {
self.buf.next();
self.pos.next_char();
@ -331,18 +385,30 @@ impl<'a> Lexer<'a> {
}
}
// TODO: handle weird characters that *are* ascii
// e.g. how do we handle `color: ;`
fn lex_ident(&mut self) -> TokenKind {
let mut string = String::with_capacity(99);
while let Some(c) = self.buf.peek() {
// we know that the first char is alphabetic from peeking
if !c.is_alphanumeric() && c != &'-' && c != &'_' && c.is_ascii() {
if !c.is_alphanumeric() && c != &'-' && c != &'_' && c != &'\\' && c.is_ascii() {
break;
}
if !c.is_ascii() {
IS_UTF8.store(true, Ordering::Relaxed);
}
if c == &'\\' {
match self.lex_back_slash() {
(TokenKind::Ident(s), _) => string.push_str(&s),
(TokenKind::Symbol(..), true) => {
self.should_emit_backslash = 2;
break;
}
(TokenKind::Symbol(..), false) => {
self.should_emit_backslash = 1;
break;
}
_ => unreachable!(),
}
continue;
}
let tok = self
.buf
.next()

View File

@ -160,6 +160,19 @@ pub(crate) fn flatten_ident<I: Iterator<Item = Token>>(
toks.next();
s.push_str(n)
}
TokenKind::Symbol(Symbol::BackSlash) => {
s.push('\\');
toks.next();
if let Some(tok) = toks.next() {
match tok.kind {
TokenKind::Symbol(Symbol::Plus) => s.push('+'),
TokenKind::Symbol(Symbol::BackSlash) => s.push('\\'),
_ => todo!("value after \\"),
}
} else {
todo!()
}
}
_ => break,
}
}

View File

@ -294,6 +294,23 @@ impl Value {
TokenKind::Keyword(Keyword::To(s)) => Ok(Value::Ident(s, QuoteKind::None)),
TokenKind::AtRule(_) => Err("expected \";\".".into()),
TokenKind::Error(e) => return Err(e),
TokenKind::Symbol(Symbol::BackSlash) => {
if let Some(tok) = toks.next() {
match tok.kind {
TokenKind::Symbol(Symbol::Plus) => Ok(Value::Ident(
"\\+".to_string() + &flatten_ident(toks, scope, super_selector)?,
QuoteKind::None,
)),
TokenKind::Symbol(Symbol::BackSlash) => Ok(Value::Ident(
"\\\\".to_string() + &flatten_ident(toks, scope, super_selector)?,
QuoteKind::None,
)),
_ => todo!("value after \\"),
}
} else {
todo!()
}
}
TokenKind::Op(Op::Plus) | TokenKind::Symbol(Symbol::Plus) => {
devour_whitespace_or_comment(toks);
let v = Self::_from_tokens(toks, scope, super_selector)?;

View File

@ -66,3 +66,19 @@ test!(
"a {\n color: red😁\n}\n",
"@charset \"UTF-8\";\na {\n color: red😁;\n}\n"
);
test!(
escape_recognized_as_at_rule,
"@\\69 f true {\n a {\n b: c;\n }\n}\n",
"a {\n b: c;\n}\n"
);
test!(
escape_in_middle,
"a {\n color: b\\6cue;\n}\n",
"a {\n color: blue;\n}\n"
);
test!(
escape_at_end,
"a {\n color: blu\\65;\n}\n",
"a {\n color: blue;\n}\n"
);
test!(double_escape_is_preserved, "a {\n color: r\\\\65;\n}\n");