From 15edae53d6e3b45b18547213a49b344e44b56a14 Mon Sep 17 00:00:00 2001 From: ConnorSkees <39542938+ConnorSkees@users.noreply.github.com> Date: Sun, 26 Jan 2020 19:07:24 -0500 Subject: [PATCH] Lex attributes in a much more robust way :) --- src/lexer.rs | 86 ++++++++++++++++++++++++++++--------------------- src/selector.rs | 4 +-- tests/main.rs | 13 +++++--- 3 files changed, 60 insertions(+), 43 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index df90cba..aa49ba6 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -127,14 +127,17 @@ impl<'a> Lexer<'a> { TokenKind::Keyword(Keyword::Important) } - fn devour_whitespace(&mut self) { + fn devour_whitespace(&mut self) -> bool { + let mut found_whitespace = false; while let Some(c) = self.buf.peek() { if !is_whitespace(*c) { - break; + return found_whitespace; } + found_whitespace = true; self.buf.next(); self.pos.next_char(); } + found_whitespace } fn lex_at_rule(&mut self) -> TokenKind { @@ -300,41 +303,10 @@ impl<'a> Lexer<'a> { let mut case_sensitive = CaseKind::Sensitive; while let Some(c) = self.buf.peek() { - if c == &']' && !c.is_whitespace() { + if c == &']' || c.is_whitespace() { break; } - if c == &'i' || c == &'I' { - if c == &'i' { - case_sensitive = CaseKind::InsensitiveLowercase; - } else if c == &'I' { - case_sensitive = CaseKind::InsensitiveCapital; - } - let tok = self - .buf - .next() - .expect("this is impossible because we have already peeked"); - self.pos.next_char(); - self.devour_whitespace(); - match self.buf.next() { - Some(']') => { - return TokenKind::Attribute(Attribute { - kind, - attr, - value, - case_sensitive, - }) - } - Some(val) => { - self.pos.next_char(); - value.push(tok); - value.push(val); - } - None => todo!("expected something to come after "), - } - continue; - } - let tok = self .buf .next() @@ -343,9 +315,49 @@ impl<'a> Lexer<'a> { value.push(tok); } - self.devour_whitespace(); - - assert!(self.buf.next() == Some(']')); + if self.devour_whitespace() { + let n = self.buf.next(); + match n { + Some('i') | Some('I') => { + let case_sensitive = match n { + Some('i') => CaseKind::InsensitiveLowercase, + Some('I') => CaseKind::InsensitiveCapital, + _ => unsafe { std::hint::unreachable_unchecked() }, + }; + self.pos.next_char(); + self.devour_whitespace(); + match self.buf.next() { + Some(']') => { + return TokenKind::Attribute(Attribute { + kind, + attr, + value, + case_sensitive, + }) + } + Some(_) => todo!("modifier must be 1 character"), + None => todo!("unexpected EOF"), + } + } + Some(']') => { + return TokenKind::Attribute(Attribute { + kind, + attr, + value, + case_sensitive, + }) + } + Some(c) => { + value.push(' '); + value.push(c.clone()); + self.devour_whitespace(); + assert!(self.buf.next() == Some(']')); + } + None => todo!(), + } + } else { + assert!(self.buf.next() == Some(']')); + } TokenKind::Attribute(Attribute { kind, diff --git a/src/selector.rs b/src/selector.rs index 2fc9ac9..9cfe642 100644 --- a/src/selector.rs +++ b/src/selector.rs @@ -348,8 +348,8 @@ pub(crate) enum CaseKind { impl Display for CaseKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::InsensitiveCapital => write!(f, "I"), - Self::InsensitiveLowercase => write!(f, "i"), + Self::InsensitiveCapital => write!(f, " I"), + Self::InsensitiveLowercase => write!(f, " i"), Self::Sensitive => write!(f, ""), } } diff --git a/tests/main.rs b/tests/main.rs index 34a6399..a5b9e66 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -143,10 +143,15 @@ mod test_selectors { selector_attribute_i_in_attr, "[atitr=val] {\n color: red;\n}\n" ); - // test!( - // selector_attribute_i_in_val, - // "[attr=vail] {\n color: red;\n}\n" - // ); + test!( + selector_attribute_i_in_val, + "[attr=vail] {\n color: red;\n}\n" + ); + test!( + selector_attribute_whitespace, + "[attr *= val ] {\n color: red;\n}\n", + "[attr*=val] {\n color: red;\n}\n" + ); test!( selector_attribute_equals, "[attr=val] {\n color: red;\n}\n"