From 16ad7298fe36d3f47182ab09e187cf2050a51484 Mon Sep 17 00:00:00 2001 From: connorskees Date: Sat, 7 Jan 2023 08:56:24 +0000 Subject: [PATCH] lazily compute token spans --- grass_internal/src/ast/media.rs | 9 ++- grass_internal/src/evaluate/visitor.rs | 13 ++-- grass_internal/src/lexer.rs | 85 +++++++++++++------------- grass_internal/src/lib.rs | 3 +- grass_internal/src/parse/base.rs | 25 ++++---- grass_internal/src/parse/keyframes.rs | 2 +- grass_internal/src/parse/sass.rs | 2 +- grass_internal/src/parse/stylesheet.rs | 20 +++--- grass_internal/src/parse/value.rs | 30 ++++----- grass_internal/src/token.rs | 19 ------ grass_internal/src/value/mod.rs | 8 +-- 11 files changed, 98 insertions(+), 118 deletions(-) delete mode 100644 grass_internal/src/token.rs diff --git a/grass_internal/src/ast/media.rs b/grass_internal/src/ast/media.rs index 03c3449..f96709b 100644 --- a/grass_internal/src/ast/media.rs +++ b/grass_internal/src/ast/media.rs @@ -2,7 +2,12 @@ use std::fmt::{self, Write}; use codemap::Span; -use crate::{ast::CssStmt, error::SassResult, lexer::Lexer, parse::MediaQueryParser, token::Token}; +use crate::{ + ast::CssStmt, + error::SassResult, + lexer::{Lexer, TokenLexer}, + parse::MediaQueryParser, +}; #[derive(Debug, Clone)] pub(crate) struct MediaRule { @@ -54,7 +59,7 @@ impl MediaQuery { } pub fn parse_list(list: &str, span: Span) -> SassResult> { - let toks = Lexer::new(list.chars().map(|x| Token::new(span, x)).collect(), span); + let toks = Lexer::new(TokenLexer::new(list.chars().peekable()).collect(), span); MediaQueryParser::new(toks).parse() } diff --git a/grass_internal/src/evaluate/visitor.rs b/grass_internal/src/evaluate/visitor.rs index 0ffee6a..dafbc96 100644 --- a/grass_internal/src/evaluate/visitor.rs +++ b/grass_internal/src/evaluate/visitor.rs @@ -25,7 +25,7 @@ use crate::{ common::{unvendor, BinaryOp, Identifier, ListSeparator, QuoteKind, UnaryOp}, error::{SassError, SassResult}, interner::InternedString, - lexer::Lexer, + lexer::{Lexer, TokenLexer}, parse::{ AtRootQueryParser, CssParser, KeyframesSelectorParser, SassParser, ScssParser, StylesheetParser, @@ -34,7 +34,6 @@ use crate::{ ComplexSelectorComponent, ExtendRule, ExtendedSelector, ExtensionStore, SelectorList, SelectorParser, }, - token::Token, utils::{to_sentence, trim_ascii}, value::{ ArgList, CalculationArg, CalculationName, Number, SassCalculation, SassFunction, SassMap, @@ -978,10 +977,8 @@ impl<'a> Visitor<'a> { let span = query.span; - let query_toks = Lexer::new( - resolved.chars().map(|x| Token::new(span, x)).collect(), - span, - ); + let query_toks = + Lexer::new(TokenLexer::new(resolved.chars().peekable()).collect(), span); AtRootQueryParser::new(query_toks).parse()? } @@ -1142,7 +1139,7 @@ impl<'a> Visitor<'a> { span: Span, ) -> SassResult { let sel_toks = Lexer::new( - selector_text.chars().map(|x| Token::new(span, x)).collect(), + TokenLexer::new(selector_text.chars().peekable()).collect(), span, ); @@ -2792,7 +2789,7 @@ impl<'a> Visitor<'a> { if self.flags.in_keyframes() { let span = ruleset.selector_span; let sel_toks = Lexer::new( - selector_text.chars().map(|x| Token::new(span, x)).collect(), + TokenLexer::new(selector_text.chars().peekable()).collect(), span, ); let parsed_selector = diff --git a/grass_internal/src/lexer.rs b/grass_internal/src/lexer.rs index a85c205..a19ddbb 100644 --- a/grass_internal/src/lexer.rs +++ b/grass_internal/src/lexer.rs @@ -2,16 +2,19 @@ use std::{borrow::Cow, iter::Peekable, str::Chars, sync::Arc}; use codemap::{File, Span}; -use crate::Token; - const FORM_FEED: char = '\x0C'; +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub(crate) struct Token { + pub kind: char, + pos: u32, +} + #[derive(Debug, Clone)] // todo: remove lifetime as Cow is now superfluous pub(crate) struct Lexer<'a> { buf: Cow<'a, [Token]>, - /// The span to be used in the case that `buf` is empty - empty_span: Span, + entire_span: Span, cursor: usize, } @@ -27,36 +30,35 @@ impl<'a> Lexer<'a> { matches!(self.peek(), Some(Token { kind, .. }) if kind == c) } - pub fn span_from(&mut self, start: usize) -> Span { - let start = match self.buf.get(start) { - Some(start) => start.pos, - None => return self.current_span(), + /// Gets the span of the character at the given index. If the index is out of + /// bounds, it returns the span of the last character. If the input is empty, + /// it returns an empty span + fn span_at_index(&self, idx: usize) -> Span { + let (start, len) = match self.buf.get(idx) { + Some(tok) => (tok.pos, tok.kind.len_utf8()), + None => match self.buf.last() { + Some(tok) => (tok.pos, tok.kind.len_utf8()), + None => (0, 0), + }, }; - self.cursor = self.cursor.saturating_sub(1); - let end = self.current_span(); - self.cursor += 1; + + self.entire_span + .subspan(start as u64, start as u64 + len as u64) + } + + pub fn span_from(&self, start: usize) -> Span { + let start = self.span_at_index(start); + let end = self.prev_span(); start.merge(end) } pub fn prev_span(&self) -> Span { - match self.buf.get(self.cursor.saturating_sub(1)) { - Some(tok) => tok.pos, - None => match self.buf.last() { - Some(tok) => tok.pos, - None => self.empty_span, - }, - } + self.span_at_index(self.cursor.saturating_sub(1)) } pub fn current_span(&self) -> Span { - match self.buf.get(self.cursor) { - Some(tok) => tok.pos, - None => match self.buf.last() { - Some(tok) => tok.pos, - None => self.empty_span, - }, - } + self.span_at_index(self.cursor) } pub fn peek(&self) -> Option { @@ -104,10 +106,16 @@ impl<'a> Iterator for Lexer<'a> { } } -struct TokenLexer<'a> { +/// Lex a string into a series of tokens +pub(crate) struct TokenLexer<'a> { buf: Peekable>, - cursor: usize, - file: Arc, + cursor: u32, +} + +impl<'a> TokenLexer<'a> { + pub fn new(buf: Peekable>) -> TokenLexer<'a> { + Self { buf, cursor: 0 } + } } impl<'a> Iterator for TokenLexer<'a> { @@ -124,11 +132,8 @@ impl<'a> Iterator for TokenLexer<'a> { } c => c, }; - let len = kind.len_utf8(); - let pos = self - .file - .span - .subspan(self.cursor as u64, (self.cursor + len) as u64); + let len = kind.len_utf8() as u32; + let pos = self.cursor; self.cursor += len; Some(Token { pos, kind }) } @@ -140,21 +145,15 @@ impl<'a> Iterator for TokenLexer<'a> { impl<'a> Lexer<'a> { pub fn new_from_file(file: &Arc) -> Self { - let buf = TokenLexer { - file: Arc::clone(file), - buf: file.source().chars().peekable(), - cursor: 0, - } - .collect(); - - Self::new(buf, file.span.subspan(0, 0)) + let buf = TokenLexer::new(file.source().chars().peekable()).collect(); + Self::new(buf, file.span) } - pub fn new(buf: Vec, empty_span: Span) -> Self { + pub fn new(buf: Vec, entire_span: Span) -> Self { Lexer { buf: Cow::Owned(buf), cursor: 0, - empty_span, + entire_span, } } } diff --git a/grass_internal/src/lib.rs b/grass_internal/src/lib.rs index c0789ca..e1e9c28 100644 --- a/grass_internal/src/lib.rs +++ b/grass_internal/src/lib.rs @@ -93,7 +93,7 @@ pub use crate::error::{ }; pub use crate::fs::{Fs, NullFs, StdFs}; pub use crate::options::{InputSyntax, Options, OutputStyle}; -pub(crate) use crate::{context_flags::ContextFlags, token::Token}; +pub(crate) use crate::{context_flags::ContextFlags, lexer::Token}; use crate::{evaluate::Visitor, lexer::Lexer, parse::ScssParser}; mod ast; @@ -110,7 +110,6 @@ mod options; mod parse; mod selector; mod serializer; -mod token; mod unit; mod utils; mod value; diff --git a/grass_internal/src/parse/base.rs b/grass_internal/src/parse/base.rs index 17796d6..186e7e6 100644 --- a/grass_internal/src/parse/base.rs +++ b/grass_internal/src/parse/base.rs @@ -211,15 +211,15 @@ pub(crate) trait BaseParser<'a> { } fn parse_escape(&mut self, identifier_start: bool) -> SassResult { + let start = self.toks().cursor(); self.expect_char('\\')?; let mut value = 0; let first = match self.toks().peek() { Some(t) => t, None => return Err(("Expected expression.", self.toks().current_span()).into()), }; - let mut span = first.pos(); if first.kind == '\n' { - return Err(("Expected escape sequence.", span).into()); + return Err(("Expected escape sequence.", self.toks().current_span()).into()); } else if first.kind.is_ascii_hexdigit() { for _ in 0..6 { let next = match self.toks().peek() { @@ -230,7 +230,6 @@ pub(crate) trait BaseParser<'a> { break; } value *= 16; - span = span.merge(next.pos); value += as_hex(next.kind); self.toks_mut().next(); } @@ -243,12 +242,12 @@ pub(crate) trait BaseParser<'a> { self.toks_mut().next(); } } else { - span = span.merge(first.pos); value = first.kind as u32; self.toks_mut().next(); } - let c = std::char::from_u32(value).ok_or(("Invalid Unicode code point.", span))?; + let c = std::char::from_u32(value) + .ok_or(("Invalid Unicode code point.", self.toks().span_from(start)))?; if (identifier_start && is_name_start(c) && !c.is_ascii_digit()) || (!identifier_start && is_name(c)) { @@ -273,8 +272,9 @@ pub(crate) trait BaseParser<'a> { self.toks_mut().next(); Ok(()) } - Some(Token { pos, .. }) => Err((format!("expected \"{}\".", c), pos).into()), - None => Err((format!("expected \"{}\".", c), self.toks().current_span()).into()), + Some(..) | None => { + Err((format!("expected \"{}\".", c), self.toks().current_span()).into()) + } } } @@ -284,8 +284,7 @@ pub(crate) trait BaseParser<'a> { self.toks_mut().next(); Ok(()) } - Some(Token { pos, .. }) => Err((format!("expected {}.", msg), pos).into()), - None => Err((format!("expected {}.", msg), self.toks().prev_span()).into()), + Some(..) | None => Err((format!("expected {}.", msg), self.toks().prev_span()).into()), } } @@ -295,8 +294,7 @@ pub(crate) trait BaseParser<'a> { kind: q @ ('\'' | '"'), .. }) => q, - Some(Token { pos, .. }) => return Err(("Expected string.", pos).into()), - None => return Err(("Expected string.", self.toks().current_span()).into()), + Some(..) | None => return Err(("Expected string.", self.toks().current_span()).into()), }; let mut buffer = String::new(); @@ -346,9 +344,8 @@ pub(crate) trait BaseParser<'a> { match self.toks().peek() { None => Ok('\u{FFFD}'), Some(Token { - kind: '\n' | '\r', - pos, - }) => Err(("Expected escape sequence.", pos).into()), + kind: '\n' | '\r', .. + }) => Err(("Expected escape sequence.", self.toks().current_span()).into()), Some(Token { kind, .. }) if kind.is_ascii_hexdigit() => { let mut value = 0; for _ in 0..6 { diff --git a/grass_internal/src/parse/keyframes.rs b/grass_internal/src/parse/keyframes.rs index 0a638c5..7124a00 100644 --- a/grass_internal/src/parse/keyframes.rs +++ b/grass_internal/src/parse/keyframes.rs @@ -1,6 +1,6 @@ use std::fmt; -use crate::{ast::KeyframesSelector, error::SassResult, lexer::Lexer, token::Token}; +use crate::{ast::KeyframesSelector, error::SassResult, lexer::Lexer, Token}; use super::BaseParser; diff --git a/grass_internal/src/parse/sass.rs b/grass_internal/src/parse/sass.rs index 905cd94..5307a4f 100644 --- a/grass_internal/src/parse/sass.rs +++ b/grass_internal/src/parse/sass.rs @@ -2,7 +2,7 @@ use std::path::Path; use codemap::{CodeMap, Span}; -use crate::{ast::*, error::SassResult, lexer::Lexer, token::Token, ContextFlags, Options}; +use crate::{ast::*, error::SassResult, lexer::Lexer, Token, ContextFlags, Options}; use super::{BaseParser, StylesheetParser}; diff --git a/grass_internal/src/parse/stylesheet.rs b/grass_internal/src/parse/stylesheet.rs index fe62434..2ed9494 100644 --- a/grass_internal/src/parse/stylesheet.rs +++ b/grass_internal/src/parse/stylesheet.rs @@ -13,7 +13,7 @@ use crate::{ ast::*, common::{unvendor, Identifier, QuoteKind}, error::SassResult, - lexer::Lexer, + lexer::{Lexer, TokenLexer}, utils::{is_name, is_name_start, is_plain_css_import, opposite_bracket}, ContextFlags, Options, Token, }; @@ -1505,7 +1505,12 @@ pub(crate) trait StylesheetParser<'a>: BaseParser<'a> + Sized { self.parse_string() } - fn use_namespace(&mut self, url: &Path, _start: usize) -> SassResult> { + fn use_namespace( + &mut self, + url: &Path, + _start: usize, + url_span: Span, + ) -> SassResult> { if self.scan_identifier("as", false)? { self.whitespace()?; return Ok(if self.scan_char('*') { @@ -1530,11 +1535,8 @@ pub(crate) trait StylesheetParser<'a>: BaseParser<'a> + Sized { }; let mut toks = Lexer::new( - namespace - .chars() - .map(|x| Token::new(self.span_before(), x)) - .collect(), - self.span_before(), + TokenLexer::new(namespace.chars().peekable()).collect(), + url_span, ); // if namespace is empty, avoid attempting to parse an identifier from @@ -1630,12 +1632,14 @@ pub(crate) trait StylesheetParser<'a>: BaseParser<'a> + Sized { } fn parse_use_rule(&mut self, start: usize) -> SassResult { + let url_start = self.toks().cursor(); let url = self.parse_url_string()?; + let url_span = self.toks().span_from(url_start); self.whitespace()?; let path = PathBuf::from(url); - let namespace = self.use_namespace(path.as_ref(), start)?; + let namespace = self.use_namespace(path.as_ref(), start, url_span)?; self.whitespace()?; let configuration = self.parse_configuration(false)?; diff --git a/grass_internal/src/parse/value.rs b/grass_internal/src/parse/value.rs index 307dc20..e41fc2b 100644 --- a/grass_internal/src/parse/value.rs +++ b/grass_internal/src/parse/value.rs @@ -229,12 +229,12 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> { parser, )?; } - Some(Token { kind: '*', pos }) => { + Some(Token { kind: '*', .. }) => { parser.toks_mut().next(); self.add_operator( Spanned { node: BinaryOp::Mul, - span: pos, + span: parser.toks().current_span(), }, parser, )?; @@ -304,12 +304,12 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> { )?; } } - Some(Token { kind: '%', pos }) => { + Some(Token { kind: '%', .. }) => { parser.toks_mut().next(); self.add_operator( Spanned { node: BinaryOp::Rem, - span: pos, + span: parser.toks().current_span(), }, parser, )?; @@ -1019,12 +1019,12 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> { } match parser.toks().peek_n(1) { - Some(Token { kind, pos }) if !kind.is_ascii_digit() => { + Some(Token { kind, .. }) if !kind.is_ascii_digit() => { if allow_trailing_dot { return Ok(None); } - return Err(("Expected digit.", pos).into()); + return Err(("Expected digit.", parser.toks().current_span()).into()); } Some(..) => {} None => return Err(("Expected digit.", parser.toks().current_span()).into()), @@ -1234,12 +1234,14 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> { .span(parser.toks_mut().span_from(start))) } else { let arguments = parser.parse_argument_invocation(false, false)?; - Ok(AstExpr::InterpolatedFunction(Arc::new(InterpolatedFunction { - name: identifier, - arguments, - span: parser.toks_mut().span_from(start), - })) - .span(parser.toks_mut().span_from(start))) + Ok( + AstExpr::InterpolatedFunction(Arc::new(InterpolatedFunction { + name: identifier, + arguments, + span: parser.toks_mut().span_from(start), + })) + .span(parser.toks_mut().span_from(start)), + ) } } _ => Ok(AstExpr::String( @@ -1625,7 +1627,7 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> { match parser.toks().peek() { Some(Token { kind: next @ ('+' | '-'), - pos, + .. }) => { if !matches!( parser.toks().peek_n_backwards(1), @@ -1642,7 +1644,7 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> { ) { return Err(( "\"+\" and \"-\" must be surrounded by whitespace in calculations.", - pos, + parser.toks().current_span(), ) .into()); } diff --git a/grass_internal/src/token.rs b/grass_internal/src/token.rs deleted file mode 100644 index bdddad4..0000000 --- a/grass_internal/src/token.rs +++ /dev/null @@ -1,19 +0,0 @@ -use codemap::Span; - -// todo: remove span from tokens - -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub(crate) struct Token { - pub pos: Span, - pub kind: char, -} - -impl Token { - pub const fn new(pos: Span, kind: char) -> Self { - Self { pos, kind } - } - - pub const fn pos(&self) -> Span { - self.pos - } -} diff --git a/grass_internal/src/value/mod.rs b/grass_internal/src/value/mod.rs index 2a62006..be90374 100644 --- a/grass_internal/src/value/mod.rs +++ b/grass_internal/src/value/mod.rs @@ -312,15 +312,11 @@ impl Value { pub fn cmp(&self, other: &Self, span: Span, op: BinaryOp) -> SassResult> { Ok(match self { - Value::Dimension(SassNumber { - num, - unit, - as_slash: _, - }) => match &other { + Value::Dimension(SassNumber { num, unit, .. }) => match &other { Value::Dimension(SassNumber { num: num2, unit: unit2, - as_slash: _, + .. }) => { if !unit.comparable(unit2) { return Err(