lazily compute token spans

This commit is contained in:
connorskees 2023-01-07 08:56:24 +00:00
parent 4f6bff8716
commit 16ad7298fe
11 changed files with 98 additions and 118 deletions

View File

@ -2,7 +2,12 @@ use std::fmt::{self, Write};
use codemap::Span; use codemap::Span;
use crate::{ast::CssStmt, error::SassResult, lexer::Lexer, parse::MediaQueryParser, token::Token}; use crate::{
ast::CssStmt,
error::SassResult,
lexer::{Lexer, TokenLexer},
parse::MediaQueryParser,
};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub(crate) struct MediaRule { pub(crate) struct MediaRule {
@ -54,7 +59,7 @@ impl MediaQuery {
} }
pub fn parse_list(list: &str, span: Span) -> SassResult<Vec<Self>> { pub fn parse_list(list: &str, span: Span) -> SassResult<Vec<Self>> {
let toks = Lexer::new(list.chars().map(|x| Token::new(span, x)).collect(), span); let toks = Lexer::new(TokenLexer::new(list.chars().peekable()).collect(), span);
MediaQueryParser::new(toks).parse() MediaQueryParser::new(toks).parse()
} }

View File

@ -25,7 +25,7 @@ use crate::{
common::{unvendor, BinaryOp, Identifier, ListSeparator, QuoteKind, UnaryOp}, common::{unvendor, BinaryOp, Identifier, ListSeparator, QuoteKind, UnaryOp},
error::{SassError, SassResult}, error::{SassError, SassResult},
interner::InternedString, interner::InternedString,
lexer::Lexer, lexer::{Lexer, TokenLexer},
parse::{ parse::{
AtRootQueryParser, CssParser, KeyframesSelectorParser, SassParser, ScssParser, AtRootQueryParser, CssParser, KeyframesSelectorParser, SassParser, ScssParser,
StylesheetParser, StylesheetParser,
@ -34,7 +34,6 @@ use crate::{
ComplexSelectorComponent, ExtendRule, ExtendedSelector, ExtensionStore, SelectorList, ComplexSelectorComponent, ExtendRule, ExtendedSelector, ExtensionStore, SelectorList,
SelectorParser, SelectorParser,
}, },
token::Token,
utils::{to_sentence, trim_ascii}, utils::{to_sentence, trim_ascii},
value::{ value::{
ArgList, CalculationArg, CalculationName, Number, SassCalculation, SassFunction, SassMap, ArgList, CalculationArg, CalculationName, Number, SassCalculation, SassFunction, SassMap,
@ -978,10 +977,8 @@ impl<'a> Visitor<'a> {
let span = query.span; let span = query.span;
let query_toks = Lexer::new( let query_toks =
resolved.chars().map(|x| Token::new(span, x)).collect(), Lexer::new(TokenLexer::new(resolved.chars().peekable()).collect(), span);
span,
);
AtRootQueryParser::new(query_toks).parse()? AtRootQueryParser::new(query_toks).parse()?
} }
@ -1142,7 +1139,7 @@ impl<'a> Visitor<'a> {
span: Span, span: Span,
) -> SassResult<SelectorList> { ) -> SassResult<SelectorList> {
let sel_toks = Lexer::new( let sel_toks = Lexer::new(
selector_text.chars().map(|x| Token::new(span, x)).collect(), TokenLexer::new(selector_text.chars().peekable()).collect(),
span, span,
); );
@ -2792,7 +2789,7 @@ impl<'a> Visitor<'a> {
if self.flags.in_keyframes() { if self.flags.in_keyframes() {
let span = ruleset.selector_span; let span = ruleset.selector_span;
let sel_toks = Lexer::new( let sel_toks = Lexer::new(
selector_text.chars().map(|x| Token::new(span, x)).collect(), TokenLexer::new(selector_text.chars().peekable()).collect(),
span, span,
); );
let parsed_selector = let parsed_selector =

View File

@ -2,16 +2,19 @@ use std::{borrow::Cow, iter::Peekable, str::Chars, sync::Arc};
use codemap::{File, Span}; use codemap::{File, Span};
use crate::Token;
const FORM_FEED: char = '\x0C'; const FORM_FEED: char = '\x0C';
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) struct Token {
pub kind: char,
pos: u32,
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
// todo: remove lifetime as Cow is now superfluous // todo: remove lifetime as Cow is now superfluous
pub(crate) struct Lexer<'a> { pub(crate) struct Lexer<'a> {
buf: Cow<'a, [Token]>, buf: Cow<'a, [Token]>,
/// The span to be used in the case that `buf` is empty entire_span: Span,
empty_span: Span,
cursor: usize, cursor: usize,
} }
@ -27,36 +30,35 @@ impl<'a> Lexer<'a> {
matches!(self.peek(), Some(Token { kind, .. }) if kind == c) matches!(self.peek(), Some(Token { kind, .. }) if kind == c)
} }
pub fn span_from(&mut self, start: usize) -> Span { /// Gets the span of the character at the given index. If the index is out of
let start = match self.buf.get(start) { /// bounds, it returns the span of the last character. If the input is empty,
Some(start) => start.pos, /// it returns an empty span
None => return self.current_span(), fn span_at_index(&self, idx: usize) -> Span {
let (start, len) = match self.buf.get(idx) {
Some(tok) => (tok.pos, tok.kind.len_utf8()),
None => match self.buf.last() {
Some(tok) => (tok.pos, tok.kind.len_utf8()),
None => (0, 0),
},
}; };
self.cursor = self.cursor.saturating_sub(1);
let end = self.current_span(); self.entire_span
self.cursor += 1; .subspan(start as u64, start as u64 + len as u64)
}
pub fn span_from(&self, start: usize) -> Span {
let start = self.span_at_index(start);
let end = self.prev_span();
start.merge(end) start.merge(end)
} }
pub fn prev_span(&self) -> Span { pub fn prev_span(&self) -> Span {
match self.buf.get(self.cursor.saturating_sub(1)) { self.span_at_index(self.cursor.saturating_sub(1))
Some(tok) => tok.pos,
None => match self.buf.last() {
Some(tok) => tok.pos,
None => self.empty_span,
},
}
} }
pub fn current_span(&self) -> Span { pub fn current_span(&self) -> Span {
match self.buf.get(self.cursor) { self.span_at_index(self.cursor)
Some(tok) => tok.pos,
None => match self.buf.last() {
Some(tok) => tok.pos,
None => self.empty_span,
},
}
} }
pub fn peek(&self) -> Option<Token> { pub fn peek(&self) -> Option<Token> {
@ -104,10 +106,16 @@ impl<'a> Iterator for Lexer<'a> {
} }
} }
struct TokenLexer<'a> { /// Lex a string into a series of tokens
pub(crate) struct TokenLexer<'a> {
buf: Peekable<Chars<'a>>, buf: Peekable<Chars<'a>>,
cursor: usize, cursor: u32,
file: Arc<File>, }
impl<'a> TokenLexer<'a> {
pub fn new(buf: Peekable<Chars<'a>>) -> TokenLexer<'a> {
Self { buf, cursor: 0 }
}
} }
impl<'a> Iterator for TokenLexer<'a> { impl<'a> Iterator for TokenLexer<'a> {
@ -124,11 +132,8 @@ impl<'a> Iterator for TokenLexer<'a> {
} }
c => c, c => c,
}; };
let len = kind.len_utf8(); let len = kind.len_utf8() as u32;
let pos = self let pos = self.cursor;
.file
.span
.subspan(self.cursor as u64, (self.cursor + len) as u64);
self.cursor += len; self.cursor += len;
Some(Token { pos, kind }) Some(Token { pos, kind })
} }
@ -140,21 +145,15 @@ impl<'a> Iterator for TokenLexer<'a> {
impl<'a> Lexer<'a> { impl<'a> Lexer<'a> {
pub fn new_from_file(file: &Arc<File>) -> Self { pub fn new_from_file(file: &Arc<File>) -> Self {
let buf = TokenLexer { let buf = TokenLexer::new(file.source().chars().peekable()).collect();
file: Arc::clone(file), Self::new(buf, file.span)
buf: file.source().chars().peekable(),
cursor: 0,
}
.collect();
Self::new(buf, file.span.subspan(0, 0))
} }
pub fn new(buf: Vec<Token>, empty_span: Span) -> Self { pub fn new(buf: Vec<Token>, entire_span: Span) -> Self {
Lexer { Lexer {
buf: Cow::Owned(buf), buf: Cow::Owned(buf),
cursor: 0, cursor: 0,
empty_span, entire_span,
} }
} }
} }

View File

@ -93,7 +93,7 @@ pub use crate::error::{
}; };
pub use crate::fs::{Fs, NullFs, StdFs}; pub use crate::fs::{Fs, NullFs, StdFs};
pub use crate::options::{InputSyntax, Options, OutputStyle}; pub use crate::options::{InputSyntax, Options, OutputStyle};
pub(crate) use crate::{context_flags::ContextFlags, token::Token}; pub(crate) use crate::{context_flags::ContextFlags, lexer::Token};
use crate::{evaluate::Visitor, lexer::Lexer, parse::ScssParser}; use crate::{evaluate::Visitor, lexer::Lexer, parse::ScssParser};
mod ast; mod ast;
@ -110,7 +110,6 @@ mod options;
mod parse; mod parse;
mod selector; mod selector;
mod serializer; mod serializer;
mod token;
mod unit; mod unit;
mod utils; mod utils;
mod value; mod value;

View File

@ -211,15 +211,15 @@ pub(crate) trait BaseParser<'a> {
} }
fn parse_escape(&mut self, identifier_start: bool) -> SassResult<String> { fn parse_escape(&mut self, identifier_start: bool) -> SassResult<String> {
let start = self.toks().cursor();
self.expect_char('\\')?; self.expect_char('\\')?;
let mut value = 0; let mut value = 0;
let first = match self.toks().peek() { let first = match self.toks().peek() {
Some(t) => t, Some(t) => t,
None => return Err(("Expected expression.", self.toks().current_span()).into()), None => return Err(("Expected expression.", self.toks().current_span()).into()),
}; };
let mut span = first.pos();
if first.kind == '\n' { if first.kind == '\n' {
return Err(("Expected escape sequence.", span).into()); return Err(("Expected escape sequence.", self.toks().current_span()).into());
} else if first.kind.is_ascii_hexdigit() { } else if first.kind.is_ascii_hexdigit() {
for _ in 0..6 { for _ in 0..6 {
let next = match self.toks().peek() { let next = match self.toks().peek() {
@ -230,7 +230,6 @@ pub(crate) trait BaseParser<'a> {
break; break;
} }
value *= 16; value *= 16;
span = span.merge(next.pos);
value += as_hex(next.kind); value += as_hex(next.kind);
self.toks_mut().next(); self.toks_mut().next();
} }
@ -243,12 +242,12 @@ pub(crate) trait BaseParser<'a> {
self.toks_mut().next(); self.toks_mut().next();
} }
} else { } else {
span = span.merge(first.pos);
value = first.kind as u32; value = first.kind as u32;
self.toks_mut().next(); self.toks_mut().next();
} }
let c = std::char::from_u32(value).ok_or(("Invalid Unicode code point.", span))?; let c = std::char::from_u32(value)
.ok_or(("Invalid Unicode code point.", self.toks().span_from(start)))?;
if (identifier_start && is_name_start(c) && !c.is_ascii_digit()) if (identifier_start && is_name_start(c) && !c.is_ascii_digit())
|| (!identifier_start && is_name(c)) || (!identifier_start && is_name(c))
{ {
@ -273,8 +272,9 @@ pub(crate) trait BaseParser<'a> {
self.toks_mut().next(); self.toks_mut().next();
Ok(()) Ok(())
} }
Some(Token { pos, .. }) => Err((format!("expected \"{}\".", c), pos).into()), Some(..) | None => {
None => Err((format!("expected \"{}\".", c), self.toks().current_span()).into()), Err((format!("expected \"{}\".", c), self.toks().current_span()).into())
}
} }
} }
@ -284,8 +284,7 @@ pub(crate) trait BaseParser<'a> {
self.toks_mut().next(); self.toks_mut().next();
Ok(()) Ok(())
} }
Some(Token { pos, .. }) => Err((format!("expected {}.", msg), pos).into()), Some(..) | None => Err((format!("expected {}.", msg), self.toks().prev_span()).into()),
None => Err((format!("expected {}.", msg), self.toks().prev_span()).into()),
} }
} }
@ -295,8 +294,7 @@ pub(crate) trait BaseParser<'a> {
kind: q @ ('\'' | '"'), kind: q @ ('\'' | '"'),
.. ..
}) => q, }) => q,
Some(Token { pos, .. }) => return Err(("Expected string.", pos).into()), Some(..) | None => return Err(("Expected string.", self.toks().current_span()).into()),
None => return Err(("Expected string.", self.toks().current_span()).into()),
}; };
let mut buffer = String::new(); let mut buffer = String::new();
@ -346,9 +344,8 @@ pub(crate) trait BaseParser<'a> {
match self.toks().peek() { match self.toks().peek() {
None => Ok('\u{FFFD}'), None => Ok('\u{FFFD}'),
Some(Token { Some(Token {
kind: '\n' | '\r', kind: '\n' | '\r', ..
pos, }) => Err(("Expected escape sequence.", self.toks().current_span()).into()),
}) => Err(("Expected escape sequence.", pos).into()),
Some(Token { kind, .. }) if kind.is_ascii_hexdigit() => { Some(Token { kind, .. }) if kind.is_ascii_hexdigit() => {
let mut value = 0; let mut value = 0;
for _ in 0..6 { for _ in 0..6 {

View File

@ -1,6 +1,6 @@
use std::fmt; use std::fmt;
use crate::{ast::KeyframesSelector, error::SassResult, lexer::Lexer, token::Token}; use crate::{ast::KeyframesSelector, error::SassResult, lexer::Lexer, Token};
use super::BaseParser; use super::BaseParser;

View File

@ -2,7 +2,7 @@ use std::path::Path;
use codemap::{CodeMap, Span}; use codemap::{CodeMap, Span};
use crate::{ast::*, error::SassResult, lexer::Lexer, token::Token, ContextFlags, Options}; use crate::{ast::*, error::SassResult, lexer::Lexer, Token, ContextFlags, Options};
use super::{BaseParser, StylesheetParser}; use super::{BaseParser, StylesheetParser};

View File

@ -13,7 +13,7 @@ use crate::{
ast::*, ast::*,
common::{unvendor, Identifier, QuoteKind}, common::{unvendor, Identifier, QuoteKind},
error::SassResult, error::SassResult,
lexer::Lexer, lexer::{Lexer, TokenLexer},
utils::{is_name, is_name_start, is_plain_css_import, opposite_bracket}, utils::{is_name, is_name_start, is_plain_css_import, opposite_bracket},
ContextFlags, Options, Token, ContextFlags, Options, Token,
}; };
@ -1505,7 +1505,12 @@ pub(crate) trait StylesheetParser<'a>: BaseParser<'a> + Sized {
self.parse_string() self.parse_string()
} }
fn use_namespace(&mut self, url: &Path, _start: usize) -> SassResult<Option<String>> { fn use_namespace(
&mut self,
url: &Path,
_start: usize,
url_span: Span,
) -> SassResult<Option<String>> {
if self.scan_identifier("as", false)? { if self.scan_identifier("as", false)? {
self.whitespace()?; self.whitespace()?;
return Ok(if self.scan_char('*') { return Ok(if self.scan_char('*') {
@ -1530,11 +1535,8 @@ pub(crate) trait StylesheetParser<'a>: BaseParser<'a> + Sized {
}; };
let mut toks = Lexer::new( let mut toks = Lexer::new(
namespace TokenLexer::new(namespace.chars().peekable()).collect(),
.chars() url_span,
.map(|x| Token::new(self.span_before(), x))
.collect(),
self.span_before(),
); );
// if namespace is empty, avoid attempting to parse an identifier from // if namespace is empty, avoid attempting to parse an identifier from
@ -1630,12 +1632,14 @@ pub(crate) trait StylesheetParser<'a>: BaseParser<'a> + Sized {
} }
fn parse_use_rule(&mut self, start: usize) -> SassResult<AstStmt> { fn parse_use_rule(&mut self, start: usize) -> SassResult<AstStmt> {
let url_start = self.toks().cursor();
let url = self.parse_url_string()?; let url = self.parse_url_string()?;
let url_span = self.toks().span_from(url_start);
self.whitespace()?; self.whitespace()?;
let path = PathBuf::from(url); let path = PathBuf::from(url);
let namespace = self.use_namespace(path.as_ref(), start)?; let namespace = self.use_namespace(path.as_ref(), start, url_span)?;
self.whitespace()?; self.whitespace()?;
let configuration = self.parse_configuration(false)?; let configuration = self.parse_configuration(false)?;

View File

@ -229,12 +229,12 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> {
parser, parser,
)?; )?;
} }
Some(Token { kind: '*', pos }) => { Some(Token { kind: '*', .. }) => {
parser.toks_mut().next(); parser.toks_mut().next();
self.add_operator( self.add_operator(
Spanned { Spanned {
node: BinaryOp::Mul, node: BinaryOp::Mul,
span: pos, span: parser.toks().current_span(),
}, },
parser, parser,
)?; )?;
@ -304,12 +304,12 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> {
)?; )?;
} }
} }
Some(Token { kind: '%', pos }) => { Some(Token { kind: '%', .. }) => {
parser.toks_mut().next(); parser.toks_mut().next();
self.add_operator( self.add_operator(
Spanned { Spanned {
node: BinaryOp::Rem, node: BinaryOp::Rem,
span: pos, span: parser.toks().current_span(),
}, },
parser, parser,
)?; )?;
@ -1019,12 +1019,12 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> {
} }
match parser.toks().peek_n(1) { match parser.toks().peek_n(1) {
Some(Token { kind, pos }) if !kind.is_ascii_digit() => { Some(Token { kind, .. }) if !kind.is_ascii_digit() => {
if allow_trailing_dot { if allow_trailing_dot {
return Ok(None); return Ok(None);
} }
return Err(("Expected digit.", pos).into()); return Err(("Expected digit.", parser.toks().current_span()).into());
} }
Some(..) => {} Some(..) => {}
None => return Err(("Expected digit.", parser.toks().current_span()).into()), None => return Err(("Expected digit.", parser.toks().current_span()).into()),
@ -1234,12 +1234,14 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> {
.span(parser.toks_mut().span_from(start))) .span(parser.toks_mut().span_from(start)))
} else { } else {
let arguments = parser.parse_argument_invocation(false, false)?; let arguments = parser.parse_argument_invocation(false, false)?;
Ok(AstExpr::InterpolatedFunction(Arc::new(InterpolatedFunction { Ok(
name: identifier, AstExpr::InterpolatedFunction(Arc::new(InterpolatedFunction {
arguments, name: identifier,
span: parser.toks_mut().span_from(start), arguments,
})) span: parser.toks_mut().span_from(start),
.span(parser.toks_mut().span_from(start))) }))
.span(parser.toks_mut().span_from(start)),
)
} }
} }
_ => Ok(AstExpr::String( _ => Ok(AstExpr::String(
@ -1625,7 +1627,7 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> {
match parser.toks().peek() { match parser.toks().peek() {
Some(Token { Some(Token {
kind: next @ ('+' | '-'), kind: next @ ('+' | '-'),
pos, ..
}) => { }) => {
if !matches!( if !matches!(
parser.toks().peek_n_backwards(1), parser.toks().peek_n_backwards(1),
@ -1642,7 +1644,7 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> {
) { ) {
return Err(( return Err((
"\"+\" and \"-\" must be surrounded by whitespace in calculations.", "\"+\" and \"-\" must be surrounded by whitespace in calculations.",
pos, parser.toks().current_span(),
) )
.into()); .into());
} }

View File

@ -1,19 +0,0 @@
use codemap::Span;
// todo: remove span from tokens
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) struct Token {
pub pos: Span,
pub kind: char,
}
impl Token {
pub const fn new(pos: Span, kind: char) -> Self {
Self { pos, kind }
}
pub const fn pos(&self) -> Span {
self.pos
}
}

View File

@ -312,15 +312,11 @@ impl Value {
pub fn cmp(&self, other: &Self, span: Span, op: BinaryOp) -> SassResult<Option<Ordering>> { pub fn cmp(&self, other: &Self, span: Span, op: BinaryOp) -> SassResult<Option<Ordering>> {
Ok(match self { Ok(match self {
Value::Dimension(SassNumber { Value::Dimension(SassNumber { num, unit, .. }) => match &other {
num,
unit,
as_slash: _,
}) => match &other {
Value::Dimension(SassNumber { Value::Dimension(SassNumber {
num: num2, num: num2,
unit: unit2, unit: unit2,
as_slash: _, ..
}) => { }) => {
if !unit.comparable(unit2) { if !unit.comparable(unit2) {
return Err( return Err(