lazily compute token spans

This commit is contained in:
connorskees 2023-01-07 08:56:24 +00:00
parent 4f6bff8716
commit 16ad7298fe
11 changed files with 98 additions and 118 deletions

View File

@ -2,7 +2,12 @@ use std::fmt::{self, Write};
use codemap::Span;
use crate::{ast::CssStmt, error::SassResult, lexer::Lexer, parse::MediaQueryParser, token::Token};
use crate::{
ast::CssStmt,
error::SassResult,
lexer::{Lexer, TokenLexer},
parse::MediaQueryParser,
};
#[derive(Debug, Clone)]
pub(crate) struct MediaRule {
@ -54,7 +59,7 @@ impl MediaQuery {
}
pub fn parse_list(list: &str, span: Span) -> SassResult<Vec<Self>> {
let toks = Lexer::new(list.chars().map(|x| Token::new(span, x)).collect(), span);
let toks = Lexer::new(TokenLexer::new(list.chars().peekable()).collect(), span);
MediaQueryParser::new(toks).parse()
}

View File

@ -25,7 +25,7 @@ use crate::{
common::{unvendor, BinaryOp, Identifier, ListSeparator, QuoteKind, UnaryOp},
error::{SassError, SassResult},
interner::InternedString,
lexer::Lexer,
lexer::{Lexer, TokenLexer},
parse::{
AtRootQueryParser, CssParser, KeyframesSelectorParser, SassParser, ScssParser,
StylesheetParser,
@ -34,7 +34,6 @@ use crate::{
ComplexSelectorComponent, ExtendRule, ExtendedSelector, ExtensionStore, SelectorList,
SelectorParser,
},
token::Token,
utils::{to_sentence, trim_ascii},
value::{
ArgList, CalculationArg, CalculationName, Number, SassCalculation, SassFunction, SassMap,
@ -978,10 +977,8 @@ impl<'a> Visitor<'a> {
let span = query.span;
let query_toks = Lexer::new(
resolved.chars().map(|x| Token::new(span, x)).collect(),
span,
);
let query_toks =
Lexer::new(TokenLexer::new(resolved.chars().peekable()).collect(), span);
AtRootQueryParser::new(query_toks).parse()?
}
@ -1142,7 +1139,7 @@ impl<'a> Visitor<'a> {
span: Span,
) -> SassResult<SelectorList> {
let sel_toks = Lexer::new(
selector_text.chars().map(|x| Token::new(span, x)).collect(),
TokenLexer::new(selector_text.chars().peekable()).collect(),
span,
);
@ -2792,7 +2789,7 @@ impl<'a> Visitor<'a> {
if self.flags.in_keyframes() {
let span = ruleset.selector_span;
let sel_toks = Lexer::new(
selector_text.chars().map(|x| Token::new(span, x)).collect(),
TokenLexer::new(selector_text.chars().peekable()).collect(),
span,
);
let parsed_selector =

View File

@ -2,16 +2,19 @@ use std::{borrow::Cow, iter::Peekable, str::Chars, sync::Arc};
use codemap::{File, Span};
use crate::Token;
const FORM_FEED: char = '\x0C';
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) struct Token {
pub kind: char,
pos: u32,
}
#[derive(Debug, Clone)]
// todo: remove lifetime as Cow is now superfluous
pub(crate) struct Lexer<'a> {
buf: Cow<'a, [Token]>,
/// The span to be used in the case that `buf` is empty
empty_span: Span,
entire_span: Span,
cursor: usize,
}
@ -27,36 +30,35 @@ impl<'a> Lexer<'a> {
matches!(self.peek(), Some(Token { kind, .. }) if kind == c)
}
pub fn span_from(&mut self, start: usize) -> Span {
let start = match self.buf.get(start) {
Some(start) => start.pos,
None => return self.current_span(),
/// Gets the span of the character at the given index. If the index is out of
/// bounds, it returns the span of the last character. If the input is empty,
/// it returns an empty span
fn span_at_index(&self, idx: usize) -> Span {
let (start, len) = match self.buf.get(idx) {
Some(tok) => (tok.pos, tok.kind.len_utf8()),
None => match self.buf.last() {
Some(tok) => (tok.pos, tok.kind.len_utf8()),
None => (0, 0),
},
};
self.cursor = self.cursor.saturating_sub(1);
let end = self.current_span();
self.cursor += 1;
self.entire_span
.subspan(start as u64, start as u64 + len as u64)
}
pub fn span_from(&self, start: usize) -> Span {
let start = self.span_at_index(start);
let end = self.prev_span();
start.merge(end)
}
pub fn prev_span(&self) -> Span {
match self.buf.get(self.cursor.saturating_sub(1)) {
Some(tok) => tok.pos,
None => match self.buf.last() {
Some(tok) => tok.pos,
None => self.empty_span,
},
}
self.span_at_index(self.cursor.saturating_sub(1))
}
pub fn current_span(&self) -> Span {
match self.buf.get(self.cursor) {
Some(tok) => tok.pos,
None => match self.buf.last() {
Some(tok) => tok.pos,
None => self.empty_span,
},
}
self.span_at_index(self.cursor)
}
pub fn peek(&self) -> Option<Token> {
@ -104,10 +106,16 @@ impl<'a> Iterator for Lexer<'a> {
}
}
struct TokenLexer<'a> {
/// Lex a string into a series of tokens
pub(crate) struct TokenLexer<'a> {
buf: Peekable<Chars<'a>>,
cursor: usize,
file: Arc<File>,
cursor: u32,
}
impl<'a> TokenLexer<'a> {
pub fn new(buf: Peekable<Chars<'a>>) -> TokenLexer<'a> {
Self { buf, cursor: 0 }
}
}
impl<'a> Iterator for TokenLexer<'a> {
@ -124,11 +132,8 @@ impl<'a> Iterator for TokenLexer<'a> {
}
c => c,
};
let len = kind.len_utf8();
let pos = self
.file
.span
.subspan(self.cursor as u64, (self.cursor + len) as u64);
let len = kind.len_utf8() as u32;
let pos = self.cursor;
self.cursor += len;
Some(Token { pos, kind })
}
@ -140,21 +145,15 @@ impl<'a> Iterator for TokenLexer<'a> {
impl<'a> Lexer<'a> {
pub fn new_from_file(file: &Arc<File>) -> Self {
let buf = TokenLexer {
file: Arc::clone(file),
buf: file.source().chars().peekable(),
cursor: 0,
}
.collect();
Self::new(buf, file.span.subspan(0, 0))
let buf = TokenLexer::new(file.source().chars().peekable()).collect();
Self::new(buf, file.span)
}
pub fn new(buf: Vec<Token>, empty_span: Span) -> Self {
pub fn new(buf: Vec<Token>, entire_span: Span) -> Self {
Lexer {
buf: Cow::Owned(buf),
cursor: 0,
empty_span,
entire_span,
}
}
}

View File

@ -93,7 +93,7 @@ pub use crate::error::{
};
pub use crate::fs::{Fs, NullFs, StdFs};
pub use crate::options::{InputSyntax, Options, OutputStyle};
pub(crate) use crate::{context_flags::ContextFlags, token::Token};
pub(crate) use crate::{context_flags::ContextFlags, lexer::Token};
use crate::{evaluate::Visitor, lexer::Lexer, parse::ScssParser};
mod ast;
@ -110,7 +110,6 @@ mod options;
mod parse;
mod selector;
mod serializer;
mod token;
mod unit;
mod utils;
mod value;

View File

@ -211,15 +211,15 @@ pub(crate) trait BaseParser<'a> {
}
fn parse_escape(&mut self, identifier_start: bool) -> SassResult<String> {
let start = self.toks().cursor();
self.expect_char('\\')?;
let mut value = 0;
let first = match self.toks().peek() {
Some(t) => t,
None => return Err(("Expected expression.", self.toks().current_span()).into()),
};
let mut span = first.pos();
if first.kind == '\n' {
return Err(("Expected escape sequence.", span).into());
return Err(("Expected escape sequence.", self.toks().current_span()).into());
} else if first.kind.is_ascii_hexdigit() {
for _ in 0..6 {
let next = match self.toks().peek() {
@ -230,7 +230,6 @@ pub(crate) trait BaseParser<'a> {
break;
}
value *= 16;
span = span.merge(next.pos);
value += as_hex(next.kind);
self.toks_mut().next();
}
@ -243,12 +242,12 @@ pub(crate) trait BaseParser<'a> {
self.toks_mut().next();
}
} else {
span = span.merge(first.pos);
value = first.kind as u32;
self.toks_mut().next();
}
let c = std::char::from_u32(value).ok_or(("Invalid Unicode code point.", span))?;
let c = std::char::from_u32(value)
.ok_or(("Invalid Unicode code point.", self.toks().span_from(start)))?;
if (identifier_start && is_name_start(c) && !c.is_ascii_digit())
|| (!identifier_start && is_name(c))
{
@ -273,8 +272,9 @@ pub(crate) trait BaseParser<'a> {
self.toks_mut().next();
Ok(())
}
Some(Token { pos, .. }) => Err((format!("expected \"{}\".", c), pos).into()),
None => Err((format!("expected \"{}\".", c), self.toks().current_span()).into()),
Some(..) | None => {
Err((format!("expected \"{}\".", c), self.toks().current_span()).into())
}
}
}
@ -284,8 +284,7 @@ pub(crate) trait BaseParser<'a> {
self.toks_mut().next();
Ok(())
}
Some(Token { pos, .. }) => Err((format!("expected {}.", msg), pos).into()),
None => Err((format!("expected {}.", msg), self.toks().prev_span()).into()),
Some(..) | None => Err((format!("expected {}.", msg), self.toks().prev_span()).into()),
}
}
@ -295,8 +294,7 @@ pub(crate) trait BaseParser<'a> {
kind: q @ ('\'' | '"'),
..
}) => q,
Some(Token { pos, .. }) => return Err(("Expected string.", pos).into()),
None => return Err(("Expected string.", self.toks().current_span()).into()),
Some(..) | None => return Err(("Expected string.", self.toks().current_span()).into()),
};
let mut buffer = String::new();
@ -346,9 +344,8 @@ pub(crate) trait BaseParser<'a> {
match self.toks().peek() {
None => Ok('\u{FFFD}'),
Some(Token {
kind: '\n' | '\r',
pos,
}) => Err(("Expected escape sequence.", pos).into()),
kind: '\n' | '\r', ..
}) => Err(("Expected escape sequence.", self.toks().current_span()).into()),
Some(Token { kind, .. }) if kind.is_ascii_hexdigit() => {
let mut value = 0;
for _ in 0..6 {

View File

@ -1,6 +1,6 @@
use std::fmt;
use crate::{ast::KeyframesSelector, error::SassResult, lexer::Lexer, token::Token};
use crate::{ast::KeyframesSelector, error::SassResult, lexer::Lexer, Token};
use super::BaseParser;

View File

@ -2,7 +2,7 @@ use std::path::Path;
use codemap::{CodeMap, Span};
use crate::{ast::*, error::SassResult, lexer::Lexer, token::Token, ContextFlags, Options};
use crate::{ast::*, error::SassResult, lexer::Lexer, Token, ContextFlags, Options};
use super::{BaseParser, StylesheetParser};

View File

@ -13,7 +13,7 @@ use crate::{
ast::*,
common::{unvendor, Identifier, QuoteKind},
error::SassResult,
lexer::Lexer,
lexer::{Lexer, TokenLexer},
utils::{is_name, is_name_start, is_plain_css_import, opposite_bracket},
ContextFlags, Options, Token,
};
@ -1505,7 +1505,12 @@ pub(crate) trait StylesheetParser<'a>: BaseParser<'a> + Sized {
self.parse_string()
}
fn use_namespace(&mut self, url: &Path, _start: usize) -> SassResult<Option<String>> {
fn use_namespace(
&mut self,
url: &Path,
_start: usize,
url_span: Span,
) -> SassResult<Option<String>> {
if self.scan_identifier("as", false)? {
self.whitespace()?;
return Ok(if self.scan_char('*') {
@ -1530,11 +1535,8 @@ pub(crate) trait StylesheetParser<'a>: BaseParser<'a> + Sized {
};
let mut toks = Lexer::new(
namespace
.chars()
.map(|x| Token::new(self.span_before(), x))
.collect(),
self.span_before(),
TokenLexer::new(namespace.chars().peekable()).collect(),
url_span,
);
// if namespace is empty, avoid attempting to parse an identifier from
@ -1630,12 +1632,14 @@ pub(crate) trait StylesheetParser<'a>: BaseParser<'a> + Sized {
}
fn parse_use_rule(&mut self, start: usize) -> SassResult<AstStmt> {
let url_start = self.toks().cursor();
let url = self.parse_url_string()?;
let url_span = self.toks().span_from(url_start);
self.whitespace()?;
let path = PathBuf::from(url);
let namespace = self.use_namespace(path.as_ref(), start)?;
let namespace = self.use_namespace(path.as_ref(), start, url_span)?;
self.whitespace()?;
let configuration = self.parse_configuration(false)?;

View File

@ -229,12 +229,12 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> {
parser,
)?;
}
Some(Token { kind: '*', pos }) => {
Some(Token { kind: '*', .. }) => {
parser.toks_mut().next();
self.add_operator(
Spanned {
node: BinaryOp::Mul,
span: pos,
span: parser.toks().current_span(),
},
parser,
)?;
@ -304,12 +304,12 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> {
)?;
}
}
Some(Token { kind: '%', pos }) => {
Some(Token { kind: '%', .. }) => {
parser.toks_mut().next();
self.add_operator(
Spanned {
node: BinaryOp::Rem,
span: pos,
span: parser.toks().current_span(),
},
parser,
)?;
@ -1019,12 +1019,12 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> {
}
match parser.toks().peek_n(1) {
Some(Token { kind, pos }) if !kind.is_ascii_digit() => {
Some(Token { kind, .. }) if !kind.is_ascii_digit() => {
if allow_trailing_dot {
return Ok(None);
}
return Err(("Expected digit.", pos).into());
return Err(("Expected digit.", parser.toks().current_span()).into());
}
Some(..) => {}
None => return Err(("Expected digit.", parser.toks().current_span()).into()),
@ -1234,12 +1234,14 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> {
.span(parser.toks_mut().span_from(start)))
} else {
let arguments = parser.parse_argument_invocation(false, false)?;
Ok(AstExpr::InterpolatedFunction(Arc::new(InterpolatedFunction {
name: identifier,
arguments,
span: parser.toks_mut().span_from(start),
}))
.span(parser.toks_mut().span_from(start)))
Ok(
AstExpr::InterpolatedFunction(Arc::new(InterpolatedFunction {
name: identifier,
arguments,
span: parser.toks_mut().span_from(start),
}))
.span(parser.toks_mut().span_from(start)),
)
}
}
_ => Ok(AstExpr::String(
@ -1625,7 +1627,7 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> {
match parser.toks().peek() {
Some(Token {
kind: next @ ('+' | '-'),
pos,
..
}) => {
if !matches!(
parser.toks().peek_n_backwards(1),
@ -1642,7 +1644,7 @@ impl<'a, 'c, P: StylesheetParser<'a>> ValueParser<'a, 'c, P> {
) {
return Err((
"\"+\" and \"-\" must be surrounded by whitespace in calculations.",
pos,
parser.toks().current_span(),
)
.into());
}

View File

@ -1,19 +0,0 @@
use codemap::Span;
// todo: remove span from tokens
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) struct Token {
pub pos: Span,
pub kind: char,
}
impl Token {
pub const fn new(pos: Span, kind: char) -> Self {
Self { pos, kind }
}
pub const fn pos(&self) -> Span {
self.pos
}
}

View File

@ -312,15 +312,11 @@ impl Value {
pub fn cmp(&self, other: &Self, span: Span, op: BinaryOp) -> SassResult<Option<Ordering>> {
Ok(match self {
Value::Dimension(SassNumber {
num,
unit,
as_slash: _,
}) => match &other {
Value::Dimension(SassNumber { num, unit, .. }) => match &other {
Value::Dimension(SassNumber {
num: num2,
unit: unit2,
as_slash: _,
..
}) => {
if !unit.comparable(unit2) {
return Err(