From bc2c927aa7ce29b683fa32c5723d2ce17b91b98f Mon Sep 17 00:00:00 2001 From: ConnorSkees <39542938+ConnorSkees@users.noreply.github.com> Date: Fri, 28 Feb 2020 18:27:32 -0500 Subject: [PATCH] Properly emit @charset --- src/atrule.rs | 11 +++++------ src/css.rs | 10 +++++----- src/format.rs | 5 ----- src/lexer.rs | 6 ++++++ src/lib.rs | 8 ++------ tests/charset.rs | 20 ++++++++++++++++++++ tests/misc.rs | 1 - 7 files changed, 38 insertions(+), 23 deletions(-) create mode 100644 tests/charset.rs diff --git a/src/atrule.rs b/src/atrule.rs index cd5b279..04a38de 100644 --- a/src/atrule.rs +++ b/src/atrule.rs @@ -18,8 +18,7 @@ pub(crate) enum AtRule { Mixin(String, Box), Function(String, Box), Return(Vec), - // todo: emit only when non-ascii char is found - Charset(Vec), + Charset, Unknown(UnknownAtRule), } @@ -94,10 +93,10 @@ impl AtRule { AtRuleKind::Use => todo!("@use not yet implemented"), AtRuleKind::Annotation => todo!("@annotation not yet implemented"), AtRuleKind::AtRoot => todo!("@at-root not yet implemented"), - AtRuleKind::Charset => AtRule::Charset( - toks.take_while(|t| t.kind != TokenKind::Symbol(Symbol::SemiColon)) - .collect(), - ), + AtRuleKind::Charset => { + toks.take_while(|t| t.kind != TokenKind::Symbol(Symbol::SemiColon)).for_each(drop); + AtRule::Charset + }, AtRuleKind::Each => todo!("@each not yet implemented"), AtRuleKind::Extend => todo!("@extend not yet implemented"), AtRuleKind::If => todo!("@if not yet implemented"), diff --git a/src/css.rs b/src/css.rs index 969cd03..3b860ac 100644 --- a/src/css.rs +++ b/src/css.rs @@ -2,6 +2,8 @@ use crate::atrule::AtRule; use crate::error::SassResult; use crate::{RuleSet, Selector, Stmt, Style, StyleSheet}; +use crate::lexer::IS_UTF8; +use std::sync::atomic::Ordering; use std::fmt; use std::io::Write; @@ -115,6 +117,9 @@ impl Css { pub fn pretty_print(self, buf: &mut W, nesting: usize) -> SassResult<()> { let mut has_written = false; let padding = vec![' '; nesting * 2].iter().collect::(); + if IS_UTF8.swap(false, Ordering::Relaxed) { + writeln!(buf, "@charset \"UTF-8\";")?; + } for block in self.blocks { match block { Toplevel::RuleSet(selector, styles) => { @@ -144,11 +149,6 @@ impl Css { .unwrap(); writeln!(buf, "{}}}", padding)?; } - AtRule::Charset(toks) => write!( - buf, - "@charset {};", - toks.iter().map(|x| x.kind.to_string()).collect::() - )?, _ => todo!(), }, Toplevel::Newline => { diff --git a/src/format.rs b/src/format.rs index 71d653a..79956cc 100644 --- a/src/format.rs +++ b/src/format.rs @@ -36,11 +36,6 @@ impl PrettyPrinter { } Stmt::AtRule(r) => match r { AtRule::Unknown(..) => todo!("Display @rules properly"), - AtRule::Charset(toks) => write!( - self.buf, - "@charset {};", - toks.iter().map(|x| x.kind.to_string()).collect::() - )?, _ => todo!(), }, } diff --git a/src/lexer.rs b/src/lexer.rs index d62453a..33eedee 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,6 +1,7 @@ use std::convert::TryFrom; use std::iter::Peekable; use std::str::Chars; +use std::sync::atomic::{AtomicBool, Ordering}; use crate::atrule::AtRuleKind; use crate::common::{Keyword, Op, Pos, Symbol}; @@ -9,6 +10,8 @@ use crate::{Token, TokenKind, Whitespace}; // Rust does not allow us to escape '\f' const FORM_FEED: char = '\x0C'; +pub static IS_UTF8: AtomicBool = AtomicBool::new(false); + #[derive(Debug, Clone)] pub(crate) struct Lexer<'a> { tokens: Vec, @@ -133,6 +136,9 @@ impl<'a> Iterator for Lexer<'a> { '\0' => return None, &v => { self.buf.next(); + if !v.is_ascii() { + IS_UTF8.store(true, Ordering::Relaxed); + } TokenKind::Unknown(v.clone()) } }; diff --git a/src/lib.rs b/src/lib.rs index 3d7310c..5cc582e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -245,8 +245,6 @@ enum Expr { Style(Box