From 13a96273e4db1a0e66c9be46c4c315c2eb75e6dd Mon Sep 17 00:00:00 2001 From: ConnorSkees <39542938+ConnorSkees@users.noreply.github.com> Date: Sun, 5 Apr 2020 23:20:47 -0400 Subject: [PATCH] emit charset only when output contains utf-8 --- src/lexer.rs | 6 ------ src/lib.rs | 6 +++--- src/output.rs | 19 ++++++++++++------- src/utils.rs | 2 ++ tests/charset.rs | 2 -- tests/misc.rs | 8 ++------ tests/str-escape.rs | 14 ++++++++++++-- tests/strings.rs | 4 +--- tests/variables.rs | 14 +++++--------- 9 files changed, 37 insertions(+), 38 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 37f021f..3ec5def 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,11 +1,9 @@ use std::iter::Peekable; use std::str::Chars; -use std::sync::atomic::{AtomicBool, Ordering}; use crate::common::Pos; use crate::Token; -pub static IS_UTF8: AtomicBool = AtomicBool::new(false); pub const FORM_FEED: char = '\x0C'; #[derive(Debug, Clone)] @@ -31,10 +29,6 @@ impl<'a> Iterator for Lexer<'a> { } } '\0' => return None, - c if !c.is_ascii() => { - IS_UTF8.store(true, Ordering::Relaxed); - c - } c => c, }; self.pos.next_char(); diff --git a/src/lib.rs b/src/lib.rs index 451b3c2..6a44c8c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -86,11 +86,11 @@ use std::path::Path; use crate::atrule::{eat_include, AtRule, AtRuleKind, Function, Mixin}; use crate::common::Pos; -use crate::output::Css; pub use crate::error::{SassError, SassResult}; use crate::format::PrettyPrinter; use crate::imports::import; use crate::lexer::Lexer; +use crate::output::Css; use crate::scope::{insert_global_fn, insert_global_mixin, insert_global_var, Scope, GLOBAL_SCOPE}; use crate::selector::Selector; use crate::style::Style; @@ -106,11 +106,11 @@ mod atrule; mod builtin; mod color; mod common; -mod output; mod error; mod format; mod imports; mod lexer; +mod output; mod scope; mod selector; mod style; @@ -264,7 +264,7 @@ impl StyleSheet { /// ``` #[inline] pub fn print_as_css(self, buf: &mut W) -> SassResult<()> { - Css::from_stylesheet(self)?.pretty_print(buf, 0) + Css::from_stylesheet(self)?.pretty_print(buf) } } diff --git a/src/output.rs b/src/output.rs index 9a1bb0a..997052b 100644 --- a/src/output.rs +++ b/src/output.rs @@ -1,11 +1,9 @@ //! # Convert from SCSS AST to CSS use std::fmt; use std::io::Write; -use std::sync::atomic::Ordering; use crate::atrule::AtRule; use crate::error::SassResult; -use crate::lexer::IS_UTF8; use crate::{RuleSet, Selector, Stmt, Style, StyleSheet}; #[derive(Debug, Clone)] @@ -121,12 +119,19 @@ impl Css { Ok(self) } - pub fn pretty_print(self, buf: &mut W, nesting: usize) -> SassResult<()> { - let mut has_written = false; - let padding = vec![' '; nesting * 2].iter().collect::(); - if IS_UTF8.swap(false, Ordering::Relaxed) { + pub fn pretty_print(self, buf: &mut W) -> SassResult<()> { + let mut string = Vec::new(); + self._inner_pretty_print(&mut string, 0)?; + if string.iter().any(|s| !s.is_ascii()) { writeln!(buf, "@charset \"UTF-8\";")?; } + write!(buf, "{}", String::from_utf8(string).unwrap())?; + Ok(()) + } + + fn _inner_pretty_print(self, buf: &mut Vec, nesting: usize) -> SassResult<()> { + let mut has_written = false; + let padding = vec![' '; nesting * 2].iter().collect::(); for block in self.blocks { match block { Toplevel::RuleSet(selector, styles) => { @@ -152,7 +157,7 @@ impl Css { writeln!(buf, "{}@{} {} {{", padding, u.name, u.params)?; } Css::from_stylesheet(StyleSheet::from_stmts(u.body))? - .pretty_print(buf, nesting + 1)?; + ._inner_pretty_print(buf, nesting + 1)?; writeln!(buf, "{}}}", padding)?; } _ => todo!("at-rule other than unknown at toplevel"), diff --git a/src/utils.rs b/src/utils.rs index d2133ba..4f71133 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -603,6 +603,8 @@ pub(crate) fn parse_quoted_string>( let c = std::char::from_u32(u32::from_str_radix(&n, 16).unwrap()).unwrap(); if c.is_control() && c != '\t' && c != '\0' { s.push_str(&format!("\\{}", n.to_ascii_lowercase())); + } else if c == '\0' { + s.push('\u{FFFD}'); } else { s.push(c); } diff --git a/tests/charset.rs b/tests/charset.rs index 51c2f47..90efc30 100644 --- a/tests/charset.rs +++ b/tests/charset.rs @@ -4,13 +4,11 @@ mod macros; test!( - #[ignore] utf8_input, "a {\n color: 🦆;\n}\n", "@charset \"UTF-8\";\na {\n color: 🦆;\n}\n" ); test!( - #[ignore] ascii_charset_utf8, "@charset \"UTF-8\";\na {\n color: red;\n}\n", "a {\n color: red;\n}\n" diff --git a/tests/misc.rs b/tests/misc.rs index ab40a32..0ba0aee 100644 --- a/tests/misc.rs +++ b/tests/misc.rs @@ -47,25 +47,21 @@ test!( "a {\n color: red;\n}\n" ); test!( - #[ignore] utf8_ident_before_len, "a {\n color: length(😀red);\n}\n", - "@charset \"UTF-8\";\na {\n color: 1;\n}\n" + "a {\n color: 1;\n}\n" ); test!( - #[ignore] utf8_ident_before, "a {\n color: 😀red;\n}\n", "@charset \"UTF-8\";\na {\n color: 😀red;\n}\n" ); test!( - #[ignore] utf8_ident_after_len, "a {\n color: length(red😁)\n}\n", - "@charset \"UTF-8\";\na {\n color: 1;\n}\n" + "a {\n color: 1;\n}\n" ); test!( - #[ignore] utf8_ident_after, "a {\n color: red😁\n}\n", "@charset \"UTF-8\";\na {\n color: red😁;\n}\n" diff --git a/tests/str-escape.rs b/tests/str-escape.rs index 254bfe2..be1e5a9 100644 --- a/tests/str-escape.rs +++ b/tests/str-escape.rs @@ -98,8 +98,8 @@ test!( ); test!( single_character_escape_sequence_has_space_after, - "a {\n color: \\0;\n}\n", - "a {\n color: \\0 ;\n}\n" + "a {\n color: \\a;\n}\n", + "a {\n color: \\a ;\n}\n" ); test!( escapes_non_hex_in_string, @@ -127,6 +127,16 @@ test!( "a {\n color: foo == f\\6F\\6F;\n}\n", "a {\n color: true;\n}\n" ); +test!( + quoted_escape_zero, + "a {\n color: \"\\0\";\n}\n", + "@charset \"UTF-8\";\na {\n color: \"�\";\n}\n" +); +test!( + unquoted_escape_zero, + "a {\n color: \\0;\n}\n", + "a {\n color: \\0 ;\n}\n" +); // test!( // quote_escape, // "a {\n color: quote(\\b);\n}\n", diff --git a/tests/strings.rs b/tests/strings.rs index aa90df5..cf29d5c 100644 --- a/tests/strings.rs +++ b/tests/strings.rs @@ -119,10 +119,9 @@ test!( "a {\n color: 7;\n}\n" ); test!( - #[ignore] str_len_double_wide, "a {\n color: str-length(\"👭\");\n}\n", - "@charset \"UTF-8\";\na {\n color: 1;\n}\n" + "a {\n color: 1;\n}\n" ); test!( str_len_combining, @@ -215,7 +214,6 @@ test!( "a {\n color: Xabcd;\n}\n" ); test!( - #[ignore] str_insert_double_width_char, "a {\n color: str-insert(\"👭\", \"c\", 2);\n}\n", "@charset \"UTF-8\";\na {\n color: \"👭c\";\n}\n" diff --git a/tests/variables.rs b/tests/variables.rs index 4ad1d19..1f73ff4 100644 --- a/tests/variables.rs +++ b/tests/variables.rs @@ -98,15 +98,11 @@ test!( "a {\n $a: red\n}\n\nb {\n color: blue;\n}\n", "b {\n color: blue;\n}\n" ); -// TODO: blocked on properly emitting @charset -// right now, we emit @charset if a utf-8 character -// is found *anywhere*, but ideally we would only emit -// it if a utf-8 character is actually in the output -// test!( -// unicode_in_variables, -// "$vär: foo;\na {\n color: $vär;\n}\n", -// "a {\n color: foo;\n}\n" -// ); +test!( + unicode_in_variables, + "$vär: foo;\na {\n color: $vär;\n}\n", + "a {\n color: foo;\n}\n" +); test!( variable_does_not_include_interpolation, "$input: foo;\na {\n color: $input#{\"literal\"};\n}\n",