Fix highlighting when delimiters appear next to string interpolation (#80)

This patch makes Splash correctly highlight strings in which a value
is interpolated next to a delimiter character. The fix is to enable
each `Grammar` implementation to decide whether two tokens should be
*merged*, which in turn enables `SwiftGrammar` to veto that string
interpolation delimiters should be merged with their predecesor.
This commit is contained in:
John Sundell 2019-08-07 15:45:15 +02:00 committed by GitHub
parent 367b8408b1
commit dcf5951d10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 132 additions and 45 deletions

View File

@ -11,11 +11,29 @@ import Foundation
/// of the Swift language grammar.
public protocol Grammar {
/// The set of characters that make up the delimiters that separates
/// tokens within the language, such as punctuation characters.
/// tokens within the language, such as punctuation characters. You
/// can control whether delimiters should be merged when forming
/// tokens by implementing the `isDelimiter(mergableWith:)` method.
var delimiters: CharacterSet { get }
/// The rules that define the syntax of the language. When tokenizing,
/// the rules will be iterated over in sequence, and the first rule
/// that matches a given code segment will be used to determine that
/// segment's token type.
var syntaxRules: [SyntaxRule] { get }
/// Return whether two delimiters should be merged into a single
/// token, or whether they should be treated as separate ones.
/// The delimiters are passed in the order in which they appear
/// in the source code to be highlighted.
/// - Parameter delimiterA: The first delimiter
/// - Parameter delimiterB: The second delimiter
func isDelimiter(_ delimiterA: Character,
mergableWith delimiterB: Character) -> Bool
}
public extension Grammar {
func isDelimiter(_ delimiterA: Character,
mergableWith delimiterB: Character) -> Bool {
return true
}
}

View File

@ -37,6 +37,20 @@ public struct SwiftGrammar: Grammar {
KeywordRule()
]
}
public func isDelimiter(_ delimiterA: Character,
mergableWith delimiterB: Character) -> Bool {
switch (delimiterA, delimiterB) {
case ("\\", "("):
return true
case ("\\", _), (_, "\\"):
return false
case (")", _):
return false
default:
return true
}
}
}
private extension SwiftGrammar {
@ -381,11 +395,15 @@ private extension SwiftGrammar {
var tokenType: TokenType { return .dotAccess }
func matches(_ segment: Segment) -> Bool {
guard segment.tokens.previous.isAny(of: ".", "(.", "[.") else {
guard !segment.tokens.onSameLine.isEmpty else {
return false
}
guard !segment.tokens.onSameLine.isEmpty else {
guard segment.isValidSymbol else {
return false
}
guard segment.tokens.previous.isAny(of: ".", "(.", "[.") else {
return false
}
@ -413,6 +431,10 @@ private extension SwiftGrammar {
return false
}
guard segment.isValidSymbol else {
return false
}
guard segment.tokens.previous.isAny(of: ".", "?.", "().", ").") else {
return false
}
@ -510,4 +532,12 @@ private extension Segment {
var prefixedByDotAccess: Bool {
return tokens.previous == "(." || prefix.hasSuffix(" .")
}
var isValidSymbol: Bool {
guard let firstCharacter = tokens.current.first else {
return false
}
return firstCharacter == "_" || firstCharacter.isLetter
}
}

View File

@ -41,7 +41,7 @@ public struct SyntaxHighlighter<Format: OutputFormat> {
state = nil
}
for segment in tokenizer.segmentsByTokenizing(code, delimiters: grammar.delimiters) {
for segment in tokenizer.segmentsByTokenizing(code, using: grammar) {
let token = segment.tokens.current
let whitespace = segment.trailingWhitespace

View File

@ -7,9 +7,10 @@
import Foundation
internal struct Tokenizer {
func segmentsByTokenizing(_ code: String, delimiters: CharacterSet) -> AnySequence<Segment> {
func segmentsByTokenizing(_ code: String,
using grammar: Grammar) -> AnySequence<Segment> {
return AnySequence<Segment> {
return Buffer(iterator: Iterator(code: code, delimiters: delimiters))
Buffer(iterator: Iterator(code: code, grammar: grammar))
}
}
}
@ -32,24 +33,29 @@ private extension Tokenizer {
}
struct Iterator: IteratorProtocol {
enum Component {
case token(String)
case delimiter(String)
case whitespace(String)
case newline(String)
struct Component {
enum Kind {
case token
case delimiter
case whitespace
case newline
}
let character: Character
let kind: Kind
}
private let code: String
private let delimiters: CharacterSet
private let grammar: Grammar
private var index: String.Index?
private var tokenCounts = [String: Int]()
private var allTokens = [String]()
private var lineTokens = [String]()
private var segments: (current: Segment?, previous: Segment?)
init(code: String, delimiters: CharacterSet) {
init(code: String, grammar: Grammar) {
self.code = code
self.delimiters = delimiters
self.grammar = grammar
segments = (nil, nil)
}
@ -65,8 +71,8 @@ private extension Tokenizer {
index = nextIndex
let component = makeComponent(at: nextIndex)
switch component {
case .token(let token), .delimiter(let token):
switch component.kind {
case .token, .delimiter:
guard var segment = segments.current else {
segments.current = makeSegment(with: component, at: nextIndex)
return next()
@ -77,22 +83,33 @@ private extension Tokenizer {
return finish(segment, with: component, at: nextIndex)
}
segment.tokens.current.append(token)
if component.isDelimiter {
let previousCharacter = segment.tokens.current.last!
let shouldMerge = grammar.isDelimiter(previousCharacter,
mergableWith: component.character)
guard shouldMerge else {
return finish(segment, with: component, at: nextIndex)
}
}
segment.tokens.current.append(component.character)
segments.current = segment
return next()
case .whitespace(let whitespace), .newline(let whitespace):
case .whitespace, .newline:
guard var segment = segments.current else {
var segment = makeSegment(with: component, at: nextIndex)
segment.trailingWhitespace = whitespace
segment.trailingWhitespace = component.token
segment.isLastOnLine = component.isNewline
segments.current = segment
return next()
}
if let existingWhitespace = segment.trailingWhitespace {
segment.trailingWhitespace = existingWhitespace.appending(whitespace)
if var existingWhitespace = segment.trailingWhitespace {
existingWhitespace.append(component.character)
segment.trailingWhitespace = existingWhitespace
} else {
segment.trailingWhitespace = whitespace
segment.trailingWhitespace = component.token
}
if component.isNewline {
@ -113,22 +130,28 @@ private extension Tokenizer {
}
private func makeComponent(at index: String.Index) -> Component {
func kind(for character: Character) -> Component.Kind {
if character.isWhitespace {
return .whitespace
}
if character.isNewline {
return .newline
}
if grammar.delimiters.contains(character) {
return .delimiter
}
return .token
}
let character = code[index]
let substring = String(character)
if character.isWhitespace {
return .whitespace(substring)
}
if character.isNewline {
return .newline(substring)
}
if delimiters.contains(character) {
return .delimiter(substring)
}
return .token(substring)
return Component(
character: character,
kind: kind(for: character)
)
}
private func makeSegment(with component: Component, at index: String.Index) -> Segment {
@ -175,17 +198,11 @@ private extension Tokenizer {
extension Tokenizer.Iterator.Component {
var token: String {
switch self {
case .token(let token),
.delimiter(let token):
return token
case .whitespace, .newline:
return ""
}
return String(character)
}
var isDelimiter: Bool {
switch self {
switch kind {
case .token, .whitespace, .newline:
return false
case .delimiter:
@ -194,7 +211,7 @@ extension Tokenizer.Iterator.Component {
}
var isNewline: Bool {
switch self {
switch kind {
case .token, .whitespace, .delimiter:
return false
case .newline:

View File

@ -110,6 +110,26 @@ final class LiteralTests: SyntaxHighlighterTestCase {
])
}
func testStringLiteralWithInterpolationSurroundedByBrackets() {
let components = highlighter.highlight(#""[\(text)]""#)
XCTAssertEqual(components, [
.token(#""["#, .string),
.plainText(#"\(text)"#),
.token(#"]""#, .string)
])
}
func testStringLiteralWithInterpolationPrefixedByPunctuation() {
let components = highlighter.highlight(#"".\(text)""#)
XCTAssertEqual(components, [
.token("\".", .string),
.plainText(#"\(text)"#),
.token("\"", .string)
])
}
func testMultiLineStringLiteral() {
let components = highlighter.highlight("""
let string = \"\"\"
@ -266,6 +286,8 @@ extension LiteralTests {
("testStringLiteralInterpolation", testStringLiteralInterpolation),
("testStringLiteralWithInterpolatedClosureArgumentShorthand", testStringLiteralWithInterpolatedClosureArgumentShorthand),
("testStringLiteralWithCustomIterpolation", testStringLiteralWithCustomIterpolation),
("testStringLiteralWithInterpolationSurroundedByBrackets", testStringLiteralWithInterpolationSurroundedByBrackets),
("testStringLiteralWithInterpolationPrefixedByPunctuation", testStringLiteralWithInterpolationPrefixedByPunctuation),
("testMultiLineStringLiteral", testMultiLineStringLiteral),
("testSingleLineRawStringLiteral", testSingleLineRawStringLiteral),
("testMultiLineRawStringLiteral", testMultiLineRawStringLiteral),