Fix highlighting when delimiters appear next to string interpolation (#80)
This patch makes Splash correctly highlight strings in which a value is interpolated next to a delimiter character. The fix is to enable each `Grammar` implementation to decide whether two tokens should be *merged*, which in turn enables `SwiftGrammar` to veto that string interpolation delimiters should be merged with their predecesor.
This commit is contained in:
parent
367b8408b1
commit
dcf5951d10
|
@ -11,11 +11,29 @@ import Foundation
|
|||
/// of the Swift language grammar.
|
||||
public protocol Grammar {
|
||||
/// The set of characters that make up the delimiters that separates
|
||||
/// tokens within the language, such as punctuation characters.
|
||||
/// tokens within the language, such as punctuation characters. You
|
||||
/// can control whether delimiters should be merged when forming
|
||||
/// tokens by implementing the `isDelimiter(mergableWith:)` method.
|
||||
var delimiters: CharacterSet { get }
|
||||
/// The rules that define the syntax of the language. When tokenizing,
|
||||
/// the rules will be iterated over in sequence, and the first rule
|
||||
/// that matches a given code segment will be used to determine that
|
||||
/// segment's token type.
|
||||
var syntaxRules: [SyntaxRule] { get }
|
||||
|
||||
/// Return whether two delimiters should be merged into a single
|
||||
/// token, or whether they should be treated as separate ones.
|
||||
/// The delimiters are passed in the order in which they appear
|
||||
/// in the source code to be highlighted.
|
||||
/// - Parameter delimiterA: The first delimiter
|
||||
/// - Parameter delimiterB: The second delimiter
|
||||
func isDelimiter(_ delimiterA: Character,
|
||||
mergableWith delimiterB: Character) -> Bool
|
||||
}
|
||||
|
||||
public extension Grammar {
|
||||
func isDelimiter(_ delimiterA: Character,
|
||||
mergableWith delimiterB: Character) -> Bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,6 +37,20 @@ public struct SwiftGrammar: Grammar {
|
|||
KeywordRule()
|
||||
]
|
||||
}
|
||||
|
||||
public func isDelimiter(_ delimiterA: Character,
|
||||
mergableWith delimiterB: Character) -> Bool {
|
||||
switch (delimiterA, delimiterB) {
|
||||
case ("\\", "("):
|
||||
return true
|
||||
case ("\\", _), (_, "\\"):
|
||||
return false
|
||||
case (")", _):
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private extension SwiftGrammar {
|
||||
|
@ -381,11 +395,15 @@ private extension SwiftGrammar {
|
|||
var tokenType: TokenType { return .dotAccess }
|
||||
|
||||
func matches(_ segment: Segment) -> Bool {
|
||||
guard segment.tokens.previous.isAny(of: ".", "(.", "[.") else {
|
||||
guard !segment.tokens.onSameLine.isEmpty else {
|
||||
return false
|
||||
}
|
||||
|
||||
guard !segment.tokens.onSameLine.isEmpty else {
|
||||
guard segment.isValidSymbol else {
|
||||
return false
|
||||
}
|
||||
|
||||
guard segment.tokens.previous.isAny(of: ".", "(.", "[.") else {
|
||||
return false
|
||||
}
|
||||
|
||||
|
@ -413,6 +431,10 @@ private extension SwiftGrammar {
|
|||
return false
|
||||
}
|
||||
|
||||
guard segment.isValidSymbol else {
|
||||
return false
|
||||
}
|
||||
|
||||
guard segment.tokens.previous.isAny(of: ".", "?.", "().", ").") else {
|
||||
return false
|
||||
}
|
||||
|
@ -510,4 +532,12 @@ private extension Segment {
|
|||
var prefixedByDotAccess: Bool {
|
||||
return tokens.previous == "(." || prefix.hasSuffix(" .")
|
||||
}
|
||||
|
||||
var isValidSymbol: Bool {
|
||||
guard let firstCharacter = tokens.current.first else {
|
||||
return false
|
||||
}
|
||||
|
||||
return firstCharacter == "_" || firstCharacter.isLetter
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ public struct SyntaxHighlighter<Format: OutputFormat> {
|
|||
state = nil
|
||||
}
|
||||
|
||||
for segment in tokenizer.segmentsByTokenizing(code, delimiters: grammar.delimiters) {
|
||||
for segment in tokenizer.segmentsByTokenizing(code, using: grammar) {
|
||||
let token = segment.tokens.current
|
||||
let whitespace = segment.trailingWhitespace
|
||||
|
||||
|
|
|
@ -7,9 +7,10 @@
|
|||
import Foundation
|
||||
|
||||
internal struct Tokenizer {
|
||||
func segmentsByTokenizing(_ code: String, delimiters: CharacterSet) -> AnySequence<Segment> {
|
||||
func segmentsByTokenizing(_ code: String,
|
||||
using grammar: Grammar) -> AnySequence<Segment> {
|
||||
return AnySequence<Segment> {
|
||||
return Buffer(iterator: Iterator(code: code, delimiters: delimiters))
|
||||
Buffer(iterator: Iterator(code: code, grammar: grammar))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -32,24 +33,29 @@ private extension Tokenizer {
|
|||
}
|
||||
|
||||
struct Iterator: IteratorProtocol {
|
||||
enum Component {
|
||||
case token(String)
|
||||
case delimiter(String)
|
||||
case whitespace(String)
|
||||
case newline(String)
|
||||
struct Component {
|
||||
enum Kind {
|
||||
case token
|
||||
case delimiter
|
||||
case whitespace
|
||||
case newline
|
||||
}
|
||||
|
||||
let character: Character
|
||||
let kind: Kind
|
||||
}
|
||||
|
||||
private let code: String
|
||||
private let delimiters: CharacterSet
|
||||
private let grammar: Grammar
|
||||
private var index: String.Index?
|
||||
private var tokenCounts = [String: Int]()
|
||||
private var allTokens = [String]()
|
||||
private var lineTokens = [String]()
|
||||
private var segments: (current: Segment?, previous: Segment?)
|
||||
|
||||
init(code: String, delimiters: CharacterSet) {
|
||||
init(code: String, grammar: Grammar) {
|
||||
self.code = code
|
||||
self.delimiters = delimiters
|
||||
self.grammar = grammar
|
||||
segments = (nil, nil)
|
||||
}
|
||||
|
||||
|
@ -65,8 +71,8 @@ private extension Tokenizer {
|
|||
index = nextIndex
|
||||
let component = makeComponent(at: nextIndex)
|
||||
|
||||
switch component {
|
||||
case .token(let token), .delimiter(let token):
|
||||
switch component.kind {
|
||||
case .token, .delimiter:
|
||||
guard var segment = segments.current else {
|
||||
segments.current = makeSegment(with: component, at: nextIndex)
|
||||
return next()
|
||||
|
@ -77,22 +83,33 @@ private extension Tokenizer {
|
|||
return finish(segment, with: component, at: nextIndex)
|
||||
}
|
||||
|
||||
segment.tokens.current.append(token)
|
||||
if component.isDelimiter {
|
||||
let previousCharacter = segment.tokens.current.last!
|
||||
let shouldMerge = grammar.isDelimiter(previousCharacter,
|
||||
mergableWith: component.character)
|
||||
|
||||
guard shouldMerge else {
|
||||
return finish(segment, with: component, at: nextIndex)
|
||||
}
|
||||
}
|
||||
|
||||
segment.tokens.current.append(component.character)
|
||||
segments.current = segment
|
||||
return next()
|
||||
case .whitespace(let whitespace), .newline(let whitespace):
|
||||
case .whitespace, .newline:
|
||||
guard var segment = segments.current else {
|
||||
var segment = makeSegment(with: component, at: nextIndex)
|
||||
segment.trailingWhitespace = whitespace
|
||||
segment.trailingWhitespace = component.token
|
||||
segment.isLastOnLine = component.isNewline
|
||||
segments.current = segment
|
||||
return next()
|
||||
}
|
||||
|
||||
if let existingWhitespace = segment.trailingWhitespace {
|
||||
segment.trailingWhitespace = existingWhitespace.appending(whitespace)
|
||||
if var existingWhitespace = segment.trailingWhitespace {
|
||||
existingWhitespace.append(component.character)
|
||||
segment.trailingWhitespace = existingWhitespace
|
||||
} else {
|
||||
segment.trailingWhitespace = whitespace
|
||||
segment.trailingWhitespace = component.token
|
||||
}
|
||||
|
||||
if component.isNewline {
|
||||
|
@ -113,22 +130,28 @@ private extension Tokenizer {
|
|||
}
|
||||
|
||||
private func makeComponent(at index: String.Index) -> Component {
|
||||
func kind(for character: Character) -> Component.Kind {
|
||||
if character.isWhitespace {
|
||||
return .whitespace
|
||||
}
|
||||
|
||||
if character.isNewline {
|
||||
return .newline
|
||||
}
|
||||
|
||||
if grammar.delimiters.contains(character) {
|
||||
return .delimiter
|
||||
}
|
||||
|
||||
return .token
|
||||
}
|
||||
|
||||
let character = code[index]
|
||||
let substring = String(character)
|
||||
|
||||
if character.isWhitespace {
|
||||
return .whitespace(substring)
|
||||
}
|
||||
|
||||
if character.isNewline {
|
||||
return .newline(substring)
|
||||
}
|
||||
|
||||
if delimiters.contains(character) {
|
||||
return .delimiter(substring)
|
||||
}
|
||||
|
||||
return .token(substring)
|
||||
return Component(
|
||||
character: character,
|
||||
kind: kind(for: character)
|
||||
)
|
||||
}
|
||||
|
||||
private func makeSegment(with component: Component, at index: String.Index) -> Segment {
|
||||
|
@ -175,17 +198,11 @@ private extension Tokenizer {
|
|||
|
||||
extension Tokenizer.Iterator.Component {
|
||||
var token: String {
|
||||
switch self {
|
||||
case .token(let token),
|
||||
.delimiter(let token):
|
||||
return token
|
||||
case .whitespace, .newline:
|
||||
return ""
|
||||
}
|
||||
return String(character)
|
||||
}
|
||||
|
||||
var isDelimiter: Bool {
|
||||
switch self {
|
||||
switch kind {
|
||||
case .token, .whitespace, .newline:
|
||||
return false
|
||||
case .delimiter:
|
||||
|
@ -194,7 +211,7 @@ extension Tokenizer.Iterator.Component {
|
|||
}
|
||||
|
||||
var isNewline: Bool {
|
||||
switch self {
|
||||
switch kind {
|
||||
case .token, .whitespace, .delimiter:
|
||||
return false
|
||||
case .newline:
|
||||
|
|
|
@ -110,6 +110,26 @@ final class LiteralTests: SyntaxHighlighterTestCase {
|
|||
])
|
||||
}
|
||||
|
||||
func testStringLiteralWithInterpolationSurroundedByBrackets() {
|
||||
let components = highlighter.highlight(#""[\(text)]""#)
|
||||
|
||||
XCTAssertEqual(components, [
|
||||
.token(#""["#, .string),
|
||||
.plainText(#"\(text)"#),
|
||||
.token(#"]""#, .string)
|
||||
])
|
||||
}
|
||||
|
||||
func testStringLiteralWithInterpolationPrefixedByPunctuation() {
|
||||
let components = highlighter.highlight(#"".\(text)""#)
|
||||
|
||||
XCTAssertEqual(components, [
|
||||
.token("\".", .string),
|
||||
.plainText(#"\(text)"#),
|
||||
.token("\"", .string)
|
||||
])
|
||||
}
|
||||
|
||||
func testMultiLineStringLiteral() {
|
||||
let components = highlighter.highlight("""
|
||||
let string = \"\"\"
|
||||
|
@ -266,6 +286,8 @@ extension LiteralTests {
|
|||
("testStringLiteralInterpolation", testStringLiteralInterpolation),
|
||||
("testStringLiteralWithInterpolatedClosureArgumentShorthand", testStringLiteralWithInterpolatedClosureArgumentShorthand),
|
||||
("testStringLiteralWithCustomIterpolation", testStringLiteralWithCustomIterpolation),
|
||||
("testStringLiteralWithInterpolationSurroundedByBrackets", testStringLiteralWithInterpolationSurroundedByBrackets),
|
||||
("testStringLiteralWithInterpolationPrefixedByPunctuation", testStringLiteralWithInterpolationPrefixedByPunctuation),
|
||||
("testMultiLineStringLiteral", testMultiLineStringLiteral),
|
||||
("testSingleLineRawStringLiteral", testSingleLineRawStringLiteral),
|
||||
("testMultiLineRawStringLiteral", testMultiLineRawStringLiteral),
|
||||
|
|
Loading…
Reference in New Issue