John Sundell cac40caf68
Handle string literals that only have a new line (#97)
Since such new lines are grouped with their trailing marker, and start
with an escaping backslash, they would previously be ignored (since they
were incorrectly treated as string interpolation or escaped markers).
2020-01-29 11:09:14 +01:00

606 lines
19 KiB

* Splash
* Copyright (c) John Sundell 2018
* MIT license - see LICENSE.md
import Foundation
/// Grammar for the Swift language. Use this implementation when
/// highlighting Swift code. This is the default grammar.
public struct SwiftGrammar: Grammar {
public var delimiters: CharacterSet
public var syntaxRules: [SyntaxRule]
public init() {
var delimiters = CharacterSet.alphanumerics.inverted
self.delimiters = delimiters
syntaxRules = [
public func isDelimiter(_ delimiterA: Character,
mergableWith delimiterB: Character) -> Bool {
switch (delimiterA, delimiterB) {
case ("\\", "("):
return true
case ("\\", _), (_, "\\"):
return false
case (")", _):
return false
case ("/", "/"), ("/", "*"), ("*", "/"):
return true
case ("/", _):
return false
case ("(", _) where delimiterB != ".":
return false
case (".", "/"):
return false
return true
private extension SwiftGrammar {
static let keywords = ([
"final", "class", "struct", "enum", "protocol",
"extension", "let", "var", "func", "typealias",
"init", "guard", "if", "else", "return", "get",
"throw", "throws", "rethrows", "for", "in", "open", "weak",
"import", "mutating", "nonmutating", "associatedtype",
"case", "switch", "static", "do", "try", "catch", "as",
"super", "self", "set", "true", "false", "nil",
"override", "where", "_", "default", "break",
"#selector", "required", "willSet", "didSet",
"lazy", "subscript", "defer", "inout", "while",
"continue", "fallthrough", "repeat", "indirect",
"deinit", "is", "#file", "#line", "#function",
"dynamic", "some", "#available", "convenience", "unowned"
] as Set<String>).union(accessControlKeywords)
static let accessControlKeywords: Set<String> = [
"public", "internal", "fileprivate", "private"
static let declarationKeywords: Set<String> = [
"class", "struct", "enum", "func",
"protocol", "typealias", "import",
"associatedtype", "subscript"
struct PreprocessingRule: SyntaxRule {
var tokenType: TokenType { return .preprocessing }
private let controlFlowTokens: Set<String> = ["#if", "#endif", "#elseif", "#else"]
private let directiveTokens: Set<String> = ["#warning", "#error"]
func matches(_ segment: Segment) -> Bool {
if segment.tokens.current.isAny(of: controlFlowTokens) {
return true
if segment.tokens.current.isAny(of: directiveTokens) {
return true
return segment.tokens.onSameLine.contains(anyOf: controlFlowTokens)
struct CommentRule: SyntaxRule {
var tokenType: TokenType { return .comment }
func matches(_ segment: Segment) -> Bool {
if segment.tokens.current.hasPrefix("/*") {
if segment.tokens.current.hasSuffix("*/") {
return true
if segment.tokens.current.hasPrefix("//") {
return true
if segment.tokens.onSameLine.contains(anyOf: "//", "///") {
return true
if segment.tokens.current.isAny(of: "/*", "/**", "*/") {
return true
let multiLineStartCount = segment.tokens.count(of: "/*") + segment.tokens.count(of: "/**")
return multiLineStartCount != segment.tokens.count(of: "*/")
struct AttributeRule: SyntaxRule {
var tokenType: TokenType { return .keyword }
func matches(_ segment: Segment) -> Bool {
return segment.tokens.current.hasPrefix("@")
struct RawStringRule: SyntaxRule {
var tokenType: TokenType { return .string }
func matches(_ segment: Segment) -> Bool {
guard !segment.isWithinRawStringInterpolation else {
return false
if segment.isWithinStringLiteral(withStart: "#\"", end: "\"#") {
return true
let multiLineStartCount = segment.tokens.count(of: "#\"\"\"")
let multiLineEndCount = segment.tokens.count(of: "\"\"\"#")
return multiLineStartCount != multiLineEndCount
struct MultiLineStringRule: SyntaxRule {
var tokenType: TokenType { return .string }
func matches(_ segment: Segment) -> Bool {
guard !segment.tokens.count(of: "\"\"\"").isEven else {
return false
return !segment.isWithinStringInterpolation
struct SingleLineStringRule: SyntaxRule {
var tokenType: TokenType { return .string }
func matches(_ segment: Segment) -> Bool {
if segment.tokens.current.hasPrefix("\"") &&
segment.tokens.current.hasSuffix("\"") {
return true
guard segment.isWithinStringLiteral(withStart: "\"", end: "\"") else {
return false
return !segment.isWithinStringInterpolation &&
struct NumberRule: SyntaxRule {
var tokenType: TokenType { return .number }
func matches(_ segment: Segment) -> Bool {
// Don't match against index-based closure arguments
if let previous = segment.tokens.previous {
guard !previous.hasSuffix("$") else {
return false
// Integers can be separated using "_", so handle that
if segment.tokens.current.removing("_").isNumber {
return true
// Double and floating point values that contain a "."
guard segment.tokens.current == "." else {
return false
guard let previous = segment.tokens.previous,
let next = segment.tokens.next else {
return false
return previous.isNumber && next.isNumber
struct CallRule: SyntaxRule {
var tokenType: TokenType { return .call }
private let keywordsToAvoid: Set<String>
private let callLikeKeywords: Set<String>
private let controlFlowTokens = ["if", "&&", "||", "for"]
init() {
var keywordsToAvoid = keywords
self.keywordsToAvoid = keywordsToAvoid
var callLikeKeywords = accessControlKeywords
self.callLikeKeywords = callLikeKeywords
func matches(_ segment: Segment) -> Bool {
let token = segment.tokens.current.trimmingCharacters(
in: CharacterSet(charactersIn: "_")
guard token.startsWithLetter else {
return false
// There's a few keywords that might look like function calls
if callLikeKeywords.contains(segment.tokens.current) {
if let nextToken = segment.tokens.next {
guard !nextToken.starts(with: "(") else {
return false
if let previousToken = segment.tokens.previous {
guard !keywordsToAvoid.contains(previousToken) else {
return false
// Don't treat enums with associated values as function calls
// when they appear within a switch statement
if previousToken == "." {
let previousTokens = segment.tokens.onSameLine
if previousTokens.count > 1 {
let lastToken = previousTokens[previousTokens.count - 2]
guard lastToken != "case" else {
return false
// Multiple expressions can be matched within a single case
guard !lastToken.hasSuffix(",") else {
return false
// Handle trailing closure syntax
guard segment.trailingWhitespace == nil else {
guard segment.tokens.next.isAny(of: "{", "{}") else {
return false
guard !keywords.contains(segment.tokens.current) else {
return false
return !segment.tokens.onSameLine.contains(anyOf: controlFlowTokens)
return segment.tokens.next?.starts(with: "(") ?? false
struct KeywordRule: SyntaxRule {
var tokenType: TokenType { return .keyword }
func matches(_ segment: Segment) -> Bool {
if segment.tokens.current == "prefix" && segment.tokens.next == "func" {
return true
if segment.tokens.next == ":" {
// Nil pattern matching inside of a switch statement case
if segment.tokens.current == "nil" {
guard let previousToken = segment.tokens.previous else {
return false
return previousToken.isAny(of: "case", ",")
guard segment.tokens.current == "default" else {
return false
if let previousToken = segment.tokens.previous {
// Don't highlight variables with the same name as a keyword
// when used in optional binding, such as if let, guard let:
guard !previousToken.isAny(of: "let", "var") else {
return false
if !declarationKeywords.contains(segment.tokens.current) {
// Highlight the '(set)' part of setter access modifiers
switch segment.tokens.current {
case "(":
return accessControlKeywords.contains(previousToken)
case "set":
if previousToken == "(" {
return true
case ")":
return previousToken == "set"
// Don't highlight most keywords when used as a parameter label
if !segment.tokens.current.isAny(of: "_", "self", "let", "var", "true", "false", "inout", "nil") {
guard !previousToken.isAny(of: "(", ",", ">(") else {
return false
guard !segment.tokens.previous.isAny(of: "func", "`") else {
return false
return keywords.contains(segment.tokens.current)
struct TypeRule: SyntaxRule {
var tokenType: TokenType { return .type }
func matches(_ segment: Segment) -> Bool {
// Types should not be highlighted when declared
if let previousToken = segment.tokens.previous {
guard !previousToken.isAny(of: declarationKeywords) else {
return false
let token = segment.tokens.current.trimmingCharacters(
in: CharacterSet(charactersIn: "_")
guard token.isCapitalized else {
return false
guard !segment.prefixedByDotAccess else {
return false
// The XCTAssert family of functions is a bit of an edge case,
// since they start with capital letters. Since they are so
// commonly used, we'll add a special case for them here:
guard !token.starts(with: "XCTAssert") else {
return false
// In a generic declaration, only highlight constraints
if segment.tokens.previous.isAny(of: "<", ",") {
var foundOpeningBracket = false
// Since the declaration might be on another line, we have to walk
// backwards through all tokens until we've found enough information.
for token in segment.tokens.all.reversed() {
// Highlight return type generics as normal
if token == "->" {
return true
if !foundOpeningBracket && token == "<" {
foundOpeningBracket = true
// Handling generic lists for parameters, rather than declarations
if foundOpeningBracket && token == ":" {
return true
guard !declarationKeywords.contains(token) else {
// If it turns out that we weren't in fact inside of a generic
// declaration, (lacking "<"), then highlight the type as normal.
return !foundOpeningBracket
guard !keywords.contains(token) else {
return true
if token.isAny(of: ">", "=", "==", "(") {
return true
return true
struct DotAccessRule: SyntaxRule {
var tokenType: TokenType { return .dotAccess }
func matches(_ segment: Segment) -> Bool {
guard !segment.tokens.onSameLine.isEmpty else {
return false
guard segment.isValidSymbol else {
return false
guard segment.tokens.previous.isAny(of: ".", "(.", "[.") else {
return false
guard !segment.tokens.current.isAny(of: "self", "init") else {
return false
return segment.tokens.onSameLine.first != "import"
struct KeyPathRule: SyntaxRule {
var tokenType: TokenType { return .property }
func matches(_ segment: Segment) -> Bool {
return segment.tokens.previous.isAny(of: #"\."#, #"(\."#)
struct PropertyRule: SyntaxRule {
var tokenType: TokenType { return .property }
func matches(_ segment: Segment) -> Bool {
guard !segment.tokens.onSameLine.isEmpty else {
return false
guard segment.isValidSymbol else {
return false
guard segment.tokens.previous.isAny(of: ".", "?.", "().", ").") else {
return false
guard segment.tokens.current != "self" else {
return false
guard !segment.prefixedByDotAccess else {
return false
if let next = segment.tokens.next {
guard !next.hasPrefix("(") else {
return false
return segment.tokens.onSameLine.first != "import"
private extension Segment {
func isWithinStringLiteral(withStart start: String, end: String) -> Bool {
if tokens.current.hasPrefix(start) {
return true
if tokens.current.hasSuffix(end) {
return true
var markerCounts = (start: 0, end: 0)
var previousToken: String?
for token in tokens.onSameLine {
if token.hasPrefix("(") || token.hasPrefix("#(") || token.hasPrefix("\"") {
guard previousToken != "\\" else {
previousToken = token
if token == start {
if start != end || markerCounts.start == markerCounts.end {
markerCounts.start += 1
} else {
markerCounts.end += 1
} else if token == end && start != end {
markerCounts.end += 1
} else {
if token.hasPrefix(start) {
markerCounts.start += 1
if token.hasSuffix(end) {
markerCounts.end += 1
previousToken = token
return markerCounts.start != markerCounts.end
var isWithinStringInterpolation: Bool {
let delimiter = "\\("
if tokens.current == delimiter || tokens.previous == delimiter {
return true
let components = tokens.onSameLine.split(separator: delimiter)
guard components.count > 1 else {
return false
let suffix = components.last!
var paranthesisCount = 1
for component in suffix {
paranthesisCount += component.numberOfOccurrences(of: "(")
paranthesisCount -= component.numberOfOccurrences(of: ")")
guard paranthesisCount > 0 else {
return false
return true
var isWithinRawStringInterpolation: Bool {
// Quick fix for supporting single expressions within raw string
// interpolation, a proper fix should be developed ASAP.
switch tokens.current {
case "\\":
return tokens.previous != "\\" && tokens.next == "#"
case "#":
return tokens.previous == "\\" && tokens.next == "("
case "(":
return tokens.onSameLine.suffix(2) == ["\\", "#"]
case ")":
let suffix = tokens.onSameLine.suffix(4)
return suffix.prefix(3) == ["\\", "#", "("]
let suffix = tokens.onSameLine.suffix(3)
return suffix == ["\\", "#", "("] && tokens.next == ")"
var prefixedByDotAccess: Bool {
return tokens.previous == "(." || prefix.hasSuffix(" .")
var isValidSymbol: Bool {
guard let firstCharacter = tokens.current.first else {
return false
return firstCharacter == "_" || firstCharacter.isLetter