splash/Sources/Splash/Grammar/SwiftGrammar.swift
John Sundell 3e400c0874
Don’t highlight generic parameter types used in initializer declarations (#107)
This change makes Splash stop highlighting generic types that are attached
to an initializer declaration, and instead now only highlights their
constraints, just like within other kinds of generic type lists.
2020-05-25 00:54:25 +02:00

617 lines
20 KiB
Swift

/**
* Splash
* Copyright (c) John Sundell 2018
* MIT license - see LICENSE.md
*/
import Foundation
/// Grammar for the Swift language. Use this implementation when
/// highlighting Swift code. This is the default grammar.
public struct SwiftGrammar: Grammar {
public var delimiters: CharacterSet
public var syntaxRules: [SyntaxRule]
public init() {
var delimiters = CharacterSet.alphanumerics.inverted
delimiters.remove("_")
delimiters.remove("\"")
delimiters.remove("#")
delimiters.remove("@")
delimiters.remove("$")
self.delimiters = delimiters
syntaxRules = [
PreprocessingRule(),
CommentRule(),
RawStringRule(),
MultiLineStringRule(),
SingleLineStringRule(),
AttributeRule(),
NumberRule(),
TypeRule(),
CallRule(),
KeyPathRule(),
PropertyRule(),
DotAccessRule(),
KeywordRule()
]
}
public func isDelimiter(_ delimiterA: Character,
mergableWith delimiterB: Character) -> Bool {
switch (delimiterA, delimiterB) {
case ("\\", "("):
return true
case ("\\", _), (_, "\\"):
return false
case (")", _):
return false
case ("/", "/"), ("/", "*"), ("*", "/"):
return true
case ("/", _):
return false
case ("(", _) where delimiterB != ".":
return false
case (".", "/"):
return false
case ("{", "/"), ("}", "/"):
return false
default:
return true
}
}
}
private extension SwiftGrammar {
static let keywords = ([
"final", "class", "struct", "enum", "protocol",
"extension", "let", "var", "func", "typealias",
"init", "guard", "if", "else", "return", "get",
"throw", "throws", "rethrows", "for", "in", "open", "weak",
"import", "mutating", "nonmutating", "associatedtype",
"case", "switch", "static", "do", "try", "catch", "as",
"super", "self", "set", "true", "false", "nil",
"override", "where", "_", "default", "break",
"#selector", "required", "willSet", "didSet",
"lazy", "subscript", "defer", "inout", "while",
"continue", "fallthrough", "repeat", "indirect",
"deinit", "is", "#file", "#line", "#function",
"dynamic", "some", "#available", "convenience", "unowned"
] as Set<String>).union(accessControlKeywords)
static let accessControlKeywords: Set<String> = [
"public", "internal", "fileprivate", "private"
]
static let declarationKeywords: Set<String> = [
"class", "struct", "enum", "func",
"protocol", "typealias", "import",
"associatedtype", "subscript", "init"
]
struct PreprocessingRule: SyntaxRule {
var tokenType: TokenType { return .preprocessing }
private let controlFlowTokens: Set<String> = ["#if", "#endif", "#elseif", "#else"]
private let directiveTokens: Set<String> = ["#warning", "#error"]
func matches(_ segment: Segment) -> Bool {
if segment.tokens.current.isAny(of: controlFlowTokens) {
return true
}
if segment.tokens.current.isAny(of: directiveTokens) {
return true
}
return segment.tokens.onSameLine.contains(anyOf: controlFlowTokens)
}
}
struct CommentRule: SyntaxRule {
var tokenType: TokenType { return .comment }
func matches(_ segment: Segment) -> Bool {
if segment.tokens.current.hasPrefix("/*") {
if segment.tokens.current.hasSuffix("*/") {
return true
}
}
if segment.tokens.current.hasPrefix("//") {
return true
}
if segment.tokens.onSameLine.contains(anyOf: "//", "///") {
return true
}
if segment.tokens.current.isAny(of: "/*", "/**", "*/") {
return true
}
let multiLineStartCount = segment.tokens.count(of: "/*") + segment.tokens.count(of: "/**")
return multiLineStartCount != segment.tokens.count(of: "*/")
}
}
struct AttributeRule: SyntaxRule {
var tokenType: TokenType { return .keyword }
func matches(_ segment: Segment) -> Bool {
return segment.tokens.current.hasPrefix("@")
}
}
struct RawStringRule: SyntaxRule {
var tokenType: TokenType { return .string }
func matches(_ segment: Segment) -> Bool {
guard !segment.isWithinRawStringInterpolation else {
return false
}
if segment.isWithinStringLiteral(withStart: "#\"", end: "\"#") {
return true
}
let multiLineStartCount = segment.tokens.count(of: "#\"\"\"")
let multiLineEndCount = segment.tokens.count(of: "\"\"\"#")
return multiLineStartCount != multiLineEndCount
}
}
struct MultiLineStringRule: SyntaxRule {
var tokenType: TokenType { return .string }
func matches(_ segment: Segment) -> Bool {
guard !segment.tokens.count(of: "\"\"\"").isEven else {
return false
}
return !segment.isWithinStringInterpolation
}
}
struct SingleLineStringRule: SyntaxRule {
var tokenType: TokenType { return .string }
func matches(_ segment: Segment) -> Bool {
if segment.tokens.current.hasPrefix("\"") &&
segment.tokens.current.hasSuffix("\"") {
return true
}
guard segment.isWithinStringLiteral(withStart: "\"", end: "\"") else {
return false
}
return !segment.isWithinStringInterpolation &&
!segment.isWithinRawStringInterpolation
}
}
struct NumberRule: SyntaxRule {
var tokenType: TokenType { return .number }
func matches(_ segment: Segment) -> Bool {
// Don't match against index-based closure arguments
if let previous = segment.tokens.previous {
guard !previous.hasSuffix("$") else {
return false
}
}
// Integers can be separated using "_", so handle that
if segment.tokens.current.removing("_").isNumber {
return true
}
// Double and floating point values that contain a "."
guard segment.tokens.current == "." else {
return false
}
guard let previous = segment.tokens.previous,
let next = segment.tokens.next else {
return false
}
return previous.isNumber && next.isNumber
}
}
struct CallRule: SyntaxRule {
var tokenType: TokenType { return .call }
private let keywordsToAvoid: Set<String>
private let callLikeKeywords: Set<String>
private let controlFlowTokens = ["if", "&&", "||", "for", "switch"]
init() {
var keywordsToAvoid = keywords
keywordsToAvoid.remove("return")
keywordsToAvoid.remove("try")
keywordsToAvoid.remove("throw")
keywordsToAvoid.remove("if")
keywordsToAvoid.remove("in")
self.keywordsToAvoid = keywordsToAvoid
var callLikeKeywords = accessControlKeywords
callLikeKeywords.insert("subscript")
callLikeKeywords.insert("init")
self.callLikeKeywords = callLikeKeywords
}
func matches(_ segment: Segment) -> Bool {
let token = segment.tokens.current.trimmingCharacters(
in: CharacterSet(charactersIn: "_")
)
guard token.startsWithLetter else {
return false
}
// There's a few keywords that might look like function calls
if callLikeKeywords.contains(segment.tokens.current) {
if let nextToken = segment.tokens.next {
guard !nextToken.starts(with: "(") else {
return false
}
}
}
if let previousToken = segment.tokens.previous {
guard !keywordsToAvoid.contains(previousToken) else {
return false
}
// Don't treat enums with associated values as function calls
// when they appear within a switch statement
if previousToken == "." {
let previousTokens = segment.tokens.onSameLine
if previousTokens.count > 1 {
let lastToken = previousTokens[previousTokens.count - 2]
guard lastToken != "case" else {
return false
}
// Multiple expressions can be matched within a single case
guard !lastToken.hasSuffix(",") else {
return false
}
}
}
}
// Handle trailing closure syntax
guard segment.trailingWhitespace == nil else {
guard segment.tokens.next.isAny(of: "{", "{}") else {
return false
}
guard !keywords.contains(segment.tokens.current) else {
return false
}
return !segment.tokens.onSameLine.contains(anyOf: controlFlowTokens)
}
return segment.tokens.next?.starts(with: "(") ?? false
}
}
struct KeywordRule: SyntaxRule {
var tokenType: TokenType { return .keyword }
func matches(_ segment: Segment) -> Bool {
if segment.tokens.current == "prefix" && segment.tokens.next == "func" {
return true
}
if segment.tokens.current == "some" {
guard segment.tokens.previous != "case" else {
return false
}
}
if segment.tokens.next == ":" {
// Nil pattern matching inside of a switch statement case
if segment.tokens.current == "nil" {
guard let previousToken = segment.tokens.previous else {
return false
}
return previousToken.isAny(of: "case", ",")
}
guard segment.tokens.current == "default" else {
return false
}
}
if let previousToken = segment.tokens.previous {
// Don't highlight variables with the same name as a keyword
// when used in optional binding, such as if let, guard let:
if !segment.tokens.onSameLine.isEmpty, segment.tokens.current != "self" {
guard !previousToken.isAny(of: "let", "var") else {
return false
}
}
if !declarationKeywords.contains(segment.tokens.current) {
// Highlight the '(set)' part of setter access modifiers
switch segment.tokens.current {
case "(":
return accessControlKeywords.contains(previousToken)
case "set":
if previousToken == "(" {
return true
}
case ")":
return previousToken == "set"
default:
break
}
// Don't highlight most keywords when used as a parameter label
if !segment.tokens.current.isAny(of: "_", "self", "let", "var", "true", "false", "inout", "nil", "try") {
guard !previousToken.isAny(of: "(", ",", ">(") else {
return false
}
}
guard !segment.tokens.previous.isAny(of: "func", "`") else {
return false
}
}
}
return keywords.contains(segment.tokens.current)
}
}
struct TypeRule: SyntaxRule {
var tokenType: TokenType { return .type }
func matches(_ segment: Segment) -> Bool {
// Types should not be highlighted when declared
if let previousToken = segment.tokens.previous {
guard !previousToken.isAny(of: declarationKeywords) else {
return false
}
}
let token = segment.tokens.current.trimmingCharacters(
in: CharacterSet(charactersIn: "_")
)
guard token.isCapitalized else {
return false
}
guard !segment.prefixedByDotAccess else {
return false
}
// The XCTAssert family of functions is a bit of an edge case,
// since they start with capital letters. Since they are so
// commonly used, we'll add a special case for them here:
guard !token.starts(with: "XCTAssert") else {
return false
}
// In a generic declaration, only highlight constraints
if segment.tokens.previous.isAny(of: "<", ",") {
var foundOpeningBracket = false
// Since the declaration might be on another line, we have to walk
// backwards through all tokens until we've found enough information.
for token in segment.tokens.all.reversed() {
// Highlight return type generics as normal
if token == "->" {
return true
}
if !foundOpeningBracket && token == "<" {
foundOpeningBracket = true
}
// Handling generic lists for parameters, rather than declarations
if foundOpeningBracket && token == ":" {
return true
}
guard !declarationKeywords.contains(token) else {
// If it turns out that we weren't in fact inside of a generic
// declaration, (lacking "<"), then highlight the type as normal.
return !foundOpeningBracket
}
guard !keywords.contains(token) else {
return true
}
if token.isAny(of: ">", "=", "==", "(") {
return true
}
}
}
return true
}
}
struct DotAccessRule: SyntaxRule {
var tokenType: TokenType { return .dotAccess }
func matches(_ segment: Segment) -> Bool {
guard !segment.tokens.onSameLine.isEmpty else {
return false
}
guard segment.isValidSymbol else {
return false
}
guard segment.tokens.previous.isAny(of: ".", "(.", "[.") else {
return false
}
guard !segment.tokens.current.isAny(of: "self", "init") else {
return false
}
return segment.tokens.onSameLine.first != "import"
}
}
struct KeyPathRule: SyntaxRule {
var tokenType: TokenType { return .property }
func matches(_ segment: Segment) -> Bool {
return segment.tokens.previous.isAny(of: #"\."#, #"(\."#)
}
}
struct PropertyRule: SyntaxRule {
var tokenType: TokenType { return .property }
func matches(_ segment: Segment) -> Bool {
guard !segment.tokens.onSameLine.isEmpty else {
return false
}
guard segment.isValidSymbol else {
return false
}
guard segment.tokens.previous.isAny(of: ".", "?.", "().", ").") else {
return false
}
guard segment.tokens.current != "self" else {
return false
}
guard !segment.prefixedByDotAccess else {
return false
}
if let next = segment.tokens.next {
guard !next.hasPrefix("(") else {
return false
}
}
return segment.tokens.onSameLine.first != "import"
}
}
}
private extension Segment {
func isWithinStringLiteral(withStart start: String, end: String) -> Bool {
if tokens.current.hasPrefix(start) {
return true
}
if tokens.current.hasSuffix(end) {
return true
}
var markerCounts = (start: 0, end: 0)
var previousToken: String?
for token in tokens.onSameLine {
if token.hasPrefix("(") || token.hasPrefix("#(") || token.hasPrefix("\"") {
guard previousToken != "\\" else {
previousToken = token
continue
}
}
if token == start {
if start != end || markerCounts.start == markerCounts.end {
markerCounts.start += 1
} else {
markerCounts.end += 1
}
} else if token == end && start != end {
markerCounts.end += 1
} else {
if token.hasPrefix(start) {
markerCounts.start += 1
}
if token.hasSuffix(end) {
markerCounts.end += 1
}
}
previousToken = token
}
return markerCounts.start != markerCounts.end
}
var isWithinStringInterpolation: Bool {
let delimiter = "\\("
if tokens.current == delimiter || tokens.previous == delimiter {
return true
}
let components = tokens.onSameLine.split(separator: delimiter)
guard components.count > 1 else {
return false
}
let suffix = components.last!
var paranthesisCount = 1
for component in suffix {
paranthesisCount += component.numberOfOccurrences(of: "(")
paranthesisCount -= component.numberOfOccurrences(of: ")")
guard paranthesisCount > 0 else {
return false
}
}
return true
}
var isWithinRawStringInterpolation: Bool {
// Quick fix for supporting single expressions within raw string
// interpolation, a proper fix should be developed ASAP.
switch tokens.current {
case "\\":
return tokens.previous != "\\" && tokens.next == "#"
case "#":
return tokens.previous == "\\" && tokens.next == "("
case "(":
return tokens.onSameLine.suffix(2) == ["\\", "#"]
case ")":
let suffix = tokens.onSameLine.suffix(4)
return suffix.prefix(3) == ["\\", "#", "("]
default:
let suffix = tokens.onSameLine.suffix(3)
return suffix == ["\\", "#", "("] && tokens.next == ")"
}
}
var prefixedByDotAccess: Bool {
return tokens.previous == "(." || prefix.hasSuffix(" .")
}
var isValidSymbol: Bool {
guard let firstCharacter = tokens.current.first else {
return false
}
return firstCharacter == "_" || firstCharacter.isLetter
}
}