8bc46cbeb0
This change adds SwiftLint to the project. For now, the rules are tweaked to match the current state of the code base — but will over time be changed back to their defaults in several cases. Some smaller changes (mostly related to code style) were applied to the project to remove all warnings.
215 lines
6.3 KiB
Swift
215 lines
6.3 KiB
Swift
/**
|
|
* Splash
|
|
* Copyright (c) John Sundell 2018
|
|
* MIT license - see LICENSE.md
|
|
*/
|
|
|
|
import Foundation
|
|
|
|
internal struct Tokenizer {
|
|
func segmentsByTokenizing(_ code: String, delimiters: CharacterSet) -> AnySequence<Segment> {
|
|
return AnySequence<Segment> {
|
|
return Buffer(iterator: Iterator(code: code, delimiters: delimiters))
|
|
}
|
|
}
|
|
}
|
|
|
|
private extension Tokenizer {
|
|
struct Buffer: IteratorProtocol {
|
|
private var iterator: Iterator
|
|
private var nextSegment: Segment?
|
|
|
|
init(iterator: Iterator) {
|
|
self.iterator = iterator
|
|
}
|
|
|
|
mutating func next() -> Segment? {
|
|
var segment = nextSegment ?? iterator.next()
|
|
nextSegment = iterator.next()
|
|
segment?.tokens.next = nextSegment?.tokens.current
|
|
return segment
|
|
}
|
|
}
|
|
|
|
struct Iterator: IteratorProtocol {
|
|
enum Component {
|
|
case token(String)
|
|
case delimiter(String)
|
|
case whitespace(String)
|
|
case newline(String)
|
|
}
|
|
|
|
private let code: String
|
|
private let delimiters: CharacterSet
|
|
private var index: String.Index?
|
|
private var tokenCounts = [String: Int]()
|
|
private var allTokens = [String]()
|
|
private var lineTokens = [String]()
|
|
private var segments: (current: Segment?, previous: Segment?)
|
|
|
|
init(code: String, delimiters: CharacterSet) {
|
|
self.code = code
|
|
self.delimiters = delimiters
|
|
segments = (nil, nil)
|
|
}
|
|
|
|
mutating func next() -> Segment? {
|
|
let nextIndex = makeNextIndex()
|
|
|
|
guard nextIndex != code.endIndex else {
|
|
let segment = segments.current
|
|
segments.current = nil
|
|
return segment
|
|
}
|
|
|
|
index = nextIndex
|
|
let component = makeComponent(at: nextIndex)
|
|
|
|
switch component {
|
|
case .token(let token), .delimiter(let token):
|
|
guard var segment = segments.current else {
|
|
segments.current = makeSegment(with: component, at: nextIndex)
|
|
return next()
|
|
}
|
|
|
|
guard segment.trailingWhitespace == nil,
|
|
component.isDelimiter == segment.currentTokenIsDelimiter else {
|
|
return finish(segment, with: component, at: nextIndex)
|
|
}
|
|
|
|
segment.tokens.current.append(token)
|
|
segments.current = segment
|
|
return next()
|
|
case .whitespace(let whitespace), .newline(let whitespace):
|
|
guard var segment = segments.current else {
|
|
var segment = makeSegment(with: component, at: nextIndex)
|
|
segment.trailingWhitespace = whitespace
|
|
segment.isLastOnLine = component.isNewline
|
|
segments.current = segment
|
|
return next()
|
|
}
|
|
|
|
if let existingWhitespace = segment.trailingWhitespace {
|
|
segment.trailingWhitespace = existingWhitespace.appending(whitespace)
|
|
} else {
|
|
segment.trailingWhitespace = whitespace
|
|
}
|
|
|
|
if component.isNewline {
|
|
segment.isLastOnLine = true
|
|
}
|
|
|
|
segments.current = segment
|
|
return next()
|
|
}
|
|
}
|
|
|
|
private func makeNextIndex() -> String.Index {
|
|
guard let index = index else {
|
|
return code.startIndex
|
|
}
|
|
|
|
return code.index(after: index)
|
|
}
|
|
|
|
private func makeComponent(at index: String.Index) -> Component {
|
|
let character = code[index]
|
|
let substring = String(character)
|
|
|
|
if character.isWhitespace {
|
|
return .whitespace(substring)
|
|
}
|
|
|
|
if character.isNewline {
|
|
return .newline(substring)
|
|
}
|
|
|
|
if delimiters.contains(character) {
|
|
return .delimiter(substring)
|
|
}
|
|
|
|
return .token(substring)
|
|
}
|
|
|
|
private func makeSegment(with component: Component, at index: String.Index) -> Segment {
|
|
let tokens = Segment.Tokens(
|
|
all: allTokens,
|
|
counts: tokenCounts,
|
|
onSameLine: lineTokens,
|
|
previous: segments.current?.tokens.current,
|
|
current: component.token,
|
|
next: nil
|
|
)
|
|
|
|
return Segment(
|
|
prefix: code[..<index],
|
|
tokens: tokens,
|
|
trailingWhitespace: nil,
|
|
currentTokenIsDelimiter: component.isDelimiter,
|
|
isLastOnLine: false
|
|
)
|
|
}
|
|
|
|
private mutating func finish(_ segment: Segment,
|
|
with component: Component,
|
|
at index: String.Index) -> Segment {
|
|
var count = tokenCounts[segment.tokens.current] ?? 0
|
|
count += 1
|
|
tokenCounts[segment.tokens.current] = count
|
|
|
|
allTokens.append(segment.tokens.current)
|
|
|
|
if segment.isLastOnLine {
|
|
lineTokens = []
|
|
} else {
|
|
lineTokens.append(segment.tokens.current)
|
|
}
|
|
|
|
segments.previous = segment
|
|
segments.current = makeSegment(with: component, at: index)
|
|
|
|
return segment
|
|
}
|
|
}
|
|
}
|
|
|
|
extension Tokenizer.Iterator.Component {
|
|
var token: String {
|
|
switch self {
|
|
case .token(let token),
|
|
.delimiter(let token):
|
|
return token
|
|
case .whitespace, .newline:
|
|
return ""
|
|
}
|
|
}
|
|
|
|
var isDelimiter: Bool {
|
|
switch self {
|
|
case .token, .whitespace, .newline:
|
|
return false
|
|
case .delimiter:
|
|
return true
|
|
}
|
|
}
|
|
|
|
var isNewline: Bool {
|
|
switch self {
|
|
case .token, .whitespace, .delimiter:
|
|
return false
|
|
case .newline:
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
private extension Character {
|
|
var isWhitespace: Bool {
|
|
return CharacterSet.whitespaces.contains(self)
|
|
}
|
|
|
|
var isNewline: Bool {
|
|
return CharacterSet.newlines.contains(self)
|
|
}
|
|
}
|