Gemini/GeminiFormat/GeminiParser.swift

111 lines
4.2 KiB
Swift

//
// GeminiParser.swift
// GeminiFormat
//
// Created by Shadowfacts on 7/12/20.
//
import Foundation
struct GeminiParser {
private init() {}
static func parse(text: String, baseURL: URL) -> Document {
var doc = Document(url: baseURL)
var preformattingState = PreformattingState.off
text.enumerateLines { (line, stop) in
if line.starts(with: "```") {
switch preformattingState {
case .off:
let alt: String?
if line.count > 3 {
alt = String(line[line.index(line.startIndex, offsetBy: 3)...])
} else {
alt = nil
}
preformattingState = .on(alt)
case .on(_):
preformattingState = .off
}
if case .off = preformattingState {
}
} else if case let .on(alt) = preformattingState {
doc.lines.append(.preformattedText(line, alt: alt))
} else if line.starts(with: "=>") {
// Link line
let urlStart = line.firstNonWhitespaceIndex(after: line.index(line.startIndex, offsetBy: 2))
let urlEnd = line.firstWhitespaceIndex(after: urlStart)
let textStart = line.firstNonWhitespaceIndex(after: urlEnd)
let urlString = String(line[urlStart..<urlEnd])
// todo: if the URL initializer fails, should there be a .link line with a nil URL?
let url = URL(string: urlString, relativeTo: baseURL)!.absoluteURL
let text: String?
if textStart < line.endIndex {
text = String(line[textStart..<line.endIndex])
} else {
text = nil
}
doc.lines.append(.link(url, text: text))
} else if line.starts(with: "#") {
let level: Document.HeadingLevel
if line.starts(with: "###") {
level = .h3
} else if line.starts(with: "##") {
level = .h2
} else {
level = .h1
}
let headingStart = line.firstNonWhitespaceIndex(after: line.index(line.startIndex, offsetBy: level.rawValue))
let headingText = String(line[headingStart...])
doc.lines.append(.heading(headingText, level: level))
} else if line.starts(with: "* ") {
let listItemStart = line.firstNonWhitespaceIndex(after: line.index(line.startIndex, offsetBy: 2))
let listItemText = String(line[listItemStart...])
doc.lines.append(.unorderedListItem(listItemText))
} else if line.starts(with: ">") {
let quoteStartIndex = line.firstNonWhitespaceIndex(after: line.index(after: line.startIndex))
let quoteText = String(line[quoteStartIndex...])
doc.lines.append(.quote(quoteText))
} else {
doc.lines.append(.text(line))
}
}
return doc
}
}
fileprivate extension GeminiParser {
enum PreformattingState {
case off
case on(_ alt: String?)
}
}
fileprivate extension String {
func firstNonWhitespaceIndex(after index: String.Index) -> String.Index {
var index = index
// using .unicodeScalars.first should be fine, since all whitespace characters are single scalars
while index < self.endIndex, CharacterSet.whitespaces.contains(self[index].unicodeScalars.first!) {
index = self.index(after: index)
}
return index
}
func firstWhitespaceIndex(after index: String.Index) -> String.Index {
var index = index
// todo: could the first unicode scalar of a character be whitespace even though the whole character is not?
while index < self.endIndex, !CharacterSet.whitespaces.contains(self[index].unicodeScalars.first!) {
index = self.index(after: index)
}
return index
}
}