2020-07-12 23:09:22 -04:00
|
|
|
//
|
|
|
|
// GeminiParser.swift
|
|
|
|
// GeminiFormat
|
|
|
|
//
|
|
|
|
// Created by Shadowfacts on 7/12/20.
|
|
|
|
//
|
|
|
|
|
|
|
|
import Foundation
|
|
|
|
|
2020-07-12 23:25:20 -04:00
|
|
|
public struct GeminiParser {
|
2020-07-12 23:09:22 -04:00
|
|
|
|
|
|
|
private init() {}
|
|
|
|
|
2020-07-12 23:25:20 -04:00
|
|
|
public static func parse(text: String, baseURL: URL) -> Document {
|
2020-07-12 23:09:22 -04:00
|
|
|
var doc = Document(url: baseURL)
|
|
|
|
|
2020-07-13 00:12:31 -04:00
|
|
|
var inPreformattingBlock = false
|
2020-07-12 23:09:22 -04:00
|
|
|
text.enumerateLines { (line, stop) in
|
|
|
|
if line.starts(with: "```") {
|
2020-07-13 00:12:31 -04:00
|
|
|
if inPreformattingBlock {
|
|
|
|
inPreformattingBlock = false
|
|
|
|
// todo: should the toggle off line be a separate line type?
|
|
|
|
doc.lines.append(.preformattedToggle(alt: nil))
|
|
|
|
} else {
|
2020-07-12 23:09:22 -04:00
|
|
|
let alt: String?
|
|
|
|
if line.count > 3 {
|
|
|
|
alt = String(line[line.index(line.startIndex, offsetBy: 3)...])
|
|
|
|
} else {
|
|
|
|
alt = nil
|
|
|
|
}
|
2020-07-13 00:12:31 -04:00
|
|
|
inPreformattingBlock = true
|
2020-07-12 23:52:38 -04:00
|
|
|
doc.lines.append(.preformattedToggle(alt: alt))
|
2020-07-12 23:09:22 -04:00
|
|
|
}
|
2020-07-13 00:12:31 -04:00
|
|
|
} else if inPreformattingBlock {
|
2020-07-12 23:52:38 -04:00
|
|
|
doc.lines.append(.preformattedText(line))
|
2020-07-12 23:09:22 -04:00
|
|
|
} else if line.starts(with: "=>") {
|
|
|
|
// Link line
|
|
|
|
let urlStart = line.firstNonWhitespaceIndex(after: line.index(line.startIndex, offsetBy: 2))
|
|
|
|
let urlEnd = line.firstWhitespaceIndex(after: urlStart)
|
|
|
|
let textStart = line.firstNonWhitespaceIndex(after: urlEnd)
|
|
|
|
|
2020-12-21 17:53:47 -05:00
|
|
|
var urlString = String(line[urlStart..<urlEnd])
|
|
|
|
if urlString.hasPrefix("//") {
|
|
|
|
// URL(string:relativeTo:) does not handle // meaning the same protocol as the base URL
|
|
|
|
urlString = baseURL.scheme! + ":" + urlString
|
|
|
|
}
|
2020-07-12 23:09:22 -04:00
|
|
|
|
|
|
|
let text: String?
|
|
|
|
if textStart < line.endIndex {
|
|
|
|
text = String(line[textStart..<line.endIndex])
|
|
|
|
} else {
|
|
|
|
text = nil
|
|
|
|
}
|
|
|
|
|
2021-06-15 23:33:40 -04:00
|
|
|
if let url = URL(string: urlString, relativeTo: baseURL)?.absoluteURL {
|
|
|
|
doc.lines.append(.link(url, text: text))
|
2021-08-30 21:03:18 -04:00
|
|
|
} else if let escaped = urlString.addingPercentEncoding(withAllowedCharacters: .URLAllowed),
|
|
|
|
let url = URL(string: escaped, relativeTo: baseURL)?.absoluteURL {
|
|
|
|
// if URL initialization fails because there are unescaped chars in the doc, escape everything and try again.
|
|
|
|
// I'm not certain, but it feels unsafe to always do this escaping
|
|
|
|
doc.lines.append(.link(url, text: text))
|
2021-06-15 23:33:40 -04:00
|
|
|
} else {
|
|
|
|
let str: String
|
|
|
|
if let text = text {
|
|
|
|
// todo: localize me?
|
|
|
|
str = "\(text): \(urlString)"
|
|
|
|
} else {
|
|
|
|
str = urlString
|
|
|
|
}
|
|
|
|
doc.lines.append(.text(str))
|
|
|
|
}
|
2020-07-12 23:09:22 -04:00
|
|
|
} else if line.starts(with: "#") {
|
|
|
|
let level: Document.HeadingLevel
|
|
|
|
if line.starts(with: "###") {
|
|
|
|
level = .h3
|
|
|
|
} else if line.starts(with: "##") {
|
|
|
|
level = .h2
|
|
|
|
} else {
|
|
|
|
level = .h1
|
|
|
|
}
|
|
|
|
let headingStart = line.firstNonWhitespaceIndex(after: line.index(line.startIndex, offsetBy: level.rawValue))
|
|
|
|
let headingText = String(line[headingStart...])
|
|
|
|
doc.lines.append(.heading(headingText, level: level))
|
|
|
|
} else if line.starts(with: "* ") {
|
|
|
|
let listItemStart = line.firstNonWhitespaceIndex(after: line.index(line.startIndex, offsetBy: 2))
|
|
|
|
let listItemText = String(line[listItemStart...])
|
|
|
|
doc.lines.append(.unorderedListItem(listItemText))
|
|
|
|
} else if line.starts(with: ">") {
|
|
|
|
let quoteStartIndex = line.firstNonWhitespaceIndex(after: line.index(after: line.startIndex))
|
|
|
|
let quoteText = String(line[quoteStartIndex...])
|
|
|
|
doc.lines.append(.quote(quoteText))
|
|
|
|
} else {
|
|
|
|
doc.lines.append(.text(line))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return doc
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
fileprivate extension String {
|
|
|
|
func firstNonWhitespaceIndex(after index: String.Index) -> String.Index {
|
|
|
|
var index = index
|
|
|
|
// using .unicodeScalars.first should be fine, since all whitespace characters are single scalars
|
|
|
|
while index < self.endIndex, CharacterSet.whitespaces.contains(self[index].unicodeScalars.first!) {
|
|
|
|
index = self.index(after: index)
|
|
|
|
}
|
|
|
|
return index
|
|
|
|
}
|
|
|
|
|
|
|
|
func firstWhitespaceIndex(after index: String.Index) -> String.Index {
|
|
|
|
var index = index
|
|
|
|
// todo: could the first unicode scalar of a character be whitespace even though the whole character is not?
|
|
|
|
while index < self.endIndex, !CharacterSet.whitespaces.contains(self[index].unicodeScalars.first!) {
|
|
|
|
index = self.index(after: index)
|
|
|
|
}
|
|
|
|
return index
|
|
|
|
}
|
|
|
|
}
|
2021-08-30 21:03:18 -04:00
|
|
|
|
|
|
|
private extension CharacterSet {
|
|
|
|
static let URLAllowed = CharacterSet(charactersIn: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;=%")
|
|
|
|
}
|