// // GeminiParser.swift // GeminiFormat // // Created by Shadowfacts on 7/12/20. // import Foundation public struct GeminiParser { private init() {} public static func parse(text: String, baseURL: URL) -> Document { var doc = Document(url: baseURL) var inPreformattingBlock = false text.enumerateLines { (line, stop) in if line.starts(with: "```") { if inPreformattingBlock { inPreformattingBlock = false // todo: should the toggle off line be a separate line type? doc.lines.append(.preformattedToggle(alt: nil)) } else { let alt: String? if line.count > 3 { alt = String(line[line.index(line.startIndex, offsetBy: 3)...]) } else { alt = nil } inPreformattingBlock = true doc.lines.append(.preformattedToggle(alt: alt)) } } else if inPreformattingBlock { doc.lines.append(.preformattedText(line)) } else if line.starts(with: "=>") { // Link line let urlStart = line.firstNonWhitespaceIndex(after: line.index(line.startIndex, offsetBy: 2)) let urlEnd = line.firstWhitespaceIndex(after: urlStart) let textStart = line.firstNonWhitespaceIndex(after: urlEnd) let urlString = String(line[urlStart..") { let quoteStartIndex = line.firstNonWhitespaceIndex(after: line.index(after: line.startIndex)) let quoteText = String(line[quoteStartIndex...]) doc.lines.append(.quote(quoteText)) } else { doc.lines.append(.text(line)) } } return doc } } fileprivate extension GeminiParser { enum PreformattingState { case off case on(_ alt: String?) } } fileprivate extension String { func firstNonWhitespaceIndex(after index: String.Index) -> String.Index { var index = index // using .unicodeScalars.first should be fine, since all whitespace characters are single scalars while index < self.endIndex, CharacterSet.whitespaces.contains(self[index].unicodeScalars.first!) { index = self.index(after: index) } return index } func firstWhitespaceIndex(after index: String.Index) -> String.Index { var index = index // todo: could the first unicode scalar of a character be whitespace even though the whole character is not? while index < self.endIndex, !CharacterSet.whitespaces.contains(self[index].unicodeScalars.first!) { index = self.index(after: index) } return index } }