138 lines
4.1 KiB
Swift
138 lines
4.1 KiB
Swift
//
|
|
// TextConverter.swift
|
|
// HTMLStreamer
|
|
//
|
|
// Created by Shadowfacts on 12/19/23.
|
|
//
|
|
|
|
import Foundation
|
|
|
|
public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
|
|
|
|
private let configuration: TextConverterConfiguration
|
|
|
|
private var tokenizer: Tokenizer<String.UnicodeScalarView.Iterator>!
|
|
private var str: String!
|
|
|
|
private var actionStack: [ElementAction] = []
|
|
private var previouslyFinishedBlockElement = false
|
|
private var currentElementIsEmpty = true
|
|
private var currentRun = ""
|
|
|
|
public init(configuration: TextConverterConfiguration = .init()) where Callbacks == DefaultCallbacks {
|
|
self.init(configuration: configuration, callbacks: DefaultCallbacks.self)
|
|
}
|
|
|
|
public init(configuration: TextConverterConfiguration = .init(), callbacks _: Callbacks.Type = Callbacks.self) {
|
|
self.configuration = configuration
|
|
}
|
|
|
|
public mutating func convert(html: String) -> String {
|
|
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
|
|
str = ""
|
|
|
|
previouslyFinishedBlockElement = false
|
|
currentElementIsEmpty = true
|
|
currentRun = ""
|
|
|
|
while let token = tokenizer.next() {
|
|
switch token {
|
|
case .character(let scalar):
|
|
currentElementIsEmpty = false
|
|
previouslyFinishedBlockElement = false
|
|
currentRun.unicodeScalars.append(scalar)
|
|
case .characterSequence(let string):
|
|
currentElementIsEmpty = false
|
|
previouslyFinishedBlockElement = false
|
|
currentRun.append(string)
|
|
case .startTag(let name, let selfClosing, let attributes):
|
|
currentElementIsEmpty = true
|
|
previouslyFinishedBlockElement = false
|
|
let action = Callbacks.elementAction(name: name, attributes: attributes)
|
|
actionStack.append(action)
|
|
handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
|
|
case .endTag(let name):
|
|
handleEndTag(name)
|
|
if actionStack.last != .default {
|
|
finishRun()
|
|
}
|
|
actionStack.removeLast()
|
|
case .comment, .doctype:
|
|
break
|
|
}
|
|
}
|
|
|
|
if previouslyFinishedBlockElement {
|
|
if configuration.insertNewlines {
|
|
currentRun.removeLast(2)
|
|
} else {
|
|
currentRun.removeLast(1)
|
|
}
|
|
}
|
|
finishRun()
|
|
|
|
return str
|
|
}
|
|
|
|
private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
|
|
switch name {
|
|
case "br":
|
|
if configuration.insertNewlines {
|
|
currentRun.append("\n")
|
|
} else {
|
|
currentRun.append(" ")
|
|
}
|
|
default:
|
|
break
|
|
}
|
|
}
|
|
|
|
private mutating func handleEndTag(_ name: String) {
|
|
switch name {
|
|
case "pre", "blockquote", "p", "ol", "ul":
|
|
finishRun()
|
|
finishBlockElement()
|
|
default:
|
|
break
|
|
}
|
|
}
|
|
|
|
private mutating func finishBlockElement() {
|
|
if !currentElementIsEmpty {
|
|
previouslyFinishedBlockElement = true
|
|
if configuration.insertNewlines {
|
|
currentRun.append("\n\n")
|
|
} else {
|
|
currentRun.append(" ")
|
|
}
|
|
}
|
|
}
|
|
|
|
private mutating func finishRun() {
|
|
if actionStack.contains(.skip) {
|
|
currentRun = ""
|
|
return
|
|
} else if case .append(let s) = actionStack.last {
|
|
currentRun.append(s)
|
|
} else if case .replace(let replacement) = actionStack.first(where: \.isReplace) {
|
|
currentRun = replacement
|
|
}
|
|
|
|
guard !currentRun.isEmpty else {
|
|
return
|
|
}
|
|
|
|
str.append(currentRun)
|
|
currentRun = ""
|
|
}
|
|
|
|
}
|
|
|
|
public struct TextConverterConfiguration {
|
|
public var insertNewlines: Bool
|
|
|
|
public init(insertNewlines: Bool = true) {
|
|
self.insertNewlines = insertNewlines
|
|
}
|
|
}
|