HTMLStreamer/Sources/HTMLStreamer/TextConverter.swift

175 lines
5.1 KiB
Swift

//
// TextConverter.swift
// HTMLStreamer
//
// Created by Shadowfacts on 12/19/23.
//
import Foundation
public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
private let configuration: TextConverterConfiguration
private var tokenizer: Tokenizer<String.UnicodeScalarView.Iterator>!
private var str: String!
private var actionStack: [ElementAction] = []
private var blockState = BlockState.unstarted
private var currentElementIsEmpty = true
private var currentRun = ""
public init(configuration: TextConverterConfiguration = .init()) where Callbacks == DefaultCallbacks {
self.init(configuration: configuration, callbacks: DefaultCallbacks.self)
}
public init(configuration: TextConverterConfiguration = .init(), callbacks _: Callbacks.Type = Callbacks.self) {
self.configuration = configuration
}
public mutating func convert(html: String) -> String {
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
str = ""
blockState = .unstarted
currentElementIsEmpty = true
currentRun = ""
while let token = tokenizer.next() {
switch token {
case .character(let scalar):
currentElementIsEmpty = false
continueBlock()
currentRun.unicodeScalars.append(scalar)
case .characterSequence(let string):
currentElementIsEmpty = false
continueBlock()
currentRun.append(string)
case .startTag(let name, let selfClosing, let attributes):
currentElementIsEmpty = true
let action = Callbacks.elementAction(name: name, attributes: attributes)
actionStack.append(action)
handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
case .endTag(let name):
handleEndTag(name)
if actionStack.last != .default {
finishRun()
}
actionStack.removeLast()
case .comment, .doctype:
break
}
}
finishRun()
return str
}
private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
switch name {
case "br":
if configuration.insertNewlines {
currentRun.append("\n")
} else {
currentRun.append(" ")
}
case "pre", "blockquote", "p", "ol", "ul":
startBlockIfNecessary()
default:
break
}
}
private mutating func handleEndTag(_ name: String) {
switch name {
case "pre", "blockquote", "p", "ol", "ul":
finishBlockElement()
finishRun()
default:
break
}
}
private mutating func startBlockIfNecessary() {
switch blockState {
case .unstarted:
blockState = .started(false)
case .started:
break
case .ongoing:
if configuration.insertNewlines {
currentRun.append("\n\n")
} else {
currentRun.append(" ")
}
blockState = .started(true)
case .finished(let nonEmpty):
if nonEmpty {
if configuration.insertNewlines {
currentRun.append("\n\n")
} else {
currentRun.append(" ")
}
}
blockState = .started(nonEmpty)
}
}
private mutating func continueBlock() {
switch blockState {
case .unstarted, .started(_):
blockState = .ongoing
case .ongoing:
break
case .finished(let nonEmpty):
if nonEmpty {
if configuration.insertNewlines {
currentRun.append("\n\n")
} else {
currentRun.append(" ")
}
}
blockState = .ongoing
}
}
private mutating func finishBlockElement() {
if blockState == .started(true) && currentElementIsEmpty {
if configuration.insertNewlines {
currentRun.removeLast(2)
} else {
currentRun.removeLast(1)
}
}
blockState = .finished(blockState == .ongoing)
}
private mutating func finishRun() {
if actionStack.contains(.skip) {
currentRun = ""
return
} else if case .append(let s) = actionStack.last {
currentRun.append(s)
} else if case .replace(let replacement) = actionStack.first(where: \.isReplace) {
currentRun = replacement
}
guard !currentRun.isEmpty else {
return
}
str.append(currentRun)
currentRun = ""
}
}
public struct TextConverterConfiguration {
public var insertNewlines: Bool
public init(insertNewlines: Bool = true) {
self.insertNewlines = insertNewlines
}
}