HTMLStreamer/Sources/HTMLStreamer/TextConverter.swift

132 lines
3.9 KiB
Swift

//
// TextConverter.swift
// HTMLStreamer
//
// Created by Shadowfacts on 12/19/23.
//
import Foundation
public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
private let configuration: TextConverterConfiguration
private var tokenizer: Tokenizer<String.UnicodeScalarView.Iterator>!
private var str: String!
private var actionStack: [ElementAction] = []
private var previouslyFinishedBlockElement = false
private var currentElementIsEmpty = true
private var currentRun = ""
public init(configuration: TextConverterConfiguration = .init()) where Callbacks == DefaultCallbacks {
self.init(configuration: configuration, callbacks: DefaultCallbacks.self)
}
public init(configuration: TextConverterConfiguration = .init(), callbacks _: Callbacks.Type = Callbacks.self) {
self.configuration = configuration
}
public mutating func convert(html: String) -> String {
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
str = ""
while let token = tokenizer.next() {
switch token {
case .character(let scalar):
currentElementIsEmpty = false
previouslyFinishedBlockElement = false
currentRun.unicodeScalars.append(scalar)
case .characterSequence(let string):
currentElementIsEmpty = false
previouslyFinishedBlockElement = false
currentRun.append(string)
case .startTag(let name, let selfClosing, let attributes):
currentElementIsEmpty = true
previouslyFinishedBlockElement = false
let action = Callbacks.elementAction(name: name, attributes: attributes)
actionStack.append(action)
handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
case .endTag(let name):
handleEndTag(name)
if actionStack.last != .default {
finishRun()
}
actionStack.removeLast()
case .comment, .doctype:
break
}
}
if previouslyFinishedBlockElement {
if configuration.insertNewlines {
currentRun.removeLast(2)
} else {
currentRun.removeLast(1)
}
}
finishRun()
return str
}
private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
switch name {
case "br":
if configuration.insertNewlines {
currentRun.append("\n")
} else {
currentRun.append(" ")
}
default:
break
}
}
private mutating func handleEndTag(_ name: String) {
switch name {
case "pre", "blockquote", "p", "ol", "ul":
finishRun()
finishBlockElement()
default:
break
}
}
private mutating func finishBlockElement() {
if !currentElementIsEmpty {
previouslyFinishedBlockElement = true
if configuration.insertNewlines {
currentRun.append("\n\n")
} else {
currentRun.append(" ")
}
}
}
private mutating func finishRun() {
guard !currentRun.isEmpty else {
return
}
if actionStack.contains(.skip) {
currentRun = ""
return
} else if case .replace(let replacement) = actionStack.first(where: \.isReplace) {
currentRun = replacement
}
str.append(currentRun)
currentRun = ""
}
}
public struct TextConverterConfiguration {
public var insertNewlines: Bool
public init(insertNewlines: Bool = true) {
self.insertNewlines = insertNewlines
}
}