diff --git a/Sources/HTMLStreamer/AttributedStringConverter.swift b/Sources/HTMLStreamer/AttributedStringConverter.swift
index 569776c..29b0626 100644
--- a/Sources/HTMLStreamer/AttributedStringConverter.swift
+++ b/Sources/HTMLStreamer/AttributedStringConverter.swift
@@ -17,7 +17,7 @@ private typealias PlatformFont = UIFont
private typealias PlatformFont = NSFont
#endif
-public struct AttributedStringConverter {
+public struct AttributedStringConverter {
private let configuration: AttributedStringConverterConfiguration
private var fontCache: [FontTrait: PlatformFont] = [:]
@@ -77,7 +77,7 @@ public struct AttributedStringConverter {
return str
}
- private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [HTMLStreamer.Attribute]) {
+ private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
if name == "br" {
currentRun.append("\n")
return
@@ -313,37 +313,6 @@ public struct AttributedStringConverter {
}
}
-public protocol AttributedStringCallbacks {
- static func makeURL(string: String) -> URL?
- static func elementAction(name: String, attributes: [Attribute]) -> ElementAction
-}
-
-public enum ElementAction: Equatable {
- case `default`
- case skip
- case replace(String)
-
- var isReplace: Bool {
- if case .replace(_) = self {
- true
- } else {
- false
- }
- }
-}
-
-public extension AttributedStringCallbacks {
- static func makeURL(string: String) -> URL? {
- URL(string: string)
- }
- static func elementAction(name: String, attributes: [Attribute]) -> ElementAction {
- .default
- }
-}
-
-public struct DefaultCallbacks: AttributedStringCallbacks {
-}
-
public struct AttributedStringConverterConfiguration {
#if os(iOS)
public var font: UIFont
diff --git a/Sources/HTMLStreamer/HTMLConversionCallbacks.swift b/Sources/HTMLStreamer/HTMLConversionCallbacks.swift
new file mode 100644
index 0000000..44d40c1
--- /dev/null
+++ b/Sources/HTMLStreamer/HTMLConversionCallbacks.swift
@@ -0,0 +1,39 @@
+//
+// HTMLConversionCallbacks.swift
+// HTMLStreamer
+//
+// Created by Shadowfacts on 12/22/23.
+//
+
+import Foundation
+
+public protocol HTMLConversionCallbacks {
+ static func makeURL(string: String) -> URL?
+ static func elementAction(name: String, attributes: [Attribute]) -> ElementAction
+}
+
+public enum ElementAction: Equatable {
+ case `default`
+ case skip
+ case replace(String)
+
+ var isReplace: Bool {
+ if case .replace(_) = self {
+ true
+ } else {
+ false
+ }
+ }
+}
+
+public extension HTMLConversionCallbacks {
+ static func makeURL(string: String) -> URL? {
+ URL(string: string)
+ }
+ static func elementAction(name: String, attributes: [Attribute]) -> ElementAction {
+ .default
+ }
+}
+
+public struct DefaultCallbacks: HTMLConversionCallbacks {
+}
diff --git a/Sources/HTMLStreamer/TextConverter.swift b/Sources/HTMLStreamer/TextConverter.swift
new file mode 100644
index 0000000..870ef34
--- /dev/null
+++ b/Sources/HTMLStreamer/TextConverter.swift
@@ -0,0 +1,135 @@
+//
+// TextConverter.swift
+// HTMLStreamer
+//
+// Created by Shadowfacts on 12/19/23.
+//
+
+import Foundation
+
+public struct TextConverter {
+
+ private let configuration: TextConverterConfiguration
+
+ private var tokenizer: Tokenizer!
+ private var str: String!
+
+ private var actionStack: [ElementAction] = []
+ private var previouslyFinishedBlockElement = false
+ private var currentRun = ""
+
+ public init(configuration: TextConverterConfiguration = .init()) where Callbacks == DefaultCallbacks {
+ self.init(configuration: configuration, callbacks: DefaultCallbacks.self)
+ }
+
+ public init(configuration: TextConverterConfiguration = .init(), callbacks _: Callbacks.Type = Callbacks.self) {
+ self.configuration = configuration
+ }
+
+ public mutating func convert(html: String) -> String {
+ tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
+ str = ""
+
+ while let token = tokenizer.next() {
+ switch token {
+ case .character(let scalar):
+ currentRun.unicodeScalars.append(scalar)
+ case .characterSequence(let string):
+ currentRun.append(string)
+ case .startTag(let name, let selfClosing, let attributes):
+ let action = Callbacks.elementAction(name: name, attributes: attributes)
+ actionStack.append(action)
+ handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
+ case .endTag(let name):
+ handleEndTag(name)
+ if actionStack.last != .default {
+ finishRun()
+ }
+ actionStack.removeLast()
+ case .comment, .doctype:
+ break
+ }
+ }
+
+ finishRun()
+
+ return str
+ }
+
+ private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
+ switch name {
+ case "br":
+ if configuration.insertNewlines {
+ currentRun.append("\n")
+ } else {
+ currentRun.append(" ")
+ }
+ case "pre", "blockquote", "p", "ol", "ul":
+ startBlockElement()
+ finishRun()
+ default:
+ break
+ }
+ }
+
+ private mutating func startBlockElement() {
+ if !str.isEmpty {
+ previouslyFinishedBlockElement = false
+ if configuration.insertNewlines {
+ currentRun.append("\n\n")
+ } else {
+ currentRun.append(" ")
+ }
+ }
+ }
+
+ private mutating func handleEndTag(_ name: String) {
+ switch name {
+ case "pre", "blockquote", "p", "ol", "ul":
+ finishRun()
+ finishBlockElement()
+ default:
+ break
+ }
+ }
+
+ private mutating func finishBlockElement() {
+ if !str.isEmpty {
+ previouslyFinishedBlockElement = true
+ }
+ }
+
+ private mutating func finishRun() {
+ guard !currentRun.isEmpty else {
+ return
+ }
+
+ if actionStack.contains(.skip) {
+ currentRun = ""
+ return
+ } else if case .replace(let replacement) = actionStack.first(where: \.isReplace) {
+ currentRun = replacement
+ }
+
+ if previouslyFinishedBlockElement {
+ previouslyFinishedBlockElement = false
+ if configuration.insertNewlines {
+ currentRun.insert(contentsOf: "\n\n", at: currentRun.startIndex)
+ } else {
+ currentRun.insert(" ", at: currentRun.startIndex)
+ }
+ }
+
+ str.append(currentRun)
+ currentRun = ""
+ }
+
+}
+
+public struct TextConverterConfiguration {
+ public var insertNewlines: Bool
+
+ public init(insertWhitespace: Bool = true) {
+ self.insertNewlines = insertWhitespace
+ }
+}
diff --git a/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift b/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift
index 307ec85..53e00e0 100644
--- a/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift
+++ b/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift
@@ -41,7 +41,7 @@ final class AttributedStringConverterTests: XCTestCase {
convert(html, callbacks: DefaultCallbacks.self)
}
- private func convert(_ html: String, callbacks _: Callbacks.Type = Callbacks.self) -> NSAttributedString {
+ private func convert(_ html: String, callbacks _: Callbacks.Type = Callbacks.self) -> NSAttributedString {
let config = AttributedStringConverterConfiguration(
font: font,
monospaceFont: monospaceFont,
@@ -212,7 +212,7 @@ final class AttributedStringConverterTests: XCTestCase {
}
func testMakeURLCallback() {
- struct Callbacks: AttributedStringCallbacks {
+ struct Callbacks: HTMLConversionCallbacks {
static func makeURL(string: String) -> URL? {
URL(string: "https://apple.com")
}
@@ -226,7 +226,7 @@ final class AttributedStringConverterTests: XCTestCase {
}
func testElementActionCallback() {
- struct Callbacks: AttributedStringCallbacks {
+ struct Callbacks: HTMLConversionCallbacks {
static func elementAction(name: String, attributes: [Attribute]) -> ElementAction {
let clazz = attributes.attributeValue(for: "class")
if clazz == "invisible" {
@@ -240,8 +240,8 @@ final class AttributedStringConverterTests: XCTestCase {
}
let skipped = convert("test", callbacks: Callbacks.self)
XCTAssertEqual(skipped, NSAttributedString())
- let skipNestped = convert("test", callbacks: Callbacks.self)
- XCTAssertEqual(skipNestped, NSAttributedString())
+ let skipNested = convert("test", callbacks: Callbacks.self)
+ XCTAssertEqual(skipNested, NSAttributedString())
let skipNestped2 = convert("test", callbacks: Callbacks.self)
XCTAssertEqual(skipNestped2, NSAttributedString())
let replaced = convert("test", callbacks: Callbacks.self)
diff --git a/Tests/HTMLStreamerTests/TextConverterTests.swift b/Tests/HTMLStreamerTests/TextConverterTests.swift
new file mode 100644
index 0000000..aa970c9
--- /dev/null
+++ b/Tests/HTMLStreamerTests/TextConverterTests.swift
@@ -0,0 +1,66 @@
+//
+// TextConverterTests.swift
+//
+//
+// Created by Shadowfacts on 12/22/23.
+//
+
+import XCTest
+@testable import HTMLStreamer
+
+final class TextConverterTests: XCTestCase {
+
+ private func convert(_ html: String, configuration: TextConverterConfiguration = .init()) -> String {
+ convert(html, configuration: configuration, callbacks: DefaultCallbacks.self)
+ }
+
+ private func convert(_ html: String, configuration: TextConverterConfiguration = .init(), callbacks _: Callbacks.Type = Callbacks.self) -> String {
+ var converter = TextConverter(configuration: configuration)
+ return converter.convert(html: html)
+ }
+
+ func testConvertBR() {
+ XCTAssertEqual(convert("a
b"), "a\nb")
+ XCTAssertEqual(convert("a
b"), "a\nb")
+ }
+
+ func testConvertA() {
+ XCTAssertEqual(convert("link"), "link")
+ }
+
+ func testIncorrectNesting() {
+ XCTAssertEqual(convert("bold both italic"), "bold both italic")
+ }
+
+ func testTextAfterBlockElement() {
+ XCTAssertEqual(convert("wee
after"), "wee\n\nafter")
+ XCTAssertEqual(convert("wee
after", configuration: .init(insertWhitespace: false)), "wee after")
+ }
+
+ func testMultipleBlockElements() {
+ XCTAssertEqual(convert("a
b
"), "a\n\nb")
+ XCTAssertEqual(convert("a
b
", configuration: .init(insertWhitespace: false)), "a b")
+ }
+
+ func testElementActionCallback() {
+ struct Callbacks: HTMLConversionCallbacks {
+ static func elementAction(name: String, attributes: [Attribute]) -> ElementAction {
+ let clazz = attributes.attributeValue(for: "class")
+ if clazz == "invisible" {
+ return .skip
+ } else if clazz == "ellipsis" {
+ return .replace("…")
+ } else {
+ return .default
+ }
+ }
+ }
+ let skipped = convert("test", callbacks: Callbacks.self)
+ XCTAssertEqual(skipped, "")
+ let skipNested = convert("test", callbacks: Callbacks.self)
+ XCTAssertEqual(skipNested, "")
+ let replaced = convert("test", callbacks: Callbacks.self)
+ XCTAssertEqual(replaced, "…")
+ }
+
+}