Attributed string conversion
This commit is contained in:
parent
a4d791a995
commit
38d57b3f79
|
@ -5,6 +5,10 @@ import PackageDescription
|
|||
|
||||
let package = Package(
|
||||
name: "HTMLStreamer",
|
||||
platforms: [
|
||||
.iOS(.v13),
|
||||
.macOS(.v10_15),
|
||||
],
|
||||
products: [
|
||||
// Products define the executables and libraries a package produces, making them visible to other packages.
|
||||
.library(
|
||||
|
|
|
@ -0,0 +1,320 @@
|
|||
//
|
||||
// AttributedStringConverter.swift
|
||||
// HTMLStreamer
|
||||
//
|
||||
// Created by Shadowfacts on 11/24/23.
|
||||
//
|
||||
|
||||
#if os(iOS)
|
||||
import UIKit
|
||||
#elseif os(macOS)
|
||||
import AppKit
|
||||
#endif
|
||||
|
||||
struct AttributedStringConverter<Callbacks: AttributedStringCallbacks> {
|
||||
private let configuration: AttributedStringConverterConfiguration
|
||||
private var tokenizer: Tokenizer<String.Iterator>
|
||||
private let str = NSMutableAttributedString()
|
||||
|
||||
private var actionStack: InlineArray3<ElementAction> = []
|
||||
private var styleStack: InlineArray3<Style> = []
|
||||
// The current run of text w/o styles changing
|
||||
private var currentRun: String = ""
|
||||
|
||||
init(html: String, configuration: AttributedStringConverterConfiguration) where Callbacks == DefaultCallbacks {
|
||||
self.init(html: html, configuration: configuration, callbacks: DefaultCallbacks.self)
|
||||
}
|
||||
|
||||
init(html: String, configuration: AttributedStringConverterConfiguration, callbacks _: Callbacks.Type = Callbacks.self) {
|
||||
self.configuration = configuration
|
||||
self.tokenizer = Tokenizer(chars: html.makeIterator())
|
||||
}
|
||||
|
||||
mutating func convert() -> NSAttributedString {
|
||||
while let token = tokenizer.next() {
|
||||
switch token {
|
||||
case .character(let c):
|
||||
currentRun.append(c)
|
||||
case .comment:
|
||||
// ignored
|
||||
continue
|
||||
case .startTag(let name, let selfClosing, let attributes):
|
||||
let action = Callbacks.elementAction(name: name, attributes: attributes)
|
||||
actionStack.append(action)
|
||||
// self closing tags are ignored since they have no content
|
||||
if !selfClosing {
|
||||
handleStartTag(name, attributes: attributes)
|
||||
}
|
||||
case .endTag(let name):
|
||||
handleEndTag(name)
|
||||
// if we have a non-default action for the current element, the run finishes here
|
||||
if actionStack.last != .default {
|
||||
finishRun()
|
||||
}
|
||||
actionStack.removeLast()
|
||||
case .doctype:
|
||||
// ignored
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
finishRun()
|
||||
|
||||
return str
|
||||
}
|
||||
|
||||
private mutating func handleStartTag(_ name: String, attributes: InlineArray3<HTMLStreamer.Attribute>) {
|
||||
switch name {
|
||||
case "br":
|
||||
currentRun.append("\n")
|
||||
case "a":
|
||||
// we need to always insert in attribute, because we need to always have one
|
||||
// to remove from the stack in handleEndTag
|
||||
// but we only need to finish the run if we have a URL, since otherwise
|
||||
// the final attribute run won't be affected
|
||||
let url = attributes.attributeValue(for: "href").flatMap(Callbacks.makeURL(string:))
|
||||
if url != nil {
|
||||
finishRun()
|
||||
}
|
||||
styleStack.append(.link(url))
|
||||
case "em", "i":
|
||||
finishRun()
|
||||
styleStack.append(.italic)
|
||||
case "strong", "b":
|
||||
finishRun()
|
||||
styleStack.append(.bold)
|
||||
case "del":
|
||||
finishRun()
|
||||
styleStack.append(.strikethrough)
|
||||
case "code":
|
||||
finishRun()
|
||||
styleStack.append(.monospace)
|
||||
case "pre":
|
||||
startBlockElement()
|
||||
finishRun()
|
||||
styleStack.append(.monospace)
|
||||
case "blockquote":
|
||||
startBlockElement()
|
||||
finishRun()
|
||||
styleStack.append(.blockquote)
|
||||
case "p":
|
||||
startBlockElement()
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
private mutating func startBlockElement() {
|
||||
if str.length != 0 || !currentRun.isEmpty {
|
||||
currentRun.append("\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
private mutating func handleEndTag(_ name: String) {
|
||||
switch name {
|
||||
case "a":
|
||||
if case .link(.some(_)) = styleStack.last {
|
||||
finishRun()
|
||||
}
|
||||
removeLastStyle(.link)
|
||||
case "em", "i":
|
||||
finishRun()
|
||||
removeLastStyle(.italic)
|
||||
case "strong", "b":
|
||||
finishRun()
|
||||
removeLastStyle(.bold)
|
||||
case "del":
|
||||
finishRun()
|
||||
removeLastStyle(.strikethrough)
|
||||
case "code":
|
||||
finishRun()
|
||||
removeLastStyle(.monospace)
|
||||
case "pre":
|
||||
finishRun()
|
||||
removeLastStyle(.monospace)
|
||||
case "blockquote":
|
||||
finishRun()
|
||||
removeLastStyle(.blockquote)
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// needed to correctly handle mis-nested tags
|
||||
private mutating func removeLastStyle(_ type: Style.StyleType) {
|
||||
var i = styleStack.index(before: styleStack.endIndex)
|
||||
while i >= styleStack.startIndex {
|
||||
if styleStack[i].type == type {
|
||||
styleStack.remove(at: i)
|
||||
return
|
||||
}
|
||||
styleStack.formIndex(before: &i)
|
||||
}
|
||||
}
|
||||
|
||||
private lazy var blockquoteParagraphStyle: NSParagraphStyle = {
|
||||
let style = configuration.paragraphStyle.mutableCopy() as! NSMutableParagraphStyle
|
||||
style.headIndent = 32
|
||||
style.firstLineHeadIndent = 32
|
||||
return style
|
||||
}()
|
||||
|
||||
private mutating func finishRun() {
|
||||
guard !currentRun.isEmpty else {
|
||||
return
|
||||
}
|
||||
|
||||
if actionStack.contains(.skip) {
|
||||
currentRun = ""
|
||||
return
|
||||
} else if case .replace(let replacement) = actionStack.first(where: \.isReplace) {
|
||||
currentRun = replacement
|
||||
}
|
||||
|
||||
var attributes = [NSAttributedString.Key: Any]()
|
||||
var currentFontTraits = Set<FontTrait>()
|
||||
for style in styleStack {
|
||||
switch style {
|
||||
case .bold:
|
||||
currentFontTraits.insert(.bold)
|
||||
case .italic:
|
||||
currentFontTraits.insert(.italic)
|
||||
case .monospace:
|
||||
currentFontTraits.insert(.monospace)
|
||||
case .link(let url):
|
||||
if let url {
|
||||
attributes[.link] = url
|
||||
}
|
||||
case .strikethrough:
|
||||
attributes[.strikethroughStyle] = NSUnderlineStyle.single.rawValue
|
||||
case .blockquote:
|
||||
attributes[.paragraphStyle] = blockquoteParagraphStyle
|
||||
currentFontTraits.insert(.italic)
|
||||
}
|
||||
}
|
||||
|
||||
let baseFont = currentFontTraits.contains(.monospace) ? configuration.monospaceFont : configuration.font
|
||||
var descriptor = baseFont.fontDescriptor
|
||||
if currentFontTraits.contains(.bold) && currentFontTraits.contains(.italic),
|
||||
let boldItalic = descriptor.withSymbolicTraits([.traitBold, .traitItalic]) {
|
||||
descriptor = boldItalic
|
||||
} else if currentFontTraits.contains(.bold),
|
||||
let bold = descriptor.withSymbolicTraits(.traitBold) {
|
||||
descriptor = bold
|
||||
} else if currentFontTraits.contains(.italic),
|
||||
let italic = descriptor.withSymbolicTraits(.traitItalic) {
|
||||
descriptor = italic
|
||||
}
|
||||
#if os(iOS)
|
||||
attributes[.font] = UIFont(descriptor: descriptor, size: 0)
|
||||
#elseif os(macOS)
|
||||
attributes[.font] = NSFont(descriptor: descriptor, size: 0)
|
||||
#endif
|
||||
|
||||
if !attributes.keys.contains(.paragraphStyle) {
|
||||
attributes[.paragraphStyle] = configuration.paragraphStyle
|
||||
}
|
||||
|
||||
str.append(NSAttributedString(string: currentRun, attributes: attributes))
|
||||
currentRun = ""
|
||||
}
|
||||
}
|
||||
|
||||
protocol AttributedStringCallbacks {
|
||||
static func makeURL(string: String) -> URL?
|
||||
static func elementAction(name: String, attributes: InlineArray3<Attribute>) -> ElementAction
|
||||
}
|
||||
|
||||
enum ElementAction: Equatable {
|
||||
case `default`
|
||||
case skip
|
||||
case replace(String)
|
||||
|
||||
var isReplace: Bool {
|
||||
if case .replace(_) = self {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension AttributedStringCallbacks {
|
||||
static func makeURL(string: String) -> URL? {
|
||||
URL(string: string)
|
||||
}
|
||||
static func elementAction(name: String, attributes: InlineArray3<Attribute>) -> ElementAction {
|
||||
.default
|
||||
}
|
||||
}
|
||||
|
||||
struct DefaultCallbacks: AttributedStringCallbacks {
|
||||
}
|
||||
|
||||
struct AttributedStringConverterConfiguration {
|
||||
#if os(iOS)
|
||||
var font: UIFont
|
||||
var monospaceFont: UIFont
|
||||
var color: UIColor
|
||||
#elseif os(macOS)
|
||||
var font: NSFont
|
||||
var monospaceFont: NSFont
|
||||
var color: NSColor
|
||||
#endif
|
||||
var paragraphStyle: NSParagraphStyle
|
||||
}
|
||||
|
||||
#if os(macOS)
|
||||
private extension NSFontDescriptor {
|
||||
func withSymbolicTraits(_ traits: SymbolicTraits) -> NSFontDescriptor? {
|
||||
let descriptor: NSFontDescriptor = self.withSymbolicTraits(traits)
|
||||
return descriptor
|
||||
}
|
||||
}
|
||||
private extension NSFontDescriptor.SymbolicTraits {
|
||||
static var traitBold: Self { .bold }
|
||||
static var traitItalic: Self { .italic }
|
||||
}
|
||||
#endif
|
||||
|
||||
private enum FontTrait {
|
||||
case bold
|
||||
case italic
|
||||
case monospace
|
||||
}
|
||||
|
||||
private enum Style {
|
||||
case bold
|
||||
case italic
|
||||
case monospace
|
||||
case link(URL?)
|
||||
case strikethrough
|
||||
case blockquote
|
||||
|
||||
var type: StyleType {
|
||||
switch self {
|
||||
case .bold:
|
||||
return .bold
|
||||
case .italic:
|
||||
return .italic
|
||||
case .monospace:
|
||||
return .monospace
|
||||
case .link(_):
|
||||
return .link
|
||||
case .strikethrough:
|
||||
return .strikethrough
|
||||
case .blockquote:
|
||||
return .blockquote
|
||||
}
|
||||
}
|
||||
|
||||
enum StyleType {
|
||||
case bold, italic, monospace, link, strikethrough, blockquote
|
||||
}
|
||||
}
|
||||
|
||||
extension Collection where Element == Attribute {
|
||||
func attributeValue(for name: String) -> String? {
|
||||
first(where: { $0.name == name })?.value
|
||||
}
|
||||
}
|
|
@ -0,0 +1,195 @@
|
|||
//
|
||||
// AttributedStringConverterTests.swift
|
||||
//
|
||||
//
|
||||
// Created by Shadowfacts on 11/24/23.
|
||||
//
|
||||
|
||||
import XCTest
|
||||
@testable import HTMLStreamer
|
||||
|
||||
final class AttributedStringConverterTests: XCTestCase {
|
||||
|
||||
#if os(iOS)
|
||||
private let font = UIFont.systemFont(ofSize: 13)
|
||||
private let monospaceFont = UIFont.monospacedSystemFont(ofSize: 13, weight: .regular)
|
||||
#elseif os(macOS)
|
||||
private let font = NSFont.systemFont(ofSize: 13)
|
||||
private lazy var italicFont = NSFont(descriptor: font.fontDescriptor.withSymbolicTraits(.italic), size: 13)!
|
||||
private lazy var boldFont = NSFont(descriptor: font.fontDescriptor.withSymbolicTraits(.bold), size: 13)!
|
||||
private lazy var boldItalicFont = NSFont(descriptor: font.fontDescriptor.withSymbolicTraits([.bold, .italic]), size: 13)!
|
||||
private let monospaceFont = NSFont.monospacedSystemFont(ofSize: 13, weight: .regular)
|
||||
#endif
|
||||
private let blockquoteParagraphStyle: NSParagraphStyle = {
|
||||
let style = NSParagraphStyle.default.mutableCopy() as! NSMutableParagraphStyle
|
||||
style.headIndent = 32
|
||||
style.firstLineHeadIndent = 32
|
||||
return style
|
||||
}()
|
||||
|
||||
private func convert(_ html: String) -> NSAttributedString {
|
||||
convert(html, callbacks: DefaultCallbacks.self)
|
||||
}
|
||||
|
||||
private func convert<Callbacks: AttributedStringCallbacks>(_ html: String, callbacks _: Callbacks.Type = Callbacks.self) -> NSAttributedString {
|
||||
let config = AttributedStringConverterConfiguration(
|
||||
font: font,
|
||||
monospaceFont: monospaceFont,
|
||||
color: .black,
|
||||
paragraphStyle: .default
|
||||
)
|
||||
var converter = AttributedStringConverter<Callbacks>(html: html, configuration: config)
|
||||
return converter.convert()
|
||||
}
|
||||
|
||||
func testConvertBR() {
|
||||
XCTAssertEqual(convert("a<br>b"), NSAttributedString(string: "a\nb", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
]))
|
||||
}
|
||||
|
||||
func testConvertA() {
|
||||
XCTAssertEqual(convert("<a href='https://example.com'>link</a>"), NSAttributedString(string: "link", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.link: URL(string: "https://example.com")!,
|
||||
]))
|
||||
XCTAssertEqual(convert("<a>link</a>"), NSAttributedString(string: "link", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
]))
|
||||
}
|
||||
|
||||
func testConvertP() {
|
||||
XCTAssertEqual(convert("<p>a</p><p>b</p>"), NSAttributedString(string: "a\n\nb", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
]))
|
||||
}
|
||||
|
||||
func testConvertEm() {
|
||||
XCTAssertEqual(convert("<em>hello</em>"), NSAttributedString(string: "hello", attributes: [
|
||||
.font: italicFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
]))
|
||||
XCTAssertEqual(convert("<i>hello</i>"), NSAttributedString(string: "hello", attributes: [
|
||||
.font: italicFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
]))
|
||||
}
|
||||
|
||||
func testConvertStrong() {
|
||||
XCTAssertEqual(convert("<strong>hello</strong>"), NSAttributedString(string: "hello", attributes: [
|
||||
.font: boldFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
]))
|
||||
XCTAssertEqual(convert("<b>hello</b>"), NSAttributedString(string: "hello", attributes: [
|
||||
.font: boldFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
]))
|
||||
}
|
||||
|
||||
func testConvertBoldItalic() {
|
||||
XCTAssertEqual(convert("<strong><em>hello</em></strong>"), NSAttributedString(string: "hello", attributes: [
|
||||
.font: boldItalicFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
]))
|
||||
}
|
||||
|
||||
func testIncorrectNesting() {
|
||||
let result = NSMutableAttributedString()
|
||||
result.append(NSAttributedString(string: "bold ", attributes: [
|
||||
.font: boldFont,
|
||||
]))
|
||||
result.append(NSAttributedString(string: "both", attributes: [
|
||||
.font: boldItalicFont,
|
||||
]))
|
||||
result.append(NSAttributedString(string: " italic", attributes: [
|
||||
.font: italicFont,
|
||||
]))
|
||||
result.addAttribute(.paragraphStyle, value: NSParagraphStyle.default, range: NSRange(location: 0, length: result.length))
|
||||
XCTAssertEqual(convert("<strong>bold <em>both</strong> italic</em>"), result)
|
||||
}
|
||||
|
||||
func testDel() {
|
||||
XCTAssertEqual(convert("<del>blah</del>"), NSAttributedString(string: "blah", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.strikethroughStyle: NSUnderlineStyle.single.rawValue,
|
||||
]))
|
||||
}
|
||||
|
||||
func testCode() {
|
||||
XCTAssertEqual(convert("<code>wee</code>"), NSAttributedString(string: "wee", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
]))
|
||||
}
|
||||
|
||||
func testPre() {
|
||||
XCTAssertEqual(convert("<pre>wee</pre>"), NSAttributedString(string: "wee", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
]))
|
||||
}
|
||||
|
||||
func testBlockquote() {
|
||||
XCTAssertEqual(convert("<blockquote>hello</blockquote>"), NSAttributedString(string: "hello", attributes: [
|
||||
.font: italicFont,
|
||||
.paragraphStyle: blockquoteParagraphStyle,
|
||||
]))
|
||||
XCTAssertEqual(convert("<blockquote><b>hello</b></blockquote>"), NSAttributedString(string: "hello", attributes: [
|
||||
.font: boldItalicFont,
|
||||
.paragraphStyle: blockquoteParagraphStyle,
|
||||
]))
|
||||
}
|
||||
|
||||
func testSelfClosing() {
|
||||
XCTAssertEqual(convert("<b />asdf"), NSAttributedString(string: "asdf", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
]))
|
||||
}
|
||||
|
||||
func testMakeURLCallback() {
|
||||
struct Callbacks: AttributedStringCallbacks {
|
||||
static func makeURL(string: String) -> URL? {
|
||||
URL(string: "https://apple.com")
|
||||
}
|
||||
}
|
||||
let result = convert("<a href='https://example.com'>test</a>", callbacks: Callbacks.self)
|
||||
XCTAssertEqual(result, NSAttributedString(string: "test", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.link: URL(string: "https://apple.com")!,
|
||||
]))
|
||||
}
|
||||
|
||||
func testElementActionCallback() {
|
||||
struct Callbacks: AttributedStringCallbacks {
|
||||
static func elementAction(name: String, attributes: InlineArray3<Attribute>) -> ElementAction {
|
||||
let clazz = attributes.attributeValue(for: "class")
|
||||
if clazz == "invisible" {
|
||||
return .skip
|
||||
} else if clazz == "ellipsis" {
|
||||
return .replace("…")
|
||||
} else {
|
||||
return .default
|
||||
}
|
||||
}
|
||||
}
|
||||
let skipped = convert("<span class='invisible'>test</span>", callbacks: Callbacks.self)
|
||||
XCTAssertEqual(skipped, NSAttributedString())
|
||||
let skipNestped = convert("<span class='invisible'><b>test</b></span>", callbacks: Callbacks.self)
|
||||
XCTAssertEqual(skipNestped, NSAttributedString())
|
||||
let skipNestped2 = convert("<b><span class='invisible'>test</span></b>", callbacks: Callbacks.self)
|
||||
XCTAssertEqual(skipNestped2, NSAttributedString())
|
||||
let replaced = convert("<span class='ellipsis'>test</span>", callbacks: Callbacks.self)
|
||||
XCTAssertEqual(replaced, NSAttributedString(string: "…", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
]))
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue