// // AttributedStringConverter.swift // HTMLStreamer // // Created by Shadowfacts on 11/24/23. // #if os(iOS) || os(visionOS) import UIKit #elseif os(macOS) import AppKit #endif #if os(iOS) || os(visionOS) private typealias PlatformFont = UIFont #elseif os(macOS) private typealias PlatformFont = NSFont #endif public struct AttributedStringConverter { private let configuration: AttributedStringConverterConfiguration private var fontCache: [FontTrait: PlatformFont] = [:] private var tokenizer: Tokenizer! private var str: NSMutableAttributedString! private var actionStack: [ElementAction] = [] private var styleStack: [Style] = [] private var blockState = BlockState.unstarted private var currentElementIsEmpty = true private var previouslyFinishedListItem = false // The current run of text w/o styles changing private var currentRun: String = "" public init(configuration: AttributedStringConverterConfiguration) where Callbacks == DefaultCallbacks { self.init(configuration: configuration, callbacks: DefaultCallbacks.self) } public init(configuration: AttributedStringConverterConfiguration, callbacks _: Callbacks.Type = Callbacks.self) { self.configuration = configuration } public mutating func convert(html: String) -> NSAttributedString { tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator()) str = NSMutableAttributedString() actionStack = [] styleStack = [] blockState = .unstarted currentElementIsEmpty = true previouslyFinishedListItem = false currentRun = "" while let token = tokenizer.next() { switch token { case .character(let c): currentElementIsEmpty = false continueBlock() currentRun.unicodeScalars.append(c) case .characterSequence(let s): currentElementIsEmpty = false continueBlock() currentRun.append(s) case .comment: // ignored continue case .startTag(let name, let selfClosing, let attributes): currentElementIsEmpty = true let action = Callbacks.elementAction(name: name, attributes: attributes) actionStack.append(action) handleStartTag(name, selfClosing: selfClosing, attributes: attributes) case .endTag(let name): handleEndTag(name) // if we have a non-default action for the current element, the run finishes here if actionStack.last != .default { finishRun() } actionStack.removeLast() case .doctype: // ignored continue } } finishRun() return str } private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) { if name == "br" { currentRun.append("\n") return } // self closing tags are ignored since they have no content guard !selfClosing else { return } switch name { case "a": // we need to always insert in attribute, because we need to always have one // to remove from the stack in handleEndTag // but we only need to finish the run if we have a URL, since otherwise // the final attribute run won't be affected let url = attributes.attributeValue(for: "href").flatMap(Callbacks.makeURL(string:)) if url != nil { finishRun() } styleStack.append(.link(url)) case "em", "i": finishRun() styleStack.append(.italic) case "strong", "b": finishRun() styleStack.append(.bold) case "del": finishRun() styleStack.append(.strikethrough) case "code": finishRun() styleStack.append(.monospace) case "pre": startBlockIfNecessary() finishRun() styleStack.append(.monospace) case "blockquote": startBlockIfNecessary() finishRun() styleStack.append(.blockquote) case "p": startBlockIfNecessary() case "ol": startBlockIfNecessary() finishRun() styleStack.append(.orderedList(nextElementOrdinal: 1)) case "ul": startBlockIfNecessary() finishRun() styleStack.append(.unorderedList) case "li": if previouslyFinishedListItem { currentRun.append("\n") } let marker: String if case .orderedList(let nextElementOrdinal) = styleStack.last { marker = orderedTextList.marker(forItemNumber: nextElementOrdinal) styleStack[styleStack.count - 1] = .orderedList(nextElementOrdinal: nextElementOrdinal + 1) } else if case .unorderedList = styleStack.last { marker = unorderedTextList.marker(forItemNumber: 0) } else { break } currentRun.append("\t\(marker)\t") default: break } } private mutating func handleEndTag(_ name: String) { switch name { case "a": if case .link(.some(_)) = lastStyle(.link) { finishRun() } removeLastStyle(.link) case "em", "i": finishRun() removeLastStyle(.italic) case "strong", "b": finishRun() removeLastStyle(.bold) case "del": finishRun() removeLastStyle(.strikethrough) case "code": finishRun() removeLastStyle(.monospace) case "pre": finishRun() removeLastStyle(.monospace) finishBlockElement() case "blockquote": finishRun() removeLastStyle(.blockquote) finishBlockElement() case "p": finishBlockElement() case "ol": finishRun() removeLastStyle(.orderedList) finishBlockElement() previouslyFinishedListItem = false case "ul": finishRun() removeLastStyle(.unorderedList) finishBlockElement() previouslyFinishedListItem = false case "li": finishRun() previouslyFinishedListItem = true default: break } } private mutating func startBlockIfNecessary() { switch blockState { case .unstarted: blockState = .started(false) case .started: break case .ongoing: currentRun.append("\n\n") blockState = .started(true) case .finished(let nonEmpty): if nonEmpty { currentRun.append("\n\n") } blockState = .started(nonEmpty) } } private mutating func continueBlock() { switch blockState { case .unstarted, .started(_): blockState = .ongoing case .ongoing: break case .finished(let nonEmpty): if nonEmpty { currentRun.append("\n\n") } blockState = .ongoing } } private mutating func finishBlockElement() { if blockState == .started(true) && currentElementIsEmpty { currentRun.removeLast(2) } blockState = .finished(blockState == .ongoing) } // Finds the last currently-open style of the given type. // We can't just use the last one because we need to handle mis-nested tags. private mutating func removeLastStyle(_ type: Style.StyleType) { var i = styleStack.index(before: styleStack.endIndex) while i >= styleStack.startIndex { if styleStack[i].type == type { styleStack.remove(at: i) return } styleStack.formIndex(before: &i) } } private func lastStyle(_ type: Style.StyleType) -> Style? { styleStack.last { $0.type == type } } private lazy var blockquoteParagraphStyle: NSParagraphStyle = { let style = configuration.paragraphStyle.mutableCopy() as! NSMutableParagraphStyle style.headIndent = 32 style.firstLineHeadIndent = 32 return style }() private lazy var listParagraphStyle: NSParagraphStyle = { let style = configuration.paragraphStyle.mutableCopy() as! NSMutableParagraphStyle // I don't like that I can't just use paragraphStyle.textLists, because it makes the list markers // not use the monospace digit font (it seems to just use whatever font attribute is set for the whole thing), // and it doesn't right align the list markers. // Unfortunately, doing it manually means the list markers are incldued in the selectable text. style.headIndent = 32 style.firstLineHeadIndent = 0 // Use 2 tab stops, one for the list marker, the second for the content. style.tabStops = [NSTextTab(textAlignment: .right, location: 28), NSTextTab(textAlignment: .natural, location: 32)] return style }() private mutating func finishRun() { if actionStack.contains(.skip) { currentRun = "" return } else if case .append(let s) = actionStack.last { currentRun.append(s) } else if case .replace(let replacement) = actionStack.first(where: \.isReplace) { currentRun = replacement } guard !currentRun.isEmpty else { return } var attributes = [NSAttributedString.Key: Any]() var paragraphStyle = configuration.paragraphStyle var currentFontTraits: FontTrait = [] for style in styleStack { switch style { case .bold: currentFontTraits.insert(.bold) case .italic: currentFontTraits.insert(.italic) case .monospace: currentFontTraits.insert(.monospace) case .link(let url): if let url { attributes[.link] = url } case .strikethrough: attributes[.strikethroughStyle] = NSUnderlineStyle.single.rawValue case .blockquote: paragraphStyle = blockquoteParagraphStyle currentFontTraits.insert(.italic) case .orderedList, .unorderedList: paragraphStyle = listParagraphStyle } } attributes[.font] = getFont(traits: currentFontTraits) attributes[.foregroundColor] = configuration.color attributes[.paragraphStyle] = paragraphStyle str.append(NSAttributedString(string: currentRun, attributes: attributes)) currentRun = "" } private mutating func getFont(traits: FontTrait) -> PlatformFont? { if let cached = fontCache[traits] { return cached } let baseFont = traits.contains(.monospace) ? configuration.monospaceFont : configuration.font var descriptor = baseFont.fontDescriptor if traits.contains(.bold) && traits.contains(.italic), let boldItalic = descriptor.withSymbolicTraits([.traitBold, .traitItalic]) { descriptor = boldItalic } else if traits.contains(.bold), let bold = descriptor.withSymbolicTraits(.traitBold) { descriptor = bold } else if traits.contains(.italic), let italic = descriptor.withSymbolicTraits(.traitItalic) { descriptor = italic } let font = PlatformFont(descriptor: descriptor, size: 0) fontCache[traits] = font return font } } public struct AttributedStringConverterConfiguration { #if os(iOS) || os(visionOS) public var font: UIFont public var monospaceFont: UIFont public var color: UIColor #elseif os(macOS) public var font: NSFont public var monospaceFont: NSFont public var color: NSColor #endif public var paragraphStyle: NSParagraphStyle #if os(iOS) || os(visionOS) public init(font: UIFont, monospaceFont: UIFont, color: UIColor, paragraphStyle: NSParagraphStyle) { self.font = font self.monospaceFont = monospaceFont self.color = color self.paragraphStyle = paragraphStyle } #elseif os(macOS) public init(font: NSFont, monospaceFont: NSFont, color: NSColor, paragraphStyle: NSParagraphStyle) { self.font = font self.monospaceFont = monospaceFont self.color = color self.paragraphStyle = paragraphStyle } #endif } #if os(macOS) private extension NSFontDescriptor { func withSymbolicTraits(_ traits: SymbolicTraits) -> NSFontDescriptor? { let descriptor: NSFontDescriptor = self.withSymbolicTraits(traits) return descriptor } } private extension NSFontDescriptor.SymbolicTraits { static var traitBold: Self { .bold } static var traitItalic: Self { .italic } } #endif private struct FontTrait: OptionSet, Hashable { static let bold = FontTrait(rawValue: 1 << 0) static let italic = FontTrait(rawValue: 1 << 1) static let monospace = FontTrait(rawValue: 1 << 2) let rawValue: Int init(rawValue: Int) { self.rawValue = rawValue } } private enum Style { case bold case italic case monospace case link(URL?) case strikethrough case blockquote case orderedList(nextElementOrdinal: Int) case unorderedList var type: StyleType { switch self { case .bold: return .bold case .italic: return .italic case .monospace: return .monospace case .link(_): return .link case .strikethrough: return .strikethrough case .blockquote: return .blockquote case .orderedList(nextElementOrdinal: _): return .orderedList case .unorderedList: return .unorderedList } } enum StyleType: Equatable { case bold case italic case monospace case link case strikethrough case blockquote case orderedList case unorderedList } } enum BlockState: Equatable { case unstarted case started(Bool) case ongoing case finished(Bool) } extension Collection where Element == Attribute { public func attributeValue(for name: String) -> String? { first(where: { $0.name == name })?.value } } private let orderedTextList = OrderedNumberTextList(markerFormat: .decimal, options: 0) private let unorderedTextList = NSTextList(markerFormat: .disc, options: 0) private class OrderedNumberTextList: NSTextList { override func marker(forItemNumber itemNumber: Int) -> String { "\(super.marker(forItemNumber: itemNumber))." } }