HTMLStreamer/Sources/HTMLStreamer/AttributedStringConverter.s...

476 lines
16 KiB
Swift

//
// AttributedStringConverter.swift
// HTMLStreamer
//
// Created by Shadowfacts on 11/24/23.
//
#if os(iOS) || os(visionOS)
import UIKit
#elseif os(macOS)
import AppKit
#endif
#if os(iOS) || os(visionOS)
private typealias PlatformFont = UIFont
#elseif os(macOS)
private typealias PlatformFont = NSFont
#endif
public class AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
private let configuration: AttributedStringConverterConfiguration
private var fontCache: [FontTrait: PlatformFont] = [:]
private var tokenizer: Tokenizer<String.UnicodeScalarView.Iterator>!
private var str: NSMutableAttributedString!
private var actionStack: [ElementAction] = [] {
didSet {
hasSkipOrReplaceElementAction = actionStack.contains(where: {
switch $0 {
case .skip, .replace(_):
true
default:
false
}
})
}
}
private var hasSkipOrReplaceElementAction = false
private var styleStack: [Style] = []
private var blockStateMachine = BlockStateMachine(blockBreak: "", lineBreak: "", listIndentForContentOutsideItem: "", append: { _ in }, removeChar: {})
private var currentElementIsEmpty = true
private var previouslyFinishedListItem = false
// The current run of text w/o styles changing
private var currentRun: String = ""
public convenience init(configuration: AttributedStringConverterConfiguration) where Callbacks == DefaultCallbacks {
self.init(configuration: configuration, callbacks: DefaultCallbacks.self)
}
public init(configuration: AttributedStringConverterConfiguration, callbacks _: Callbacks.Type = Callbacks.self) {
self.configuration = configuration
}
public func convert(html: String) -> NSAttributedString {
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
str = NSMutableAttributedString()
actionStack = []
styleStack = []
blockStateMachine = BlockStateMachine(blockBreak: "\n\n", lineBreak: "\n", listIndentForContentOutsideItem: "\t\t", append: { [unowned self] in
self.append($0)
}, removeChar: { [unowned self] in
self.removeChar()
})
currentElementIsEmpty = true
previouslyFinishedListItem = false
currentRun = ""
while let token = tokenizer.next() {
switch token {
case .character(let c):
currentElementIsEmpty = false
if blockStateMachine.continueBlock(char: c),
!hasSkipOrReplaceElementAction {
currentRun.unicodeScalars.append(c)
}
case .characterSequence(let s):
currentElementIsEmpty = false
for c in s.unicodeScalars {
if blockStateMachine.continueBlock(char: c),
!hasSkipOrReplaceElementAction {
currentRun.unicodeScalars.append(c)
}
}
case .comment:
// ignored
continue
case .startTag(let name, let selfClosing, let attributes):
currentElementIsEmpty = true
let action = Callbacks.elementAction(name: name, attributes: attributes)
if action != .default {
finishRun()
}
actionStack.append(action)
handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
case .endTag(let name):
handleEndTag(name)
// if we have a non-default action for the current element, the run finishes here
if let action = actionStack.last {
if action != .default {
finishRun()
}
actionStack.removeLast()
}
case .doctype:
// ignored
continue
}
}
blockStateMachine.endBlocks()
finishRun()
return str
}
private func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
if name == "br" {
blockStateMachine.breakTag()
return
}
// self closing tags are ignored since they have no content
guard !selfClosing else {
return
}
switch name {
case "a":
// we need to always insert in attribute, because we need to always have one
// to remove from the stack in handleEndTag
// but we only need to finish the run if we have a URL, since otherwise
// the final attribute run won't be affected
let url = attributes.attributeValue(for: "href").flatMap(Callbacks.makeURL(string:))
if url != nil {
finishRun()
}
styleStack.append(.link(url))
case "em", "i":
finishRun()
styleStack.append(.italic)
case "strong", "b":
finishRun()
styleStack.append(.bold)
case "del":
finishRun()
styleStack.append(.strikethrough)
case "code":
finishRun()
styleStack.append(.monospace)
case "pre":
blockStateMachine.startOrEndBlock()
blockStateMachine.startPreformatted()
finishRun()
styleStack.append(.monospace)
case "blockquote":
blockStateMachine.startOrEndBlock()
finishRun()
styleStack.append(.blockquote)
case "p":
blockStateMachine.startOrEndBlock()
case "ol":
blockStateMachine.startOrEndBlock()
finishRun()
styleStack.append(.orderedList(nextElementOrdinal: 1))
case "ul":
blockStateMachine.startOrEndBlock()
finishRun()
styleStack.append(.unorderedList)
case "li":
let marker: String
if case .orderedList(let nextElementOrdinal) = styleStack.last {
marker = orderedTextList.marker(forItemNumber: nextElementOrdinal)
styleStack[styleStack.count - 1] = .orderedList(nextElementOrdinal: nextElementOrdinal + 1)
} else if case .unorderedList = styleStack.last {
marker = unorderedTextList.marker(forItemNumber: 0)
} else {
break
}
blockStateMachine.startListItem()
currentRun.append("\t\(marker)\t")
default:
break
}
}
private func handleEndTag(_ name: String) {
switch name {
case "a":
if case .link(.some(_)) = lastStyle(.link) {
finishRun()
}
removeLastStyle(.link)
case "em", "i":
finishRun()
removeLastStyle(.italic)
case "strong", "b":
finishRun()
removeLastStyle(.bold)
case "del":
finishRun()
removeLastStyle(.strikethrough)
case "code":
finishRun()
removeLastStyle(.monospace)
case "pre":
finishRun()
removeLastStyle(.monospace)
blockStateMachine.startOrEndBlock()
blockStateMachine.endPreformatted()
case "blockquote":
finishRun()
removeLastStyle(.blockquote)
blockStateMachine.startOrEndBlock()
case "p":
blockStateMachine.startOrEndBlock()
case "ol":
finishRun()
removeLastStyle(.orderedList)
blockStateMachine.startOrEndBlock()
previouslyFinishedListItem = false
case "ul":
finishRun()
removeLastStyle(.unorderedList)
blockStateMachine.startOrEndBlock()
previouslyFinishedListItem = false
case "li":
finishRun()
previouslyFinishedListItem = true
blockStateMachine.endListItem()
default:
break
}
}
var blockBreak: String {
"\n\n"
}
var lineBreak: String {
"\n"
}
var listIndentForContentOutsideItem: String {
"\t\t"
}
func append(_ s: String) {
currentRun.append(s)
}
func removeChar() {
if currentRun.isEmpty {
str.deleteCharacters(in: NSRange(location: str.length - 1, length: 1))
} else {
currentRun.removeLast()
}
}
// Finds the last currently-open style of the given type.
// We can't just use the last one because we need to handle mis-nested tags.
private func removeLastStyle(_ type: Style.StyleType) {
var i = styleStack.index(before: styleStack.endIndex)
while i >= styleStack.startIndex {
if styleStack[i].type == type {
styleStack.remove(at: i)
return
}
styleStack.formIndex(before: &i)
}
}
private func lastStyle(_ type: Style.StyleType) -> Style? {
styleStack.last { $0.type == type }
}
private lazy var blockquoteParagraphStyle: NSParagraphStyle = {
let style = configuration.paragraphStyle.mutableCopy() as! NSMutableParagraphStyle
style.headIndent = 32
style.firstLineHeadIndent = 32
return style
}()
private lazy var listParagraphStyle: NSParagraphStyle = {
let style = configuration.paragraphStyle.mutableCopy() as! NSMutableParagraphStyle
// I don't like that I can't just use paragraphStyle.textLists, because it makes the list markers
// not use the monospace digit font (it seems to just use whatever font attribute is set for the whole thing),
// and it doesn't right align the list markers.
// Unfortunately, doing it manually means the list markers are incldued in the selectable text.
style.headIndent = 32
style.firstLineHeadIndent = 0
// Use 2 tab stops, one for the list marker, the second for the content.
style.tabStops = [NSTextTab(textAlignment: .right, location: 28), NSTextTab(textAlignment: .natural, location: 32)]
return style
}()
private func finishRun() {
if case .append(let s) = actionStack.last {
currentRun.append(s)
} else if case .replace(let replacement) = actionStack.last {
currentRun.append(replacement)
}
guard !currentRun.isEmpty else {
return
}
var attributes = [NSAttributedString.Key: Any]()
var paragraphStyle = configuration.paragraphStyle
var currentFontTraits: FontTrait = []
for style in styleStack {
switch style {
case .bold:
currentFontTraits.insert(.bold)
case .italic:
currentFontTraits.insert(.italic)
case .monospace:
currentFontTraits.insert(.monospace)
case .link(let url):
if let url {
attributes[.link] = url
}
case .strikethrough:
attributes[.strikethroughStyle] = NSUnderlineStyle.single.rawValue
case .blockquote:
paragraphStyle = blockquoteParagraphStyle
currentFontTraits.insert(.italic)
case .orderedList, .unorderedList:
paragraphStyle = listParagraphStyle
}
}
attributes[.font] = getFont(traits: currentFontTraits)
attributes[.foregroundColor] = configuration.color
attributes[.paragraphStyle] = paragraphStyle
str.append(NSAttributedString(string: currentRun, attributes: attributes))
currentRun = ""
}
private func getFont(traits: FontTrait) -> PlatformFont? {
if let cached = fontCache[traits] {
return cached
}
let baseFont = traits.contains(.monospace) ? configuration.monospaceFont : configuration.font
var descriptor = baseFont.fontDescriptor
if traits.contains(.bold) && traits.contains(.italic),
let boldItalic = descriptor.withSymbolicTraits([.traitBold, .traitItalic]) {
descriptor = boldItalic
} else if traits.contains(.bold),
let bold = descriptor.withSymbolicTraits(.traitBold) {
descriptor = bold
} else if traits.contains(.italic),
let italic = descriptor.withSymbolicTraits(.traitItalic) {
descriptor = italic
}
let font = PlatformFont(descriptor: descriptor, size: 0)
fontCache[traits] = font
return font
}
}
public struct AttributedStringConverterConfiguration {
#if os(iOS) || os(visionOS)
public var font: UIFont
public var monospaceFont: UIFont
public var color: UIColor
#elseif os(macOS)
public var font: NSFont
public var monospaceFont: NSFont
public var color: NSColor
#endif
public var paragraphStyle: NSParagraphStyle
#if os(iOS) || os(visionOS)
public init(font: UIFont, monospaceFont: UIFont, color: UIColor, paragraphStyle: NSParagraphStyle) {
self.font = font
self.monospaceFont = monospaceFont
self.color = color
self.paragraphStyle = paragraphStyle
}
#elseif os(macOS)
public init(font: NSFont, monospaceFont: NSFont, color: NSColor, paragraphStyle: NSParagraphStyle) {
self.font = font
self.monospaceFont = monospaceFont
self.color = color
self.paragraphStyle = paragraphStyle
}
#endif
}
#if os(macOS)
private extension NSFontDescriptor {
func withSymbolicTraits(_ traits: SymbolicTraits) -> NSFontDescriptor? {
let descriptor: NSFontDescriptor = self.withSymbolicTraits(traits)
return descriptor
}
}
private extension NSFontDescriptor.SymbolicTraits {
static var traitBold: Self { .bold }
static var traitItalic: Self { .italic }
}
#endif
private struct FontTrait: OptionSet, Hashable {
static let bold = FontTrait(rawValue: 1 << 0)
static let italic = FontTrait(rawValue: 1 << 1)
static let monospace = FontTrait(rawValue: 1 << 2)
let rawValue: Int
init(rawValue: Int) {
self.rawValue = rawValue
}
}
private enum Style {
case bold
case italic
case monospace
case link(URL?)
case strikethrough
case blockquote
case orderedList(nextElementOrdinal: Int)
case unorderedList
var type: StyleType {
switch self {
case .bold:
return .bold
case .italic:
return .italic
case .monospace:
return .monospace
case .link(_):
return .link
case .strikethrough:
return .strikethrough
case .blockquote:
return .blockquote
case .orderedList(nextElementOrdinal: _):
return .orderedList
case .unorderedList:
return .unorderedList
}
}
enum StyleType: Equatable {
case bold
case italic
case monospace
case link
case strikethrough
case blockquote
case orderedList
case unorderedList
}
}
extension Collection where Element == Attribute {
public func attributeValue(for name: String) -> String? {
first(where: { $0.name == name })?.value
}
}
private let orderedTextList = OrderedNumberTextList(markerFormat: .decimal, options: 0)
private let unorderedTextList = NSTextList(markerFormat: .disc, options: 0)
private class OrderedNumberTextList: NSTextList {
override func marker(forItemNumber itemNumber: Int) -> String {
"\(super.marker(forItemNumber: itemNumber))."
}
}