Handle block elements better (again)

This commit is contained in:
Shadowfacts 2024-01-17 15:28:06 -05:00
parent e709543568
commit aa8f99bb96
3 changed files with 108 additions and 28 deletions

View File

@ -26,7 +26,7 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
private var actionStack: [ElementAction] = []
private var styleStack: [Style] = []
private var previouslyFinishedBlockElement = false
private var blockState = BlockState.unstarted
private var currentElementIsEmpty = true
private var previouslyFinishedListItem = false
// The current run of text w/o styles changing
@ -46,26 +46,26 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
actionStack = []
styleStack = []
previouslyFinishedBlockElement = false
blockState = .unstarted
currentElementIsEmpty = true
previouslyFinishedListItem = false
currentRun = ""
while let token = tokenizer.next() {
switch token {
case .character(let c):
currentElementIsEmpty = false
previouslyFinishedBlockElement = false
continueBlock()
currentRun.unicodeScalars.append(c)
case .characterSequence(let s):
currentElementIsEmpty = false
previouslyFinishedBlockElement = false
continueBlock()
currentRun.append(s)
case .comment:
// ignored
continue
case .startTag(let name, let selfClosing, let attributes):
currentElementIsEmpty = true
previouslyFinishedBlockElement = false
let action = Callbacks.elementAction(name: name, attributes: attributes)
actionStack.append(action)
handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
@ -82,9 +82,6 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
}
}
if previouslyFinishedBlockElement {
currentRun.removeLast(2)
}
finishRun()
return str
@ -124,15 +121,21 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
finishRun()
styleStack.append(.monospace)
case "pre":
startBlockIfNecessary()
finishRun()
styleStack.append(.monospace)
case "blockquote":
startBlockIfNecessary()
finishRun()
styleStack.append(.blockquote)
case "p":
startBlockIfNecessary()
case "ol":
startBlockIfNecessary()
finishRun()
styleStack.append(.orderedList(nextElementOrdinal: 1))
case "ul":
startBlockIfNecessary()
finishRun()
styleStack.append(.unorderedList)
case "li":
@ -201,13 +204,44 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
}
}
private mutating func finishBlockElement() {
if !currentElementIsEmpty {
previouslyFinishedBlockElement = true
private mutating func startBlockIfNecessary() {
switch blockState {
case .unstarted:
blockState = .started(false)
case .started:
break
case .ongoing:
currentRun.append("\n\n")
blockState = .started(true)
case .finished(let nonEmpty):
if nonEmpty {
currentRun.append("\n\n")
}
blockState = .started(nonEmpty)
}
}
private mutating func continueBlock() {
switch blockState {
case .unstarted, .started(_):
blockState = .ongoing
case .ongoing:
break
case .finished(let nonEmpty):
if nonEmpty {
currentRun.append("\n\n")
}
blockState = .ongoing
}
}
private mutating func finishBlockElement() {
if blockState == .started(true) && currentElementIsEmpty {
currentRun.removeLast(2)
}
blockState = .finished(blockState == .ongoing)
}
// Finds the last currently-open style of the given type.
// We can't just use the last one because we need to handle mis-nested tags.
private mutating func removeLastStyle(_ type: Style.StyleType) {
@ -413,6 +447,13 @@ private enum Style {
}
}
enum BlockState: Equatable {
case unstarted
case started(Bool)
case ongoing
case finished(Bool)
}
extension Collection where Element == Attribute {
public func attributeValue(for name: String) -> String? {
first(where: { $0.name == name })?.value

View File

@ -15,7 +15,7 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
private var str: String!
private var actionStack: [ElementAction] = []
private var previouslyFinishedBlockElement = false
private var blockState = BlockState.unstarted
private var currentElementIsEmpty = true
private var currentRun = ""
@ -31,7 +31,7 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
str = ""
previouslyFinishedBlockElement = false
blockState = .unstarted
currentElementIsEmpty = true
currentRun = ""
@ -39,15 +39,14 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
switch token {
case .character(let scalar):
currentElementIsEmpty = false
previouslyFinishedBlockElement = false
continueBlock()
currentRun.unicodeScalars.append(scalar)
case .characterSequence(let string):
currentElementIsEmpty = false
previouslyFinishedBlockElement = false
continueBlock()
currentRun.append(string)
case .startTag(let name, let selfClosing, let attributes):
currentElementIsEmpty = true
previouslyFinishedBlockElement = false
let action = Callbacks.elementAction(name: name, attributes: attributes)
actionStack.append(action)
handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
@ -62,13 +61,6 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
}
}
if previouslyFinishedBlockElement {
if configuration.insertNewlines {
currentRun.removeLast(2)
} else {
currentRun.removeLast(1)
}
}
finishRun()
return str
@ -82,6 +74,8 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
} else {
currentRun.append(" ")
}
case "pre", "blockquote", "p", "ol", "ul":
startBlockIfNecessary()
default:
break
}
@ -90,24 +84,67 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
private mutating func handleEndTag(_ name: String) {
switch name {
case "pre", "blockquote", "p", "ol", "ul":
finishRun()
finishBlockElement()
finishRun()
default:
break
}
}
private mutating func finishBlockElement() {
if !currentElementIsEmpty {
previouslyFinishedBlockElement = true
private mutating func startBlockIfNecessary() {
switch blockState {
case .unstarted:
blockState = .started(false)
case .started:
break
case .ongoing:
if configuration.insertNewlines {
currentRun.append("\n\n")
} else {
currentRun.append(" ")
}
blockState = .started(true)
case .finished(let nonEmpty):
if nonEmpty {
if configuration.insertNewlines {
currentRun.append("\n\n")
} else {
currentRun.append(" ")
}
}
blockState = .started(nonEmpty)
}
}
private mutating func continueBlock() {
switch blockState {
case .unstarted, .started(_):
blockState = .ongoing
case .ongoing:
break
case .finished(let nonEmpty):
if nonEmpty {
if configuration.insertNewlines {
currentRun.append("\n\n")
} else {
currentRun.append(" ")
}
}
blockState = .ongoing
}
}
private mutating func finishBlockElement() {
if blockState == .started(true) && currentElementIsEmpty {
if configuration.insertNewlines {
currentRun.removeLast(2)
} else {
currentRun.removeLast(1)
}
}
blockState = .finished(blockState == .ongoing)
}
private mutating func finishRun() {
if actionStack.contains(.skip) {
currentRun = ""

View File

@ -319,7 +319,7 @@ final class AttributedStringConverterTests: XCTestCase {
XCTAssertEqual(convert("<p></p><blockquote><span>inside<br>quote</span></blockquote><span>after</span><p></p>"), result)
}
func testParagraphFollowedByList() {
func testFollowedByList() {
let result = NSMutableAttributedString()
result.append(NSAttributedString(string: "a\n\n", attributes: [
.font: font,
@ -332,6 +332,8 @@ final class AttributedStringConverterTests: XCTestCase {
.foregroundColor: color,
]))
XCTAssertEqual(convert("<p>a</p><ol><li>b</li><li>c</li></ol>"), result)
XCTAssertEqual(convert("<span>a</span><ol><li>b</li><li>c</li></ol>"), result)
XCTAssertEqual(convert("a<ol><li>b</li><li>c</li></ol>"), result)
}
}