Handle block elements better (again)

This commit is contained in:
Shadowfacts 2024-01-17 15:28:06 -05:00
parent e709543568
commit aa8f99bb96
3 changed files with 108 additions and 28 deletions

View File

@ -26,7 +26,7 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
private var actionStack: [ElementAction] = [] private var actionStack: [ElementAction] = []
private var styleStack: [Style] = [] private var styleStack: [Style] = []
private var previouslyFinishedBlockElement = false private var blockState = BlockState.unstarted
private var currentElementIsEmpty = true private var currentElementIsEmpty = true
private var previouslyFinishedListItem = false private var previouslyFinishedListItem = false
// The current run of text w/o styles changing // The current run of text w/o styles changing
@ -46,26 +46,26 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
actionStack = [] actionStack = []
styleStack = [] styleStack = []
previouslyFinishedBlockElement = false blockState = .unstarted
currentElementIsEmpty = true currentElementIsEmpty = true
previouslyFinishedListItem = false
currentRun = "" currentRun = ""
while let token = tokenizer.next() { while let token = tokenizer.next() {
switch token { switch token {
case .character(let c): case .character(let c):
currentElementIsEmpty = false currentElementIsEmpty = false
previouslyFinishedBlockElement = false continueBlock()
currentRun.unicodeScalars.append(c) currentRun.unicodeScalars.append(c)
case .characterSequence(let s): case .characterSequence(let s):
currentElementIsEmpty = false currentElementIsEmpty = false
previouslyFinishedBlockElement = false continueBlock()
currentRun.append(s) currentRun.append(s)
case .comment: case .comment:
// ignored // ignored
continue continue
case .startTag(let name, let selfClosing, let attributes): case .startTag(let name, let selfClosing, let attributes):
currentElementIsEmpty = true currentElementIsEmpty = true
previouslyFinishedBlockElement = false
let action = Callbacks.elementAction(name: name, attributes: attributes) let action = Callbacks.elementAction(name: name, attributes: attributes)
actionStack.append(action) actionStack.append(action)
handleStartTag(name, selfClosing: selfClosing, attributes: attributes) handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
@ -82,9 +82,6 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
} }
} }
if previouslyFinishedBlockElement {
currentRun.removeLast(2)
}
finishRun() finishRun()
return str return str
@ -124,15 +121,21 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
finishRun() finishRun()
styleStack.append(.monospace) styleStack.append(.monospace)
case "pre": case "pre":
startBlockIfNecessary()
finishRun() finishRun()
styleStack.append(.monospace) styleStack.append(.monospace)
case "blockquote": case "blockquote":
startBlockIfNecessary()
finishRun() finishRun()
styleStack.append(.blockquote) styleStack.append(.blockquote)
case "p":
startBlockIfNecessary()
case "ol": case "ol":
startBlockIfNecessary()
finishRun() finishRun()
styleStack.append(.orderedList(nextElementOrdinal: 1)) styleStack.append(.orderedList(nextElementOrdinal: 1))
case "ul": case "ul":
startBlockIfNecessary()
finishRun() finishRun()
styleStack.append(.unorderedList) styleStack.append(.unorderedList)
case "li": case "li":
@ -201,13 +204,44 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
} }
} }
private mutating func finishBlockElement() { private mutating func startBlockIfNecessary() {
if !currentElementIsEmpty { switch blockState {
previouslyFinishedBlockElement = true case .unstarted:
blockState = .started(false)
case .started:
break
case .ongoing:
currentRun.append("\n\n") currentRun.append("\n\n")
blockState = .started(true)
case .finished(let nonEmpty):
if nonEmpty {
currentRun.append("\n\n")
}
blockState = .started(nonEmpty)
} }
} }
private mutating func continueBlock() {
switch blockState {
case .unstarted, .started(_):
blockState = .ongoing
case .ongoing:
break
case .finished(let nonEmpty):
if nonEmpty {
currentRun.append("\n\n")
}
blockState = .ongoing
}
}
private mutating func finishBlockElement() {
if blockState == .started(true) && currentElementIsEmpty {
currentRun.removeLast(2)
}
blockState = .finished(blockState == .ongoing)
}
// Finds the last currently-open style of the given type. // Finds the last currently-open style of the given type.
// We can't just use the last one because we need to handle mis-nested tags. // We can't just use the last one because we need to handle mis-nested tags.
private mutating func removeLastStyle(_ type: Style.StyleType) { private mutating func removeLastStyle(_ type: Style.StyleType) {
@ -413,6 +447,13 @@ private enum Style {
} }
} }
enum BlockState: Equatable {
case unstarted
case started(Bool)
case ongoing
case finished(Bool)
}
extension Collection where Element == Attribute { extension Collection where Element == Attribute {
public func attributeValue(for name: String) -> String? { public func attributeValue(for name: String) -> String? {
first(where: { $0.name == name })?.value first(where: { $0.name == name })?.value

View File

@ -15,7 +15,7 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
private var str: String! private var str: String!
private var actionStack: [ElementAction] = [] private var actionStack: [ElementAction] = []
private var previouslyFinishedBlockElement = false private var blockState = BlockState.unstarted
private var currentElementIsEmpty = true private var currentElementIsEmpty = true
private var currentRun = "" private var currentRun = ""
@ -31,7 +31,7 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator()) tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
str = "" str = ""
previouslyFinishedBlockElement = false blockState = .unstarted
currentElementIsEmpty = true currentElementIsEmpty = true
currentRun = "" currentRun = ""
@ -39,15 +39,14 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
switch token { switch token {
case .character(let scalar): case .character(let scalar):
currentElementIsEmpty = false currentElementIsEmpty = false
previouslyFinishedBlockElement = false continueBlock()
currentRun.unicodeScalars.append(scalar) currentRun.unicodeScalars.append(scalar)
case .characterSequence(let string): case .characterSequence(let string):
currentElementIsEmpty = false currentElementIsEmpty = false
previouslyFinishedBlockElement = false continueBlock()
currentRun.append(string) currentRun.append(string)
case .startTag(let name, let selfClosing, let attributes): case .startTag(let name, let selfClosing, let attributes):
currentElementIsEmpty = true currentElementIsEmpty = true
previouslyFinishedBlockElement = false
let action = Callbacks.elementAction(name: name, attributes: attributes) let action = Callbacks.elementAction(name: name, attributes: attributes)
actionStack.append(action) actionStack.append(action)
handleStartTag(name, selfClosing: selfClosing, attributes: attributes) handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
@ -62,13 +61,6 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
} }
} }
if previouslyFinishedBlockElement {
if configuration.insertNewlines {
currentRun.removeLast(2)
} else {
currentRun.removeLast(1)
}
}
finishRun() finishRun()
return str return str
@ -82,6 +74,8 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
} else { } else {
currentRun.append(" ") currentRun.append(" ")
} }
case "pre", "blockquote", "p", "ol", "ul":
startBlockIfNecessary()
default: default:
break break
} }
@ -90,24 +84,67 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
private mutating func handleEndTag(_ name: String) { private mutating func handleEndTag(_ name: String) {
switch name { switch name {
case "pre", "blockquote", "p", "ol", "ul": case "pre", "blockquote", "p", "ol", "ul":
finishRun()
finishBlockElement() finishBlockElement()
finishRun()
default: default:
break break
} }
} }
private mutating func finishBlockElement() { private mutating func startBlockIfNecessary() {
if !currentElementIsEmpty { switch blockState {
previouslyFinishedBlockElement = true case .unstarted:
blockState = .started(false)
case .started:
break
case .ongoing:
if configuration.insertNewlines { if configuration.insertNewlines {
currentRun.append("\n\n") currentRun.append("\n\n")
} else { } else {
currentRun.append(" ") currentRun.append(" ")
} }
blockState = .started(true)
case .finished(let nonEmpty):
if nonEmpty {
if configuration.insertNewlines {
currentRun.append("\n\n")
} else {
currentRun.append(" ")
}
}
blockState = .started(nonEmpty)
} }
} }
private mutating func continueBlock() {
switch blockState {
case .unstarted, .started(_):
blockState = .ongoing
case .ongoing:
break
case .finished(let nonEmpty):
if nonEmpty {
if configuration.insertNewlines {
currentRun.append("\n\n")
} else {
currentRun.append(" ")
}
}
blockState = .ongoing
}
}
private mutating func finishBlockElement() {
if blockState == .started(true) && currentElementIsEmpty {
if configuration.insertNewlines {
currentRun.removeLast(2)
} else {
currentRun.removeLast(1)
}
}
blockState = .finished(blockState == .ongoing)
}
private mutating func finishRun() { private mutating func finishRun() {
if actionStack.contains(.skip) { if actionStack.contains(.skip) {
currentRun = "" currentRun = ""

View File

@ -319,7 +319,7 @@ final class AttributedStringConverterTests: XCTestCase {
XCTAssertEqual(convert("<p></p><blockquote><span>inside<br>quote</span></blockquote><span>after</span><p></p>"), result) XCTAssertEqual(convert("<p></p><blockquote><span>inside<br>quote</span></blockquote><span>after</span><p></p>"), result)
} }
func testParagraphFollowedByList() { func testFollowedByList() {
let result = NSMutableAttributedString() let result = NSMutableAttributedString()
result.append(NSAttributedString(string: "a\n\n", attributes: [ result.append(NSAttributedString(string: "a\n\n", attributes: [
.font: font, .font: font,
@ -332,6 +332,8 @@ final class AttributedStringConverterTests: XCTestCase {
.foregroundColor: color, .foregroundColor: color,
])) ]))
XCTAssertEqual(convert("<p>a</p><ol><li>b</li><li>c</li></ol>"), result) XCTAssertEqual(convert("<p>a</p><ol><li>b</li><li>c</li></ol>"), result)
XCTAssertEqual(convert("<span>a</span><ol><li>b</li><li>c</li></ol>"), result)
XCTAssertEqual(convert("a<ol><li>b</li><li>c</li></ol>"), result)
} }
} }