Handle block elements better (again)
This commit is contained in:
parent
e709543568
commit
aa8f99bb96
|
@ -26,7 +26,7 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
|
|
||||||
private var actionStack: [ElementAction] = []
|
private var actionStack: [ElementAction] = []
|
||||||
private var styleStack: [Style] = []
|
private var styleStack: [Style] = []
|
||||||
private var previouslyFinishedBlockElement = false
|
private var blockState = BlockState.unstarted
|
||||||
private var currentElementIsEmpty = true
|
private var currentElementIsEmpty = true
|
||||||
private var previouslyFinishedListItem = false
|
private var previouslyFinishedListItem = false
|
||||||
// The current run of text w/o styles changing
|
// The current run of text w/o styles changing
|
||||||
|
@ -46,26 +46,26 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
|
|
||||||
actionStack = []
|
actionStack = []
|
||||||
styleStack = []
|
styleStack = []
|
||||||
previouslyFinishedBlockElement = false
|
blockState = .unstarted
|
||||||
currentElementIsEmpty = true
|
currentElementIsEmpty = true
|
||||||
|
previouslyFinishedListItem = false
|
||||||
currentRun = ""
|
currentRun = ""
|
||||||
|
|
||||||
while let token = tokenizer.next() {
|
while let token = tokenizer.next() {
|
||||||
switch token {
|
switch token {
|
||||||
case .character(let c):
|
case .character(let c):
|
||||||
currentElementIsEmpty = false
|
currentElementIsEmpty = false
|
||||||
previouslyFinishedBlockElement = false
|
continueBlock()
|
||||||
currentRun.unicodeScalars.append(c)
|
currentRun.unicodeScalars.append(c)
|
||||||
case .characterSequence(let s):
|
case .characterSequence(let s):
|
||||||
currentElementIsEmpty = false
|
currentElementIsEmpty = false
|
||||||
previouslyFinishedBlockElement = false
|
continueBlock()
|
||||||
currentRun.append(s)
|
currentRun.append(s)
|
||||||
case .comment:
|
case .comment:
|
||||||
// ignored
|
// ignored
|
||||||
continue
|
continue
|
||||||
case .startTag(let name, let selfClosing, let attributes):
|
case .startTag(let name, let selfClosing, let attributes):
|
||||||
currentElementIsEmpty = true
|
currentElementIsEmpty = true
|
||||||
previouslyFinishedBlockElement = false
|
|
||||||
let action = Callbacks.elementAction(name: name, attributes: attributes)
|
let action = Callbacks.elementAction(name: name, attributes: attributes)
|
||||||
actionStack.append(action)
|
actionStack.append(action)
|
||||||
handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
|
handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
|
||||||
|
@ -82,9 +82,6 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if previouslyFinishedBlockElement {
|
|
||||||
currentRun.removeLast(2)
|
|
||||||
}
|
|
||||||
finishRun()
|
finishRun()
|
||||||
|
|
||||||
return str
|
return str
|
||||||
|
@ -124,15 +121,21 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
finishRun()
|
finishRun()
|
||||||
styleStack.append(.monospace)
|
styleStack.append(.monospace)
|
||||||
case "pre":
|
case "pre":
|
||||||
|
startBlockIfNecessary()
|
||||||
finishRun()
|
finishRun()
|
||||||
styleStack.append(.monospace)
|
styleStack.append(.monospace)
|
||||||
case "blockquote":
|
case "blockquote":
|
||||||
|
startBlockIfNecessary()
|
||||||
finishRun()
|
finishRun()
|
||||||
styleStack.append(.blockquote)
|
styleStack.append(.blockquote)
|
||||||
|
case "p":
|
||||||
|
startBlockIfNecessary()
|
||||||
case "ol":
|
case "ol":
|
||||||
|
startBlockIfNecessary()
|
||||||
finishRun()
|
finishRun()
|
||||||
styleStack.append(.orderedList(nextElementOrdinal: 1))
|
styleStack.append(.orderedList(nextElementOrdinal: 1))
|
||||||
case "ul":
|
case "ul":
|
||||||
|
startBlockIfNecessary()
|
||||||
finishRun()
|
finishRun()
|
||||||
styleStack.append(.unorderedList)
|
styleStack.append(.unorderedList)
|
||||||
case "li":
|
case "li":
|
||||||
|
@ -201,13 +204,44 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private mutating func finishBlockElement() {
|
private mutating func startBlockIfNecessary() {
|
||||||
if !currentElementIsEmpty {
|
switch blockState {
|
||||||
previouslyFinishedBlockElement = true
|
case .unstarted:
|
||||||
|
blockState = .started(false)
|
||||||
|
case .started:
|
||||||
|
break
|
||||||
|
case .ongoing:
|
||||||
currentRun.append("\n\n")
|
currentRun.append("\n\n")
|
||||||
|
blockState = .started(true)
|
||||||
|
case .finished(let nonEmpty):
|
||||||
|
if nonEmpty {
|
||||||
|
currentRun.append("\n\n")
|
||||||
|
}
|
||||||
|
blockState = .started(nonEmpty)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private mutating func continueBlock() {
|
||||||
|
switch blockState {
|
||||||
|
case .unstarted, .started(_):
|
||||||
|
blockState = .ongoing
|
||||||
|
case .ongoing:
|
||||||
|
break
|
||||||
|
case .finished(let nonEmpty):
|
||||||
|
if nonEmpty {
|
||||||
|
currentRun.append("\n\n")
|
||||||
|
}
|
||||||
|
blockState = .ongoing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private mutating func finishBlockElement() {
|
||||||
|
if blockState == .started(true) && currentElementIsEmpty {
|
||||||
|
currentRun.removeLast(2)
|
||||||
|
}
|
||||||
|
blockState = .finished(blockState == .ongoing)
|
||||||
|
}
|
||||||
|
|
||||||
// Finds the last currently-open style of the given type.
|
// Finds the last currently-open style of the given type.
|
||||||
// We can't just use the last one because we need to handle mis-nested tags.
|
// We can't just use the last one because we need to handle mis-nested tags.
|
||||||
private mutating func removeLastStyle(_ type: Style.StyleType) {
|
private mutating func removeLastStyle(_ type: Style.StyleType) {
|
||||||
|
@ -413,6 +447,13 @@ private enum Style {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum BlockState: Equatable {
|
||||||
|
case unstarted
|
||||||
|
case started(Bool)
|
||||||
|
case ongoing
|
||||||
|
case finished(Bool)
|
||||||
|
}
|
||||||
|
|
||||||
extension Collection where Element == Attribute {
|
extension Collection where Element == Attribute {
|
||||||
public func attributeValue(for name: String) -> String? {
|
public func attributeValue(for name: String) -> String? {
|
||||||
first(where: { $0.name == name })?.value
|
first(where: { $0.name == name })?.value
|
||||||
|
|
|
@ -15,7 +15,7 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
private var str: String!
|
private var str: String!
|
||||||
|
|
||||||
private var actionStack: [ElementAction] = []
|
private var actionStack: [ElementAction] = []
|
||||||
private var previouslyFinishedBlockElement = false
|
private var blockState = BlockState.unstarted
|
||||||
private var currentElementIsEmpty = true
|
private var currentElementIsEmpty = true
|
||||||
private var currentRun = ""
|
private var currentRun = ""
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
|
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
|
||||||
str = ""
|
str = ""
|
||||||
|
|
||||||
previouslyFinishedBlockElement = false
|
blockState = .unstarted
|
||||||
currentElementIsEmpty = true
|
currentElementIsEmpty = true
|
||||||
currentRun = ""
|
currentRun = ""
|
||||||
|
|
||||||
|
@ -39,15 +39,14 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
switch token {
|
switch token {
|
||||||
case .character(let scalar):
|
case .character(let scalar):
|
||||||
currentElementIsEmpty = false
|
currentElementIsEmpty = false
|
||||||
previouslyFinishedBlockElement = false
|
continueBlock()
|
||||||
currentRun.unicodeScalars.append(scalar)
|
currentRun.unicodeScalars.append(scalar)
|
||||||
case .characterSequence(let string):
|
case .characterSequence(let string):
|
||||||
currentElementIsEmpty = false
|
currentElementIsEmpty = false
|
||||||
previouslyFinishedBlockElement = false
|
continueBlock()
|
||||||
currentRun.append(string)
|
currentRun.append(string)
|
||||||
case .startTag(let name, let selfClosing, let attributes):
|
case .startTag(let name, let selfClosing, let attributes):
|
||||||
currentElementIsEmpty = true
|
currentElementIsEmpty = true
|
||||||
previouslyFinishedBlockElement = false
|
|
||||||
let action = Callbacks.elementAction(name: name, attributes: attributes)
|
let action = Callbacks.elementAction(name: name, attributes: attributes)
|
||||||
actionStack.append(action)
|
actionStack.append(action)
|
||||||
handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
|
handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
|
||||||
|
@ -62,13 +61,6 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if previouslyFinishedBlockElement {
|
|
||||||
if configuration.insertNewlines {
|
|
||||||
currentRun.removeLast(2)
|
|
||||||
} else {
|
|
||||||
currentRun.removeLast(1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
finishRun()
|
finishRun()
|
||||||
|
|
||||||
return str
|
return str
|
||||||
|
@ -82,6 +74,8 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
} else {
|
} else {
|
||||||
currentRun.append(" ")
|
currentRun.append(" ")
|
||||||
}
|
}
|
||||||
|
case "pre", "blockquote", "p", "ol", "ul":
|
||||||
|
startBlockIfNecessary()
|
||||||
default:
|
default:
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -90,24 +84,67 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
private mutating func handleEndTag(_ name: String) {
|
private mutating func handleEndTag(_ name: String) {
|
||||||
switch name {
|
switch name {
|
||||||
case "pre", "blockquote", "p", "ol", "ul":
|
case "pre", "blockquote", "p", "ol", "ul":
|
||||||
finishRun()
|
|
||||||
finishBlockElement()
|
finishBlockElement()
|
||||||
|
finishRun()
|
||||||
default:
|
default:
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private mutating func finishBlockElement() {
|
private mutating func startBlockIfNecessary() {
|
||||||
if !currentElementIsEmpty {
|
switch blockState {
|
||||||
previouslyFinishedBlockElement = true
|
case .unstarted:
|
||||||
|
blockState = .started(false)
|
||||||
|
case .started:
|
||||||
|
break
|
||||||
|
case .ongoing:
|
||||||
if configuration.insertNewlines {
|
if configuration.insertNewlines {
|
||||||
currentRun.append("\n\n")
|
currentRun.append("\n\n")
|
||||||
} else {
|
} else {
|
||||||
currentRun.append(" ")
|
currentRun.append(" ")
|
||||||
}
|
}
|
||||||
|
blockState = .started(true)
|
||||||
|
case .finished(let nonEmpty):
|
||||||
|
if nonEmpty {
|
||||||
|
if configuration.insertNewlines {
|
||||||
|
currentRun.append("\n\n")
|
||||||
|
} else {
|
||||||
|
currentRun.append(" ")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
blockState = .started(nonEmpty)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private mutating func continueBlock() {
|
||||||
|
switch blockState {
|
||||||
|
case .unstarted, .started(_):
|
||||||
|
blockState = .ongoing
|
||||||
|
case .ongoing:
|
||||||
|
break
|
||||||
|
case .finished(let nonEmpty):
|
||||||
|
if nonEmpty {
|
||||||
|
if configuration.insertNewlines {
|
||||||
|
currentRun.append("\n\n")
|
||||||
|
} else {
|
||||||
|
currentRun.append(" ")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
blockState = .ongoing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private mutating func finishBlockElement() {
|
||||||
|
if blockState == .started(true) && currentElementIsEmpty {
|
||||||
|
if configuration.insertNewlines {
|
||||||
|
currentRun.removeLast(2)
|
||||||
|
} else {
|
||||||
|
currentRun.removeLast(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
blockState = .finished(blockState == .ongoing)
|
||||||
|
}
|
||||||
|
|
||||||
private mutating func finishRun() {
|
private mutating func finishRun() {
|
||||||
if actionStack.contains(.skip) {
|
if actionStack.contains(.skip) {
|
||||||
currentRun = ""
|
currentRun = ""
|
||||||
|
|
|
@ -319,7 +319,7 @@ final class AttributedStringConverterTests: XCTestCase {
|
||||||
XCTAssertEqual(convert("<p></p><blockquote><span>inside<br>quote</span></blockquote><span>after</span><p></p>"), result)
|
XCTAssertEqual(convert("<p></p><blockquote><span>inside<br>quote</span></blockquote><span>after</span><p></p>"), result)
|
||||||
}
|
}
|
||||||
|
|
||||||
func testParagraphFollowedByList() {
|
func testFollowedByList() {
|
||||||
let result = NSMutableAttributedString()
|
let result = NSMutableAttributedString()
|
||||||
result.append(NSAttributedString(string: "a\n\n", attributes: [
|
result.append(NSAttributedString(string: "a\n\n", attributes: [
|
||||||
.font: font,
|
.font: font,
|
||||||
|
@ -332,6 +332,8 @@ final class AttributedStringConverterTests: XCTestCase {
|
||||||
.foregroundColor: color,
|
.foregroundColor: color,
|
||||||
]))
|
]))
|
||||||
XCTAssertEqual(convert("<p>a</p><ol><li>b</li><li>c</li></ol>"), result)
|
XCTAssertEqual(convert("<p>a</p><ol><li>b</li><li>c</li></ol>"), result)
|
||||||
|
XCTAssertEqual(convert("<span>a</span><ol><li>b</li><li>c</li></ol>"), result)
|
||||||
|
XCTAssertEqual(convert("a<ol><li>b</li><li>c</li></ol>"), result)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue