diff --git a/Sources/HTMLStreamer/AttributedStringConverter.swift b/Sources/HTMLStreamer/AttributedStringConverter.swift
index 594db7a..bfff1d2 100644
--- a/Sources/HTMLStreamer/AttributedStringConverter.swift
+++ b/Sources/HTMLStreamer/AttributedStringConverter.swift
@@ -26,7 +26,7 @@ public struct AttributedStringConverter {
private var actionStack: [ElementAction] = []
private var styleStack: [Style] = []
- private var previouslyFinishedBlockElement = false
+ private var blockState = BlockState.unstarted
private var currentElementIsEmpty = true
private var previouslyFinishedListItem = false
// The current run of text w/o styles changing
@@ -46,26 +46,26 @@ public struct AttributedStringConverter {
actionStack = []
styleStack = []
- previouslyFinishedBlockElement = false
+ blockState = .unstarted
currentElementIsEmpty = true
+ previouslyFinishedListItem = false
currentRun = ""
while let token = tokenizer.next() {
switch token {
case .character(let c):
currentElementIsEmpty = false
- previouslyFinishedBlockElement = false
+ continueBlock()
currentRun.unicodeScalars.append(c)
case .characterSequence(let s):
currentElementIsEmpty = false
- previouslyFinishedBlockElement = false
+ continueBlock()
currentRun.append(s)
case .comment:
// ignored
continue
case .startTag(let name, let selfClosing, let attributes):
currentElementIsEmpty = true
- previouslyFinishedBlockElement = false
let action = Callbacks.elementAction(name: name, attributes: attributes)
actionStack.append(action)
handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
@@ -82,9 +82,6 @@ public struct AttributedStringConverter {
}
}
- if previouslyFinishedBlockElement {
- currentRun.removeLast(2)
- }
finishRun()
return str
@@ -124,15 +121,21 @@ public struct AttributedStringConverter {
finishRun()
styleStack.append(.monospace)
case "pre":
+ startBlockIfNecessary()
finishRun()
styleStack.append(.monospace)
case "blockquote":
+ startBlockIfNecessary()
finishRun()
styleStack.append(.blockquote)
+ case "p":
+ startBlockIfNecessary()
case "ol":
+ startBlockIfNecessary()
finishRun()
styleStack.append(.orderedList(nextElementOrdinal: 1))
case "ul":
+ startBlockIfNecessary()
finishRun()
styleStack.append(.unorderedList)
case "li":
@@ -201,13 +204,44 @@ public struct AttributedStringConverter {
}
}
- private mutating func finishBlockElement() {
- if !currentElementIsEmpty {
- previouslyFinishedBlockElement = true
+ private mutating func startBlockIfNecessary() {
+ switch blockState {
+ case .unstarted:
+ blockState = .started(false)
+ case .started:
+ break
+ case .ongoing:
currentRun.append("\n\n")
+ blockState = .started(true)
+ case .finished(let nonEmpty):
+ if nonEmpty {
+ currentRun.append("\n\n")
+ }
+ blockState = .started(nonEmpty)
}
}
+ private mutating func continueBlock() {
+ switch blockState {
+ case .unstarted, .started(_):
+ blockState = .ongoing
+ case .ongoing:
+ break
+ case .finished(let nonEmpty):
+ if nonEmpty {
+ currentRun.append("\n\n")
+ }
+ blockState = .ongoing
+ }
+ }
+
+ private mutating func finishBlockElement() {
+ if blockState == .started(true) && currentElementIsEmpty {
+ currentRun.removeLast(2)
+ }
+ blockState = .finished(blockState == .ongoing)
+ }
+
// Finds the last currently-open style of the given type.
// We can't just use the last one because we need to handle mis-nested tags.
private mutating func removeLastStyle(_ type: Style.StyleType) {
@@ -413,6 +447,13 @@ private enum Style {
}
}
+enum BlockState: Equatable {
+ case unstarted
+ case started(Bool)
+ case ongoing
+ case finished(Bool)
+}
+
extension Collection where Element == Attribute {
public func attributeValue(for name: String) -> String? {
first(where: { $0.name == name })?.value
diff --git a/Sources/HTMLStreamer/TextConverter.swift b/Sources/HTMLStreamer/TextConverter.swift
index 028024e..78d398b 100644
--- a/Sources/HTMLStreamer/TextConverter.swift
+++ b/Sources/HTMLStreamer/TextConverter.swift
@@ -15,7 +15,7 @@ public struct TextConverter {
private var str: String!
private var actionStack: [ElementAction] = []
- private var previouslyFinishedBlockElement = false
+ private var blockState = BlockState.unstarted
private var currentElementIsEmpty = true
private var currentRun = ""
@@ -31,7 +31,7 @@ public struct TextConverter {
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
str = ""
- previouslyFinishedBlockElement = false
+ blockState = .unstarted
currentElementIsEmpty = true
currentRun = ""
@@ -39,15 +39,14 @@ public struct TextConverter {
switch token {
case .character(let scalar):
currentElementIsEmpty = false
- previouslyFinishedBlockElement = false
+ continueBlock()
currentRun.unicodeScalars.append(scalar)
case .characterSequence(let string):
currentElementIsEmpty = false
- previouslyFinishedBlockElement = false
+ continueBlock()
currentRun.append(string)
case .startTag(let name, let selfClosing, let attributes):
currentElementIsEmpty = true
- previouslyFinishedBlockElement = false
let action = Callbacks.elementAction(name: name, attributes: attributes)
actionStack.append(action)
handleStartTag(name, selfClosing: selfClosing, attributes: attributes)
@@ -62,13 +61,6 @@ public struct TextConverter {
}
}
- if previouslyFinishedBlockElement {
- if configuration.insertNewlines {
- currentRun.removeLast(2)
- } else {
- currentRun.removeLast(1)
- }
- }
finishRun()
return str
@@ -82,6 +74,8 @@ public struct TextConverter {
} else {
currentRun.append(" ")
}
+ case "pre", "blockquote", "p", "ol", "ul":
+ startBlockIfNecessary()
default:
break
}
@@ -90,24 +84,67 @@ public struct TextConverter {
private mutating func handleEndTag(_ name: String) {
switch name {
case "pre", "blockquote", "p", "ol", "ul":
- finishRun()
finishBlockElement()
+ finishRun()
default:
break
}
}
- private mutating func finishBlockElement() {
- if !currentElementIsEmpty {
- previouslyFinishedBlockElement = true
+ private mutating func startBlockIfNecessary() {
+ switch blockState {
+ case .unstarted:
+ blockState = .started(false)
+ case .started:
+ break
+ case .ongoing:
if configuration.insertNewlines {
currentRun.append("\n\n")
} else {
currentRun.append(" ")
}
+ blockState = .started(true)
+ case .finished(let nonEmpty):
+ if nonEmpty {
+ if configuration.insertNewlines {
+ currentRun.append("\n\n")
+ } else {
+ currentRun.append(" ")
+ }
+ }
+ blockState = .started(nonEmpty)
}
}
+ private mutating func continueBlock() {
+ switch blockState {
+ case .unstarted, .started(_):
+ blockState = .ongoing
+ case .ongoing:
+ break
+ case .finished(let nonEmpty):
+ if nonEmpty {
+ if configuration.insertNewlines {
+ currentRun.append("\n\n")
+ } else {
+ currentRun.append(" ")
+ }
+ }
+ blockState = .ongoing
+ }
+ }
+
+ private mutating func finishBlockElement() {
+ if blockState == .started(true) && currentElementIsEmpty {
+ if configuration.insertNewlines {
+ currentRun.removeLast(2)
+ } else {
+ currentRun.removeLast(1)
+ }
+ }
+ blockState = .finished(blockState == .ongoing)
+ }
+
private mutating func finishRun() {
if actionStack.contains(.skip) {
currentRun = ""
diff --git a/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift b/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift
index f3a520d..8a8f5b7 100644
--- a/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift
+++ b/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift
@@ -319,7 +319,7 @@ final class AttributedStringConverterTests: XCTestCase {
XCTAssertEqual(convert("inside
quote
after"), result)
}
- func testParagraphFollowedByList() {
+ func testFollowedByList() {
let result = NSMutableAttributedString()
result.append(NSAttributedString(string: "a\n\n", attributes: [
.font: font,
@@ -332,6 +332,8 @@ final class AttributedStringConverterTests: XCTestCase {
.foregroundColor: color,
]))
XCTAssertEqual(convert("a
- b
- c
"), result)
+ XCTAssertEqual(convert("a- b
- c
"), result)
+ XCTAssertEqual(convert("a- b
- c
"), result)
}
}