From aa8f99bb962ff946397cb6e1968741f3885e8d15 Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Wed, 17 Jan 2024 15:28:06 -0500 Subject: [PATCH] Handle block elements better (again) --- .../AttributedStringConverter.swift | 63 ++++++++++++++--- Sources/HTMLStreamer/TextConverter.swift | 69 ++++++++++++++----- .../AttributedStringConverterTests.swift | 4 +- 3 files changed, 108 insertions(+), 28 deletions(-) diff --git a/Sources/HTMLStreamer/AttributedStringConverter.swift b/Sources/HTMLStreamer/AttributedStringConverter.swift index 594db7a..bfff1d2 100644 --- a/Sources/HTMLStreamer/AttributedStringConverter.swift +++ b/Sources/HTMLStreamer/AttributedStringConverter.swift @@ -26,7 +26,7 @@ public struct AttributedStringConverter { private var actionStack: [ElementAction] = [] private var styleStack: [Style] = [] - private var previouslyFinishedBlockElement = false + private var blockState = BlockState.unstarted private var currentElementIsEmpty = true private var previouslyFinishedListItem = false // The current run of text w/o styles changing @@ -46,26 +46,26 @@ public struct AttributedStringConverter { actionStack = [] styleStack = [] - previouslyFinishedBlockElement = false + blockState = .unstarted currentElementIsEmpty = true + previouslyFinishedListItem = false currentRun = "" while let token = tokenizer.next() { switch token { case .character(let c): currentElementIsEmpty = false - previouslyFinishedBlockElement = false + continueBlock() currentRun.unicodeScalars.append(c) case .characterSequence(let s): currentElementIsEmpty = false - previouslyFinishedBlockElement = false + continueBlock() currentRun.append(s) case .comment: // ignored continue case .startTag(let name, let selfClosing, let attributes): currentElementIsEmpty = true - previouslyFinishedBlockElement = false let action = Callbacks.elementAction(name: name, attributes: attributes) actionStack.append(action) handleStartTag(name, selfClosing: selfClosing, attributes: attributes) @@ -82,9 +82,6 @@ public struct AttributedStringConverter { } } - if previouslyFinishedBlockElement { - currentRun.removeLast(2) - } finishRun() return str @@ -124,15 +121,21 @@ public struct AttributedStringConverter { finishRun() styleStack.append(.monospace) case "pre": + startBlockIfNecessary() finishRun() styleStack.append(.monospace) case "blockquote": + startBlockIfNecessary() finishRun() styleStack.append(.blockquote) + case "p": + startBlockIfNecessary() case "ol": + startBlockIfNecessary() finishRun() styleStack.append(.orderedList(nextElementOrdinal: 1)) case "ul": + startBlockIfNecessary() finishRun() styleStack.append(.unorderedList) case "li": @@ -201,13 +204,44 @@ public struct AttributedStringConverter { } } - private mutating func finishBlockElement() { - if !currentElementIsEmpty { - previouslyFinishedBlockElement = true + private mutating func startBlockIfNecessary() { + switch blockState { + case .unstarted: + blockState = .started(false) + case .started: + break + case .ongoing: currentRun.append("\n\n") + blockState = .started(true) + case .finished(let nonEmpty): + if nonEmpty { + currentRun.append("\n\n") + } + blockState = .started(nonEmpty) } } + private mutating func continueBlock() { + switch blockState { + case .unstarted, .started(_): + blockState = .ongoing + case .ongoing: + break + case .finished(let nonEmpty): + if nonEmpty { + currentRun.append("\n\n") + } + blockState = .ongoing + } + } + + private mutating func finishBlockElement() { + if blockState == .started(true) && currentElementIsEmpty { + currentRun.removeLast(2) + } + blockState = .finished(blockState == .ongoing) + } + // Finds the last currently-open style of the given type. // We can't just use the last one because we need to handle mis-nested tags. private mutating func removeLastStyle(_ type: Style.StyleType) { @@ -413,6 +447,13 @@ private enum Style { } } +enum BlockState: Equatable { + case unstarted + case started(Bool) + case ongoing + case finished(Bool) +} + extension Collection where Element == Attribute { public func attributeValue(for name: String) -> String? { first(where: { $0.name == name })?.value diff --git a/Sources/HTMLStreamer/TextConverter.swift b/Sources/HTMLStreamer/TextConverter.swift index 028024e..78d398b 100644 --- a/Sources/HTMLStreamer/TextConverter.swift +++ b/Sources/HTMLStreamer/TextConverter.swift @@ -15,7 +15,7 @@ public struct TextConverter { private var str: String! private var actionStack: [ElementAction] = [] - private var previouslyFinishedBlockElement = false + private var blockState = BlockState.unstarted private var currentElementIsEmpty = true private var currentRun = "" @@ -31,7 +31,7 @@ public struct TextConverter { tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator()) str = "" - previouslyFinishedBlockElement = false + blockState = .unstarted currentElementIsEmpty = true currentRun = "" @@ -39,15 +39,14 @@ public struct TextConverter { switch token { case .character(let scalar): currentElementIsEmpty = false - previouslyFinishedBlockElement = false + continueBlock() currentRun.unicodeScalars.append(scalar) case .characterSequence(let string): currentElementIsEmpty = false - previouslyFinishedBlockElement = false + continueBlock() currentRun.append(string) case .startTag(let name, let selfClosing, let attributes): currentElementIsEmpty = true - previouslyFinishedBlockElement = false let action = Callbacks.elementAction(name: name, attributes: attributes) actionStack.append(action) handleStartTag(name, selfClosing: selfClosing, attributes: attributes) @@ -62,13 +61,6 @@ public struct TextConverter { } } - if previouslyFinishedBlockElement { - if configuration.insertNewlines { - currentRun.removeLast(2) - } else { - currentRun.removeLast(1) - } - } finishRun() return str @@ -82,6 +74,8 @@ public struct TextConverter { } else { currentRun.append(" ") } + case "pre", "blockquote", "p", "ol", "ul": + startBlockIfNecessary() default: break } @@ -90,24 +84,67 @@ public struct TextConverter { private mutating func handleEndTag(_ name: String) { switch name { case "pre", "blockquote", "p", "ol", "ul": - finishRun() finishBlockElement() + finishRun() default: break } } - private mutating func finishBlockElement() { - if !currentElementIsEmpty { - previouslyFinishedBlockElement = true + private mutating func startBlockIfNecessary() { + switch blockState { + case .unstarted: + blockState = .started(false) + case .started: + break + case .ongoing: if configuration.insertNewlines { currentRun.append("\n\n") } else { currentRun.append(" ") } + blockState = .started(true) + case .finished(let nonEmpty): + if nonEmpty { + if configuration.insertNewlines { + currentRun.append("\n\n") + } else { + currentRun.append(" ") + } + } + blockState = .started(nonEmpty) } } + private mutating func continueBlock() { + switch blockState { + case .unstarted, .started(_): + blockState = .ongoing + case .ongoing: + break + case .finished(let nonEmpty): + if nonEmpty { + if configuration.insertNewlines { + currentRun.append("\n\n") + } else { + currentRun.append(" ") + } + } + blockState = .ongoing + } + } + + private mutating func finishBlockElement() { + if blockState == .started(true) && currentElementIsEmpty { + if configuration.insertNewlines { + currentRun.removeLast(2) + } else { + currentRun.removeLast(1) + } + } + blockState = .finished(blockState == .ongoing) + } + private mutating func finishRun() { if actionStack.contains(.skip) { currentRun = "" diff --git a/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift b/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift index f3a520d..8a8f5b7 100644 --- a/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift +++ b/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift @@ -319,7 +319,7 @@ final class AttributedStringConverterTests: XCTestCase { XCTAssertEqual(convert("

inside
quote
after

"), result) } - func testParagraphFollowedByList() { + func testFollowedByList() { let result = NSMutableAttributedString() result.append(NSAttributedString(string: "a\n\n", attributes: [ .font: font, @@ -332,6 +332,8 @@ final class AttributedStringConverterTests: XCTestCase { .foregroundColor: color, ])) XCTAssertEqual(convert("

a

  1. b
  2. c
"), result) + XCTAssertEqual(convert("a
  1. b
  2. c
"), result) + XCTAssertEqual(convert("a
  1. b
  2. c
"), result) } }