diff --git a/BlockState.dot b/BlockState.dot new file mode 100644 index 0000000..ab9fea6 --- /dev/null +++ b/BlockState.dot @@ -0,0 +1,126 @@ +digraph blockstate { + /* rankdir=LR; */ + node [shape = doublecircle, fontsize = 18]; end; + node [shape = circle, fontsize = 18]; + edge [fontsize = 18]; + init [label = "", shape=none, height = .0, width = .0]; + start; + emptyBlock [label = "empty block"]; + nonEmptyBlock [label = "non-empty block"]; + emittedSpace [label = "emitted space"]; + lineBreakTag [label = "line break tag"]; + atLeastTwoLineBreakTags [label = ">=2 line break tags"]; + emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "empty block w/ >=2 prev line break tags"]; + beginListItem [label = "begin list item"]; + endListItem [label = "end list item"]; + listItemContent [label = "list item content"]; + emittedSpaceInListItemContent [label = "emitted space in text in list item content"]; + lineBreakTagInListItemContent [label = "line break tag in list item content"]; + atLeastTwoLineBreakTagsInListItemContent [label = ">= 2 line break tags in list item content"]; + preformattedStart [label = "preformatted start"]; + preformattedEmptyBlock [label = "preformatted empty block"]; + preformattedNonEmptyBlock [label = "preformatted non-empty block"]; + preformattedLineBreak [label = "preformatted line break"]; + preformattedAtLeastTwoLineBreaks [label = "preformatted >=2 line breaks"]; + afterPreStartTag [label = "after
 start tag"];
+	afterPreStartTagWithLeadingWhitespace [label = "after 
 start tag w/ leading whitespace"];
+	preformattedNonEmptyBlockWithTrailingWhitespace [label = "preformatted non-empty block w/ trailing whitespace"];
+	preformattedEmptyBlockWithLeadingWhitespace [label = "preformatted empty block w/ leading whitespace"];
+
+	init -> start;
+	start -> start [label = "whitespace (skip)\n
(skip)\n
\nstart/end block"]; + start -> nonEmptyBlock [label = "non-whitespace"]; + start -> preformattedStart [label = "
 (depth = 1)"];
+	start -> beginListItem [label = "
  • "]; + nonEmptyBlock -> nonEmptyBlock [label = "non-whitespace"]; + nonEmptyBlock -> emptyBlock [label = "start/end block"]; + nonEmptyBlock -> emittedSpace [label = "whitespace (emit space)"]; + nonEmptyBlock -> lineBreakTag [label = "
    (append to tmp)"]; + nonEmptyBlock -> beginListItem [label = "
  • "]; + nonEmptyBlock -> endListItem [label = "
  • "]; + emittedSpace -> nonEmptyBlock [label = "non-whitespace"]; + emittedSpace -> emittedSpace [label = "whitespace (skip)"]; + emittedSpace -> emptyBlock [label = "start/end block (remove 1)"]; + emittedSpace -> lineBreakTag [label = "
    (append to tmp)"]; + emittedSpace -> end [label = "EOF (remove 1)"]; + emptyBlock -> nonEmptyBlock [label = "non-whitespace (block break)"]; + emptyBlock -> emptyBlock [label = "whitespace (skip)\n
    \n
    \nstart/end block"]; + emptyBlock -> afterPreStartTag [label = "
     (depth = 1)"];
    +	emptyBlock -> beginListItem [label = "
  • "]; + emptyBlock -> endListItem [label = "
  • "]; + lineBreakTag -> lineBreakTag [label = "whitespace (skip)"]; + lineBreakTag -> atLeastTwoLineBreakTags [label = "
    (append to tmp)"]; + lineBreakTag -> emptyBlock [label = "start/end block (clear tmp)"]; + lineBreakTag -> nonEmptyBlock [label = "non-whitespace (emit tmp)"]; + atLeastTwoLineBreakTags -> atLeastTwoLineBreakTags [label = "whitespace (skip)\n
    (append to tmp)"]; + atLeastTwoLineBreakTags -> nonEmptyBlock [label = "non-whitespace (emit tmp)"]; + atLeastTwoLineBreakTags -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "start/end block"]; + emptyBlockWithAtLeastTwoPreviousLineBreakTags -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "whitespace (skip)\n
    \n
    \nstart/end block"]; + emptyBlockWithAtLeastTwoPreviousLineBreakTags -> nonEmptyBlock [label = "non-whitespace (emit tmp)"]; + emptyBlockWithAtLeastTwoPreviousLineBreakTags -> afterPreStartTagWithLeadingWhitespace [label = "
     (depth = 1)"];
    +	beginListItem -> beginListItem [label = "
  • \nwhitespace (skip)\n
    \nstart/end block"]; + beginListItem -> listItemContent [label = "non-whitespace"]; + beginListItem -> endListItem [label = "
  • "]; + beginListItem -> afterPreStartTagWithLeadingWhitespace [label = "
    "];
    +	endListItem -> endListItem [label = "whitespace (skip)\n"];
    +	endListItem -> beginListItem [label = "
  • (line break)"]; + endListItem -> emptyBlock [label = "start/end block"]; + endListItem -> listItemContent [label = "non-whitespace (line break, indent)"]; + endListItem -> lineBreakTagInListItemContent [label = "
    (append to tmp)"]; + listItemContent -> listItemContent [label = "non-whitespace"]; + listItemContent -> beginListItem [label = "
  • (line break)"]; + listItemContent -> lineBreakTagInListItemContent [label = "
    (append to tmp)"]; + listItemContent -> emittedSpaceInListItemContent [label = "whitespace (emit space)"]; + listItemContent -> emptyBlock [label = "start/end block"]; + listItemContent -> endListItem [label = "
  • "]; + emittedSpaceInListItemContent -> emittedSpaceInListItemContent [label = "whitespace (skip)"]; + emittedSpaceInListItemContent -> listItemContent [label = "non-whitespace"]; + emittedSpaceInListItemContent -> end [label = "EOF (remove 1)"]; + emittedSpaceInListItemContent -> emptyBlock [label = "start/end block (remove 1)"]; + emittedSpaceInListItemContent -> beginListItem [label = "
  • (remove 1, line break)"]; + emittedSpaceInListItemContent -> lineBreakTagInListItemContent [label = "
    (append to tmp)"]; + emittedSpaceInListItemContent -> endListItem [label = "
  • (remove 1)"]; + lineBreakTagInListItemContent -> lineBreakTagInListItemContent [label = "whitespace (skip)"]; + lineBreakTagInListItemContent -> emptyBlock [label = "start/end block (clear tmp)"]; + lineBreakTagInListItemContent -> beginListItem [label = "
  • (emit tmp, line break)"]; + lineBreakTagInListItemContent -> listItemContent [label = "non-whitespace (emit tmp)"]; + lineBreakTagInListItemContent -> atLeastTwoLineBreakTagsInListItemContent [label = "
    (append to tmp)"]; + lineBreakTagInListItemContent -> endListItem [label = "
  • (clear tmp)"]; + atLeastTwoLineBreakTagsInListItemContent -> atLeastTwoLineBreakTagsInListItemContent [label = "
    (append to tmp)\nwhitespace (skip)"]; + atLeastTwoLineBreakTagsInListItemContent -> beginListItem [label = "
  • (emit tmp, line break)"]; + atLeastTwoLineBreakTagsInListItemContent -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "start/end block"]; + atLeastTwoLineBreakTagsInListItemContent -> listItemContent [label = "non-whitespace (emit tmp)"]; + atLeastTwoLineBreakTagsInListItemContent -> endListItem [label = "
  • (clear tmp)"]; + afterPreStartTag -> preformattedLineBreak [label = "
    (append to tmp, append block break to tmp)"]; + afterPreStartTag -> preformattedNonEmptyBlock [label = "non \\n (block break)"]; + afterPreStartTag -> preformattedEmptyBlock [label = "\\n (skip)\nstart/end block"]; + preformattedLineBreak -> preformattedNonEmptyBlock [label = "non-whitespace (emit tmp)"]; + preformattedLineBreak -> preformattedNonEmptyBlockWithTrailingWhitespace [label = "other whitespace (append to tmp)"]; + preformattedLineBreak -> preformattedAtLeastTwoLineBreaks [label = "\\n or
    (append to tmp)"]; + preformattedAtLeastTwoLineBreaks -> preformattedAtLeastTwoLineBreaks [label = "\\n or
    (append to tmp)"]; + preformattedAtLeastTwoLineBreaks -> preformattedNonEmptyBlock [label = "non \\n or
    (emit tmp)"]; + preformattedAtLeastTwoLineBreaks -> preformattedEmptyBlockWithLeadingWhitespace [label = "start/end block"]; + preformattedEmptyBlockWithLeadingWhitespace -> preformattedEmptyBlockWithLeadingWhitespace [label = "whitespace (append to tmp)\nstart/end block\n
    if depth>1&&tmp.count>=2 (depth - 1, remove 1 from tmp)"]; + preformattedEmptyBlockWithLeadingWhitespace -> preformattedLineBreak [label = "\\n or
    (append to tmp)"]; + preformattedEmptyBlockWithLeadingWhitespace -> afterPreStartTagWithLeadingWhitespace [label = "
     (depth + 1)"];
    +	preformattedEmptyBlockWithLeadingWhitespace -> preformattedEmptyBlock [label = "
    if depth>1&&tmp.count<2 (depth - 1, remove 1 from tmp)"]; + preformattedEmptyBlockWithLeadingWhitespace -> emptyBlock [label = "
    if depth<=1 (clear tmp)"]; + preformattedEmptyBlock -> preformattedEmptyBlock [label = "start/end block\n
    if depth>1 (depth - 1)"]; + preformattedEmptyBlock -> afterPreStartTag [label = "
     (depth + 1"];
    +	preformattedEmptyBlock -> preformattedNonEmptyBlock [label = "non-whitespace (block break)"];
    +	preformattedEmptyBlock -> preformattedEmptyBlockWithLeadingWhitespace [label = "whitespace (append to tmp)"];
    +	preformattedEmptyBlock -> preformattedLineBreak [label = "
    (append to tmp)"]; + preformattedNonEmptyBlock -> preformattedNonEmptyBlock [label = "non-whitespace"]; + preformattedNonEmptyBlock -> preformattedLineBreak [label = "\\n or
    (append to tmp)"]; + preformattedNonEmptyBlock -> preformattedNonEmptyBlockWithTrailingWhitespace [label = "other whitespace (append to tmp)"]; + preformattedNonEmptyBlock -> preformattedEmptyBlock [label = "start/end block"]; + preformattedNonEmptyBlockWithTrailingWhitespace -> preformattedNonEmptyBlockWithTrailingWhitespace [label = "whitespace (append to tmp)"]; + preformattedNonEmptyBlockWithTrailingWhitespace -> preformattedNonEmptyBlock [label = "non-whitespace (emit tmp)"]; + preformattedNonEmptyBlockWithTrailingWhitespace -> preformattedLineBreak [label = "\\n or
    (append to tmp)"]; + preformattedNonEmptyBlockWithTrailingWhitespace -> preformattedEmptyBlockWithLeadingWhitespace [label = "start/end block (append block break to tmp)"]; + afterPreStartTagWithLeadingWhitespace -> preformattedNonEmptyBlock [label = "non-whitespace (emit tmp)"]; + afterPreStartTagWithLeadingWhitespace -> preformattedEmptyBlockWithLeadingWhitespace [label = "\\n (skip)\nother whitespace (append to tmp)\n
    (append to tmp)\nstart/end block"]; + preformattedStart -> preformattedStart [label = "
     (depth + 1)\n
    if depth>1 (depth - 1)\n\\n or
    (skip)\nstart/end block"]; + preformattedStart -> start [label = "
    if depth<=1"]; + preformattedStart -> preformattedNonEmptyBlock [label = "non \\n"]; +} diff --git a/Sources/HTMLStreamer/AttributedStringConverter.swift b/Sources/HTMLStreamer/AttributedStringConverter.swift index 9710dc8..580df6e 100644 --- a/Sources/HTMLStreamer/AttributedStringConverter.swift +++ b/Sources/HTMLStreamer/AttributedStringConverter.swift @@ -27,6 +27,7 @@ public struct AttributedStringConverter: Blo private var actionStack: [ElementAction] = [] private var styleStack: [Style] = [] var blockState = BlockState.start + var temporaryBuffer: String = "" private var currentElementIsEmpty = true private var previouslyFinishedListItem = false // The current run of text w/o styles changing @@ -47,6 +48,7 @@ public struct AttributedStringConverter: Blo actionStack = [] styleStack = [] blockState = .start + temporaryBuffer = "" currentElementIsEmpty = true previouslyFinishedListItem = false currentRun = "" @@ -55,12 +57,16 @@ public struct AttributedStringConverter: Blo switch token { case .character(let c): currentElementIsEmpty = false - continueBlock() - currentRun.unicodeScalars.append(c) + if continueBlock(char: c) { + currentRun.unicodeScalars.append(c) + } case .characterSequence(let s): currentElementIsEmpty = false - continueBlock() - currentRun.append(s) + for c in s.unicodeScalars { + if continueBlock(char: c) { + currentRun.unicodeScalars.append(c) + } + } case .comment: // ignored continue @@ -87,6 +93,7 @@ public struct AttributedStringConverter: Blo } } + endBlocks() finishRun() return str @@ -94,7 +101,7 @@ public struct AttributedStringConverter: Blo private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) { if name == "br" { - currentRun.append("\n") + breakTag() return } // self closing tags are ignored since they have no content @@ -126,29 +133,25 @@ public struct AttributedStringConverter: Blo finishRun() styleStack.append(.monospace) case "pre": - startOrFinishBlock() + startOrEndBlock() + startPreformatted() finishRun() styleStack.append(.monospace) case "blockquote": - startOrFinishBlock() + startOrEndBlock() finishRun() styleStack.append(.blockquote) case "p": - startOrFinishBlock() + startOrEndBlock() case "ol": - startOrFinishBlock() + startOrEndBlock() finishRun() styleStack.append(.orderedList(nextElementOrdinal: 1)) case "ul": - startOrFinishBlock() + startOrEndBlock() finishRun() styleStack.append(.unorderedList) case "li": - if previouslyFinishedListItem { - currentRun.append("\n") - } else { - continueBlock() - } let marker: String if case .orderedList(let nextElementOrdinal) = styleStack.last { marker = orderedTextList.marker(forItemNumber: nextElementOrdinal) @@ -158,6 +161,7 @@ public struct AttributedStringConverter: Blo } else { break } + startListItem() currentRun.append("\t\(marker)\t") default: break @@ -186,33 +190,55 @@ public struct AttributedStringConverter: Blo case "pre": finishRun() removeLastStyle(.monospace) - startOrFinishBlock() + startOrEndBlock() + endPreformatted() case "blockquote": finishRun() removeLastStyle(.blockquote) - startOrFinishBlock() + startOrEndBlock() case "p": - startOrFinishBlock() + startOrEndBlock() case "ol": finishRun() removeLastStyle(.orderedList) - startOrFinishBlock() + startOrEndBlock() previouslyFinishedListItem = false case "ul": finishRun() removeLastStyle(.unorderedList) - startOrFinishBlock() + startOrEndBlock() previouslyFinishedListItem = false case "li": finishRun() previouslyFinishedListItem = true + endListItem() default: break } } - mutating func insertBlockBreak() { - currentRun.append("\n\n") + var blockBreak: String { + "\n\n" + } + + var lineBreak: String { + "\n" + } + + var listIndentForContentOutsideItem: String { + "\t\t" + } + + mutating func append(_ s: String) { + currentRun.append(s) + } + + mutating func removeChar() { + if currentRun.isEmpty { + str.deleteCharacters(in: NSRange(location: str.length - 1, length: 1)) + } else { + currentRun.removeLast() + } } // Finds the last currently-open style of the given type. diff --git a/Sources/HTMLStreamer/BlockState.swift b/Sources/HTMLStreamer/BlockState.swift index 51b4173..86130e5 100644 --- a/Sources/HTMLStreamer/BlockState.swift +++ b/Sources/HTMLStreamer/BlockState.swift @@ -7,39 +7,572 @@ import Foundation +/* + + This gnarly mess of a state machine is responsible for: + 1) Inserting line breaks in the right places corresponding to boundaries between block elements + 2) Preventing leading/trailing whitespace from being emitted + 3) Collapsing whitespace within the string like https://www.w3.org/TR/css-text-3/#white-space-phase-1 + 4) Handling whitespace inside
     elements
    + 
    + DO NOT TOUCH THE CODE WITHOUT CHECKING/UPDATING THE DIAGRAM.
    +
    + */
    +
     protocol BlockRenderer {
         var blockState: BlockState { get set }
    -    mutating func insertBlockBreak()
    +    var blockBreak: String { get }
    +    var lineBreak: String { get }
    +    var listIndentForContentOutsideItem: String { get }
    +    var temporaryBuffer: String { get set }
    +    mutating func append(_ s: String)
    +    mutating func removeChar()
     }
     
     extension BlockRenderer {
    -    mutating func startOrFinishBlock() {
    +    mutating func startOrEndBlock() {
             switch blockState {
             case .start:
                 break
    +        case .emptyBlock:
    +            break
             case .nonEmptyBlock:
                 blockState = .emptyBlock
    -        case .emptyBlock:
    +        case .emittedSpace:
    +            blockState = .emptyBlock
    +            removeChar()
    +        case .lineBreakTag:
    +            blockState = .emptyBlock
    +            temporaryBuffer = ""
    +        case .atLeastTwoLineBreakTags:
    +            blockState = .emptyBlockWithAtLeastTwoPreviousLineBreakTags
    +        case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
    +            break
    +        case .beginListItem:
    +            break
    +        case .endListItem:
    +            blockState = .emptyBlock
    +        case .listItemContent:
    +            blockState = .emptyBlock
    +        case .emittedSpaceInListItemContent:
    +            blockState = .emptyBlock
    +            removeChar()
    +        case .lineBreakTagInListItemContent:
    +            blockState = .emptyBlock
    +            temporaryBuffer = ""
    +        case .atLeastTwoLineBreakTagsInListItemContent:
    +            blockState = .emptyBlockWithAtLeastTwoPreviousLineBreakTags
    +        case .preformattedStart(depth: _):
    +            break
    +        case .preformattedEmptyBlock(depth: _):
    +            break
    +        case .preformattedNonEmptyBlock(let depth):
    +            blockState = .preformattedEmptyBlock(depth: depth)
    +        case .preformattedLineBreak(depth: let depth):
    +            blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
    +            temporaryBuffer.append(lineBreak)
    +        case .preformattedAtLeastTwoLineBreaks(let depth):
    +            blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
    +        case .afterPreStartTag(let depth):
    +            blockState = .preformattedEmptyBlock(depth: depth)
    +        case .afterPreStartTagWithLeadingWhitespace(let depth):
    +            blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
    +        case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
    +            temporaryBuffer.append(blockBreak)
    +        case .preformattedEmptyBlockWithLeadingWhitespace(depth: _):
                 break
             }
         }
         
    -    mutating func continueBlock() {
    +    mutating func continueBlock(char: UnicodeScalar) -> Bool {
             switch blockState {
             case .start:
    -            blockState = .nonEmptyBlock
    -        case .nonEmptyBlock:
    -            break
    +            if char.properties.isWhitespace {
    +                return false
    +            } else {
    +                blockState = .nonEmptyBlock
    +                return true
    +            }
             case .emptyBlock:
    -            insertBlockBreak()
    -            blockState = .nonEmptyBlock
    +            if char.properties.isWhitespace {
    +                return false
    +            } else {
    +                blockState = .nonEmptyBlock
    +                append(blockBreak)
    +                return true
    +            }
    +        case .nonEmptyBlock:
    +            if char.properties.isWhitespace {
    +                blockState = .emittedSpace
    +                append(" ")
    +                return false
    +            } else {
    +                return true
    +            }
    +        case .emittedSpace:
    +            if char.properties.isWhitespace {
    +                return false
    +            } else {
    +                blockState = .nonEmptyBlock
    +                return true
    +            }
    +        case .lineBreakTag:
    +            if char.properties.isWhitespace {
    +                return false
    +            } else {
    +                blockState = .nonEmptyBlock
    +                append(temporaryBuffer)
    +                temporaryBuffer = ""
    +                return true
    +            }
    +        case .atLeastTwoLineBreakTags:
    +            if char.properties.isWhitespace {
    +                return false
    +            } else {
    +                blockState = .nonEmptyBlock
    +                append(temporaryBuffer)
    +                temporaryBuffer = ""
    +                return true
    +            }
    +        case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
    +            if char.properties.isWhitespace {
    +                return false
    +            } else {
    +                blockState = .nonEmptyBlock
    +                append(temporaryBuffer)
    +                temporaryBuffer = ""
    +                return true
    +            }
    +        case .beginListItem:
    +            if char.properties.isWhitespace {
    +                return false
    +            } else {
    +                blockState = .listItemContent
    +                return true
    +            }
    +        case .endListItem:
    +            if char.properties.isWhitespace {
    +                return false
    +            } else {
    +                blockState = .listItemContent
    +                append(lineBreak)
    +                append(listIndentForContentOutsideItem)
    +                return true
    +            }
    +        case .listItemContent:
    +            if char.properties.isWhitespace {
    +                blockState = .emittedSpaceInListItemContent
    +                append(" ")
    +                return false
    +            } else {
    +                return true
    +            }
    +        case .emittedSpaceInListItemContent:
    +            if char.properties.isWhitespace {
    +                return false
    +            } else {
    +                blockState = .listItemContent
    +                return true
    +            }
    +        case .lineBreakTagInListItemContent:
    +            if char.properties.isWhitespace {
    +                return false
    +            } else {
    +                blockState = .listItemContent
    +                append(temporaryBuffer)
    +                temporaryBuffer = ""
    +                return true
    +            }
    +        case .atLeastTwoLineBreakTagsInListItemContent:
    +            if char.properties.isWhitespace {
    +                return false
    +            } else {
    +                blockState = .listItemContent
    +                append(temporaryBuffer)
    +                temporaryBuffer = ""
    +                return true
    +            }
    +        case .preformattedStart(let depth):
    +            if char == "\n" {
    +                return false
    +            } else {
    +                blockState = .preformattedNonEmptyBlock(depth: depth)
    +                return true
    +            }
    +        case .preformattedEmptyBlock(depth: let depth):
    +            if char.properties.isWhitespace {
    +                blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
    +                temporaryBuffer.unicodeScalars.append(char)
    +                return false
    +            } else {
    +                blockState = .preformattedNonEmptyBlock(depth: depth)
    +                append(blockBreak)
    +                return true
    +            }
    +        case .preformattedNonEmptyBlock(let depth):
    +            if char == "\n" {
    +                blockState = .preformattedLineBreak(depth: depth)
    +                temporaryBuffer.append(lineBreak)
    +                return false
    +            } else if char.properties.isWhitespace {
    +                blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
    +                temporaryBuffer.unicodeScalars.append(char)
    +                return false
    +            } else {
    +                return true
    +            }
    +        case .preformattedLineBreak(let depth):
    +            if char == "\n" {
    +                blockState = .preformattedAtLeastTwoLineBreaks(depth: depth)
    +                temporaryBuffer.append(lineBreak)
    +                return false
    +            } else if char.properties.isWhitespace {
    +                blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
    +                temporaryBuffer.unicodeScalars.append(char)
    +                return false
    +            } else {
    +                blockState = .preformattedNonEmptyBlock(depth: depth)
    +                append(temporaryBuffer)
    +                temporaryBuffer = ""
    +                return true
    +            }
    +        case .preformattedAtLeastTwoLineBreaks(let depth):
    +            if char.properties.isWhitespace {
    +                temporaryBuffer.unicodeScalars.append(char)
    +                return false
    +            } else {
    +                blockState = .preformattedNonEmptyBlock(depth: depth)
    +                append(temporaryBuffer)
    +                temporaryBuffer = ""
    +                return true
    +            }
    +        case .afterPreStartTag(let depth):
    +            if char == "\n" {
    +                blockState = .preformattedEmptyBlock(depth: depth)
    +                return false
    +            } else {
    +                blockState = .preformattedNonEmptyBlock(depth: depth)
    +                append(blockBreak)
    +                return true
    +            }
    +        case .afterPreStartTagWithLeadingWhitespace(let depth):
    +            if char == "\n" {
    +                blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
    +                return false
    +            } else if char.properties.isWhitespace {
    +                blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
    +                temporaryBuffer.unicodeScalars.append(char)
    +                return false
    +            } else {
    +                blockState = .preformattedNonEmptyBlock(depth: depth)
    +                append(temporaryBuffer)
    +                temporaryBuffer = ""
    +                return true
    +            }
    +        case .preformattedNonEmptyBlockWithTrailingWhitespace(let depth):
    +            if char == "\n" {
    +                blockState = .preformattedLineBreak(depth: depth)
    +                temporaryBuffer.append(lineBreak)
    +                return false
    +            } else if char.properties.isWhitespace {
    +                temporaryBuffer.unicodeScalars.append(char)
    +                return false
    +            } else {
    +                blockState = .preformattedNonEmptyBlock(depth: depth)
    +                append(temporaryBuffer)
    +                temporaryBuffer = ""
    +                return true
    +            }
    +        case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
    +            if char == "\n" {
    +                blockState = .preformattedLineBreak(depth: depth)
    +                temporaryBuffer.append(lineBreak)
    +                return false
    +            } else if char.properties.isWhitespace {
    +                temporaryBuffer.unicodeScalars.append(char)
    +                return false
    +            } else {
    +                blockState = .preformattedNonEmptyBlock(depth: depth)
    +                append(temporaryBuffer)
    +                temporaryBuffer = ""
    +                return true
    +            }
             }
         }
         
    +    mutating func breakTag() {
    +        switch blockState {
    +        case .start:
    +            break
    +        case .emptyBlock:
    +            append(lineBreak)
    +        case .nonEmptyBlock:
    +            blockState = .lineBreakTag
    +            temporaryBuffer.append(lineBreak)
    +        case .emittedSpace:
    +            blockState = .lineBreakTag
    +            temporaryBuffer.append(lineBreak)
    +        case .lineBreakTag:
    +            blockState = .atLeastTwoLineBreakTags
    +            temporaryBuffer.append(lineBreak)
    +        case .atLeastTwoLineBreakTags:
    +            temporaryBuffer.append(lineBreak)
    +        case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
    +            append(lineBreak)
    +        case .beginListItem:
    +            append(lineBreak)
    +        case .endListItem:
    +            blockState = .lineBreakTagInListItemContent
    +            temporaryBuffer.append(lineBreak)
    +        case .listItemContent:
    +            blockState = .lineBreakTagInListItemContent
    +            temporaryBuffer.append(lineBreak)
    +        case .emittedSpaceInListItemContent:
    +            blockState = .lineBreakTagInListItemContent
    +            temporaryBuffer.append(lineBreak)
    +        case .lineBreakTagInListItemContent:
    +            blockState = .atLeastTwoLineBreakTagsInListItemContent
    +            temporaryBuffer.append(lineBreak)
    +        case .atLeastTwoLineBreakTagsInListItemContent:
    +            temporaryBuffer.append(lineBreak)
    +        case .preformattedStart(depth: _):
    +            break
    +        case .preformattedEmptyBlock(let depth):
    +            blockState = .preformattedLineBreak(depth: depth)
    +            temporaryBuffer.append(lineBreak)
    +        case .preformattedNonEmptyBlock(let depth):
    +            blockState = .preformattedLineBreak(depth: depth)
    +            temporaryBuffer.append(lineBreak)
    +        case .preformattedLineBreak(let depth):
    +            blockState = .preformattedAtLeastTwoLineBreaks(depth: depth)
    +            temporaryBuffer.append(lineBreak)
    +        case .preformattedAtLeastTwoLineBreaks(depth: _):
    +            temporaryBuffer.append(lineBreak)
    +        case .afterPreStartTag(let depth):
    +            blockState = .preformattedLineBreak(depth: depth)
    +            temporaryBuffer.append(blockBreak)
    +            temporaryBuffer.append(lineBreak)
    +        case .afterPreStartTagWithLeadingWhitespace(let depth):
    +            blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
    +            temporaryBuffer.append(lineBreak)
    +        case .preformattedNonEmptyBlockWithTrailingWhitespace(let depth):
    +            blockState = .preformattedLineBreak(depth: depth)
    +            temporaryBuffer.append(lineBreak)
    +        case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
    +            blockState = .preformattedLineBreak(depth: depth)
    +            temporaryBuffer.append(lineBreak)
    +        }
    +    }
    +    
    +    mutating func startPreformatted() {
    +        switch blockState {
    +        case .start:
    +            blockState = .preformattedStart(depth: 1)
    +        case .emptyBlock:
    +            blockState = .afterPreStartTag(depth: 1)
    +        case .nonEmptyBlock:
    +            fatalError("unreachable")
    +        case .emittedSpace:
    +            fatalError("unreachable")
    +        case .lineBreakTag:
    +            fatalError("unreachable")
    +        case .atLeastTwoLineBreakTags:
    +            fatalError("unreachable")
    +        case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
    +            blockState = .afterPreStartTagWithLeadingWhitespace(depth: 1)
    +        case .beginListItem:
    +            blockState = .afterPreStartTagWithLeadingWhitespace(depth: 1)
    +        case .endListItem:
    +            fatalError("unreachable")
    +        case .listItemContent:
    +            fatalError("unreachable")
    +        case .emittedSpaceInListItemContent:
    +            fatalError("unreachable")
    +        case .lineBreakTagInListItemContent:
    +            fatalError("unreachable")
    +        case .atLeastTwoLineBreakTagsInListItemContent:
    +            fatalError("unreachable")
    +        case .preformattedStart(let depth):
    +            blockState = .preformattedStart(depth: depth + 1)
    +        case .preformattedEmptyBlock(let depth):
    +            blockState = .afterPreStartTag(depth: depth + 1)
    +        case .preformattedNonEmptyBlock(depth: _):
    +            fatalError("unreachable")
    +        case .preformattedLineBreak(depth: _):
    +            fatalError("unreachable")
    +        case .preformattedAtLeastTwoLineBreaks(depth: _):
    +            fatalError("unreachable")
    +        case .afterPreStartTag(depth: _):
    +            fatalError("unreachable")
    +        case .afterPreStartTagWithLeadingWhitespace(depth: _):
    +            fatalError("unreachable")
    +        case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
    +            fatalError("unreachable")
    +        case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
    +            blockState = .afterPreStartTagWithLeadingWhitespace(depth: depth + 1)
    +        }
    +    }
    +    
    +    mutating func endPreformatted() {
    +        switch blockState {
    +        case .start:
    +            break
    +        case .emptyBlock:
    +            break
    +        case .nonEmptyBlock:
    +            fatalError("unreachable")
    +        case .emittedSpace:
    +            fatalError("unreachable")
    +        case .lineBreakTag:
    +            fatalError("unreachable")
    +        case .atLeastTwoLineBreakTags:
    +            fatalError("unreachable")
    +        case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
    +            break
    +        case .beginListItem:
    +            break
    +        case .endListItem:
    +            fatalError("unreachable")
    +        case .listItemContent:
    +            fatalError("unreachable")
    +        case .emittedSpaceInListItemContent:
    +            fatalError("unreachable")
    +        case .lineBreakTagInListItemContent:
    +            fatalError("unreachable")
    +        case .atLeastTwoLineBreakTagsInListItemContent:
    +            fatalError("unreachable")
    +        case .preformattedStart(let depth):
    +            if depth <= 1 {
    +                blockState = .start
    +            } else {
    +                blockState = .preformattedStart(depth: depth - 1)
    +            }
    +        case .preformattedEmptyBlock(let depth):
    +            if depth <= 1 {
    +                blockState = .emptyBlock
    +            } else {
    +                blockState = .preformattedEmptyBlock(depth: depth - 1)
    +            }
    +        case .preformattedNonEmptyBlock(depth: _):
    +            fatalError("unreachable")
    +        case .preformattedLineBreak(depth: _):
    +            fatalError("unreachable")
    +        case .preformattedAtLeastTwoLineBreaks(depth: _):
    +            fatalError("unreachable")
    +        case .afterPreStartTag(depth: _):
    +            fatalError("unreachable")
    +        case .afterPreStartTagWithLeadingWhitespace(depth: _):
    +            fatalError("unreachable")
    +        case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
    +            fatalError("unreachable")
    +        case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
    +            if depth <= 1 {
    +                blockState = .emptyBlock
    +                temporaryBuffer = ""
    +            } else {
    +                if temporaryBuffer.count >= 2 {
    +                    temporaryBuffer.removeLast()
    +                    blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth - 1)
    +                } else {
    +                    temporaryBuffer.removeLast()
    +                    blockState = .preformattedEmptyBlock(depth: depth - 1)
    +                }
    +            }
    +        }
    +    }
    +    
    +    mutating func startListItem() {
    +        switch blockState {
    +        case .start:
    +            blockState = .beginListItem
    +        case .emptyBlock:
    +            blockState = .beginListItem
    +            append(blockBreak)
    +        case .nonEmptyBlock:
    +            blockState = .beginListItem
    +            append(blockBreak)
    +        case .beginListItem:
    +            break
    +        case .endListItem:
    +            blockState = .beginListItem
    +            append(lineBreak)
    +        case .listItemContent:
    +            blockState = .beginListItem
    +            append(lineBreak)
    +        case .emittedSpaceInListItemContent:
    +            blockState = .beginListItem
    +            removeChar()
    +            append(lineBreak)
    +        case .lineBreakTagInListItemContent:
    +            blockState = .beginListItem
    +            append(temporaryBuffer)
    +            temporaryBuffer = ""
    +            append(lineBreak)
    +        case .atLeastTwoLineBreakTagsInListItemContent:
    +            blockState = .beginListItem
    +            append(temporaryBuffer)
    +            temporaryBuffer = ""
    +            append(lineBreak)
    +        default:
    +            break
    +        }
    +    }
    +    
    +    mutating func endListItem() {
    +        switch blockState {
    +        case .emptyBlock:
    +            blockState = .endListItem
    +        case .nonEmptyBlock:
    +            blockState = .endListItem
    +        case .listItemContent:
    +            blockState = .endListItem
    +        case .emittedSpaceInListItemContent:
    +            blockState = .endListItem
    +            removeChar()
    +        case .lineBreakTagInListItemContent:
    +            blockState = .endListItem
    +            temporaryBuffer = ""
    +        case .atLeastTwoLineBreakTagsInListItemContent:
    +            blockState = .endListItem
    +            temporaryBuffer = ""
    +        default:
    +            break
    +        }
    +    }
    +    
    +    mutating func endBlocks() {
    +        switch blockState {
    +        case .emittedSpace:
    +            removeChar()
    +        case .emittedSpaceInListItemContent:
    +            removeChar()
    +        default:
    +            break
    +        }
    +    }
     }
     
     enum BlockState: Equatable {
         case start
    -    case nonEmptyBlock
         case emptyBlock
    +    case nonEmptyBlock
    +    case emittedSpace
    +    case lineBreakTag
    +    case atLeastTwoLineBreakTags
    +    case emptyBlockWithAtLeastTwoPreviousLineBreakTags
    +    case beginListItem
    +    case endListItem
    +    case listItemContent
    +    case emittedSpaceInListItemContent
    +    case lineBreakTagInListItemContent
    +    case atLeastTwoLineBreakTagsInListItemContent
    +    case preformattedStart(depth: Int)
    +    case preformattedEmptyBlock(depth: Int)
    +    case preformattedNonEmptyBlock(depth: Int)
    +    case preformattedLineBreak(depth: Int)
    +    case preformattedAtLeastTwoLineBreaks(depth: Int)
    +    case afterPreStartTag(depth: Int)
    +    case afterPreStartTagWithLeadingWhitespace(depth: Int)
    +    case preformattedNonEmptyBlockWithTrailingWhitespace(depth: Int)
    +    case preformattedEmptyBlockWithLeadingWhitespace(depth: Int)
     }
    diff --git a/Sources/HTMLStreamer/TextConverter.swift b/Sources/HTMLStreamer/TextConverter.swift
    index 57dac99..75acc4a 100644
    --- a/Sources/HTMLStreamer/TextConverter.swift
    +++ b/Sources/HTMLStreamer/TextConverter.swift
    @@ -8,7 +8,6 @@
     import Foundation
     
     public struct TextConverter: BlockRenderer {
    -    
         private let configuration: TextConverterConfiguration
         
         private var tokenizer: Tokenizer!
    @@ -16,6 +15,7 @@ public struct TextConverter: BlockRenderer {
         
         private var actionStack: [ElementAction] = []
         var blockState = BlockState.start
    +    var temporaryBuffer: String = ""
         private var currentElementIsEmpty = true
         private var currentRun = ""
         
    @@ -32,6 +32,7 @@ public struct TextConverter: BlockRenderer {
             str = ""
             
             blockState = .start
    +        temporaryBuffer = ""
             currentElementIsEmpty = true
             currentRun = ""
             
    @@ -39,12 +40,16 @@ public struct TextConverter: BlockRenderer {
                 switch token {
                 case .character(let scalar):
                     currentElementIsEmpty = false
    -                continueBlock()
    -                currentRun.unicodeScalars.append(scalar)
    +                if continueBlock(char: scalar) {
    +                    currentRun.unicodeScalars.append(scalar)
    +                }
                 case .characterSequence(let string):
                     currentElementIsEmpty = false
    -                continueBlock()
    -                currentRun.append(string)
    +                for c in string.unicodeScalars {
    +                    if continueBlock(char: c) {
    +                        currentRun.unicodeScalars.append(c)
    +                    }
    +                }
                 case .startTag(let name, let selfClosing, let attributes):
                     currentElementIsEmpty = true
                     let action = Callbacks.elementAction(name: name, attributes: attributes)
    @@ -66,6 +71,7 @@ public struct TextConverter: BlockRenderer {
                 }
             }
             
    +        endBlocks()
             finishRun()
             
             return str
    @@ -74,13 +80,9 @@ public struct TextConverter: BlockRenderer {
         private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
             switch name {
             case "br":
    -            if configuration.insertNewlines {
    -                currentRun.append("\n")
    -            } else {
    -                currentRun.append(" ")
    -            }
    +            breakTag()
             case "pre", "blockquote", "p", "ol", "ul":
    -            startOrFinishBlock()
    +            startOrEndBlock()
             default:
                 break
             }
    @@ -89,21 +91,45 @@ public struct TextConverter: BlockRenderer {
         private mutating func handleEndTag(_ name: String) {
             switch name {
             case "pre", "blockquote", "p", "ol", "ul":
    -            startOrFinishBlock()
    +            startOrEndBlock()
                 finishRun()
             default:
                 break
             }
         }
         
    -    mutating func insertBlockBreak() {
    +    var blockBreak: String {
             if configuration.insertNewlines {
    -            currentRun.append("\n\n")
    +            "\n\n"
             } else {
    -            currentRun.append(" ")
    +            " "
             }
         }
    -
    +    
    +    var lineBreak: String {
    +        if configuration.insertNewlines {
    +            "\n"
    +        } else {
    +            " "
    +        }
    +    }
    +    
    +    var listIndentForContentOutsideItem: String {
    +        " "
    +    }
    +    
    +    mutating func append(_ s: String) {
    +        currentRun.append(s)
    +    }
    +    
    +    mutating func removeChar() {
    +        if currentRun.isEmpty {
    +            str.removeLast()
    +        } else {
    +            currentRun.removeLast()
    +        }
    +    }
    +    
         private mutating func finishRun() {
             if actionStack.contains(.skip) {
                 currentRun = ""
    diff --git a/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift b/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift
    index 2dacd30..ed2e79e 100644
    --- a/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift
    +++ b/Tests/HTMLStreamerTests/AttributedStringConverterTests.swift
    @@ -327,6 +327,15 @@ final class AttributedStringConverterTests: XCTestCase {
             XCTAssertEqual(convert("a
    1. b
    2. c
    "), result) } + func testListItemOutsideList() { + let result = NSAttributedString(string: "a", attributes: [ + .font: font, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
  • a
  • "), result) + } + func testSkipElementActionFollowingUnfinishedRun() { struct Callbacks: HTMLConversionCallbacks { static func elementAction(name: String, attributes: [Attribute]) -> ElementAction { @@ -347,7 +356,7 @@ final class AttributedStringConverterTests: XCTestCase { XCTAssertEqual(convert(""), .init()) } - func testWTF() { + func testMultipleClosingBlockTagsBeforeOpeningBlockTag() { let result = NSMutableAttributedString() result.append(NSAttributedString(string: "a", attributes: [ .font: italicFont, @@ -362,4 +371,234 @@ final class AttributedStringConverterTests: XCTestCase { XCTAssertEqual(convert(#"

    a

    b

    "#), result) } + func testNewlineBetweenClosingAndOpeningBlockTag() { + let result = NSAttributedString(string: "a\n\nb", attributes: [ + .font: font, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("

    a

    \n

    b

    "), result) + XCTAssertEqual(convert("

    a

    \nb

    "), result) + } + + func testEndAfterNewlineInBlockContent() { + let result = NSAttributedString(string: "a", attributes: [ + .font: font, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("

    a\n\n

    "), result) + XCTAssertEqual(convert("

    a\n\n

    \n"), result) + XCTAssertEqual(convert("

    \n\na

    "), result) + XCTAssertEqual(convert("

    \n\na

    \n"), result) + let result2 = NSAttributedString(string: "a b", attributes: [ + .font: font, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("

    a\n\n\nb

    "), result2) + } + + func testBRAtBlockElementBoundary() { + let two = NSAttributedString(string: "a\n\nb", attributes: [ + .font: font, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("

    a

    b

    "), two) + let three = NSAttributedString(string: "a\n\n\nb", attributes: [ + .font: font, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("

    a


    b

    "), three) + } + + func testPreFollowedByP() { + let result = NSMutableAttributedString() + result.append(NSAttributedString(string: "a", attributes: [ + .font: monospaceFont, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ])) + result.append(NSAttributedString(string: "\n\nb", attributes: [ + .font: font, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ])) + XCTAssertEqual(convert("
    a

    b

    "), result) + } + + func testPreFollowedByPre() { + let result = NSAttributedString(string: "a\n\nb", attributes: [ + .font: monospaceFont, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
    a
    b
    "), result) + } + + func testBRAtPreBoundary() { + let two = NSAttributedString(string: "a\n\nb", attributes: [ + .font: monospaceFont, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
    a
    b
    "), two) + let three = NSAttributedString(string: "a\n\n\nb", attributes: [ + .font: monospaceFont, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
    a

    b
    "), three) + } + + func testNestedPre() { + let one = NSAttributedString(string: "a", attributes: [ + .font: monospaceFont, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
    a
    "), one) + let two = NSAttributedString(string: "a\n\nb", attributes: [ + .font: monospaceFont, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
    a
    b
    "), two) + XCTAssertEqual(convert("
    a
    b
    "), two) + let three = NSAttributedString(string: "a\n\n\nb", attributes: [ + .font: monospaceFont, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
    a

    b
    "), three) + } + + func testIgnoreLeadingNewlineInPre() { + let one = NSAttributedString(string: "a", attributes: [ + .font: monospaceFont, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
    \na
    "), one) + let two = NSMutableAttributedString() + two.append(NSAttributedString(string: "a", attributes: [ + .font: font, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ])) + two.append(NSAttributedString(string: "\n\nb", attributes: [ + .font: monospaceFont, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ])) + XCTAssertEqual(convert("a
    \nb
    "), two) + } + + func testPreFollowingChar() { + let result = NSMutableAttributedString() + result.append(NSAttributedString(string: "a", attributes: [ + .font: font, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ])) + result.append(NSAttributedString(string: "\n\nb", attributes: [ + .font: monospaceFont, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ])) + XCTAssertEqual(convert("a
    b
    "), result) + } + + func testSkipLeadingTrailingWhitespace() { + let result = NSAttributedString(string: "a", attributes: [ + .font: font, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert(" \n\ta"), result) + XCTAssertEqual(convert(" \n\t

    a

    "), result) + XCTAssertEqual(convert("a \n\t"), result) + XCTAssertEqual(convert("

    a

    \n\t"), result) + + let pre = NSAttributedString(string: "a", attributes: [ + .font: monospaceFont, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert(" \n\t
    a
    "), pre) + XCTAssertEqual(convert("
    a
    \n\t"), pre) + } + + func testWhitespaceCollapsing() { + let result = NSAttributedString(string: "a b", attributes: [ + .font: font, + .paragraphStyle: NSParagraphStyle.default, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("

    a \t\nb

    "), result) + } + + func testParagraphInsideListItem() { + let result = NSAttributedString(string: "\t1.\ta\n\t2.\tb", attributes: [ + .font: font, + .paragraphStyle: listParagraphStyle, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
    1. a

    2. b

    "), result) + } + + func testBreakBetweenListItems() { + let result = NSAttributedString(string: "\t1.\ta\n\n\t2.\tb", attributes: [ + .font: font, + .paragraphStyle: listParagraphStyle, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
    1. a

    2. b
    "), result) + } + + func testCharacterBetweenListItems() { + let result = NSAttributedString(string: "\t1.\ta\n\t\tc\n\t2.\tb", attributes: [ + .font: font, + .paragraphStyle: listParagraphStyle, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
    1. a
    2. c
    3. b
    "), result) + XCTAssertEqual(convert("
    1. a
    2. c
    3. b
    "), result) + } + + func testWhitespaceCollapsingInTextBetweenListItems() { + let result = NSAttributedString(string: "\t1.\ta\n\t\tc d\n\t2.\tb", attributes: [ + .font: font, + .paragraphStyle: listParagraphStyle, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
    1. a
    2. c d
    3. b
    "), result) + } + + func testImplicitlyClosedListItem() { + let result = NSAttributedString(string: "\t1.\ta\n\t2.\tb", attributes: [ + .font: font, + .paragraphStyle: listParagraphStyle, + .foregroundColor: color, + ]) + XCTAssertEqual(convert("
    1. a
    2. b
    "), result) + } + + func testPreInsideListItem() { + let result = NSMutableAttributedString() + result.append(NSAttributedString(string: "\t1.\t", attributes: [ + .font: font, + .paragraphStyle: listParagraphStyle, + .foregroundColor: color, + ])) + result.append(NSAttributedString(string: "a", attributes: [ + .font: monospaceFont, + .paragraphStyle: listParagraphStyle, + .foregroundColor: color, + ])) + XCTAssertEqual(convert("
    1. a
    "), result) + } + }