Compare commits
2 Commits
fa03efedbb
...
a2ca8fd650
Author | SHA1 | Date |
---|---|---|
Shadowfacts | a2ca8fd650 | |
Shadowfacts | 1f26c4923c |
|
@ -0,0 +1,126 @@
|
|||
digraph blockstate {
|
||||
/* rankdir=LR; */
|
||||
node [shape = doublecircle, fontsize = 18]; end;
|
||||
node [shape = circle, fontsize = 18];
|
||||
edge [fontsize = 18];
|
||||
init [label = "", shape=none, height = .0, width = .0];
|
||||
start;
|
||||
emptyBlock [label = "empty block"];
|
||||
nonEmptyBlock [label = "non-empty block"];
|
||||
emittedSpace [label = "emitted space"];
|
||||
lineBreakTag [label = "line break tag"];
|
||||
atLeastTwoLineBreakTags [label = ">=2 line break tags"];
|
||||
emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "empty block w/ >=2 prev line break tags"];
|
||||
beginListItem [label = "begin list item"];
|
||||
endListItem [label = "end list item"];
|
||||
listItemContent [label = "list item content"];
|
||||
emittedSpaceInListItemContent [label = "emitted space in text in list item content"];
|
||||
lineBreakTagInListItemContent [label = "line break tag in list item content"];
|
||||
atLeastTwoLineBreakTagsInListItemContent [label = ">= 2 line break tags in list item content"];
|
||||
preformattedStart [label = "preformatted start"];
|
||||
preformattedEmptyBlock [label = "preformatted empty block"];
|
||||
preformattedNonEmptyBlock [label = "preformatted non-empty block"];
|
||||
preformattedLineBreak [label = "preformatted line break"];
|
||||
preformattedAtLeastTwoLineBreaks [label = "preformatted >=2 line breaks"];
|
||||
afterPreStartTag [label = "after <pre> start tag"];
|
||||
afterPreStartTagWithLeadingWhitespace [label = "after <pre> start tag w/ leading whitespace"];
|
||||
preformattedNonEmptyBlockWithTrailingWhitespace [label = "preformatted non-empty block w/ trailing whitespace"];
|
||||
preformattedEmptyBlockWithLeadingWhitespace [label = "preformatted empty block w/ leading whitespace"];
|
||||
|
||||
init -> start;
|
||||
start -> start [label = "whitespace (skip)\n<br> (skip)\n</pre>\nstart/end block"];
|
||||
start -> nonEmptyBlock [label = "non-whitespace"];
|
||||
start -> preformattedStart [label = "<pre> (depth = 1)"];
|
||||
start -> beginListItem [label = "<li>"];
|
||||
nonEmptyBlock -> nonEmptyBlock [label = "non-whitespace"];
|
||||
nonEmptyBlock -> emptyBlock [label = "start/end block"];
|
||||
nonEmptyBlock -> emittedSpace [label = "whitespace (emit space)"];
|
||||
nonEmptyBlock -> lineBreakTag [label = "<br> (append to tmp)"];
|
||||
nonEmptyBlock -> beginListItem [label = "<li>"];
|
||||
nonEmptyBlock -> endListItem [label = "</li>"];
|
||||
emittedSpace -> nonEmptyBlock [label = "non-whitespace"];
|
||||
emittedSpace -> emittedSpace [label = "whitespace (skip)"];
|
||||
emittedSpace -> emptyBlock [label = "start/end block (remove 1)"];
|
||||
emittedSpace -> lineBreakTag [label = "<br> (append to tmp)"];
|
||||
emittedSpace -> end [label = "EOF (remove 1)"];
|
||||
emptyBlock -> nonEmptyBlock [label = "non-whitespace (block break)"];
|
||||
emptyBlock -> emptyBlock [label = "whitespace (skip)\n<br>\n</pre>\nstart/end block"];
|
||||
emptyBlock -> afterPreStartTag [label = "<pre> (depth = 1)"];
|
||||
emptyBlock -> beginListItem [label = "<li>"];
|
||||
emptyBlock -> endListItem [label = "</li>"];
|
||||
lineBreakTag -> lineBreakTag [label = "whitespace (skip)"];
|
||||
lineBreakTag -> atLeastTwoLineBreakTags [label = "<br> (append to tmp)"];
|
||||
lineBreakTag -> emptyBlock [label = "start/end block (clear tmp)"];
|
||||
lineBreakTag -> nonEmptyBlock [label = "non-whitespace (emit tmp)"];
|
||||
atLeastTwoLineBreakTags -> atLeastTwoLineBreakTags [label = "whitespace (skip)\n<br> (append to tmp)"];
|
||||
atLeastTwoLineBreakTags -> nonEmptyBlock [label = "non-whitespace (emit tmp)"];
|
||||
atLeastTwoLineBreakTags -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "start/end block"];
|
||||
emptyBlockWithAtLeastTwoPreviousLineBreakTags -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "whitespace (skip)\n<br>\n</pre>\nstart/end block"];
|
||||
emptyBlockWithAtLeastTwoPreviousLineBreakTags -> nonEmptyBlock [label = "non-whitespace (emit tmp)"];
|
||||
emptyBlockWithAtLeastTwoPreviousLineBreakTags -> afterPreStartTagWithLeadingWhitespace [label = "<pre> (depth = 1)"];
|
||||
beginListItem -> beginListItem [label = "<li>\nwhitespace (skip)\n<br>\nstart/end block"];
|
||||
beginListItem -> listItemContent [label = "non-whitespace"];
|
||||
beginListItem -> endListItem [label = "</li>"];
|
||||
beginListItem -> afterPreStartTagWithLeadingWhitespace [label = "<pre>"];
|
||||
endListItem -> endListItem [label = "whitespace (skip)\n</li>"];
|
||||
endListItem -> beginListItem [label = "<li> (line break)"];
|
||||
endListItem -> emptyBlock [label = "start/end block"];
|
||||
endListItem -> listItemContent [label = "non-whitespace (line break, indent)"];
|
||||
endListItem -> lineBreakTagInListItemContent [label = "<br> (append to tmp)"];
|
||||
listItemContent -> listItemContent [label = "non-whitespace"];
|
||||
listItemContent -> beginListItem [label = "<li> (line break)"];
|
||||
listItemContent -> lineBreakTagInListItemContent [label = "<br> (append to tmp)"];
|
||||
listItemContent -> emittedSpaceInListItemContent [label = "whitespace (emit space)"];
|
||||
listItemContent -> emptyBlock [label = "start/end block"];
|
||||
listItemContent -> endListItem [label = "</li>"];
|
||||
emittedSpaceInListItemContent -> emittedSpaceInListItemContent [label = "whitespace (skip)"];
|
||||
emittedSpaceInListItemContent -> listItemContent [label = "non-whitespace"];
|
||||
emittedSpaceInListItemContent -> end [label = "EOF (remove 1)"];
|
||||
emittedSpaceInListItemContent -> emptyBlock [label = "start/end block (remove 1)"];
|
||||
emittedSpaceInListItemContent -> beginListItem [label = "<li> (remove 1, line break)"];
|
||||
emittedSpaceInListItemContent -> lineBreakTagInListItemContent [label = "<br> (append to tmp)"];
|
||||
emittedSpaceInListItemContent -> endListItem [label = "</li> (remove 1)"];
|
||||
lineBreakTagInListItemContent -> lineBreakTagInListItemContent [label = "whitespace (skip)"];
|
||||
lineBreakTagInListItemContent -> emptyBlock [label = "start/end block (clear tmp)"];
|
||||
lineBreakTagInListItemContent -> beginListItem [label = "<li> (emit tmp, line break)"];
|
||||
lineBreakTagInListItemContent -> listItemContent [label = "non-whitespace (emit tmp)"];
|
||||
lineBreakTagInListItemContent -> atLeastTwoLineBreakTagsInListItemContent [label = "<br> (append to tmp)"];
|
||||
lineBreakTagInListItemContent -> endListItem [label = "</li> (clear tmp)"];
|
||||
atLeastTwoLineBreakTagsInListItemContent -> atLeastTwoLineBreakTagsInListItemContent [label = "<br> (append to tmp)\nwhitespace (skip)"];
|
||||
atLeastTwoLineBreakTagsInListItemContent -> beginListItem [label = "<li> (emit tmp, line break)"];
|
||||
atLeastTwoLineBreakTagsInListItemContent -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "start/end block"];
|
||||
atLeastTwoLineBreakTagsInListItemContent -> listItemContent [label = "non-whitespace (emit tmp)"];
|
||||
atLeastTwoLineBreakTagsInListItemContent -> endListItem [label = "</li> (clear tmp)"];
|
||||
afterPreStartTag -> preformattedLineBreak [label = "<br> (append to tmp, append block break to tmp)"];
|
||||
afterPreStartTag -> preformattedNonEmptyBlock [label = "non \\n (block break)"];
|
||||
afterPreStartTag -> preformattedEmptyBlock [label = "\\n (skip)\nstart/end block"];
|
||||
preformattedLineBreak -> preformattedNonEmptyBlock [label = "non-whitespace (emit tmp)"];
|
||||
preformattedLineBreak -> preformattedNonEmptyBlockWithTrailingWhitespace [label = "other whitespace (append to tmp)"];
|
||||
preformattedLineBreak -> preformattedAtLeastTwoLineBreaks [label = "\\n or <br> (append to tmp)"];
|
||||
preformattedAtLeastTwoLineBreaks -> preformattedAtLeastTwoLineBreaks [label = "\\n or <br> (append to tmp)"];
|
||||
preformattedAtLeastTwoLineBreaks -> preformattedNonEmptyBlock [label = "non \\n or <br> (emit tmp)"];
|
||||
preformattedAtLeastTwoLineBreaks -> preformattedEmptyBlockWithLeadingWhitespace [label = "start/end block"];
|
||||
preformattedEmptyBlockWithLeadingWhitespace -> preformattedEmptyBlockWithLeadingWhitespace [label = "whitespace (append to tmp)\nstart/end block\n</pre> if depth>1&&tmp.count>=2 (depth - 1, remove 1 from tmp)"];
|
||||
preformattedEmptyBlockWithLeadingWhitespace -> preformattedLineBreak [label = "\\n or <br> (append to tmp)"];
|
||||
preformattedEmptyBlockWithLeadingWhitespace -> afterPreStartTagWithLeadingWhitespace [label = "<pre> (depth + 1)"];
|
||||
preformattedEmptyBlockWithLeadingWhitespace -> preformattedEmptyBlock [label = "</pre> if depth>1&&tmp.count<2 (depth - 1, remove 1 from tmp)"];
|
||||
preformattedEmptyBlockWithLeadingWhitespace -> emptyBlock [label = "</pre> if depth<=1 (clear tmp)"];
|
||||
preformattedEmptyBlock -> preformattedEmptyBlock [label = "start/end block\n</pre>if depth>1 (depth - 1)"];
|
||||
preformattedEmptyBlock -> afterPreStartTag [label = "<pre> (depth + 1"];
|
||||
preformattedEmptyBlock -> preformattedNonEmptyBlock [label = "non-whitespace (block break)"];
|
||||
preformattedEmptyBlock -> preformattedEmptyBlockWithLeadingWhitespace [label = "whitespace (append to tmp)"];
|
||||
preformattedEmptyBlock -> preformattedLineBreak [label = "<br> (append to tmp)"];
|
||||
preformattedNonEmptyBlock -> preformattedNonEmptyBlock [label = "non-whitespace"];
|
||||
preformattedNonEmptyBlock -> preformattedLineBreak [label = "\\n or <br> (append to tmp)"];
|
||||
preformattedNonEmptyBlock -> preformattedNonEmptyBlockWithTrailingWhitespace [label = "other whitespace (append to tmp)"];
|
||||
preformattedNonEmptyBlock -> preformattedEmptyBlock [label = "start/end block"];
|
||||
preformattedNonEmptyBlockWithTrailingWhitespace -> preformattedNonEmptyBlockWithTrailingWhitespace [label = "whitespace (append to tmp)"];
|
||||
preformattedNonEmptyBlockWithTrailingWhitespace -> preformattedNonEmptyBlock [label = "non-whitespace (emit tmp)"];
|
||||
preformattedNonEmptyBlockWithTrailingWhitespace -> preformattedLineBreak [label = "\\n or <br> (append to tmp)"];
|
||||
preformattedNonEmptyBlockWithTrailingWhitespace -> preformattedEmptyBlockWithLeadingWhitespace [label = "start/end block (append block break to tmp)"];
|
||||
afterPreStartTagWithLeadingWhitespace -> preformattedNonEmptyBlock [label = "non-whitespace (emit tmp)"];
|
||||
afterPreStartTagWithLeadingWhitespace -> preformattedEmptyBlockWithLeadingWhitespace [label = "\\n (skip)\nother whitespace (append to tmp)\n<br> (append to tmp)\nstart/end block"];
|
||||
preformattedStart -> preformattedStart [label = "<pre> (depth + 1)\n</pre> if depth>1 (depth - 1)\n\\n or <br> (skip)\nstart/end block"];
|
||||
preformattedStart -> start [label = "</pre> if depth<=1"];
|
||||
preformattedStart -> preformattedNonEmptyBlock [label = "non \\n"];
|
||||
}
|
|
@ -17,7 +17,7 @@ private typealias PlatformFont = UIFont
|
|||
private typealias PlatformFont = NSFont
|
||||
#endif
|
||||
|
||||
public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: BlockRenderer {
|
||||
public class AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
|
||||
private let configuration: AttributedStringConverterConfiguration
|
||||
private var fontCache: [FontTrait: PlatformFont] = [:]
|
||||
|
||||
|
@ -26,13 +26,13 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
|||
|
||||
private var actionStack: [ElementAction] = []
|
||||
private var styleStack: [Style] = []
|
||||
var blockState = BlockState.start
|
||||
private var blockStateMachine = BlockStateMachine(blockBreak: "", lineBreak: "", listIndentForContentOutsideItem: "", append: { _ in }, removeChar: {})
|
||||
private var currentElementIsEmpty = true
|
||||
private var previouslyFinishedListItem = false
|
||||
// The current run of text w/o styles changing
|
||||
private var currentRun: String = ""
|
||||
|
||||
public init(configuration: AttributedStringConverterConfiguration) where Callbacks == DefaultCallbacks {
|
||||
public convenience init(configuration: AttributedStringConverterConfiguration) where Callbacks == DefaultCallbacks {
|
||||
self.init(configuration: configuration, callbacks: DefaultCallbacks.self)
|
||||
}
|
||||
|
||||
|
@ -40,13 +40,17 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
|||
self.configuration = configuration
|
||||
}
|
||||
|
||||
public mutating func convert(html: String) -> NSAttributedString {
|
||||
public func convert(html: String) -> NSAttributedString {
|
||||
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
|
||||
str = NSMutableAttributedString()
|
||||
|
||||
actionStack = []
|
||||
styleStack = []
|
||||
blockState = .start
|
||||
blockStateMachine = BlockStateMachine(blockBreak: "\n\n", lineBreak: "\n", listIndentForContentOutsideItem: "\t\t", append: { [unowned self] in
|
||||
self.append($0)
|
||||
}, removeChar: { [unowned self] in
|
||||
self.removeChar()
|
||||
})
|
||||
currentElementIsEmpty = true
|
||||
previouslyFinishedListItem = false
|
||||
currentRun = ""
|
||||
|
@ -55,12 +59,16 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
|||
switch token {
|
||||
case .character(let c):
|
||||
currentElementIsEmpty = false
|
||||
continueBlock()
|
||||
currentRun.unicodeScalars.append(c)
|
||||
if blockStateMachine.continueBlock(char: c) {
|
||||
currentRun.unicodeScalars.append(c)
|
||||
}
|
||||
case .characterSequence(let s):
|
||||
currentElementIsEmpty = false
|
||||
continueBlock()
|
||||
currentRun.append(s)
|
||||
for c in s.unicodeScalars {
|
||||
if blockStateMachine.continueBlock(char: c) {
|
||||
currentRun.unicodeScalars.append(c)
|
||||
}
|
||||
}
|
||||
case .comment:
|
||||
// ignored
|
||||
continue
|
||||
|
@ -87,14 +95,15 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
|||
}
|
||||
}
|
||||
|
||||
blockStateMachine.endBlocks()
|
||||
finishRun()
|
||||
|
||||
return str
|
||||
}
|
||||
|
||||
private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
|
||||
private func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
|
||||
if name == "br" {
|
||||
currentRun.append("\n")
|
||||
blockStateMachine.breakTag()
|
||||
return
|
||||
}
|
||||
// self closing tags are ignored since they have no content
|
||||
|
@ -126,29 +135,25 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
|||
finishRun()
|
||||
styleStack.append(.monospace)
|
||||
case "pre":
|
||||
startOrFinishBlock()
|
||||
blockStateMachine.startOrEndBlock()
|
||||
blockStateMachine.startPreformatted()
|
||||
finishRun()
|
||||
styleStack.append(.monospace)
|
||||
case "blockquote":
|
||||
startOrFinishBlock()
|
||||
blockStateMachine.startOrEndBlock()
|
||||
finishRun()
|
||||
styleStack.append(.blockquote)
|
||||
case "p":
|
||||
startOrFinishBlock()
|
||||
blockStateMachine.startOrEndBlock()
|
||||
case "ol":
|
||||
startOrFinishBlock()
|
||||
blockStateMachine.startOrEndBlock()
|
||||
finishRun()
|
||||
styleStack.append(.orderedList(nextElementOrdinal: 1))
|
||||
case "ul":
|
||||
startOrFinishBlock()
|
||||
blockStateMachine.startOrEndBlock()
|
||||
finishRun()
|
||||
styleStack.append(.unorderedList)
|
||||
case "li":
|
||||
if previouslyFinishedListItem {
|
||||
currentRun.append("\n")
|
||||
} else {
|
||||
continueBlock()
|
||||
}
|
||||
let marker: String
|
||||
if case .orderedList(let nextElementOrdinal) = styleStack.last {
|
||||
marker = orderedTextList.marker(forItemNumber: nextElementOrdinal)
|
||||
|
@ -158,13 +163,14 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
|||
} else {
|
||||
break
|
||||
}
|
||||
blockStateMachine.startListItem()
|
||||
currentRun.append("\t\(marker)\t")
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
private mutating func handleEndTag(_ name: String) {
|
||||
private func handleEndTag(_ name: String) {
|
||||
switch name {
|
||||
case "a":
|
||||
if case .link(.some(_)) = lastStyle(.link) {
|
||||
|
@ -186,38 +192,60 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
|||
case "pre":
|
||||
finishRun()
|
||||
removeLastStyle(.monospace)
|
||||
startOrFinishBlock()
|
||||
blockStateMachine.startOrEndBlock()
|
||||
blockStateMachine.endPreformatted()
|
||||
case "blockquote":
|
||||
finishRun()
|
||||
removeLastStyle(.blockquote)
|
||||
startOrFinishBlock()
|
||||
blockStateMachine.startOrEndBlock()
|
||||
case "p":
|
||||
startOrFinishBlock()
|
||||
blockStateMachine.startOrEndBlock()
|
||||
case "ol":
|
||||
finishRun()
|
||||
removeLastStyle(.orderedList)
|
||||
startOrFinishBlock()
|
||||
blockStateMachine.startOrEndBlock()
|
||||
previouslyFinishedListItem = false
|
||||
case "ul":
|
||||
finishRun()
|
||||
removeLastStyle(.unorderedList)
|
||||
startOrFinishBlock()
|
||||
blockStateMachine.startOrEndBlock()
|
||||
previouslyFinishedListItem = false
|
||||
case "li":
|
||||
finishRun()
|
||||
previouslyFinishedListItem = true
|
||||
blockStateMachine.endListItem()
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
mutating func insertBlockBreak() {
|
||||
currentRun.append("\n\n")
|
||||
var blockBreak: String {
|
||||
"\n\n"
|
||||
}
|
||||
|
||||
var lineBreak: String {
|
||||
"\n"
|
||||
}
|
||||
|
||||
var listIndentForContentOutsideItem: String {
|
||||
"\t\t"
|
||||
}
|
||||
|
||||
func append(_ s: String) {
|
||||
currentRun.append(s)
|
||||
}
|
||||
|
||||
func removeChar() {
|
||||
if currentRun.isEmpty {
|
||||
str.deleteCharacters(in: NSRange(location: str.length - 1, length: 1))
|
||||
} else {
|
||||
currentRun.removeLast()
|
||||
}
|
||||
}
|
||||
|
||||
// Finds the last currently-open style of the given type.
|
||||
// We can't just use the last one because we need to handle mis-nested tags.
|
||||
private mutating func removeLastStyle(_ type: Style.StyleType) {
|
||||
private func removeLastStyle(_ type: Style.StyleType) {
|
||||
var i = styleStack.index(before: styleStack.endIndex)
|
||||
while i >= styleStack.startIndex {
|
||||
if styleStack[i].type == type {
|
||||
|
@ -252,7 +280,7 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
|||
return style
|
||||
}()
|
||||
|
||||
private mutating func finishRun() {
|
||||
private func finishRun() {
|
||||
if actionStack.contains(.skip) {
|
||||
currentRun = ""
|
||||
return
|
||||
|
@ -300,7 +328,7 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
|||
currentRun = ""
|
||||
}
|
||||
|
||||
private mutating func getFont(traits: FontTrait) -> PlatformFont? {
|
||||
private func getFont(traits: FontTrait) -> PlatformFont? {
|
||||
if let cached = fontCache[traits] {
|
||||
return cached
|
||||
}
|
||||
|
|
|
@ -7,39 +7,581 @@
|
|||
|
||||
import Foundation
|
||||
|
||||
protocol BlockRenderer {
|
||||
var blockState: BlockState { get set }
|
||||
mutating func insertBlockBreak()
|
||||
/*
|
||||
|
||||
This gnarly mess of a state machine is responsible for:
|
||||
1) Inserting line breaks in the right places corresponding to boundaries between block elements
|
||||
2) Preventing leading/trailing whitespace from being emitted
|
||||
3) Collapsing whitespace within the string like https://www.w3.org/TR/css-text-3/#white-space-phase-1
|
||||
4) Handling whitespace inside <pre> elements
|
||||
|
||||
DO NOT TOUCH THE CODE WITHOUT CHECKING/UPDATING THE DIAGRAM.
|
||||
|
||||
*/
|
||||
|
||||
struct BlockStateMachine {
|
||||
var blockState: BlockState = .start
|
||||
let blockBreak: String
|
||||
let lineBreak: String
|
||||
let listIndentForContentOutsideItem: String
|
||||
var temporaryBuffer: String = ""
|
||||
let append: (String) -> Void
|
||||
let removeChar: () -> Void
|
||||
}
|
||||
|
||||
extension BlockRenderer {
|
||||
mutating func startOrFinishBlock() {
|
||||
extension BlockStateMachine {
|
||||
mutating func startOrEndBlock() {
|
||||
switch blockState {
|
||||
case .start:
|
||||
break
|
||||
case .emptyBlock:
|
||||
break
|
||||
case .nonEmptyBlock:
|
||||
blockState = .emptyBlock
|
||||
case .emptyBlock:
|
||||
case .emittedSpace:
|
||||
blockState = .emptyBlock
|
||||
removeChar()
|
||||
case .lineBreakTag:
|
||||
blockState = .emptyBlock
|
||||
temporaryBuffer = ""
|
||||
case .atLeastTwoLineBreakTags:
|
||||
blockState = .emptyBlockWithAtLeastTwoPreviousLineBreakTags
|
||||
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
||||
break
|
||||
case .beginListItem:
|
||||
break
|
||||
case .endListItem:
|
||||
blockState = .emptyBlock
|
||||
case .listItemContent:
|
||||
blockState = .emptyBlock
|
||||
case .emittedSpaceInListItemContent:
|
||||
blockState = .emptyBlock
|
||||
removeChar()
|
||||
case .lineBreakTagInListItemContent:
|
||||
blockState = .emptyBlock
|
||||
temporaryBuffer = ""
|
||||
case .atLeastTwoLineBreakTagsInListItemContent:
|
||||
blockState = .emptyBlockWithAtLeastTwoPreviousLineBreakTags
|
||||
case .preformattedStart(depth: _):
|
||||
break
|
||||
case .preformattedEmptyBlock(depth: _):
|
||||
break
|
||||
case .preformattedNonEmptyBlock(let depth):
|
||||
blockState = .preformattedEmptyBlock(depth: depth)
|
||||
case .preformattedLineBreak(depth: let depth):
|
||||
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .preformattedAtLeastTwoLineBreaks(let depth):
|
||||
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
||||
case .afterPreStartTag(let depth):
|
||||
blockState = .preformattedEmptyBlock(depth: depth)
|
||||
case .afterPreStartTagWithLeadingWhitespace(let depth):
|
||||
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
||||
case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
|
||||
temporaryBuffer.append(blockBreak)
|
||||
case .preformattedEmptyBlockWithLeadingWhitespace(depth: _):
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
mutating func continueBlock() {
|
||||
mutating func continueBlock(char: UnicodeScalar) -> Bool {
|
||||
let isNewline = char == "\n"
|
||||
let isWhitespace = isNewline || isWhitespace(char)
|
||||
switch blockState {
|
||||
case .start:
|
||||
blockState = .nonEmptyBlock
|
||||
case .nonEmptyBlock:
|
||||
break
|
||||
if isWhitespace {
|
||||
return false
|
||||
} else {
|
||||
blockState = .nonEmptyBlock
|
||||
return true
|
||||
}
|
||||
case .emptyBlock:
|
||||
insertBlockBreak()
|
||||
blockState = .nonEmptyBlock
|
||||
if isWhitespace {
|
||||
return false
|
||||
} else {
|
||||
blockState = .nonEmptyBlock
|
||||
append(blockBreak)
|
||||
return true
|
||||
}
|
||||
case .nonEmptyBlock:
|
||||
if isWhitespace {
|
||||
blockState = .emittedSpace
|
||||
append(" ")
|
||||
return false
|
||||
} else {
|
||||
return true
|
||||
}
|
||||
case .emittedSpace:
|
||||
if isWhitespace {
|
||||
return false
|
||||
} else {
|
||||
blockState = .nonEmptyBlock
|
||||
return true
|
||||
}
|
||||
case .lineBreakTag:
|
||||
if isWhitespace {
|
||||
return false
|
||||
} else {
|
||||
blockState = .nonEmptyBlock
|
||||
append(temporaryBuffer)
|
||||
temporaryBuffer = ""
|
||||
return true
|
||||
}
|
||||
case .atLeastTwoLineBreakTags:
|
||||
if isWhitespace {
|
||||
return false
|
||||
} else {
|
||||
blockState = .nonEmptyBlock
|
||||
append(temporaryBuffer)
|
||||
temporaryBuffer = ""
|
||||
return true
|
||||
}
|
||||
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
||||
if isWhitespace {
|
||||
return false
|
||||
} else {
|
||||
blockState = .nonEmptyBlock
|
||||
append(temporaryBuffer)
|
||||
temporaryBuffer = ""
|
||||
return true
|
||||
}
|
||||
case .beginListItem:
|
||||
if isWhitespace {
|
||||
return false
|
||||
} else {
|
||||
blockState = .listItemContent
|
||||
return true
|
||||
}
|
||||
case .endListItem:
|
||||
if isWhitespace {
|
||||
return false
|
||||
} else {
|
||||
blockState = .listItemContent
|
||||
append(lineBreak)
|
||||
append(listIndentForContentOutsideItem)
|
||||
return true
|
||||
}
|
||||
case .listItemContent:
|
||||
if isWhitespace {
|
||||
blockState = .emittedSpaceInListItemContent
|
||||
append(" ")
|
||||
return false
|
||||
} else {
|
||||
return true
|
||||
}
|
||||
case .emittedSpaceInListItemContent:
|
||||
if isWhitespace {
|
||||
return false
|
||||
} else {
|
||||
blockState = .listItemContent
|
||||
return true
|
||||
}
|
||||
case .lineBreakTagInListItemContent:
|
||||
if isWhitespace {
|
||||
return false
|
||||
} else {
|
||||
blockState = .listItemContent
|
||||
append(temporaryBuffer)
|
||||
temporaryBuffer = ""
|
||||
return true
|
||||
}
|
||||
case .atLeastTwoLineBreakTagsInListItemContent:
|
||||
if isWhitespace {
|
||||
return false
|
||||
} else {
|
||||
blockState = .listItemContent
|
||||
append(temporaryBuffer)
|
||||
temporaryBuffer = ""
|
||||
return true
|
||||
}
|
||||
case .preformattedStart(let depth):
|
||||
if isNewline {
|
||||
return false
|
||||
} else {
|
||||
blockState = .preformattedNonEmptyBlock(depth: depth)
|
||||
return true
|
||||
}
|
||||
case .preformattedEmptyBlock(depth: let depth):
|
||||
if isWhitespace {
|
||||
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
||||
temporaryBuffer.unicodeScalars.append(char)
|
||||
return false
|
||||
} else {
|
||||
blockState = .preformattedNonEmptyBlock(depth: depth)
|
||||
append(blockBreak)
|
||||
return true
|
||||
}
|
||||
case .preformattedNonEmptyBlock(let depth):
|
||||
if isNewline {
|
||||
blockState = .preformattedLineBreak(depth: depth)
|
||||
temporaryBuffer.append(lineBreak)
|
||||
return false
|
||||
} else if isWhitespace {
|
||||
blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
|
||||
temporaryBuffer.unicodeScalars.append(char)
|
||||
return false
|
||||
} else {
|
||||
return true
|
||||
}
|
||||
case .preformattedLineBreak(let depth):
|
||||
if isNewline {
|
||||
blockState = .preformattedAtLeastTwoLineBreaks(depth: depth)
|
||||
temporaryBuffer.append(lineBreak)
|
||||
return false
|
||||
} else if isWhitespace {
|
||||
blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
|
||||
temporaryBuffer.unicodeScalars.append(char)
|
||||
return false
|
||||
} else {
|
||||
blockState = .preformattedNonEmptyBlock(depth: depth)
|
||||
append(temporaryBuffer)
|
||||
temporaryBuffer = ""
|
||||
return true
|
||||
}
|
||||
case .preformattedAtLeastTwoLineBreaks(let depth):
|
||||
if isWhitespace {
|
||||
temporaryBuffer.unicodeScalars.append(char)
|
||||
return false
|
||||
} else {
|
||||
blockState = .preformattedNonEmptyBlock(depth: depth)
|
||||
append(temporaryBuffer)
|
||||
temporaryBuffer = ""
|
||||
return true
|
||||
}
|
||||
case .afterPreStartTag(let depth):
|
||||
if isNewline {
|
||||
blockState = .preformattedEmptyBlock(depth: depth)
|
||||
return false
|
||||
} else {
|
||||
blockState = .preformattedNonEmptyBlock(depth: depth)
|
||||
append(blockBreak)
|
||||
return true
|
||||
}
|
||||
case .afterPreStartTagWithLeadingWhitespace(let depth):
|
||||
if isNewline {
|
||||
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
||||
return false
|
||||
} else if isWhitespace {
|
||||
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
||||
temporaryBuffer.unicodeScalars.append(char)
|
||||
return false
|
||||
} else {
|
||||
blockState = .preformattedNonEmptyBlock(depth: depth)
|
||||
append(temporaryBuffer)
|
||||
temporaryBuffer = ""
|
||||
return true
|
||||
}
|
||||
case .preformattedNonEmptyBlockWithTrailingWhitespace(let depth):
|
||||
if isNewline {
|
||||
blockState = .preformattedLineBreak(depth: depth)
|
||||
temporaryBuffer.append(lineBreak)
|
||||
return false
|
||||
} else if isWhitespace {
|
||||
temporaryBuffer.unicodeScalars.append(char)
|
||||
return false
|
||||
} else {
|
||||
blockState = .preformattedNonEmptyBlock(depth: depth)
|
||||
append(temporaryBuffer)
|
||||
temporaryBuffer = ""
|
||||
return true
|
||||
}
|
||||
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
||||
if isNewline {
|
||||
blockState = .preformattedLineBreak(depth: depth)
|
||||
temporaryBuffer.append(lineBreak)
|
||||
return false
|
||||
} else if isWhitespace {
|
||||
temporaryBuffer.unicodeScalars.append(char)
|
||||
return false
|
||||
} else {
|
||||
blockState = .preformattedNonEmptyBlock(depth: depth)
|
||||
append(temporaryBuffer)
|
||||
temporaryBuffer = ""
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mutating func breakTag() {
|
||||
switch blockState {
|
||||
case .start:
|
||||
break
|
||||
case .emptyBlock:
|
||||
append(lineBreak)
|
||||
case .nonEmptyBlock:
|
||||
blockState = .lineBreakTag
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .emittedSpace:
|
||||
blockState = .lineBreakTag
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .lineBreakTag:
|
||||
blockState = .atLeastTwoLineBreakTags
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .atLeastTwoLineBreakTags:
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
||||
append(lineBreak)
|
||||
case .beginListItem:
|
||||
append(lineBreak)
|
||||
case .endListItem:
|
||||
blockState = .lineBreakTagInListItemContent
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .listItemContent:
|
||||
blockState = .lineBreakTagInListItemContent
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .emittedSpaceInListItemContent:
|
||||
blockState = .lineBreakTagInListItemContent
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .lineBreakTagInListItemContent:
|
||||
blockState = .atLeastTwoLineBreakTagsInListItemContent
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .atLeastTwoLineBreakTagsInListItemContent:
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .preformattedStart(depth: _):
|
||||
break
|
||||
case .preformattedEmptyBlock(let depth):
|
||||
blockState = .preformattedLineBreak(depth: depth)
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .preformattedNonEmptyBlock(let depth):
|
||||
blockState = .preformattedLineBreak(depth: depth)
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .preformattedLineBreak(let depth):
|
||||
blockState = .preformattedAtLeastTwoLineBreaks(depth: depth)
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .preformattedAtLeastTwoLineBreaks(depth: _):
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .afterPreStartTag(let depth):
|
||||
blockState = .preformattedLineBreak(depth: depth)
|
||||
temporaryBuffer.append(blockBreak)
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .afterPreStartTagWithLeadingWhitespace(let depth):
|
||||
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .preformattedNonEmptyBlockWithTrailingWhitespace(let depth):
|
||||
blockState = .preformattedLineBreak(depth: depth)
|
||||
temporaryBuffer.append(lineBreak)
|
||||
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
||||
blockState = .preformattedLineBreak(depth: depth)
|
||||
temporaryBuffer.append(lineBreak)
|
||||
}
|
||||
}
|
||||
|
||||
mutating func startPreformatted() {
|
||||
switch blockState {
|
||||
case .start:
|
||||
blockState = .preformattedStart(depth: 1)
|
||||
case .emptyBlock:
|
||||
blockState = .afterPreStartTag(depth: 1)
|
||||
case .nonEmptyBlock:
|
||||
fatalError("unreachable")
|
||||
case .emittedSpace:
|
||||
fatalError("unreachable")
|
||||
case .lineBreakTag:
|
||||
fatalError("unreachable")
|
||||
case .atLeastTwoLineBreakTags:
|
||||
fatalError("unreachable")
|
||||
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
||||
blockState = .afterPreStartTagWithLeadingWhitespace(depth: 1)
|
||||
case .beginListItem:
|
||||
blockState = .afterPreStartTagWithLeadingWhitespace(depth: 1)
|
||||
case .endListItem:
|
||||
fatalError("unreachable")
|
||||
case .listItemContent:
|
||||
fatalError("unreachable")
|
||||
case .emittedSpaceInListItemContent:
|
||||
fatalError("unreachable")
|
||||
case .lineBreakTagInListItemContent:
|
||||
fatalError("unreachable")
|
||||
case .atLeastTwoLineBreakTagsInListItemContent:
|
||||
fatalError("unreachable")
|
||||
case .preformattedStart(let depth):
|
||||
blockState = .preformattedStart(depth: depth + 1)
|
||||
case .preformattedEmptyBlock(let depth):
|
||||
blockState = .afterPreStartTag(depth: depth + 1)
|
||||
case .preformattedNonEmptyBlock(depth: _):
|
||||
fatalError("unreachable")
|
||||
case .preformattedLineBreak(depth: _):
|
||||
fatalError("unreachable")
|
||||
case .preformattedAtLeastTwoLineBreaks(depth: _):
|
||||
fatalError("unreachable")
|
||||
case .afterPreStartTag(depth: _):
|
||||
fatalError("unreachable")
|
||||
case .afterPreStartTagWithLeadingWhitespace(depth: _):
|
||||
fatalError("unreachable")
|
||||
case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
|
||||
fatalError("unreachable")
|
||||
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
||||
blockState = .afterPreStartTagWithLeadingWhitespace(depth: depth + 1)
|
||||
}
|
||||
}
|
||||
|
||||
mutating func endPreformatted() {
|
||||
switch blockState {
|
||||
case .start:
|
||||
break
|
||||
case .emptyBlock:
|
||||
break
|
||||
case .nonEmptyBlock:
|
||||
fatalError("unreachable")
|
||||
case .emittedSpace:
|
||||
fatalError("unreachable")
|
||||
case .lineBreakTag:
|
||||
fatalError("unreachable")
|
||||
case .atLeastTwoLineBreakTags:
|
||||
fatalError("unreachable")
|
||||
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
||||
break
|
||||
case .beginListItem:
|
||||
break
|
||||
case .endListItem:
|
||||
fatalError("unreachable")
|
||||
case .listItemContent:
|
||||
fatalError("unreachable")
|
||||
case .emittedSpaceInListItemContent:
|
||||
fatalError("unreachable")
|
||||
case .lineBreakTagInListItemContent:
|
||||
fatalError("unreachable")
|
||||
case .atLeastTwoLineBreakTagsInListItemContent:
|
||||
fatalError("unreachable")
|
||||
case .preformattedStart(let depth):
|
||||
if depth <= 1 {
|
||||
blockState = .start
|
||||
} else {
|
||||
blockState = .preformattedStart(depth: depth - 1)
|
||||
}
|
||||
case .preformattedEmptyBlock(let depth):
|
||||
if depth <= 1 {
|
||||
blockState = .emptyBlock
|
||||
} else {
|
||||
blockState = .preformattedEmptyBlock(depth: depth - 1)
|
||||
}
|
||||
case .preformattedNonEmptyBlock(depth: _):
|
||||
fatalError("unreachable")
|
||||
case .preformattedLineBreak(depth: _):
|
||||
fatalError("unreachable")
|
||||
case .preformattedAtLeastTwoLineBreaks(depth: _):
|
||||
fatalError("unreachable")
|
||||
case .afterPreStartTag(depth: _):
|
||||
fatalError("unreachable")
|
||||
case .afterPreStartTagWithLeadingWhitespace(depth: _):
|
||||
fatalError("unreachable")
|
||||
case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
|
||||
fatalError("unreachable")
|
||||
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
||||
if depth <= 1 {
|
||||
blockState = .emptyBlock
|
||||
temporaryBuffer = ""
|
||||
} else {
|
||||
if temporaryBuffer.count >= 2 {
|
||||
temporaryBuffer.removeLast()
|
||||
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth - 1)
|
||||
} else {
|
||||
temporaryBuffer.removeLast()
|
||||
blockState = .preformattedEmptyBlock(depth: depth - 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mutating func startListItem() {
|
||||
switch blockState {
|
||||
case .start:
|
||||
blockState = .beginListItem
|
||||
case .emptyBlock:
|
||||
blockState = .beginListItem
|
||||
append(blockBreak)
|
||||
case .nonEmptyBlock:
|
||||
blockState = .beginListItem
|
||||
append(blockBreak)
|
||||
case .beginListItem:
|
||||
break
|
||||
case .endListItem:
|
||||
blockState = .beginListItem
|
||||
append(lineBreak)
|
||||
case .listItemContent:
|
||||
blockState = .beginListItem
|
||||
append(lineBreak)
|
||||
case .emittedSpaceInListItemContent:
|
||||
blockState = .beginListItem
|
||||
removeChar()
|
||||
append(lineBreak)
|
||||
case .lineBreakTagInListItemContent:
|
||||
blockState = .beginListItem
|
||||
append(temporaryBuffer)
|
||||
temporaryBuffer = ""
|
||||
append(lineBreak)
|
||||
case .atLeastTwoLineBreakTagsInListItemContent:
|
||||
blockState = .beginListItem
|
||||
append(temporaryBuffer)
|
||||
temporaryBuffer = ""
|
||||
append(lineBreak)
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
mutating func endListItem() {
|
||||
switch blockState {
|
||||
case .emptyBlock:
|
||||
blockState = .endListItem
|
||||
case .nonEmptyBlock:
|
||||
blockState = .endListItem
|
||||
case .listItemContent:
|
||||
blockState = .endListItem
|
||||
case .emittedSpaceInListItemContent:
|
||||
blockState = .endListItem
|
||||
removeChar()
|
||||
case .lineBreakTagInListItemContent:
|
||||
blockState = .endListItem
|
||||
temporaryBuffer = ""
|
||||
case .atLeastTwoLineBreakTagsInListItemContent:
|
||||
blockState = .endListItem
|
||||
temporaryBuffer = ""
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
mutating func endBlocks() {
|
||||
switch blockState {
|
||||
case .emittedSpace:
|
||||
removeChar()
|
||||
case .emittedSpaceInListItemContent:
|
||||
removeChar()
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum BlockState: Equatable {
|
||||
case start
|
||||
case nonEmptyBlock
|
||||
case emptyBlock
|
||||
case nonEmptyBlock
|
||||
case emittedSpace
|
||||
case lineBreakTag
|
||||
case atLeastTwoLineBreakTags
|
||||
case emptyBlockWithAtLeastTwoPreviousLineBreakTags
|
||||
case beginListItem
|
||||
case endListItem
|
||||
case listItemContent
|
||||
case emittedSpaceInListItemContent
|
||||
case lineBreakTagInListItemContent
|
||||
case atLeastTwoLineBreakTagsInListItemContent
|
||||
case preformattedStart(depth: Int32)
|
||||
case preformattedEmptyBlock(depth: Int32)
|
||||
case preformattedNonEmptyBlock(depth: Int32)
|
||||
case preformattedLineBreak(depth: Int32)
|
||||
case preformattedAtLeastTwoLineBreaks(depth: Int32)
|
||||
case afterPreStartTag(depth: Int32)
|
||||
case afterPreStartTagWithLeadingWhitespace(depth: Int32)
|
||||
case preformattedNonEmptyBlockWithTrailingWhitespace(depth: Int32)
|
||||
case preformattedEmptyBlockWithLeadingWhitespace(depth: Int32)
|
||||
}
|
||||
|
||||
@inline(__always)
|
||||
private func isWhitespace(_ c: UnicodeScalar) -> Bool {
|
||||
// this is not strictly correct, but checking the actual unicode properties is slow
|
||||
// and this should cover the vast majority of actual use
|
||||
c == " " || c == "\n" || c == "\t" || c == "\u{A0}" /* NO-BREAK SPACE */
|
||||
}
|
||||
|
|
|
@ -7,19 +7,18 @@
|
|||
|
||||
import Foundation
|
||||
|
||||
public struct TextConverter<Callbacks: HTMLConversionCallbacks>: BlockRenderer {
|
||||
|
||||
public class TextConverter<Callbacks: HTMLConversionCallbacks> {
|
||||
private let configuration: TextConverterConfiguration
|
||||
|
||||
private var tokenizer: Tokenizer<String.UnicodeScalarView.Iterator>!
|
||||
private var str: String!
|
||||
|
||||
private var actionStack: [ElementAction] = []
|
||||
var blockState = BlockState.start
|
||||
var blockStateMachine = BlockStateMachine(blockBreak: "", lineBreak: "", listIndentForContentOutsideItem: "", append: { _ in }, removeChar: {})
|
||||
private var currentElementIsEmpty = true
|
||||
private var currentRun = ""
|
||||
|
||||
public init(configuration: TextConverterConfiguration = .init()) where Callbacks == DefaultCallbacks {
|
||||
public convenience init(configuration: TextConverterConfiguration = .init()) where Callbacks == DefaultCallbacks {
|
||||
self.init(configuration: configuration, callbacks: DefaultCallbacks.self)
|
||||
}
|
||||
|
||||
|
@ -27,11 +26,19 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks>: BlockRenderer {
|
|||
self.configuration = configuration
|
||||
}
|
||||
|
||||
public mutating func convert(html: String) -> String {
|
||||
public func convert(html: String) -> String {
|
||||
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
|
||||
str = ""
|
||||
|
||||
blockState = .start
|
||||
blockStateMachine = BlockStateMachine(
|
||||
blockBreak: configuration.insertNewlines ? "\n\n" : " " ,
|
||||
lineBreak: configuration.insertNewlines ? "\n" : " " ,
|
||||
listIndentForContentOutsideItem: "",
|
||||
append: { [unowned self] in
|
||||
self.append($0)
|
||||
}, removeChar: { [unowned self] in
|
||||
self.removeChar()
|
||||
})
|
||||
currentElementIsEmpty = true
|
||||
currentRun = ""
|
||||
|
||||
|
@ -39,12 +46,16 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks>: BlockRenderer {
|
|||
switch token {
|
||||
case .character(let scalar):
|
||||
currentElementIsEmpty = false
|
||||
continueBlock()
|
||||
currentRun.unicodeScalars.append(scalar)
|
||||
if blockStateMachine.continueBlock(char: scalar) {
|
||||
currentRun.unicodeScalars.append(scalar)
|
||||
}
|
||||
case .characterSequence(let string):
|
||||
currentElementIsEmpty = false
|
||||
continueBlock()
|
||||
currentRun.append(string)
|
||||
for c in string.unicodeScalars {
|
||||
if blockStateMachine.continueBlock(char: c) {
|
||||
currentRun.unicodeScalars.append(c)
|
||||
}
|
||||
}
|
||||
case .startTag(let name, let selfClosing, let attributes):
|
||||
currentElementIsEmpty = true
|
||||
let action = Callbacks.elementAction(name: name, attributes: attributes)
|
||||
|
@ -66,45 +77,66 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks>: BlockRenderer {
|
|||
}
|
||||
}
|
||||
|
||||
blockStateMachine.endBlocks()
|
||||
finishRun()
|
||||
|
||||
return str
|
||||
}
|
||||
|
||||
private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
|
||||
private func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
|
||||
switch name {
|
||||
case "br":
|
||||
if configuration.insertNewlines {
|
||||
currentRun.append("\n")
|
||||
} else {
|
||||
currentRun.append(" ")
|
||||
}
|
||||
blockStateMachine.breakTag()
|
||||
case "pre", "blockquote", "p", "ol", "ul":
|
||||
startOrFinishBlock()
|
||||
blockStateMachine.startOrEndBlock()
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
private mutating func handleEndTag(_ name: String) {
|
||||
private func handleEndTag(_ name: String) {
|
||||
switch name {
|
||||
case "pre", "blockquote", "p", "ol", "ul":
|
||||
startOrFinishBlock()
|
||||
blockStateMachine.startOrEndBlock()
|
||||
finishRun()
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
mutating func insertBlockBreak() {
|
||||
var blockBreak: String {
|
||||
if configuration.insertNewlines {
|
||||
currentRun.append("\n\n")
|
||||
"\n\n"
|
||||
} else {
|
||||
currentRun.append(" ")
|
||||
" "
|
||||
}
|
||||
}
|
||||
|
||||
private mutating func finishRun() {
|
||||
var lineBreak: String {
|
||||
if configuration.insertNewlines {
|
||||
"\n"
|
||||
} else {
|
||||
" "
|
||||
}
|
||||
}
|
||||
|
||||
var listIndentForContentOutsideItem: String {
|
||||
" "
|
||||
}
|
||||
|
||||
func append(_ s: String) {
|
||||
currentRun.append(s)
|
||||
}
|
||||
|
||||
func removeChar() {
|
||||
if currentRun.isEmpty {
|
||||
str.removeLast()
|
||||
} else {
|
||||
currentRun.removeLast()
|
||||
}
|
||||
}
|
||||
|
||||
private func finishRun() {
|
||||
if actionStack.contains(.skip) {
|
||||
currentRun = ""
|
||||
return
|
||||
|
|
|
@ -327,6 +327,15 @@ final class AttributedStringConverterTests: XCTestCase {
|
|||
XCTAssertEqual(convert("a<ol><li>b</li><li>c</li></ol>"), result)
|
||||
}
|
||||
|
||||
func testListItemOutsideList() {
|
||||
let result = NSAttributedString(string: "a", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<li>a</li>"), result)
|
||||
}
|
||||
|
||||
func testSkipElementActionFollowingUnfinishedRun() {
|
||||
struct Callbacks: HTMLConversionCallbacks {
|
||||
static func elementAction(name: String, attributes: [Attribute]) -> ElementAction {
|
||||
|
@ -347,7 +356,7 @@ final class AttributedStringConverterTests: XCTestCase {
|
|||
XCTAssertEqual(convert("</span>"), .init())
|
||||
}
|
||||
|
||||
func testWTF() {
|
||||
func testMultipleClosingBlockTagsBeforeOpeningBlockTag() {
|
||||
let result = NSMutableAttributedString()
|
||||
result.append(NSAttributedString(string: "a", attributes: [
|
||||
.font: italicFont,
|
||||
|
@ -362,4 +371,234 @@ final class AttributedStringConverterTests: XCTestCase {
|
|||
XCTAssertEqual(convert(#"<blockquote><p>a</p></blockquote><p>b</p>"#), result)
|
||||
}
|
||||
|
||||
func testNewlineBetweenClosingAndOpeningBlockTag() {
|
||||
let result = NSAttributedString(string: "a\n\nb", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<p>a</p>\n<p>b</p>"), result)
|
||||
XCTAssertEqual(convert("<p>a</p><p>\nb</p>"), result)
|
||||
}
|
||||
|
||||
func testEndAfterNewlineInBlockContent() {
|
||||
let result = NSAttributedString(string: "a", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<p>a\n\n</p>"), result)
|
||||
XCTAssertEqual(convert("<p>a\n\n</p>\n"), result)
|
||||
XCTAssertEqual(convert("<p>\n\na</p>"), result)
|
||||
XCTAssertEqual(convert("<p>\n\na</p>\n"), result)
|
||||
let result2 = NSAttributedString(string: "a b", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<p>a\n\n\nb</p>"), result2)
|
||||
}
|
||||
|
||||
func testBRAtBlockElementBoundary() {
|
||||
let two = NSAttributedString(string: "a\n\nb", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<p>a<br></p><p>b</p>"), two)
|
||||
let three = NSAttributedString(string: "a\n\n\nb", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<p>a</p><p><br>b</p>"), three)
|
||||
}
|
||||
|
||||
func testPreFollowedByP() {
|
||||
let result = NSMutableAttributedString()
|
||||
result.append(NSAttributedString(string: "a", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
]))
|
||||
result.append(NSAttributedString(string: "\n\nb", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
]))
|
||||
XCTAssertEqual(convert("<pre>a<br></pre><p>b</p>"), result)
|
||||
}
|
||||
|
||||
func testPreFollowedByPre() {
|
||||
let result = NSAttributedString(string: "a\n\nb", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<pre>a</pre><pre>b</pre>"), result)
|
||||
}
|
||||
|
||||
func testBRAtPreBoundary() {
|
||||
let two = NSAttributedString(string: "a\n\nb", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<pre>a<br></pre><pre>b</pre>"), two)
|
||||
let three = NSAttributedString(string: "a\n\n\nb", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<pre>a</pre><pre><br>b</pre>"), three)
|
||||
}
|
||||
|
||||
func testNestedPre() {
|
||||
let one = NSAttributedString(string: "a", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<pre><pre>a</pre></pre>"), one)
|
||||
let two = NSAttributedString(string: "a\n\nb", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<pre>a<pre>b</pre></pre>"), two)
|
||||
XCTAssertEqual(convert("<pre>a<br><pre>b</pre></pre>"), two)
|
||||
let three = NSAttributedString(string: "a\n\n\nb", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<pre>a<pre><br>b</pre></pre>"), three)
|
||||
}
|
||||
|
||||
func testIgnoreLeadingNewlineInPre() {
|
||||
let one = NSAttributedString(string: "a", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<pre>\na</pre>"), one)
|
||||
let two = NSMutableAttributedString()
|
||||
two.append(NSAttributedString(string: "a", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
]))
|
||||
two.append(NSAttributedString(string: "\n\nb", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
]))
|
||||
XCTAssertEqual(convert("a<pre>\nb</pre>"), two)
|
||||
}
|
||||
|
||||
func testPreFollowingChar() {
|
||||
let result = NSMutableAttributedString()
|
||||
result.append(NSAttributedString(string: "a", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
]))
|
||||
result.append(NSAttributedString(string: "\n\nb", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
]))
|
||||
XCTAssertEqual(convert("a<pre>b</pre>"), result)
|
||||
}
|
||||
|
||||
func testSkipLeadingTrailingWhitespace() {
|
||||
let result = NSAttributedString(string: "a", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert(" \n\ta"), result)
|
||||
XCTAssertEqual(convert(" \n\t<p>a</p>"), result)
|
||||
XCTAssertEqual(convert("a \n\t"), result)
|
||||
XCTAssertEqual(convert("<p>a</p> \n\t"), result)
|
||||
|
||||
let pre = NSAttributedString(string: "a", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert(" \n\t<pre>a</pre>"), pre)
|
||||
XCTAssertEqual(convert("<pre>a</pre> \n\t"), pre)
|
||||
}
|
||||
|
||||
func testWhitespaceCollapsing() {
|
||||
let result = NSAttributedString(string: "a b", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: NSParagraphStyle.default,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<p>a \t\nb</p>"), result)
|
||||
}
|
||||
|
||||
func testParagraphInsideListItem() {
|
||||
let result = NSAttributedString(string: "\t1.\ta\n\t2.\tb", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: listParagraphStyle,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<ol><li><p>a</p></li><li><p>b</p></li></ol>"), result)
|
||||
}
|
||||
|
||||
func testBreakBetweenListItems() {
|
||||
let result = NSAttributedString(string: "\t1.\ta\n\n\t2.\tb", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: listParagraphStyle,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<ol><li>a</li><br><li>b</li></ol>"), result)
|
||||
}
|
||||
|
||||
func testCharacterBetweenListItems() {
|
||||
let result = NSAttributedString(string: "\t1.\ta\n\t\tc\n\t2.\tb", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: listParagraphStyle,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<ol><li>a</li>c<li>b</li></ol>"), result)
|
||||
XCTAssertEqual(convert("<ol><li>a</li>c <li>b</li></ol>"), result)
|
||||
}
|
||||
|
||||
func testWhitespaceCollapsingInTextBetweenListItems() {
|
||||
let result = NSAttributedString(string: "\t1.\ta\n\t\tc d\n\t2.\tb", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: listParagraphStyle,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<ol><li>a</li>c d<li>b</li></ol>"), result)
|
||||
}
|
||||
|
||||
func testImplicitlyClosedListItem() {
|
||||
let result = NSAttributedString(string: "\t1.\ta\n\t2.\tb", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: listParagraphStyle,
|
||||
.foregroundColor: color,
|
||||
])
|
||||
XCTAssertEqual(convert("<ol><li>a<li>b</ol>"), result)
|
||||
}
|
||||
|
||||
func testPreInsideListItem() {
|
||||
let result = NSMutableAttributedString()
|
||||
result.append(NSAttributedString(string: "\t1.\t", attributes: [
|
||||
.font: font,
|
||||
.paragraphStyle: listParagraphStyle,
|
||||
.foregroundColor: color,
|
||||
]))
|
||||
result.append(NSAttributedString(string: "a", attributes: [
|
||||
.font: monospaceFont,
|
||||
.paragraphStyle: listParagraphStyle,
|
||||
.foregroundColor: color,
|
||||
]))
|
||||
XCTAssertEqual(convert("<ol><li><pre>a</pre></li></ol>"), result)
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue