BlockStateMachine performance improvements
This commit is contained in:
parent
1f26c4923c
commit
a2ca8fd650
|
@ -17,7 +17,7 @@ private typealias PlatformFont = UIFont
|
||||||
private typealias PlatformFont = NSFont
|
private typealias PlatformFont = NSFont
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: BlockRenderer {
|
public class AttributedStringConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
private let configuration: AttributedStringConverterConfiguration
|
private let configuration: AttributedStringConverterConfiguration
|
||||||
private var fontCache: [FontTrait: PlatformFont] = [:]
|
private var fontCache: [FontTrait: PlatformFont] = [:]
|
||||||
|
|
||||||
|
@ -26,14 +26,13 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
||||||
|
|
||||||
private var actionStack: [ElementAction] = []
|
private var actionStack: [ElementAction] = []
|
||||||
private var styleStack: [Style] = []
|
private var styleStack: [Style] = []
|
||||||
var blockState = BlockState.start
|
private var blockStateMachine = BlockStateMachine(blockBreak: "", lineBreak: "", listIndentForContentOutsideItem: "", append: { _ in }, removeChar: {})
|
||||||
var temporaryBuffer: String = ""
|
|
||||||
private var currentElementIsEmpty = true
|
private var currentElementIsEmpty = true
|
||||||
private var previouslyFinishedListItem = false
|
private var previouslyFinishedListItem = false
|
||||||
// The current run of text w/o styles changing
|
// The current run of text w/o styles changing
|
||||||
private var currentRun: String = ""
|
private var currentRun: String = ""
|
||||||
|
|
||||||
public init(configuration: AttributedStringConverterConfiguration) where Callbacks == DefaultCallbacks {
|
public convenience init(configuration: AttributedStringConverterConfiguration) where Callbacks == DefaultCallbacks {
|
||||||
self.init(configuration: configuration, callbacks: DefaultCallbacks.self)
|
self.init(configuration: configuration, callbacks: DefaultCallbacks.self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,14 +40,17 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
||||||
self.configuration = configuration
|
self.configuration = configuration
|
||||||
}
|
}
|
||||||
|
|
||||||
public mutating func convert(html: String) -> NSAttributedString {
|
public func convert(html: String) -> NSAttributedString {
|
||||||
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
|
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
|
||||||
str = NSMutableAttributedString()
|
str = NSMutableAttributedString()
|
||||||
|
|
||||||
actionStack = []
|
actionStack = []
|
||||||
styleStack = []
|
styleStack = []
|
||||||
blockState = .start
|
blockStateMachine = BlockStateMachine(blockBreak: "\n\n", lineBreak: "\n", listIndentForContentOutsideItem: "\t\t", append: { [unowned self] in
|
||||||
temporaryBuffer = ""
|
self.append($0)
|
||||||
|
}, removeChar: { [unowned self] in
|
||||||
|
self.removeChar()
|
||||||
|
})
|
||||||
currentElementIsEmpty = true
|
currentElementIsEmpty = true
|
||||||
previouslyFinishedListItem = false
|
previouslyFinishedListItem = false
|
||||||
currentRun = ""
|
currentRun = ""
|
||||||
|
@ -57,13 +59,13 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
||||||
switch token {
|
switch token {
|
||||||
case .character(let c):
|
case .character(let c):
|
||||||
currentElementIsEmpty = false
|
currentElementIsEmpty = false
|
||||||
if continueBlock(char: c) {
|
if blockStateMachine.continueBlock(char: c) {
|
||||||
currentRun.unicodeScalars.append(c)
|
currentRun.unicodeScalars.append(c)
|
||||||
}
|
}
|
||||||
case .characterSequence(let s):
|
case .characterSequence(let s):
|
||||||
currentElementIsEmpty = false
|
currentElementIsEmpty = false
|
||||||
for c in s.unicodeScalars {
|
for c in s.unicodeScalars {
|
||||||
if continueBlock(char: c) {
|
if blockStateMachine.continueBlock(char: c) {
|
||||||
currentRun.unicodeScalars.append(c)
|
currentRun.unicodeScalars.append(c)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -93,15 +95,15 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
endBlocks()
|
blockStateMachine.endBlocks()
|
||||||
finishRun()
|
finishRun()
|
||||||
|
|
||||||
return str
|
return str
|
||||||
}
|
}
|
||||||
|
|
||||||
private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
|
private func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
|
||||||
if name == "br" {
|
if name == "br" {
|
||||||
breakTag()
|
blockStateMachine.breakTag()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// self closing tags are ignored since they have no content
|
// self closing tags are ignored since they have no content
|
||||||
|
@ -133,22 +135,22 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
||||||
finishRun()
|
finishRun()
|
||||||
styleStack.append(.monospace)
|
styleStack.append(.monospace)
|
||||||
case "pre":
|
case "pre":
|
||||||
startOrEndBlock()
|
blockStateMachine.startOrEndBlock()
|
||||||
startPreformatted()
|
blockStateMachine.startPreformatted()
|
||||||
finishRun()
|
finishRun()
|
||||||
styleStack.append(.monospace)
|
styleStack.append(.monospace)
|
||||||
case "blockquote":
|
case "blockquote":
|
||||||
startOrEndBlock()
|
blockStateMachine.startOrEndBlock()
|
||||||
finishRun()
|
finishRun()
|
||||||
styleStack.append(.blockquote)
|
styleStack.append(.blockquote)
|
||||||
case "p":
|
case "p":
|
||||||
startOrEndBlock()
|
blockStateMachine.startOrEndBlock()
|
||||||
case "ol":
|
case "ol":
|
||||||
startOrEndBlock()
|
blockStateMachine.startOrEndBlock()
|
||||||
finishRun()
|
finishRun()
|
||||||
styleStack.append(.orderedList(nextElementOrdinal: 1))
|
styleStack.append(.orderedList(nextElementOrdinal: 1))
|
||||||
case "ul":
|
case "ul":
|
||||||
startOrEndBlock()
|
blockStateMachine.startOrEndBlock()
|
||||||
finishRun()
|
finishRun()
|
||||||
styleStack.append(.unorderedList)
|
styleStack.append(.unorderedList)
|
||||||
case "li":
|
case "li":
|
||||||
|
@ -161,14 +163,14 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
||||||
} else {
|
} else {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
startListItem()
|
blockStateMachine.startListItem()
|
||||||
currentRun.append("\t\(marker)\t")
|
currentRun.append("\t\(marker)\t")
|
||||||
default:
|
default:
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private mutating func handleEndTag(_ name: String) {
|
private func handleEndTag(_ name: String) {
|
||||||
switch name {
|
switch name {
|
||||||
case "a":
|
case "a":
|
||||||
if case .link(.some(_)) = lastStyle(.link) {
|
if case .link(.some(_)) = lastStyle(.link) {
|
||||||
|
@ -190,28 +192,28 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
||||||
case "pre":
|
case "pre":
|
||||||
finishRun()
|
finishRun()
|
||||||
removeLastStyle(.monospace)
|
removeLastStyle(.monospace)
|
||||||
startOrEndBlock()
|
blockStateMachine.startOrEndBlock()
|
||||||
endPreformatted()
|
blockStateMachine.endPreformatted()
|
||||||
case "blockquote":
|
case "blockquote":
|
||||||
finishRun()
|
finishRun()
|
||||||
removeLastStyle(.blockquote)
|
removeLastStyle(.blockquote)
|
||||||
startOrEndBlock()
|
blockStateMachine.startOrEndBlock()
|
||||||
case "p":
|
case "p":
|
||||||
startOrEndBlock()
|
blockStateMachine.startOrEndBlock()
|
||||||
case "ol":
|
case "ol":
|
||||||
finishRun()
|
finishRun()
|
||||||
removeLastStyle(.orderedList)
|
removeLastStyle(.orderedList)
|
||||||
startOrEndBlock()
|
blockStateMachine.startOrEndBlock()
|
||||||
previouslyFinishedListItem = false
|
previouslyFinishedListItem = false
|
||||||
case "ul":
|
case "ul":
|
||||||
finishRun()
|
finishRun()
|
||||||
removeLastStyle(.unorderedList)
|
removeLastStyle(.unorderedList)
|
||||||
startOrEndBlock()
|
blockStateMachine.startOrEndBlock()
|
||||||
previouslyFinishedListItem = false
|
previouslyFinishedListItem = false
|
||||||
case "li":
|
case "li":
|
||||||
finishRun()
|
finishRun()
|
||||||
previouslyFinishedListItem = true
|
previouslyFinishedListItem = true
|
||||||
endListItem()
|
blockStateMachine.endListItem()
|
||||||
default:
|
default:
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -229,11 +231,11 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
||||||
"\t\t"
|
"\t\t"
|
||||||
}
|
}
|
||||||
|
|
||||||
mutating func append(_ s: String) {
|
func append(_ s: String) {
|
||||||
currentRun.append(s)
|
currentRun.append(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
mutating func removeChar() {
|
func removeChar() {
|
||||||
if currentRun.isEmpty {
|
if currentRun.isEmpty {
|
||||||
str.deleteCharacters(in: NSRange(location: str.length - 1, length: 1))
|
str.deleteCharacters(in: NSRange(location: str.length - 1, length: 1))
|
||||||
} else {
|
} else {
|
||||||
|
@ -243,7 +245,7 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
||||||
|
|
||||||
// Finds the last currently-open style of the given type.
|
// Finds the last currently-open style of the given type.
|
||||||
// We can't just use the last one because we need to handle mis-nested tags.
|
// We can't just use the last one because we need to handle mis-nested tags.
|
||||||
private mutating func removeLastStyle(_ type: Style.StyleType) {
|
private func removeLastStyle(_ type: Style.StyleType) {
|
||||||
var i = styleStack.index(before: styleStack.endIndex)
|
var i = styleStack.index(before: styleStack.endIndex)
|
||||||
while i >= styleStack.startIndex {
|
while i >= styleStack.startIndex {
|
||||||
if styleStack[i].type == type {
|
if styleStack[i].type == type {
|
||||||
|
@ -278,7 +280,7 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
||||||
return style
|
return style
|
||||||
}()
|
}()
|
||||||
|
|
||||||
private mutating func finishRun() {
|
private func finishRun() {
|
||||||
if actionStack.contains(.skip) {
|
if actionStack.contains(.skip) {
|
||||||
currentRun = ""
|
currentRun = ""
|
||||||
return
|
return
|
||||||
|
@ -326,7 +328,7 @@ public struct AttributedStringConverter<Callbacks: HTMLConversionCallbacks>: Blo
|
||||||
currentRun = ""
|
currentRun = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
private mutating func getFont(traits: FontTrait) -> PlatformFont? {
|
private func getFont(traits: FontTrait) -> PlatformFont? {
|
||||||
if let cached = fontCache[traits] {
|
if let cached = fontCache[traits] {
|
||||||
return cached
|
return cached
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,17 +19,17 @@ import Foundation
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
protocol BlockRenderer {
|
struct BlockStateMachine {
|
||||||
var blockState: BlockState { get set }
|
var blockState: BlockState = .start
|
||||||
var blockBreak: String { get }
|
let blockBreak: String
|
||||||
var lineBreak: String { get }
|
let lineBreak: String
|
||||||
var listIndentForContentOutsideItem: String { get }
|
let listIndentForContentOutsideItem: String
|
||||||
var temporaryBuffer: String { get set }
|
var temporaryBuffer: String = ""
|
||||||
mutating func append(_ s: String)
|
let append: (String) -> Void
|
||||||
mutating func removeChar()
|
let removeChar: () -> Void
|
||||||
}
|
}
|
||||||
|
|
||||||
extension BlockRenderer {
|
extension BlockStateMachine {
|
||||||
mutating func startOrEndBlock() {
|
mutating func startOrEndBlock() {
|
||||||
switch blockState {
|
switch blockState {
|
||||||
case .start:
|
case .start:
|
||||||
|
@ -85,16 +85,18 @@ extension BlockRenderer {
|
||||||
}
|
}
|
||||||
|
|
||||||
mutating func continueBlock(char: UnicodeScalar) -> Bool {
|
mutating func continueBlock(char: UnicodeScalar) -> Bool {
|
||||||
|
let isNewline = char == "\n"
|
||||||
|
let isWhitespace = isNewline || isWhitespace(char)
|
||||||
switch blockState {
|
switch blockState {
|
||||||
case .start:
|
case .start:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
blockState = .nonEmptyBlock
|
blockState = .nonEmptyBlock
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .emptyBlock:
|
case .emptyBlock:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
blockState = .nonEmptyBlock
|
blockState = .nonEmptyBlock
|
||||||
|
@ -102,7 +104,7 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .nonEmptyBlock:
|
case .nonEmptyBlock:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
blockState = .emittedSpace
|
blockState = .emittedSpace
|
||||||
append(" ")
|
append(" ")
|
||||||
return false
|
return false
|
||||||
|
@ -110,14 +112,14 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .emittedSpace:
|
case .emittedSpace:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
blockState = .nonEmptyBlock
|
blockState = .nonEmptyBlock
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .lineBreakTag:
|
case .lineBreakTag:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
blockState = .nonEmptyBlock
|
blockState = .nonEmptyBlock
|
||||||
|
@ -126,7 +128,7 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .atLeastTwoLineBreakTags:
|
case .atLeastTwoLineBreakTags:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
blockState = .nonEmptyBlock
|
blockState = .nonEmptyBlock
|
||||||
|
@ -135,7 +137,7 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
blockState = .nonEmptyBlock
|
blockState = .nonEmptyBlock
|
||||||
|
@ -144,14 +146,14 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .beginListItem:
|
case .beginListItem:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
blockState = .listItemContent
|
blockState = .listItemContent
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .endListItem:
|
case .endListItem:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
blockState = .listItemContent
|
blockState = .listItemContent
|
||||||
|
@ -160,7 +162,7 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .listItemContent:
|
case .listItemContent:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
blockState = .emittedSpaceInListItemContent
|
blockState = .emittedSpaceInListItemContent
|
||||||
append(" ")
|
append(" ")
|
||||||
return false
|
return false
|
||||||
|
@ -168,14 +170,14 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .emittedSpaceInListItemContent:
|
case .emittedSpaceInListItemContent:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
blockState = .listItemContent
|
blockState = .listItemContent
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .lineBreakTagInListItemContent:
|
case .lineBreakTagInListItemContent:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
blockState = .listItemContent
|
blockState = .listItemContent
|
||||||
|
@ -184,7 +186,7 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .atLeastTwoLineBreakTagsInListItemContent:
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
blockState = .listItemContent
|
blockState = .listItemContent
|
||||||
|
@ -193,14 +195,14 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .preformattedStart(let depth):
|
case .preformattedStart(let depth):
|
||||||
if char == "\n" {
|
if isNewline {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
blockState = .preformattedNonEmptyBlock(depth: depth)
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .preformattedEmptyBlock(depth: let depth):
|
case .preformattedEmptyBlock(depth: let depth):
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
||||||
temporaryBuffer.unicodeScalars.append(char)
|
temporaryBuffer.unicodeScalars.append(char)
|
||||||
return false
|
return false
|
||||||
|
@ -210,11 +212,11 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .preformattedNonEmptyBlock(let depth):
|
case .preformattedNonEmptyBlock(let depth):
|
||||||
if char == "\n" {
|
if isNewline {
|
||||||
blockState = .preformattedLineBreak(depth: depth)
|
blockState = .preformattedLineBreak(depth: depth)
|
||||||
temporaryBuffer.append(lineBreak)
|
temporaryBuffer.append(lineBreak)
|
||||||
return false
|
return false
|
||||||
} else if char.properties.isWhitespace {
|
} else if isWhitespace {
|
||||||
blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
|
blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
|
||||||
temporaryBuffer.unicodeScalars.append(char)
|
temporaryBuffer.unicodeScalars.append(char)
|
||||||
return false
|
return false
|
||||||
|
@ -222,11 +224,11 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .preformattedLineBreak(let depth):
|
case .preformattedLineBreak(let depth):
|
||||||
if char == "\n" {
|
if isNewline {
|
||||||
blockState = .preformattedAtLeastTwoLineBreaks(depth: depth)
|
blockState = .preformattedAtLeastTwoLineBreaks(depth: depth)
|
||||||
temporaryBuffer.append(lineBreak)
|
temporaryBuffer.append(lineBreak)
|
||||||
return false
|
return false
|
||||||
} else if char.properties.isWhitespace {
|
} else if isWhitespace {
|
||||||
blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
|
blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
|
||||||
temporaryBuffer.unicodeScalars.append(char)
|
temporaryBuffer.unicodeScalars.append(char)
|
||||||
return false
|
return false
|
||||||
|
@ -237,7 +239,7 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .preformattedAtLeastTwoLineBreaks(let depth):
|
case .preformattedAtLeastTwoLineBreaks(let depth):
|
||||||
if char.properties.isWhitespace {
|
if isWhitespace {
|
||||||
temporaryBuffer.unicodeScalars.append(char)
|
temporaryBuffer.unicodeScalars.append(char)
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
|
@ -247,7 +249,7 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .afterPreStartTag(let depth):
|
case .afterPreStartTag(let depth):
|
||||||
if char == "\n" {
|
if isNewline {
|
||||||
blockState = .preformattedEmptyBlock(depth: depth)
|
blockState = .preformattedEmptyBlock(depth: depth)
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
|
@ -256,10 +258,10 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .afterPreStartTagWithLeadingWhitespace(let depth):
|
case .afterPreStartTagWithLeadingWhitespace(let depth):
|
||||||
if char == "\n" {
|
if isNewline {
|
||||||
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
||||||
return false
|
return false
|
||||||
} else if char.properties.isWhitespace {
|
} else if isWhitespace {
|
||||||
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
||||||
temporaryBuffer.unicodeScalars.append(char)
|
temporaryBuffer.unicodeScalars.append(char)
|
||||||
return false
|
return false
|
||||||
|
@ -270,11 +272,11 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .preformattedNonEmptyBlockWithTrailingWhitespace(let depth):
|
case .preformattedNonEmptyBlockWithTrailingWhitespace(let depth):
|
||||||
if char == "\n" {
|
if isNewline {
|
||||||
blockState = .preformattedLineBreak(depth: depth)
|
blockState = .preformattedLineBreak(depth: depth)
|
||||||
temporaryBuffer.append(lineBreak)
|
temporaryBuffer.append(lineBreak)
|
||||||
return false
|
return false
|
||||||
} else if char.properties.isWhitespace {
|
} else if isWhitespace {
|
||||||
temporaryBuffer.unicodeScalars.append(char)
|
temporaryBuffer.unicodeScalars.append(char)
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
|
@ -284,11 +286,11 @@ extension BlockRenderer {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
||||||
if char == "\n" {
|
if isNewline {
|
||||||
blockState = .preformattedLineBreak(depth: depth)
|
blockState = .preformattedLineBreak(depth: depth)
|
||||||
temporaryBuffer.append(lineBreak)
|
temporaryBuffer.append(lineBreak)
|
||||||
return false
|
return false
|
||||||
} else if char.properties.isWhitespace {
|
} else if isWhitespace {
|
||||||
temporaryBuffer.unicodeScalars.append(char)
|
temporaryBuffer.unicodeScalars.append(char)
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
|
@ -566,13 +568,20 @@ enum BlockState: Equatable {
|
||||||
case emittedSpaceInListItemContent
|
case emittedSpaceInListItemContent
|
||||||
case lineBreakTagInListItemContent
|
case lineBreakTagInListItemContent
|
||||||
case atLeastTwoLineBreakTagsInListItemContent
|
case atLeastTwoLineBreakTagsInListItemContent
|
||||||
case preformattedStart(depth: Int)
|
case preformattedStart(depth: Int32)
|
||||||
case preformattedEmptyBlock(depth: Int)
|
case preformattedEmptyBlock(depth: Int32)
|
||||||
case preformattedNonEmptyBlock(depth: Int)
|
case preformattedNonEmptyBlock(depth: Int32)
|
||||||
case preformattedLineBreak(depth: Int)
|
case preformattedLineBreak(depth: Int32)
|
||||||
case preformattedAtLeastTwoLineBreaks(depth: Int)
|
case preformattedAtLeastTwoLineBreaks(depth: Int32)
|
||||||
case afterPreStartTag(depth: Int)
|
case afterPreStartTag(depth: Int32)
|
||||||
case afterPreStartTagWithLeadingWhitespace(depth: Int)
|
case afterPreStartTagWithLeadingWhitespace(depth: Int32)
|
||||||
case preformattedNonEmptyBlockWithTrailingWhitespace(depth: Int)
|
case preformattedNonEmptyBlockWithTrailingWhitespace(depth: Int32)
|
||||||
case preformattedEmptyBlockWithLeadingWhitespace(depth: Int)
|
case preformattedEmptyBlockWithLeadingWhitespace(depth: Int32)
|
||||||
|
}
|
||||||
|
|
||||||
|
@inline(__always)
|
||||||
|
private func isWhitespace(_ c: UnicodeScalar) -> Bool {
|
||||||
|
// this is not strictly correct, but checking the actual unicode properties is slow
|
||||||
|
// and this should cover the vast majority of actual use
|
||||||
|
c == " " || c == "\n" || c == "\t" || c == "\u{A0}" /* NO-BREAK SPACE */
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,19 +7,18 @@
|
||||||
|
|
||||||
import Foundation
|
import Foundation
|
||||||
|
|
||||||
public struct TextConverter<Callbacks: HTMLConversionCallbacks>: BlockRenderer {
|
public class TextConverter<Callbacks: HTMLConversionCallbacks> {
|
||||||
private let configuration: TextConverterConfiguration
|
private let configuration: TextConverterConfiguration
|
||||||
|
|
||||||
private var tokenizer: Tokenizer<String.UnicodeScalarView.Iterator>!
|
private var tokenizer: Tokenizer<String.UnicodeScalarView.Iterator>!
|
||||||
private var str: String!
|
private var str: String!
|
||||||
|
|
||||||
private var actionStack: [ElementAction] = []
|
private var actionStack: [ElementAction] = []
|
||||||
var blockState = BlockState.start
|
var blockStateMachine = BlockStateMachine(blockBreak: "", lineBreak: "", listIndentForContentOutsideItem: "", append: { _ in }, removeChar: {})
|
||||||
var temporaryBuffer: String = ""
|
|
||||||
private var currentElementIsEmpty = true
|
private var currentElementIsEmpty = true
|
||||||
private var currentRun = ""
|
private var currentRun = ""
|
||||||
|
|
||||||
public init(configuration: TextConverterConfiguration = .init()) where Callbacks == DefaultCallbacks {
|
public convenience init(configuration: TextConverterConfiguration = .init()) where Callbacks == DefaultCallbacks {
|
||||||
self.init(configuration: configuration, callbacks: DefaultCallbacks.self)
|
self.init(configuration: configuration, callbacks: DefaultCallbacks.self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,12 +26,19 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks>: BlockRenderer {
|
||||||
self.configuration = configuration
|
self.configuration = configuration
|
||||||
}
|
}
|
||||||
|
|
||||||
public mutating func convert(html: String) -> String {
|
public func convert(html: String) -> String {
|
||||||
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
|
tokenizer = Tokenizer(chars: html.unicodeScalars.makeIterator())
|
||||||
str = ""
|
str = ""
|
||||||
|
|
||||||
blockState = .start
|
blockStateMachine = BlockStateMachine(
|
||||||
temporaryBuffer = ""
|
blockBreak: configuration.insertNewlines ? "\n\n" : " " ,
|
||||||
|
lineBreak: configuration.insertNewlines ? "\n" : " " ,
|
||||||
|
listIndentForContentOutsideItem: "",
|
||||||
|
append: { [unowned self] in
|
||||||
|
self.append($0)
|
||||||
|
}, removeChar: { [unowned self] in
|
||||||
|
self.removeChar()
|
||||||
|
})
|
||||||
currentElementIsEmpty = true
|
currentElementIsEmpty = true
|
||||||
currentRun = ""
|
currentRun = ""
|
||||||
|
|
||||||
|
@ -40,13 +46,13 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks>: BlockRenderer {
|
||||||
switch token {
|
switch token {
|
||||||
case .character(let scalar):
|
case .character(let scalar):
|
||||||
currentElementIsEmpty = false
|
currentElementIsEmpty = false
|
||||||
if continueBlock(char: scalar) {
|
if blockStateMachine.continueBlock(char: scalar) {
|
||||||
currentRun.unicodeScalars.append(scalar)
|
currentRun.unicodeScalars.append(scalar)
|
||||||
}
|
}
|
||||||
case .characterSequence(let string):
|
case .characterSequence(let string):
|
||||||
currentElementIsEmpty = false
|
currentElementIsEmpty = false
|
||||||
for c in string.unicodeScalars {
|
for c in string.unicodeScalars {
|
||||||
if continueBlock(char: c) {
|
if blockStateMachine.continueBlock(char: c) {
|
||||||
currentRun.unicodeScalars.append(c)
|
currentRun.unicodeScalars.append(c)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -71,27 +77,27 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks>: BlockRenderer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
endBlocks()
|
blockStateMachine.endBlocks()
|
||||||
finishRun()
|
finishRun()
|
||||||
|
|
||||||
return str
|
return str
|
||||||
}
|
}
|
||||||
|
|
||||||
private mutating func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
|
private func handleStartTag(_ name: String, selfClosing: Bool, attributes: [Attribute]) {
|
||||||
switch name {
|
switch name {
|
||||||
case "br":
|
case "br":
|
||||||
breakTag()
|
blockStateMachine.breakTag()
|
||||||
case "pre", "blockquote", "p", "ol", "ul":
|
case "pre", "blockquote", "p", "ol", "ul":
|
||||||
startOrEndBlock()
|
blockStateMachine.startOrEndBlock()
|
||||||
default:
|
default:
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private mutating func handleEndTag(_ name: String) {
|
private func handleEndTag(_ name: String) {
|
||||||
switch name {
|
switch name {
|
||||||
case "pre", "blockquote", "p", "ol", "ul":
|
case "pre", "blockquote", "p", "ol", "ul":
|
||||||
startOrEndBlock()
|
blockStateMachine.startOrEndBlock()
|
||||||
finishRun()
|
finishRun()
|
||||||
default:
|
default:
|
||||||
break
|
break
|
||||||
|
@ -118,11 +124,11 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks>: BlockRenderer {
|
||||||
" "
|
" "
|
||||||
}
|
}
|
||||||
|
|
||||||
mutating func append(_ s: String) {
|
func append(_ s: String) {
|
||||||
currentRun.append(s)
|
currentRun.append(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
mutating func removeChar() {
|
func removeChar() {
|
||||||
if currentRun.isEmpty {
|
if currentRun.isEmpty {
|
||||||
str.removeLast()
|
str.removeLast()
|
||||||
} else {
|
} else {
|
||||||
|
@ -130,7 +136,7 @@ public struct TextConverter<Callbacks: HTMLConversionCallbacks>: BlockRenderer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private mutating func finishRun() {
|
private func finishRun() {
|
||||||
if actionStack.contains(.skip) {
|
if actionStack.contains(.skip) {
|
||||||
currentRun = ""
|
currentRun = ""
|
||||||
return
|
return
|
||||||
|
|
Loading…
Reference in New Issue