551 lines
20 KiB
Swift
551 lines
20 KiB
Swift
//
|
|
// BlockState.swift
|
|
// HTMLStreamer
|
|
//
|
|
// Created by Shadowfacts on 2/14/24.
|
|
//
|
|
|
|
import Foundation
|
|
|
|
/*
|
|
|
|
This gnarly mess of a state machine is responsible for:
|
|
1) Inserting line breaks in the right places corresponding to boundaries between block elements
|
|
2) Preventing leading/trailing whitespace from being emitted
|
|
3) Collapsing whitespace within the string like https://www.w3.org/TR/css-text-3/#white-space-phase-1
|
|
4) Handling whitespace inside <pre> elements
|
|
|
|
DO NOT TOUCH THE CODE WITHOUT CHECKING/UPDATING THE DIAGRAM.
|
|
|
|
*/
|
|
|
|
struct BlockStateMachine {
|
|
var blockState: BlockState = .start
|
|
let blockBreak: String
|
|
let lineBreak: String
|
|
let listIndentForContentOutsideItem: String
|
|
var temporaryBuffer: String = ""
|
|
let append: (String) -> Void
|
|
let removeChar: () -> Void
|
|
}
|
|
|
|
extension BlockStateMachine {
|
|
mutating func startOrEndBlock() {
|
|
switch blockState {
|
|
case .start:
|
|
break
|
|
case .emptyBlock:
|
|
break
|
|
case .nonEmptyBlock:
|
|
blockState = .emptyBlock
|
|
case .lineBreakTag:
|
|
blockState = .emptyBlock
|
|
temporaryBuffer = ""
|
|
case .atLeastTwoLineBreakTags:
|
|
blockState = .emptyBlockWithAtLeastTwoPreviousLineBreakTags
|
|
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
|
break
|
|
case .beginListItem:
|
|
break
|
|
case .endListItem:
|
|
blockState = .emptyBlock
|
|
case .listItemContent:
|
|
blockState = .emptyBlock
|
|
case .lineBreakTagInListItemContent:
|
|
blockState = .emptyBlock
|
|
temporaryBuffer = ""
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
blockState = .emptyBlockWithAtLeastTwoPreviousLineBreakTags
|
|
case .preformattedStart(depth: _):
|
|
break
|
|
case .preformattedEmptyBlock(depth: _):
|
|
break
|
|
case .preformattedNonEmptyBlock(let depth):
|
|
blockState = .preformattedEmptyBlock(depth: depth)
|
|
case .preformattedLineBreak(depth: let depth):
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedAtLeastTwoLineBreaks(let depth):
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
case .afterPreStartTag(let depth):
|
|
blockState = .preformattedEmptyBlock(depth: depth)
|
|
case .afterPreStartTagWithLeadingWhitespace(let depth):
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
|
|
temporaryBuffer.append(blockBreak)
|
|
case .preformattedEmptyBlockWithLeadingWhitespace(depth: _):
|
|
break
|
|
}
|
|
}
|
|
|
|
mutating func continueBlock(char: UnicodeScalar) -> Bool {
|
|
let isNewline = char == "\n"
|
|
let isWhitespace = isNewline || isWhitespace(char)
|
|
switch blockState {
|
|
case .start:
|
|
if isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .nonEmptyBlock
|
|
return true
|
|
}
|
|
case .emptyBlock:
|
|
if isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .nonEmptyBlock
|
|
append(blockBreak)
|
|
return true
|
|
}
|
|
case .nonEmptyBlock:
|
|
if isNewline {
|
|
blockState = .lineBreakTag
|
|
temporaryBuffer.append("\n")
|
|
return false
|
|
} else {
|
|
return true
|
|
}
|
|
case .lineBreakTag:
|
|
if isWhitespace {
|
|
if isNewline {
|
|
blockState = .atLeastTwoLineBreakTags
|
|
}
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .nonEmptyBlock
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .atLeastTwoLineBreakTags:
|
|
if isWhitespace {
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .nonEmptyBlock
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
|
if isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .nonEmptyBlock
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .beginListItem:
|
|
if isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .listItemContent
|
|
return true
|
|
}
|
|
case .endListItem:
|
|
if isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .listItemContent
|
|
append(lineBreak)
|
|
append(listIndentForContentOutsideItem)
|
|
return true
|
|
}
|
|
case .listItemContent:
|
|
if isNewline {
|
|
blockState = .lineBreakTagInListItemContent
|
|
temporaryBuffer.append("\n")
|
|
return false
|
|
} else {
|
|
return true
|
|
}
|
|
case .lineBreakTagInListItemContent:
|
|
if isWhitespace {
|
|
if isNewline {
|
|
blockState = .atLeastTwoLineBreakTagsInListItemContent
|
|
}
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .listItemContent
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
if isWhitespace {
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .listItemContent
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .preformattedStart(let depth):
|
|
if isNewline {
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
return true
|
|
}
|
|
case .preformattedEmptyBlock(depth: let depth):
|
|
if isWhitespace {
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(blockBreak)
|
|
return true
|
|
}
|
|
case .preformattedNonEmptyBlock(let depth):
|
|
if isNewline {
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
return false
|
|
} else if isWhitespace {
|
|
blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
return true
|
|
}
|
|
case .preformattedLineBreak(let depth):
|
|
if isNewline {
|
|
blockState = .preformattedAtLeastTwoLineBreaks(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
return false
|
|
} else if isWhitespace {
|
|
blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .preformattedAtLeastTwoLineBreaks(let depth):
|
|
if isWhitespace {
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .afterPreStartTag(let depth):
|
|
if isNewline {
|
|
blockState = .preformattedEmptyBlock(depth: depth)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(blockBreak)
|
|
return true
|
|
}
|
|
case .afterPreStartTagWithLeadingWhitespace(let depth):
|
|
if isNewline {
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
return false
|
|
} else if isWhitespace {
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .preformattedNonEmptyBlockWithTrailingWhitespace(let depth):
|
|
if isNewline {
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
return false
|
|
} else if isWhitespace {
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
|
if isNewline {
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
return false
|
|
} else if isWhitespace {
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
mutating func breakTag() {
|
|
switch blockState {
|
|
case .start:
|
|
break
|
|
case .emptyBlock:
|
|
append(lineBreak)
|
|
case .nonEmptyBlock:
|
|
blockState = .lineBreakTag
|
|
temporaryBuffer.append(lineBreak)
|
|
case .lineBreakTag:
|
|
blockState = .atLeastTwoLineBreakTags
|
|
temporaryBuffer.append(lineBreak)
|
|
case .atLeastTwoLineBreakTags:
|
|
temporaryBuffer.append(lineBreak)
|
|
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
|
append(lineBreak)
|
|
case .beginListItem:
|
|
append(lineBreak)
|
|
case .endListItem:
|
|
blockState = .lineBreakTagInListItemContent
|
|
temporaryBuffer.append(lineBreak)
|
|
case .listItemContent:
|
|
blockState = .lineBreakTagInListItemContent
|
|
temporaryBuffer.append(lineBreak)
|
|
case .lineBreakTagInListItemContent:
|
|
blockState = .atLeastTwoLineBreakTagsInListItemContent
|
|
temporaryBuffer.append(lineBreak)
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedStart(depth: _):
|
|
break
|
|
case .preformattedEmptyBlock(let depth):
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedNonEmptyBlock(let depth):
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedLineBreak(let depth):
|
|
blockState = .preformattedAtLeastTwoLineBreaks(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedAtLeastTwoLineBreaks(depth: _):
|
|
temporaryBuffer.append(lineBreak)
|
|
case .afterPreStartTag(let depth):
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(blockBreak)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .afterPreStartTagWithLeadingWhitespace(let depth):
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedNonEmptyBlockWithTrailingWhitespace(let depth):
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
}
|
|
}
|
|
|
|
mutating func startPreformatted() {
|
|
switch blockState {
|
|
case .start:
|
|
blockState = .preformattedStart(depth: 1)
|
|
case .emptyBlock:
|
|
blockState = .afterPreStartTag(depth: 1)
|
|
case .nonEmptyBlock:
|
|
fatalError("unreachable")
|
|
case .lineBreakTag:
|
|
fatalError("unreachable")
|
|
case .atLeastTwoLineBreakTags:
|
|
fatalError("unreachable")
|
|
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
|
blockState = .afterPreStartTagWithLeadingWhitespace(depth: 1)
|
|
case .beginListItem:
|
|
blockState = .afterPreStartTagWithLeadingWhitespace(depth: 1)
|
|
case .endListItem:
|
|
fatalError("unreachable")
|
|
case .listItemContent:
|
|
fatalError("unreachable")
|
|
case .lineBreakTagInListItemContent:
|
|
fatalError("unreachable")
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
fatalError("unreachable")
|
|
case .preformattedStart(let depth):
|
|
blockState = .preformattedStart(depth: depth + 1)
|
|
case .preformattedEmptyBlock(let depth):
|
|
blockState = .afterPreStartTag(depth: depth + 1)
|
|
case .preformattedNonEmptyBlock(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedLineBreak(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedAtLeastTwoLineBreaks(depth: _):
|
|
fatalError("unreachable")
|
|
case .afterPreStartTag(depth: _):
|
|
fatalError("unreachable")
|
|
case .afterPreStartTagWithLeadingWhitespace(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
|
blockState = .afterPreStartTagWithLeadingWhitespace(depth: depth + 1)
|
|
}
|
|
}
|
|
|
|
mutating func endPreformatted() {
|
|
switch blockState {
|
|
case .start:
|
|
break
|
|
case .emptyBlock:
|
|
break
|
|
case .nonEmptyBlock:
|
|
fatalError("unreachable")
|
|
case .lineBreakTag:
|
|
fatalError("unreachable")
|
|
case .atLeastTwoLineBreakTags:
|
|
fatalError("unreachable")
|
|
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
|
break
|
|
case .beginListItem:
|
|
break
|
|
case .endListItem:
|
|
fatalError("unreachable")
|
|
case .listItemContent:
|
|
fatalError("unreachable")
|
|
case .lineBreakTagInListItemContent:
|
|
fatalError("unreachable")
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
fatalError("unreachable")
|
|
case .preformattedStart(let depth):
|
|
if depth <= 1 {
|
|
blockState = .start
|
|
} else {
|
|
blockState = .preformattedStart(depth: depth - 1)
|
|
}
|
|
case .preformattedEmptyBlock(let depth):
|
|
if depth <= 1 {
|
|
blockState = .emptyBlock
|
|
} else {
|
|
blockState = .preformattedEmptyBlock(depth: depth - 1)
|
|
}
|
|
case .preformattedNonEmptyBlock(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedLineBreak(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedAtLeastTwoLineBreaks(depth: _):
|
|
fatalError("unreachable")
|
|
case .afterPreStartTag(depth: _):
|
|
fatalError("unreachable")
|
|
case .afterPreStartTagWithLeadingWhitespace(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
|
if depth <= 1 {
|
|
blockState = .emptyBlock
|
|
temporaryBuffer = ""
|
|
} else {
|
|
if temporaryBuffer.count >= 2 {
|
|
temporaryBuffer.removeLast()
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth - 1)
|
|
} else {
|
|
temporaryBuffer.removeLast()
|
|
blockState = .preformattedEmptyBlock(depth: depth - 1)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
mutating func startListItem() {
|
|
switch blockState {
|
|
case .start:
|
|
blockState = .beginListItem
|
|
case .emptyBlock:
|
|
blockState = .beginListItem
|
|
append(blockBreak)
|
|
case .nonEmptyBlock:
|
|
blockState = .beginListItem
|
|
append(blockBreak)
|
|
case .beginListItem:
|
|
break
|
|
case .endListItem:
|
|
blockState = .beginListItem
|
|
append(lineBreak)
|
|
case .listItemContent:
|
|
blockState = .beginListItem
|
|
append(lineBreak)
|
|
case .lineBreakTagInListItemContent:
|
|
blockState = .beginListItem
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
append(lineBreak)
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
blockState = .beginListItem
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
append(lineBreak)
|
|
default:
|
|
break
|
|
}
|
|
}
|
|
|
|
mutating func endListItem() {
|
|
switch blockState {
|
|
case .emptyBlock:
|
|
blockState = .endListItem
|
|
case .nonEmptyBlock:
|
|
blockState = .endListItem
|
|
case .listItemContent:
|
|
blockState = .endListItem
|
|
case .lineBreakTagInListItemContent:
|
|
blockState = .endListItem
|
|
temporaryBuffer = ""
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
blockState = .endListItem
|
|
temporaryBuffer = ""
|
|
default:
|
|
break
|
|
}
|
|
}
|
|
|
|
mutating func endBlocks() {
|
|
switch blockState {
|
|
default:
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
enum BlockState: Equatable {
|
|
case start
|
|
case emptyBlock
|
|
case nonEmptyBlock
|
|
case lineBreakTag
|
|
case atLeastTwoLineBreakTags
|
|
case emptyBlockWithAtLeastTwoPreviousLineBreakTags
|
|
case beginListItem
|
|
case endListItem
|
|
case listItemContent
|
|
case lineBreakTagInListItemContent
|
|
case atLeastTwoLineBreakTagsInListItemContent
|
|
case preformattedStart(depth: Int32)
|
|
case preformattedEmptyBlock(depth: Int32)
|
|
case preformattedNonEmptyBlock(depth: Int32)
|
|
case preformattedLineBreak(depth: Int32)
|
|
case preformattedAtLeastTwoLineBreaks(depth: Int32)
|
|
case afterPreStartTag(depth: Int32)
|
|
case afterPreStartTagWithLeadingWhitespace(depth: Int32)
|
|
case preformattedNonEmptyBlockWithTrailingWhitespace(depth: Int32)
|
|
case preformattedEmptyBlockWithLeadingWhitespace(depth: Int32)
|
|
}
|
|
|
|
@inline(__always)
|
|
private func isWhitespace(_ c: UnicodeScalar) -> Bool {
|
|
// this is not strictly correct, but checking the actual unicode properties is slow
|
|
// and this should cover the vast majority of actual use
|
|
c == " " || c == "\n" || c == "\t"
|
|
}
|