579 lines
21 KiB
Swift
579 lines
21 KiB
Swift
//
|
|
// BlockState.swift
|
|
// HTMLStreamer
|
|
//
|
|
// Created by Shadowfacts on 2/14/24.
|
|
//
|
|
|
|
import Foundation
|
|
|
|
/*
|
|
|
|
This gnarly mess of a state machine is responsible for:
|
|
1) Inserting line breaks in the right places corresponding to boundaries between block elements
|
|
2) Preventing leading/trailing whitespace from being emitted
|
|
3) Collapsing whitespace within the string like https://www.w3.org/TR/css-text-3/#white-space-phase-1
|
|
4) Handling whitespace inside <pre> elements
|
|
|
|
DO NOT TOUCH THE CODE WITHOUT CHECKING/UPDATING THE DIAGRAM.
|
|
|
|
*/
|
|
|
|
protocol BlockRenderer {
|
|
var blockState: BlockState { get set }
|
|
var blockBreak: String { get }
|
|
var lineBreak: String { get }
|
|
var listIndentForContentOutsideItem: String { get }
|
|
var temporaryBuffer: String { get set }
|
|
mutating func append(_ s: String)
|
|
mutating func removeChar()
|
|
}
|
|
|
|
extension BlockRenderer {
|
|
mutating func startOrEndBlock() {
|
|
switch blockState {
|
|
case .start:
|
|
break
|
|
case .emptyBlock:
|
|
break
|
|
case .nonEmptyBlock:
|
|
blockState = .emptyBlock
|
|
case .emittedSpace:
|
|
blockState = .emptyBlock
|
|
removeChar()
|
|
case .lineBreakTag:
|
|
blockState = .emptyBlock
|
|
temporaryBuffer = ""
|
|
case .atLeastTwoLineBreakTags:
|
|
blockState = .emptyBlockWithAtLeastTwoPreviousLineBreakTags
|
|
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
|
break
|
|
case .beginListItem:
|
|
break
|
|
case .endListItem:
|
|
blockState = .emptyBlock
|
|
case .listItemContent:
|
|
blockState = .emptyBlock
|
|
case .emittedSpaceInListItemContent:
|
|
blockState = .emptyBlock
|
|
removeChar()
|
|
case .lineBreakTagInListItemContent:
|
|
blockState = .emptyBlock
|
|
temporaryBuffer = ""
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
blockState = .emptyBlockWithAtLeastTwoPreviousLineBreakTags
|
|
case .preformattedStart(depth: _):
|
|
break
|
|
case .preformattedEmptyBlock(depth: _):
|
|
break
|
|
case .preformattedNonEmptyBlock(let depth):
|
|
blockState = .preformattedEmptyBlock(depth: depth)
|
|
case .preformattedLineBreak(depth: let depth):
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedAtLeastTwoLineBreaks(let depth):
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
case .afterPreStartTag(let depth):
|
|
blockState = .preformattedEmptyBlock(depth: depth)
|
|
case .afterPreStartTagWithLeadingWhitespace(let depth):
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
|
|
temporaryBuffer.append(blockBreak)
|
|
case .preformattedEmptyBlockWithLeadingWhitespace(depth: _):
|
|
break
|
|
}
|
|
}
|
|
|
|
mutating func continueBlock(char: UnicodeScalar) -> Bool {
|
|
switch blockState {
|
|
case .start:
|
|
if char.properties.isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .nonEmptyBlock
|
|
return true
|
|
}
|
|
case .emptyBlock:
|
|
if char.properties.isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .nonEmptyBlock
|
|
append(blockBreak)
|
|
return true
|
|
}
|
|
case .nonEmptyBlock:
|
|
if char.properties.isWhitespace {
|
|
blockState = .emittedSpace
|
|
append(" ")
|
|
return false
|
|
} else {
|
|
return true
|
|
}
|
|
case .emittedSpace:
|
|
if char.properties.isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .nonEmptyBlock
|
|
return true
|
|
}
|
|
case .lineBreakTag:
|
|
if char.properties.isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .nonEmptyBlock
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .atLeastTwoLineBreakTags:
|
|
if char.properties.isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .nonEmptyBlock
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
|
if char.properties.isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .nonEmptyBlock
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .beginListItem:
|
|
if char.properties.isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .listItemContent
|
|
return true
|
|
}
|
|
case .endListItem:
|
|
if char.properties.isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .listItemContent
|
|
append(lineBreak)
|
|
append(listIndentForContentOutsideItem)
|
|
return true
|
|
}
|
|
case .listItemContent:
|
|
if char.properties.isWhitespace {
|
|
blockState = .emittedSpaceInListItemContent
|
|
append(" ")
|
|
return false
|
|
} else {
|
|
return true
|
|
}
|
|
case .emittedSpaceInListItemContent:
|
|
if char.properties.isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .listItemContent
|
|
return true
|
|
}
|
|
case .lineBreakTagInListItemContent:
|
|
if char.properties.isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .listItemContent
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
if char.properties.isWhitespace {
|
|
return false
|
|
} else {
|
|
blockState = .listItemContent
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .preformattedStart(let depth):
|
|
if char == "\n" {
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
return true
|
|
}
|
|
case .preformattedEmptyBlock(depth: let depth):
|
|
if char.properties.isWhitespace {
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(blockBreak)
|
|
return true
|
|
}
|
|
case .preformattedNonEmptyBlock(let depth):
|
|
if char == "\n" {
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
return false
|
|
} else if char.properties.isWhitespace {
|
|
blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
return true
|
|
}
|
|
case .preformattedLineBreak(let depth):
|
|
if char == "\n" {
|
|
blockState = .preformattedAtLeastTwoLineBreaks(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
return false
|
|
} else if char.properties.isWhitespace {
|
|
blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .preformattedAtLeastTwoLineBreaks(let depth):
|
|
if char.properties.isWhitespace {
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .afterPreStartTag(let depth):
|
|
if char == "\n" {
|
|
blockState = .preformattedEmptyBlock(depth: depth)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(blockBreak)
|
|
return true
|
|
}
|
|
case .afterPreStartTagWithLeadingWhitespace(let depth):
|
|
if char == "\n" {
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
return false
|
|
} else if char.properties.isWhitespace {
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .preformattedNonEmptyBlockWithTrailingWhitespace(let depth):
|
|
if char == "\n" {
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
return false
|
|
} else if char.properties.isWhitespace {
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
|
if char == "\n" {
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
return false
|
|
} else if char.properties.isWhitespace {
|
|
temporaryBuffer.unicodeScalars.append(char)
|
|
return false
|
|
} else {
|
|
blockState = .preformattedNonEmptyBlock(depth: depth)
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
mutating func breakTag() {
|
|
switch blockState {
|
|
case .start:
|
|
break
|
|
case .emptyBlock:
|
|
append(lineBreak)
|
|
case .nonEmptyBlock:
|
|
blockState = .lineBreakTag
|
|
temporaryBuffer.append(lineBreak)
|
|
case .emittedSpace:
|
|
blockState = .lineBreakTag
|
|
temporaryBuffer.append(lineBreak)
|
|
case .lineBreakTag:
|
|
blockState = .atLeastTwoLineBreakTags
|
|
temporaryBuffer.append(lineBreak)
|
|
case .atLeastTwoLineBreakTags:
|
|
temporaryBuffer.append(lineBreak)
|
|
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
|
append(lineBreak)
|
|
case .beginListItem:
|
|
append(lineBreak)
|
|
case .endListItem:
|
|
blockState = .lineBreakTagInListItemContent
|
|
temporaryBuffer.append(lineBreak)
|
|
case .listItemContent:
|
|
blockState = .lineBreakTagInListItemContent
|
|
temporaryBuffer.append(lineBreak)
|
|
case .emittedSpaceInListItemContent:
|
|
blockState = .lineBreakTagInListItemContent
|
|
temporaryBuffer.append(lineBreak)
|
|
case .lineBreakTagInListItemContent:
|
|
blockState = .atLeastTwoLineBreakTagsInListItemContent
|
|
temporaryBuffer.append(lineBreak)
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedStart(depth: _):
|
|
break
|
|
case .preformattedEmptyBlock(let depth):
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedNonEmptyBlock(let depth):
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedLineBreak(let depth):
|
|
blockState = .preformattedAtLeastTwoLineBreaks(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedAtLeastTwoLineBreaks(depth: _):
|
|
temporaryBuffer.append(lineBreak)
|
|
case .afterPreStartTag(let depth):
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(blockBreak)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .afterPreStartTagWithLeadingWhitespace(let depth):
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedNonEmptyBlockWithTrailingWhitespace(let depth):
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
|
blockState = .preformattedLineBreak(depth: depth)
|
|
temporaryBuffer.append(lineBreak)
|
|
}
|
|
}
|
|
|
|
mutating func startPreformatted() {
|
|
switch blockState {
|
|
case .start:
|
|
blockState = .preformattedStart(depth: 1)
|
|
case .emptyBlock:
|
|
blockState = .afterPreStartTag(depth: 1)
|
|
case .nonEmptyBlock:
|
|
fatalError("unreachable")
|
|
case .emittedSpace:
|
|
fatalError("unreachable")
|
|
case .lineBreakTag:
|
|
fatalError("unreachable")
|
|
case .atLeastTwoLineBreakTags:
|
|
fatalError("unreachable")
|
|
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
|
blockState = .afterPreStartTagWithLeadingWhitespace(depth: 1)
|
|
case .beginListItem:
|
|
blockState = .afterPreStartTagWithLeadingWhitespace(depth: 1)
|
|
case .endListItem:
|
|
fatalError("unreachable")
|
|
case .listItemContent:
|
|
fatalError("unreachable")
|
|
case .emittedSpaceInListItemContent:
|
|
fatalError("unreachable")
|
|
case .lineBreakTagInListItemContent:
|
|
fatalError("unreachable")
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
fatalError("unreachable")
|
|
case .preformattedStart(let depth):
|
|
blockState = .preformattedStart(depth: depth + 1)
|
|
case .preformattedEmptyBlock(let depth):
|
|
blockState = .afterPreStartTag(depth: depth + 1)
|
|
case .preformattedNonEmptyBlock(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedLineBreak(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedAtLeastTwoLineBreaks(depth: _):
|
|
fatalError("unreachable")
|
|
case .afterPreStartTag(depth: _):
|
|
fatalError("unreachable")
|
|
case .afterPreStartTagWithLeadingWhitespace(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
|
blockState = .afterPreStartTagWithLeadingWhitespace(depth: depth + 1)
|
|
}
|
|
}
|
|
|
|
mutating func endPreformatted() {
|
|
switch blockState {
|
|
case .start:
|
|
break
|
|
case .emptyBlock:
|
|
break
|
|
case .nonEmptyBlock:
|
|
fatalError("unreachable")
|
|
case .emittedSpace:
|
|
fatalError("unreachable")
|
|
case .lineBreakTag:
|
|
fatalError("unreachable")
|
|
case .atLeastTwoLineBreakTags:
|
|
fatalError("unreachable")
|
|
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
|
|
break
|
|
case .beginListItem:
|
|
break
|
|
case .endListItem:
|
|
fatalError("unreachable")
|
|
case .listItemContent:
|
|
fatalError("unreachable")
|
|
case .emittedSpaceInListItemContent:
|
|
fatalError("unreachable")
|
|
case .lineBreakTagInListItemContent:
|
|
fatalError("unreachable")
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
fatalError("unreachable")
|
|
case .preformattedStart(let depth):
|
|
if depth <= 1 {
|
|
blockState = .start
|
|
} else {
|
|
blockState = .preformattedStart(depth: depth - 1)
|
|
}
|
|
case .preformattedEmptyBlock(let depth):
|
|
if depth <= 1 {
|
|
blockState = .emptyBlock
|
|
} else {
|
|
blockState = .preformattedEmptyBlock(depth: depth - 1)
|
|
}
|
|
case .preformattedNonEmptyBlock(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedLineBreak(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedAtLeastTwoLineBreaks(depth: _):
|
|
fatalError("unreachable")
|
|
case .afterPreStartTag(depth: _):
|
|
fatalError("unreachable")
|
|
case .afterPreStartTagWithLeadingWhitespace(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
|
|
fatalError("unreachable")
|
|
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
|
|
if depth <= 1 {
|
|
blockState = .emptyBlock
|
|
temporaryBuffer = ""
|
|
} else {
|
|
if temporaryBuffer.count >= 2 {
|
|
temporaryBuffer.removeLast()
|
|
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth - 1)
|
|
} else {
|
|
temporaryBuffer.removeLast()
|
|
blockState = .preformattedEmptyBlock(depth: depth - 1)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
mutating func startListItem() {
|
|
switch blockState {
|
|
case .start:
|
|
blockState = .beginListItem
|
|
case .emptyBlock:
|
|
blockState = .beginListItem
|
|
append(blockBreak)
|
|
case .nonEmptyBlock:
|
|
blockState = .beginListItem
|
|
append(blockBreak)
|
|
case .beginListItem:
|
|
break
|
|
case .endListItem:
|
|
blockState = .beginListItem
|
|
append(lineBreak)
|
|
case .listItemContent:
|
|
blockState = .beginListItem
|
|
append(lineBreak)
|
|
case .emittedSpaceInListItemContent:
|
|
blockState = .beginListItem
|
|
removeChar()
|
|
append(lineBreak)
|
|
case .lineBreakTagInListItemContent:
|
|
blockState = .beginListItem
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
append(lineBreak)
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
blockState = .beginListItem
|
|
append(temporaryBuffer)
|
|
temporaryBuffer = ""
|
|
append(lineBreak)
|
|
default:
|
|
break
|
|
}
|
|
}
|
|
|
|
mutating func endListItem() {
|
|
switch blockState {
|
|
case .emptyBlock:
|
|
blockState = .endListItem
|
|
case .nonEmptyBlock:
|
|
blockState = .endListItem
|
|
case .listItemContent:
|
|
blockState = .endListItem
|
|
case .emittedSpaceInListItemContent:
|
|
blockState = .endListItem
|
|
removeChar()
|
|
case .lineBreakTagInListItemContent:
|
|
blockState = .endListItem
|
|
temporaryBuffer = ""
|
|
case .atLeastTwoLineBreakTagsInListItemContent:
|
|
blockState = .endListItem
|
|
temporaryBuffer = ""
|
|
default:
|
|
break
|
|
}
|
|
}
|
|
|
|
mutating func endBlocks() {
|
|
switch blockState {
|
|
case .emittedSpace:
|
|
removeChar()
|
|
case .emittedSpaceInListItemContent:
|
|
removeChar()
|
|
default:
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
enum BlockState: Equatable {
|
|
case start
|
|
case emptyBlock
|
|
case nonEmptyBlock
|
|
case emittedSpace
|
|
case lineBreakTag
|
|
case atLeastTwoLineBreakTags
|
|
case emptyBlockWithAtLeastTwoPreviousLineBreakTags
|
|
case beginListItem
|
|
case endListItem
|
|
case listItemContent
|
|
case emittedSpaceInListItemContent
|
|
case lineBreakTagInListItemContent
|
|
case atLeastTwoLineBreakTagsInListItemContent
|
|
case preformattedStart(depth: Int)
|
|
case preformattedEmptyBlock(depth: Int)
|
|
case preformattedNonEmptyBlock(depth: Int)
|
|
case preformattedLineBreak(depth: Int)
|
|
case preformattedAtLeastTwoLineBreaks(depth: Int)
|
|
case afterPreStartTag(depth: Int)
|
|
case afterPreStartTagWithLeadingWhitespace(depth: Int)
|
|
case preformattedNonEmptyBlockWithTrailingWhitespace(depth: Int)
|
|
case preformattedEmptyBlockWithLeadingWhitespace(depth: Int)
|
|
}
|