HTMLStreamer/Sources/HTMLStreamer/BlockState.swift

579 lines
21 KiB
Swift

//
// BlockState.swift
// HTMLStreamer
//
// Created by Shadowfacts on 2/14/24.
//
import Foundation
/*
This gnarly mess of a state machine is responsible for:
1) Inserting line breaks in the right places corresponding to boundaries between block elements
2) Preventing leading/trailing whitespace from being emitted
3) Collapsing whitespace within the string like https://www.w3.org/TR/css-text-3/#white-space-phase-1
4) Handling whitespace inside <pre> elements
DO NOT TOUCH THE CODE WITHOUT CHECKING/UPDATING THE DIAGRAM.
*/
protocol BlockRenderer {
var blockState: BlockState { get set }
var blockBreak: String { get }
var lineBreak: String { get }
var listIndentForContentOutsideItem: String { get }
var temporaryBuffer: String { get set }
mutating func append(_ s: String)
mutating func removeChar()
}
extension BlockRenderer {
mutating func startOrEndBlock() {
switch blockState {
case .start:
break
case .emptyBlock:
break
case .nonEmptyBlock:
blockState = .emptyBlock
case .emittedSpace:
blockState = .emptyBlock
removeChar()
case .lineBreakTag:
blockState = .emptyBlock
temporaryBuffer = ""
case .atLeastTwoLineBreakTags:
blockState = .emptyBlockWithAtLeastTwoPreviousLineBreakTags
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
break
case .beginListItem:
break
case .endListItem:
blockState = .emptyBlock
case .listItemContent:
blockState = .emptyBlock
case .emittedSpaceInListItemContent:
blockState = .emptyBlock
removeChar()
case .lineBreakTagInListItemContent:
blockState = .emptyBlock
temporaryBuffer = ""
case .atLeastTwoLineBreakTagsInListItemContent:
blockState = .emptyBlockWithAtLeastTwoPreviousLineBreakTags
case .preformattedStart(depth: _):
break
case .preformattedEmptyBlock(depth: _):
break
case .preformattedNonEmptyBlock(let depth):
blockState = .preformattedEmptyBlock(depth: depth)
case .preformattedLineBreak(depth: let depth):
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
temporaryBuffer.append(lineBreak)
case .preformattedAtLeastTwoLineBreaks(let depth):
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
case .afterPreStartTag(let depth):
blockState = .preformattedEmptyBlock(depth: depth)
case .afterPreStartTagWithLeadingWhitespace(let depth):
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
temporaryBuffer.append(blockBreak)
case .preformattedEmptyBlockWithLeadingWhitespace(depth: _):
break
}
}
mutating func continueBlock(char: UnicodeScalar) -> Bool {
switch blockState {
case .start:
if char.properties.isWhitespace {
return false
} else {
blockState = .nonEmptyBlock
return true
}
case .emptyBlock:
if char.properties.isWhitespace {
return false
} else {
blockState = .nonEmptyBlock
append(blockBreak)
return true
}
case .nonEmptyBlock:
if char.properties.isWhitespace {
blockState = .emittedSpace
append(" ")
return false
} else {
return true
}
case .emittedSpace:
if char.properties.isWhitespace {
return false
} else {
blockState = .nonEmptyBlock
return true
}
case .lineBreakTag:
if char.properties.isWhitespace {
return false
} else {
blockState = .nonEmptyBlock
append(temporaryBuffer)
temporaryBuffer = ""
return true
}
case .atLeastTwoLineBreakTags:
if char.properties.isWhitespace {
return false
} else {
blockState = .nonEmptyBlock
append(temporaryBuffer)
temporaryBuffer = ""
return true
}
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
if char.properties.isWhitespace {
return false
} else {
blockState = .nonEmptyBlock
append(temporaryBuffer)
temporaryBuffer = ""
return true
}
case .beginListItem:
if char.properties.isWhitespace {
return false
} else {
blockState = .listItemContent
return true
}
case .endListItem:
if char.properties.isWhitespace {
return false
} else {
blockState = .listItemContent
append(lineBreak)
append(listIndentForContentOutsideItem)
return true
}
case .listItemContent:
if char.properties.isWhitespace {
blockState = .emittedSpaceInListItemContent
append(" ")
return false
} else {
return true
}
case .emittedSpaceInListItemContent:
if char.properties.isWhitespace {
return false
} else {
blockState = .listItemContent
return true
}
case .lineBreakTagInListItemContent:
if char.properties.isWhitespace {
return false
} else {
blockState = .listItemContent
append(temporaryBuffer)
temporaryBuffer = ""
return true
}
case .atLeastTwoLineBreakTagsInListItemContent:
if char.properties.isWhitespace {
return false
} else {
blockState = .listItemContent
append(temporaryBuffer)
temporaryBuffer = ""
return true
}
case .preformattedStart(let depth):
if char == "\n" {
return false
} else {
blockState = .preformattedNonEmptyBlock(depth: depth)
return true
}
case .preformattedEmptyBlock(depth: let depth):
if char.properties.isWhitespace {
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
temporaryBuffer.unicodeScalars.append(char)
return false
} else {
blockState = .preformattedNonEmptyBlock(depth: depth)
append(blockBreak)
return true
}
case .preformattedNonEmptyBlock(let depth):
if char == "\n" {
blockState = .preformattedLineBreak(depth: depth)
temporaryBuffer.append(lineBreak)
return false
} else if char.properties.isWhitespace {
blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
temporaryBuffer.unicodeScalars.append(char)
return false
} else {
return true
}
case .preformattedLineBreak(let depth):
if char == "\n" {
blockState = .preformattedAtLeastTwoLineBreaks(depth: depth)
temporaryBuffer.append(lineBreak)
return false
} else if char.properties.isWhitespace {
blockState = .preformattedNonEmptyBlockWithTrailingWhitespace(depth: depth)
temporaryBuffer.unicodeScalars.append(char)
return false
} else {
blockState = .preformattedNonEmptyBlock(depth: depth)
append(temporaryBuffer)
temporaryBuffer = ""
return true
}
case .preformattedAtLeastTwoLineBreaks(let depth):
if char.properties.isWhitespace {
temporaryBuffer.unicodeScalars.append(char)
return false
} else {
blockState = .preformattedNonEmptyBlock(depth: depth)
append(temporaryBuffer)
temporaryBuffer = ""
return true
}
case .afterPreStartTag(let depth):
if char == "\n" {
blockState = .preformattedEmptyBlock(depth: depth)
return false
} else {
blockState = .preformattedNonEmptyBlock(depth: depth)
append(blockBreak)
return true
}
case .afterPreStartTagWithLeadingWhitespace(let depth):
if char == "\n" {
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
return false
} else if char.properties.isWhitespace {
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
temporaryBuffer.unicodeScalars.append(char)
return false
} else {
blockState = .preformattedNonEmptyBlock(depth: depth)
append(temporaryBuffer)
temporaryBuffer = ""
return true
}
case .preformattedNonEmptyBlockWithTrailingWhitespace(let depth):
if char == "\n" {
blockState = .preformattedLineBreak(depth: depth)
temporaryBuffer.append(lineBreak)
return false
} else if char.properties.isWhitespace {
temporaryBuffer.unicodeScalars.append(char)
return false
} else {
blockState = .preformattedNonEmptyBlock(depth: depth)
append(temporaryBuffer)
temporaryBuffer = ""
return true
}
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
if char == "\n" {
blockState = .preformattedLineBreak(depth: depth)
temporaryBuffer.append(lineBreak)
return false
} else if char.properties.isWhitespace {
temporaryBuffer.unicodeScalars.append(char)
return false
} else {
blockState = .preformattedNonEmptyBlock(depth: depth)
append(temporaryBuffer)
temporaryBuffer = ""
return true
}
}
}
mutating func breakTag() {
switch blockState {
case .start:
break
case .emptyBlock:
append(lineBreak)
case .nonEmptyBlock:
blockState = .lineBreakTag
temporaryBuffer.append(lineBreak)
case .emittedSpace:
blockState = .lineBreakTag
temporaryBuffer.append(lineBreak)
case .lineBreakTag:
blockState = .atLeastTwoLineBreakTags
temporaryBuffer.append(lineBreak)
case .atLeastTwoLineBreakTags:
temporaryBuffer.append(lineBreak)
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
append(lineBreak)
case .beginListItem:
append(lineBreak)
case .endListItem:
blockState = .lineBreakTagInListItemContent
temporaryBuffer.append(lineBreak)
case .listItemContent:
blockState = .lineBreakTagInListItemContent
temporaryBuffer.append(lineBreak)
case .emittedSpaceInListItemContent:
blockState = .lineBreakTagInListItemContent
temporaryBuffer.append(lineBreak)
case .lineBreakTagInListItemContent:
blockState = .atLeastTwoLineBreakTagsInListItemContent
temporaryBuffer.append(lineBreak)
case .atLeastTwoLineBreakTagsInListItemContent:
temporaryBuffer.append(lineBreak)
case .preformattedStart(depth: _):
break
case .preformattedEmptyBlock(let depth):
blockState = .preformattedLineBreak(depth: depth)
temporaryBuffer.append(lineBreak)
case .preformattedNonEmptyBlock(let depth):
blockState = .preformattedLineBreak(depth: depth)
temporaryBuffer.append(lineBreak)
case .preformattedLineBreak(let depth):
blockState = .preformattedAtLeastTwoLineBreaks(depth: depth)
temporaryBuffer.append(lineBreak)
case .preformattedAtLeastTwoLineBreaks(depth: _):
temporaryBuffer.append(lineBreak)
case .afterPreStartTag(let depth):
blockState = .preformattedLineBreak(depth: depth)
temporaryBuffer.append(blockBreak)
temporaryBuffer.append(lineBreak)
case .afterPreStartTagWithLeadingWhitespace(let depth):
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth)
temporaryBuffer.append(lineBreak)
case .preformattedNonEmptyBlockWithTrailingWhitespace(let depth):
blockState = .preformattedLineBreak(depth: depth)
temporaryBuffer.append(lineBreak)
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
blockState = .preformattedLineBreak(depth: depth)
temporaryBuffer.append(lineBreak)
}
}
mutating func startPreformatted() {
switch blockState {
case .start:
blockState = .preformattedStart(depth: 1)
case .emptyBlock:
blockState = .afterPreStartTag(depth: 1)
case .nonEmptyBlock:
fatalError("unreachable")
case .emittedSpace:
fatalError("unreachable")
case .lineBreakTag:
fatalError("unreachable")
case .atLeastTwoLineBreakTags:
fatalError("unreachable")
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
blockState = .afterPreStartTagWithLeadingWhitespace(depth: 1)
case .beginListItem:
blockState = .afterPreStartTagWithLeadingWhitespace(depth: 1)
case .endListItem:
fatalError("unreachable")
case .listItemContent:
fatalError("unreachable")
case .emittedSpaceInListItemContent:
fatalError("unreachable")
case .lineBreakTagInListItemContent:
fatalError("unreachable")
case .atLeastTwoLineBreakTagsInListItemContent:
fatalError("unreachable")
case .preformattedStart(let depth):
blockState = .preformattedStart(depth: depth + 1)
case .preformattedEmptyBlock(let depth):
blockState = .afterPreStartTag(depth: depth + 1)
case .preformattedNonEmptyBlock(depth: _):
fatalError("unreachable")
case .preformattedLineBreak(depth: _):
fatalError("unreachable")
case .preformattedAtLeastTwoLineBreaks(depth: _):
fatalError("unreachable")
case .afterPreStartTag(depth: _):
fatalError("unreachable")
case .afterPreStartTagWithLeadingWhitespace(depth: _):
fatalError("unreachable")
case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
fatalError("unreachable")
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
blockState = .afterPreStartTagWithLeadingWhitespace(depth: depth + 1)
}
}
mutating func endPreformatted() {
switch blockState {
case .start:
break
case .emptyBlock:
break
case .nonEmptyBlock:
fatalError("unreachable")
case .emittedSpace:
fatalError("unreachable")
case .lineBreakTag:
fatalError("unreachable")
case .atLeastTwoLineBreakTags:
fatalError("unreachable")
case .emptyBlockWithAtLeastTwoPreviousLineBreakTags:
break
case .beginListItem:
break
case .endListItem:
fatalError("unreachable")
case .listItemContent:
fatalError("unreachable")
case .emittedSpaceInListItemContent:
fatalError("unreachable")
case .lineBreakTagInListItemContent:
fatalError("unreachable")
case .atLeastTwoLineBreakTagsInListItemContent:
fatalError("unreachable")
case .preformattedStart(let depth):
if depth <= 1 {
blockState = .start
} else {
blockState = .preformattedStart(depth: depth - 1)
}
case .preformattedEmptyBlock(let depth):
if depth <= 1 {
blockState = .emptyBlock
} else {
blockState = .preformattedEmptyBlock(depth: depth - 1)
}
case .preformattedNonEmptyBlock(depth: _):
fatalError("unreachable")
case .preformattedLineBreak(depth: _):
fatalError("unreachable")
case .preformattedAtLeastTwoLineBreaks(depth: _):
fatalError("unreachable")
case .afterPreStartTag(depth: _):
fatalError("unreachable")
case .afterPreStartTagWithLeadingWhitespace(depth: _):
fatalError("unreachable")
case .preformattedNonEmptyBlockWithTrailingWhitespace(depth: _):
fatalError("unreachable")
case .preformattedEmptyBlockWithLeadingWhitespace(let depth):
if depth <= 1 {
blockState = .emptyBlock
temporaryBuffer = ""
} else {
if temporaryBuffer.count >= 2 {
temporaryBuffer.removeLast()
blockState = .preformattedEmptyBlockWithLeadingWhitespace(depth: depth - 1)
} else {
temporaryBuffer.removeLast()
blockState = .preformattedEmptyBlock(depth: depth - 1)
}
}
}
}
mutating func startListItem() {
switch blockState {
case .start:
blockState = .beginListItem
case .emptyBlock:
blockState = .beginListItem
append(blockBreak)
case .nonEmptyBlock:
blockState = .beginListItem
append(blockBreak)
case .beginListItem:
break
case .endListItem:
blockState = .beginListItem
append(lineBreak)
case .listItemContent:
blockState = .beginListItem
append(lineBreak)
case .emittedSpaceInListItemContent:
blockState = .beginListItem
removeChar()
append(lineBreak)
case .lineBreakTagInListItemContent:
blockState = .beginListItem
append(temporaryBuffer)
temporaryBuffer = ""
append(lineBreak)
case .atLeastTwoLineBreakTagsInListItemContent:
blockState = .beginListItem
append(temporaryBuffer)
temporaryBuffer = ""
append(lineBreak)
default:
break
}
}
mutating func endListItem() {
switch blockState {
case .emptyBlock:
blockState = .endListItem
case .nonEmptyBlock:
blockState = .endListItem
case .listItemContent:
blockState = .endListItem
case .emittedSpaceInListItemContent:
blockState = .endListItem
removeChar()
case .lineBreakTagInListItemContent:
blockState = .endListItem
temporaryBuffer = ""
case .atLeastTwoLineBreakTagsInListItemContent:
blockState = .endListItem
temporaryBuffer = ""
default:
break
}
}
mutating func endBlocks() {
switch blockState {
case .emittedSpace:
removeChar()
case .emittedSpaceInListItemContent:
removeChar()
default:
break
}
}
}
enum BlockState: Equatable {
case start
case emptyBlock
case nonEmptyBlock
case emittedSpace
case lineBreakTag
case atLeastTwoLineBreakTags
case emptyBlockWithAtLeastTwoPreviousLineBreakTags
case beginListItem
case endListItem
case listItemContent
case emittedSpaceInListItemContent
case lineBreakTagInListItemContent
case atLeastTwoLineBreakTagsInListItemContent
case preformattedStart(depth: Int)
case preformattedEmptyBlock(depth: Int)
case preformattedNonEmptyBlock(depth: Int)
case preformattedLineBreak(depth: Int)
case preformattedAtLeastTwoLineBreaks(depth: Int)
case afterPreStartTag(depth: Int)
case afterPreStartTagWithLeadingWhitespace(depth: Int)
case preformattedNonEmptyBlockWithTrailingWhitespace(depth: Int)
case preformattedEmptyBlockWithLeadingWhitespace(depth: Int)
}