Use something more like white-space: pre-wrap;

This commit is contained in:
Shadowfacts 2024-02-28 17:55:48 -05:00
parent a2ca8fd650
commit 9c8b127f15
3 changed files with 43 additions and 85 deletions

View File

@ -7,14 +7,12 @@ digraph blockstate {
start;
emptyBlock [label = "empty block"];
nonEmptyBlock [label = "non-empty block"];
emittedSpace [label = "emitted space"];
lineBreakTag [label = "line break tag"];
atLeastTwoLineBreakTags [label = ">=2 line break tags"];
emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "empty block w/ >=2 prev line break tags"];
beginListItem [label = "begin list item"];
endListItem [label = "end list item"];
listItemContent [label = "list item content"];
emittedSpaceInListItemContent [label = "emitted space in text in list item content"];
lineBreakTagInListItemContent [label = "line break tag in list item content"];
atLeastTwoLineBreakTagsInListItemContent [label = ">= 2 line break tags in list item content"];
preformattedStart [label = "preformatted start"];
@ -32,27 +30,21 @@ digraph blockstate {
start -> nonEmptyBlock [label = "non-whitespace"];
start -> preformattedStart [label = "<pre> (depth = 1)"];
start -> beginListItem [label = "<li>"];
nonEmptyBlock -> nonEmptyBlock [label = "non-whitespace"];
nonEmptyBlock -> nonEmptyBlock [label = "non-newline"];
nonEmptyBlock -> emptyBlock [label = "start/end block"];
nonEmptyBlock -> emittedSpace [label = "whitespace (emit space)"];
nonEmptyBlock -> lineBreakTag [label = "<br> (append to tmp)"];
nonEmptyBlock -> lineBreakTag [label = "<br> or \\n (append to tmp)"];
nonEmptyBlock -> beginListItem [label = "<li>"];
nonEmptyBlock -> endListItem [label = "</li>"];
emittedSpace -> nonEmptyBlock [label = "non-whitespace"];
emittedSpace -> emittedSpace [label = "whitespace (skip)"];
emittedSpace -> emptyBlock [label = "start/end block (remove 1)"];
emittedSpace -> lineBreakTag [label = "<br> (append to tmp)"];
emittedSpace -> end [label = "EOF (remove 1)"];
emptyBlock -> nonEmptyBlock [label = "non-whitespace (block break)"];
emptyBlock -> emptyBlock [label = "whitespace (skip)\n<br>\n</pre>\nstart/end block"];
emptyBlock -> afterPreStartTag [label = "<pre> (depth = 1)"];
emptyBlock -> beginListItem [label = "<li>"];
emptyBlock -> endListItem [label = "</li>"];
lineBreakTag -> lineBreakTag [label = "whitespace (skip)"];
lineBreakTag -> atLeastTwoLineBreakTags [label = "<br> (append to tmp)"];
lineBreakTag -> lineBreakTag [label = "whitespace (append to tmp)"];
lineBreakTag -> atLeastTwoLineBreakTags [label = "<br> or \\n (append to tmp)"];
lineBreakTag -> emptyBlock [label = "start/end block (clear tmp)"];
lineBreakTag -> nonEmptyBlock [label = "non-whitespace (emit tmp)"];
atLeastTwoLineBreakTags -> atLeastTwoLineBreakTags [label = "whitespace (skip)\n<br> (append to tmp)"];
atLeastTwoLineBreakTags -> atLeastTwoLineBreakTags [label = "whitespace or <br> (append to tmp)"];
atLeastTwoLineBreakTags -> nonEmptyBlock [label = "non-whitespace (emit tmp)"];
atLeastTwoLineBreakTags -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "start/end block"];
emptyBlockWithAtLeastTwoPreviousLineBreakTags -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "whitespace (skip)\n<br>\n</pre>\nstart/end block"];
@ -70,23 +62,15 @@ digraph blockstate {
listItemContent -> listItemContent [label = "non-whitespace"];
listItemContent -> beginListItem [label = "<li> (line break)"];
listItemContent -> lineBreakTagInListItemContent [label = "<br> (append to tmp)"];
listItemContent -> emittedSpaceInListItemContent [label = "whitespace (emit space)"];
listItemContent -> emptyBlock [label = "start/end block"];
listItemContent -> endListItem [label = "</li>"];
emittedSpaceInListItemContent -> emittedSpaceInListItemContent [label = "whitespace (skip)"];
emittedSpaceInListItemContent -> listItemContent [label = "non-whitespace"];
emittedSpaceInListItemContent -> end [label = "EOF (remove 1)"];
emittedSpaceInListItemContent -> emptyBlock [label = "start/end block (remove 1)"];
emittedSpaceInListItemContent -> beginListItem [label = "<li> (remove 1, line break)"];
emittedSpaceInListItemContent -> lineBreakTagInListItemContent [label = "<br> (append to tmp)"];
emittedSpaceInListItemContent -> endListItem [label = "</li> (remove 1)"];
lineBreakTagInListItemContent -> lineBreakTagInListItemContent [label = "whitespace (skip)"];
lineBreakTagInListItemContent -> lineBreakTagInListItemContent [label = "whitespace (append to tmp)"];
lineBreakTagInListItemContent -> emptyBlock [label = "start/end block (clear tmp)"];
lineBreakTagInListItemContent -> beginListItem [label = "<li> (emit tmp, line break)"];
lineBreakTagInListItemContent -> listItemContent [label = "non-whitespace (emit tmp)"];
lineBreakTagInListItemContent -> atLeastTwoLineBreakTagsInListItemContent [label = "<br> (append to tmp)"];
lineBreakTagInListItemContent -> atLeastTwoLineBreakTagsInListItemContent [label = "<br> or \\n (append to tmp)"];
lineBreakTagInListItemContent -> endListItem [label = "</li> (clear tmp)"];
atLeastTwoLineBreakTagsInListItemContent -> atLeastTwoLineBreakTagsInListItemContent [label = "<br> (append to tmp)\nwhitespace (skip)"];
atLeastTwoLineBreakTagsInListItemContent -> atLeastTwoLineBreakTagsInListItemContent [label = "whitespace or <br> (append to tmp)"];
atLeastTwoLineBreakTagsInListItemContent -> beginListItem [label = "<li> (emit tmp, line break)"];
atLeastTwoLineBreakTagsInListItemContent -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "start/end block"];
atLeastTwoLineBreakTagsInListItemContent -> listItemContent [label = "non-whitespace (emit tmp)"];

View File

@ -38,9 +38,6 @@ extension BlockStateMachine {
break
case .nonEmptyBlock:
blockState = .emptyBlock
case .emittedSpace:
blockState = .emptyBlock
removeChar()
case .lineBreakTag:
blockState = .emptyBlock
temporaryBuffer = ""
@ -54,9 +51,6 @@ extension BlockStateMachine {
blockState = .emptyBlock
case .listItemContent:
blockState = .emptyBlock
case .emittedSpaceInListItemContent:
blockState = .emptyBlock
removeChar()
case .lineBreakTagInListItemContent:
blockState = .emptyBlock
temporaryBuffer = ""
@ -104,22 +98,19 @@ extension BlockStateMachine {
return true
}
case .nonEmptyBlock:
if isWhitespace {
blockState = .emittedSpace
append(" ")
if isNewline {
blockState = .lineBreakTag
temporaryBuffer.append("\n")
return false
} else {
return true
}
case .emittedSpace:
if isWhitespace {
return false
} else {
blockState = .nonEmptyBlock
return true
}
case .lineBreakTag:
if isWhitespace {
if isNewline {
blockState = .atLeastTwoLineBreakTags
}
temporaryBuffer.unicodeScalars.append(char)
return false
} else {
blockState = .nonEmptyBlock
@ -129,6 +120,7 @@ extension BlockStateMachine {
}
case .atLeastTwoLineBreakTags:
if isWhitespace {
temporaryBuffer.unicodeScalars.append(char)
return false
} else {
blockState = .nonEmptyBlock
@ -162,22 +154,19 @@ extension BlockStateMachine {
return true
}
case .listItemContent:
if isWhitespace {
blockState = .emittedSpaceInListItemContent
append(" ")
if isNewline {
blockState = .lineBreakTagInListItemContent
temporaryBuffer.append("\n")
return false
} else {
return true
}
case .emittedSpaceInListItemContent:
if isWhitespace {
return false
} else {
blockState = .listItemContent
return true
}
case .lineBreakTagInListItemContent:
if isWhitespace {
if isNewline {
blockState = .atLeastTwoLineBreakTagsInListItemContent
}
temporaryBuffer.unicodeScalars.append(char)
return false
} else {
blockState = .listItemContent
@ -187,6 +176,7 @@ extension BlockStateMachine {
}
case .atLeastTwoLineBreakTagsInListItemContent:
if isWhitespace {
temporaryBuffer.unicodeScalars.append(char)
return false
} else {
blockState = .listItemContent
@ -311,9 +301,6 @@ extension BlockStateMachine {
case .nonEmptyBlock:
blockState = .lineBreakTag
temporaryBuffer.append(lineBreak)
case .emittedSpace:
blockState = .lineBreakTag
temporaryBuffer.append(lineBreak)
case .lineBreakTag:
blockState = .atLeastTwoLineBreakTags
temporaryBuffer.append(lineBreak)
@ -329,9 +316,6 @@ extension BlockStateMachine {
case .listItemContent:
blockState = .lineBreakTagInListItemContent
temporaryBuffer.append(lineBreak)
case .emittedSpaceInListItemContent:
blockState = .lineBreakTagInListItemContent
temporaryBuffer.append(lineBreak)
case .lineBreakTagInListItemContent:
blockState = .atLeastTwoLineBreakTagsInListItemContent
temporaryBuffer.append(lineBreak)
@ -374,8 +358,6 @@ extension BlockStateMachine {
blockState = .afterPreStartTag(depth: 1)
case .nonEmptyBlock:
fatalError("unreachable")
case .emittedSpace:
fatalError("unreachable")
case .lineBreakTag:
fatalError("unreachable")
case .atLeastTwoLineBreakTags:
@ -388,8 +370,6 @@ extension BlockStateMachine {
fatalError("unreachable")
case .listItemContent:
fatalError("unreachable")
case .emittedSpaceInListItemContent:
fatalError("unreachable")
case .lineBreakTagInListItemContent:
fatalError("unreachable")
case .atLeastTwoLineBreakTagsInListItemContent:
@ -423,8 +403,6 @@ extension BlockStateMachine {
break
case .nonEmptyBlock:
fatalError("unreachable")
case .emittedSpace:
fatalError("unreachable")
case .lineBreakTag:
fatalError("unreachable")
case .atLeastTwoLineBreakTags:
@ -437,8 +415,6 @@ extension BlockStateMachine {
fatalError("unreachable")
case .listItemContent:
fatalError("unreachable")
case .emittedSpaceInListItemContent:
fatalError("unreachable")
case .lineBreakTagInListItemContent:
fatalError("unreachable")
case .atLeastTwoLineBreakTagsInListItemContent:
@ -501,10 +477,6 @@ extension BlockStateMachine {
case .listItemContent:
blockState = .beginListItem
append(lineBreak)
case .emittedSpaceInListItemContent:
blockState = .beginListItem
removeChar()
append(lineBreak)
case .lineBreakTagInListItemContent:
blockState = .beginListItem
append(temporaryBuffer)
@ -528,9 +500,6 @@ extension BlockStateMachine {
blockState = .endListItem
case .listItemContent:
blockState = .endListItem
case .emittedSpaceInListItemContent:
blockState = .endListItem
removeChar()
case .lineBreakTagInListItemContent:
blockState = .endListItem
temporaryBuffer = ""
@ -544,10 +513,6 @@ extension BlockStateMachine {
mutating func endBlocks() {
switch blockState {
case .emittedSpace:
removeChar()
case .emittedSpaceInListItemContent:
removeChar()
default:
break
}
@ -558,14 +523,12 @@ enum BlockState: Equatable {
case start
case emptyBlock
case nonEmptyBlock
case emittedSpace
case lineBreakTag
case atLeastTwoLineBreakTags
case emptyBlockWithAtLeastTwoPreviousLineBreakTags
case beginListItem
case endListItem
case listItemContent
case emittedSpaceInListItemContent
case lineBreakTagInListItemContent
case atLeastTwoLineBreakTagsInListItemContent
case preformattedStart(depth: Int32)
@ -583,5 +546,5 @@ enum BlockState: Equatable {
private func isWhitespace(_ c: UnicodeScalar) -> Bool {
// this is not strictly correct, but checking the actual unicode properties is slow
// and this should cover the vast majority of actual use
c == " " || c == "\n" || c == "\t" || c == "\u{A0}" /* NO-BREAK SPACE */
c == " " || c == "\n" || c == "\t"
}

View File

@ -50,7 +50,7 @@ final class AttributedStringConverterTests: XCTestCase {
color: color,
paragraphStyle: .default
)
var converter = AttributedStringConverter<Callbacks>(configuration: config)
let converter = AttributedStringConverter<Callbacks>(configuration: config)
return converter.convert(html: html)
}
@ -391,7 +391,7 @@ final class AttributedStringConverterTests: XCTestCase {
XCTAssertEqual(convert("<p>a\n\n</p>\n"), result)
XCTAssertEqual(convert("<p>\n\na</p>"), result)
XCTAssertEqual(convert("<p>\n\na</p>\n"), result)
let result2 = NSAttributedString(string: "a b", attributes: [
let result2 = NSAttributedString(string: "a\n\n\nb", attributes: [
.font: font,
.paragraphStyle: NSParagraphStyle.default,
.foregroundColor: color,
@ -519,8 +519,14 @@ final class AttributedStringConverterTests: XCTestCase {
])
XCTAssertEqual(convert(" \n\ta"), result)
XCTAssertEqual(convert(" \n\t<p>a</p>"), result)
XCTAssertEqual(convert("a \n\t"), result)
XCTAssertEqual(convert("a\n\t"), result)
XCTAssertEqual(convert("<p>a</p> \n\t"), result)
let result2 = NSAttributedString(string: "a ", attributes: [
.font: font,
.paragraphStyle: NSParagraphStyle.default,
.foregroundColor: color,
])
XCTAssertEqual(convert("a \n\t"), result2)
let pre = NSAttributedString(string: "a", attributes: [
.font: monospaceFont,
@ -531,8 +537,8 @@ final class AttributedStringConverterTests: XCTestCase {
XCTAssertEqual(convert("<pre>a</pre> \n\t"), pre)
}
func testWhitespaceCollapsing() {
let result = NSAttributedString(string: "a b", attributes: [
func testDoesNotCollapseWhitespace() {
let result = NSAttributedString(string: "a \t\nb", attributes: [
.font: font,
.paragraphStyle: NSParagraphStyle.default,
.foregroundColor: color,
@ -565,11 +571,16 @@ final class AttributedStringConverterTests: XCTestCase {
.foregroundColor: color,
])
XCTAssertEqual(convert("<ol><li>a</li>c<li>b</li></ol>"), result)
XCTAssertEqual(convert("<ol><li>a</li>c <li>b</li></ol>"), result)
let result2 = NSAttributedString(string: "\t1.\ta\n\t\tc \n\t2.\tb", attributes: [
.font: font,
.paragraphStyle: listParagraphStyle,
.foregroundColor: color,
])
XCTAssertEqual(convert("<ol><li>a</li>c <li>b</li></ol>"), result2)
}
func testWhitespaceCollapsingInTextBetweenListItems() {
let result = NSAttributedString(string: "\t1.\ta\n\t\tc d\n\t2.\tb", attributes: [
let result = NSAttributedString(string: "\t1.\ta\n\t\tc d\n\t2.\tb", attributes: [
.font: font,
.paragraphStyle: listParagraphStyle,
.foregroundColor: color,