Use something more like white-space: pre-wrap;

This commit is contained in:
Shadowfacts 2024-02-28 17:55:48 -05:00
parent a2ca8fd650
commit 9c8b127f15
3 changed files with 43 additions and 85 deletions

View File

@ -7,14 +7,12 @@ digraph blockstate {
start; start;
emptyBlock [label = "empty block"]; emptyBlock [label = "empty block"];
nonEmptyBlock [label = "non-empty block"]; nonEmptyBlock [label = "non-empty block"];
emittedSpace [label = "emitted space"];
lineBreakTag [label = "line break tag"]; lineBreakTag [label = "line break tag"];
atLeastTwoLineBreakTags [label = ">=2 line break tags"]; atLeastTwoLineBreakTags [label = ">=2 line break tags"];
emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "empty block w/ >=2 prev line break tags"]; emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "empty block w/ >=2 prev line break tags"];
beginListItem [label = "begin list item"]; beginListItem [label = "begin list item"];
endListItem [label = "end list item"]; endListItem [label = "end list item"];
listItemContent [label = "list item content"]; listItemContent [label = "list item content"];
emittedSpaceInListItemContent [label = "emitted space in text in list item content"];
lineBreakTagInListItemContent [label = "line break tag in list item content"]; lineBreakTagInListItemContent [label = "line break tag in list item content"];
atLeastTwoLineBreakTagsInListItemContent [label = ">= 2 line break tags in list item content"]; atLeastTwoLineBreakTagsInListItemContent [label = ">= 2 line break tags in list item content"];
preformattedStart [label = "preformatted start"]; preformattedStart [label = "preformatted start"];
@ -32,27 +30,21 @@ digraph blockstate {
start -> nonEmptyBlock [label = "non-whitespace"]; start -> nonEmptyBlock [label = "non-whitespace"];
start -> preformattedStart [label = "<pre> (depth = 1)"]; start -> preformattedStart [label = "<pre> (depth = 1)"];
start -> beginListItem [label = "<li>"]; start -> beginListItem [label = "<li>"];
nonEmptyBlock -> nonEmptyBlock [label = "non-whitespace"]; nonEmptyBlock -> nonEmptyBlock [label = "non-newline"];
nonEmptyBlock -> emptyBlock [label = "start/end block"]; nonEmptyBlock -> emptyBlock [label = "start/end block"];
nonEmptyBlock -> emittedSpace [label = "whitespace (emit space)"]; nonEmptyBlock -> lineBreakTag [label = "<br> or \\n (append to tmp)"];
nonEmptyBlock -> lineBreakTag [label = "<br> (append to tmp)"];
nonEmptyBlock -> beginListItem [label = "<li>"]; nonEmptyBlock -> beginListItem [label = "<li>"];
nonEmptyBlock -> endListItem [label = "</li>"]; nonEmptyBlock -> endListItem [label = "</li>"];
emittedSpace -> nonEmptyBlock [label = "non-whitespace"];
emittedSpace -> emittedSpace [label = "whitespace (skip)"];
emittedSpace -> emptyBlock [label = "start/end block (remove 1)"];
emittedSpace -> lineBreakTag [label = "<br> (append to tmp)"];
emittedSpace -> end [label = "EOF (remove 1)"];
emptyBlock -> nonEmptyBlock [label = "non-whitespace (block break)"]; emptyBlock -> nonEmptyBlock [label = "non-whitespace (block break)"];
emptyBlock -> emptyBlock [label = "whitespace (skip)\n<br>\n</pre>\nstart/end block"]; emptyBlock -> emptyBlock [label = "whitespace (skip)\n<br>\n</pre>\nstart/end block"];
emptyBlock -> afterPreStartTag [label = "<pre> (depth = 1)"]; emptyBlock -> afterPreStartTag [label = "<pre> (depth = 1)"];
emptyBlock -> beginListItem [label = "<li>"]; emptyBlock -> beginListItem [label = "<li>"];
emptyBlock -> endListItem [label = "</li>"]; emptyBlock -> endListItem [label = "</li>"];
lineBreakTag -> lineBreakTag [label = "whitespace (skip)"]; lineBreakTag -> lineBreakTag [label = "whitespace (append to tmp)"];
lineBreakTag -> atLeastTwoLineBreakTags [label = "<br> (append to tmp)"]; lineBreakTag -> atLeastTwoLineBreakTags [label = "<br> or \\n (append to tmp)"];
lineBreakTag -> emptyBlock [label = "start/end block (clear tmp)"]; lineBreakTag -> emptyBlock [label = "start/end block (clear tmp)"];
lineBreakTag -> nonEmptyBlock [label = "non-whitespace (emit tmp)"]; lineBreakTag -> nonEmptyBlock [label = "non-whitespace (emit tmp)"];
atLeastTwoLineBreakTags -> atLeastTwoLineBreakTags [label = "whitespace (skip)\n<br> (append to tmp)"]; atLeastTwoLineBreakTags -> atLeastTwoLineBreakTags [label = "whitespace or <br> (append to tmp)"];
atLeastTwoLineBreakTags -> nonEmptyBlock [label = "non-whitespace (emit tmp)"]; atLeastTwoLineBreakTags -> nonEmptyBlock [label = "non-whitespace (emit tmp)"];
atLeastTwoLineBreakTags -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "start/end block"]; atLeastTwoLineBreakTags -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "start/end block"];
emptyBlockWithAtLeastTwoPreviousLineBreakTags -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "whitespace (skip)\n<br>\n</pre>\nstart/end block"]; emptyBlockWithAtLeastTwoPreviousLineBreakTags -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "whitespace (skip)\n<br>\n</pre>\nstart/end block"];
@ -70,23 +62,15 @@ digraph blockstate {
listItemContent -> listItemContent [label = "non-whitespace"]; listItemContent -> listItemContent [label = "non-whitespace"];
listItemContent -> beginListItem [label = "<li> (line break)"]; listItemContent -> beginListItem [label = "<li> (line break)"];
listItemContent -> lineBreakTagInListItemContent [label = "<br> (append to tmp)"]; listItemContent -> lineBreakTagInListItemContent [label = "<br> (append to tmp)"];
listItemContent -> emittedSpaceInListItemContent [label = "whitespace (emit space)"];
listItemContent -> emptyBlock [label = "start/end block"]; listItemContent -> emptyBlock [label = "start/end block"];
listItemContent -> endListItem [label = "</li>"]; listItemContent -> endListItem [label = "</li>"];
emittedSpaceInListItemContent -> emittedSpaceInListItemContent [label = "whitespace (skip)"]; lineBreakTagInListItemContent -> lineBreakTagInListItemContent [label = "whitespace (append to tmp)"];
emittedSpaceInListItemContent -> listItemContent [label = "non-whitespace"];
emittedSpaceInListItemContent -> end [label = "EOF (remove 1)"];
emittedSpaceInListItemContent -> emptyBlock [label = "start/end block (remove 1)"];
emittedSpaceInListItemContent -> beginListItem [label = "<li> (remove 1, line break)"];
emittedSpaceInListItemContent -> lineBreakTagInListItemContent [label = "<br> (append to tmp)"];
emittedSpaceInListItemContent -> endListItem [label = "</li> (remove 1)"];
lineBreakTagInListItemContent -> lineBreakTagInListItemContent [label = "whitespace (skip)"];
lineBreakTagInListItemContent -> emptyBlock [label = "start/end block (clear tmp)"]; lineBreakTagInListItemContent -> emptyBlock [label = "start/end block (clear tmp)"];
lineBreakTagInListItemContent -> beginListItem [label = "<li> (emit tmp, line break)"]; lineBreakTagInListItemContent -> beginListItem [label = "<li> (emit tmp, line break)"];
lineBreakTagInListItemContent -> listItemContent [label = "non-whitespace (emit tmp)"]; lineBreakTagInListItemContent -> listItemContent [label = "non-whitespace (emit tmp)"];
lineBreakTagInListItemContent -> atLeastTwoLineBreakTagsInListItemContent [label = "<br> (append to tmp)"]; lineBreakTagInListItemContent -> atLeastTwoLineBreakTagsInListItemContent [label = "<br> or \\n (append to tmp)"];
lineBreakTagInListItemContent -> endListItem [label = "</li> (clear tmp)"]; lineBreakTagInListItemContent -> endListItem [label = "</li> (clear tmp)"];
atLeastTwoLineBreakTagsInListItemContent -> atLeastTwoLineBreakTagsInListItemContent [label = "<br> (append to tmp)\nwhitespace (skip)"]; atLeastTwoLineBreakTagsInListItemContent -> atLeastTwoLineBreakTagsInListItemContent [label = "whitespace or <br> (append to tmp)"];
atLeastTwoLineBreakTagsInListItemContent -> beginListItem [label = "<li> (emit tmp, line break)"]; atLeastTwoLineBreakTagsInListItemContent -> beginListItem [label = "<li> (emit tmp, line break)"];
atLeastTwoLineBreakTagsInListItemContent -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "start/end block"]; atLeastTwoLineBreakTagsInListItemContent -> emptyBlockWithAtLeastTwoPreviousLineBreakTags [label = "start/end block"];
atLeastTwoLineBreakTagsInListItemContent -> listItemContent [label = "non-whitespace (emit tmp)"]; atLeastTwoLineBreakTagsInListItemContent -> listItemContent [label = "non-whitespace (emit tmp)"];

View File

@ -38,9 +38,6 @@ extension BlockStateMachine {
break break
case .nonEmptyBlock: case .nonEmptyBlock:
blockState = .emptyBlock blockState = .emptyBlock
case .emittedSpace:
blockState = .emptyBlock
removeChar()
case .lineBreakTag: case .lineBreakTag:
blockState = .emptyBlock blockState = .emptyBlock
temporaryBuffer = "" temporaryBuffer = ""
@ -54,9 +51,6 @@ extension BlockStateMachine {
blockState = .emptyBlock blockState = .emptyBlock
case .listItemContent: case .listItemContent:
blockState = .emptyBlock blockState = .emptyBlock
case .emittedSpaceInListItemContent:
blockState = .emptyBlock
removeChar()
case .lineBreakTagInListItemContent: case .lineBreakTagInListItemContent:
blockState = .emptyBlock blockState = .emptyBlock
temporaryBuffer = "" temporaryBuffer = ""
@ -104,22 +98,19 @@ extension BlockStateMachine {
return true return true
} }
case .nonEmptyBlock: case .nonEmptyBlock:
if isWhitespace { if isNewline {
blockState = .emittedSpace blockState = .lineBreakTag
append(" ") temporaryBuffer.append("\n")
return false return false
} else { } else {
return true return true
} }
case .emittedSpace:
if isWhitespace {
return false
} else {
blockState = .nonEmptyBlock
return true
}
case .lineBreakTag: case .lineBreakTag:
if isWhitespace { if isWhitespace {
if isNewline {
blockState = .atLeastTwoLineBreakTags
}
temporaryBuffer.unicodeScalars.append(char)
return false return false
} else { } else {
blockState = .nonEmptyBlock blockState = .nonEmptyBlock
@ -129,6 +120,7 @@ extension BlockStateMachine {
} }
case .atLeastTwoLineBreakTags: case .atLeastTwoLineBreakTags:
if isWhitespace { if isWhitespace {
temporaryBuffer.unicodeScalars.append(char)
return false return false
} else { } else {
blockState = .nonEmptyBlock blockState = .nonEmptyBlock
@ -162,22 +154,19 @@ extension BlockStateMachine {
return true return true
} }
case .listItemContent: case .listItemContent:
if isWhitespace { if isNewline {
blockState = .emittedSpaceInListItemContent blockState = .lineBreakTagInListItemContent
append(" ") temporaryBuffer.append("\n")
return false return false
} else { } else {
return true return true
} }
case .emittedSpaceInListItemContent:
if isWhitespace {
return false
} else {
blockState = .listItemContent
return true
}
case .lineBreakTagInListItemContent: case .lineBreakTagInListItemContent:
if isWhitespace { if isWhitespace {
if isNewline {
blockState = .atLeastTwoLineBreakTagsInListItemContent
}
temporaryBuffer.unicodeScalars.append(char)
return false return false
} else { } else {
blockState = .listItemContent blockState = .listItemContent
@ -187,6 +176,7 @@ extension BlockStateMachine {
} }
case .atLeastTwoLineBreakTagsInListItemContent: case .atLeastTwoLineBreakTagsInListItemContent:
if isWhitespace { if isWhitespace {
temporaryBuffer.unicodeScalars.append(char)
return false return false
} else { } else {
blockState = .listItemContent blockState = .listItemContent
@ -311,9 +301,6 @@ extension BlockStateMachine {
case .nonEmptyBlock: case .nonEmptyBlock:
blockState = .lineBreakTag blockState = .lineBreakTag
temporaryBuffer.append(lineBreak) temporaryBuffer.append(lineBreak)
case .emittedSpace:
blockState = .lineBreakTag
temporaryBuffer.append(lineBreak)
case .lineBreakTag: case .lineBreakTag:
blockState = .atLeastTwoLineBreakTags blockState = .atLeastTwoLineBreakTags
temporaryBuffer.append(lineBreak) temporaryBuffer.append(lineBreak)
@ -329,9 +316,6 @@ extension BlockStateMachine {
case .listItemContent: case .listItemContent:
blockState = .lineBreakTagInListItemContent blockState = .lineBreakTagInListItemContent
temporaryBuffer.append(lineBreak) temporaryBuffer.append(lineBreak)
case .emittedSpaceInListItemContent:
blockState = .lineBreakTagInListItemContent
temporaryBuffer.append(lineBreak)
case .lineBreakTagInListItemContent: case .lineBreakTagInListItemContent:
blockState = .atLeastTwoLineBreakTagsInListItemContent blockState = .atLeastTwoLineBreakTagsInListItemContent
temporaryBuffer.append(lineBreak) temporaryBuffer.append(lineBreak)
@ -374,8 +358,6 @@ extension BlockStateMachine {
blockState = .afterPreStartTag(depth: 1) blockState = .afterPreStartTag(depth: 1)
case .nonEmptyBlock: case .nonEmptyBlock:
fatalError("unreachable") fatalError("unreachable")
case .emittedSpace:
fatalError("unreachable")
case .lineBreakTag: case .lineBreakTag:
fatalError("unreachable") fatalError("unreachable")
case .atLeastTwoLineBreakTags: case .atLeastTwoLineBreakTags:
@ -388,8 +370,6 @@ extension BlockStateMachine {
fatalError("unreachable") fatalError("unreachable")
case .listItemContent: case .listItemContent:
fatalError("unreachable") fatalError("unreachable")
case .emittedSpaceInListItemContent:
fatalError("unreachable")
case .lineBreakTagInListItemContent: case .lineBreakTagInListItemContent:
fatalError("unreachable") fatalError("unreachable")
case .atLeastTwoLineBreakTagsInListItemContent: case .atLeastTwoLineBreakTagsInListItemContent:
@ -423,8 +403,6 @@ extension BlockStateMachine {
break break
case .nonEmptyBlock: case .nonEmptyBlock:
fatalError("unreachable") fatalError("unreachable")
case .emittedSpace:
fatalError("unreachable")
case .lineBreakTag: case .lineBreakTag:
fatalError("unreachable") fatalError("unreachable")
case .atLeastTwoLineBreakTags: case .atLeastTwoLineBreakTags:
@ -437,8 +415,6 @@ extension BlockStateMachine {
fatalError("unreachable") fatalError("unreachable")
case .listItemContent: case .listItemContent:
fatalError("unreachable") fatalError("unreachable")
case .emittedSpaceInListItemContent:
fatalError("unreachable")
case .lineBreakTagInListItemContent: case .lineBreakTagInListItemContent:
fatalError("unreachable") fatalError("unreachable")
case .atLeastTwoLineBreakTagsInListItemContent: case .atLeastTwoLineBreakTagsInListItemContent:
@ -501,10 +477,6 @@ extension BlockStateMachine {
case .listItemContent: case .listItemContent:
blockState = .beginListItem blockState = .beginListItem
append(lineBreak) append(lineBreak)
case .emittedSpaceInListItemContent:
blockState = .beginListItem
removeChar()
append(lineBreak)
case .lineBreakTagInListItemContent: case .lineBreakTagInListItemContent:
blockState = .beginListItem blockState = .beginListItem
append(temporaryBuffer) append(temporaryBuffer)
@ -528,9 +500,6 @@ extension BlockStateMachine {
blockState = .endListItem blockState = .endListItem
case .listItemContent: case .listItemContent:
blockState = .endListItem blockState = .endListItem
case .emittedSpaceInListItemContent:
blockState = .endListItem
removeChar()
case .lineBreakTagInListItemContent: case .lineBreakTagInListItemContent:
blockState = .endListItem blockState = .endListItem
temporaryBuffer = "" temporaryBuffer = ""
@ -544,10 +513,6 @@ extension BlockStateMachine {
mutating func endBlocks() { mutating func endBlocks() {
switch blockState { switch blockState {
case .emittedSpace:
removeChar()
case .emittedSpaceInListItemContent:
removeChar()
default: default:
break break
} }
@ -558,14 +523,12 @@ enum BlockState: Equatable {
case start case start
case emptyBlock case emptyBlock
case nonEmptyBlock case nonEmptyBlock
case emittedSpace
case lineBreakTag case lineBreakTag
case atLeastTwoLineBreakTags case atLeastTwoLineBreakTags
case emptyBlockWithAtLeastTwoPreviousLineBreakTags case emptyBlockWithAtLeastTwoPreviousLineBreakTags
case beginListItem case beginListItem
case endListItem case endListItem
case listItemContent case listItemContent
case emittedSpaceInListItemContent
case lineBreakTagInListItemContent case lineBreakTagInListItemContent
case atLeastTwoLineBreakTagsInListItemContent case atLeastTwoLineBreakTagsInListItemContent
case preformattedStart(depth: Int32) case preformattedStart(depth: Int32)
@ -583,5 +546,5 @@ enum BlockState: Equatable {
private func isWhitespace(_ c: UnicodeScalar) -> Bool { private func isWhitespace(_ c: UnicodeScalar) -> Bool {
// this is not strictly correct, but checking the actual unicode properties is slow // this is not strictly correct, but checking the actual unicode properties is slow
// and this should cover the vast majority of actual use // and this should cover the vast majority of actual use
c == " " || c == "\n" || c == "\t" || c == "\u{A0}" /* NO-BREAK SPACE */ c == " " || c == "\n" || c == "\t"
} }

View File

@ -50,7 +50,7 @@ final class AttributedStringConverterTests: XCTestCase {
color: color, color: color,
paragraphStyle: .default paragraphStyle: .default
) )
var converter = AttributedStringConverter<Callbacks>(configuration: config) let converter = AttributedStringConverter<Callbacks>(configuration: config)
return converter.convert(html: html) return converter.convert(html: html)
} }
@ -391,7 +391,7 @@ final class AttributedStringConverterTests: XCTestCase {
XCTAssertEqual(convert("<p>a\n\n</p>\n"), result) XCTAssertEqual(convert("<p>a\n\n</p>\n"), result)
XCTAssertEqual(convert("<p>\n\na</p>"), result) XCTAssertEqual(convert("<p>\n\na</p>"), result)
XCTAssertEqual(convert("<p>\n\na</p>\n"), result) XCTAssertEqual(convert("<p>\n\na</p>\n"), result)
let result2 = NSAttributedString(string: "a b", attributes: [ let result2 = NSAttributedString(string: "a\n\n\nb", attributes: [
.font: font, .font: font,
.paragraphStyle: NSParagraphStyle.default, .paragraphStyle: NSParagraphStyle.default,
.foregroundColor: color, .foregroundColor: color,
@ -519,8 +519,14 @@ final class AttributedStringConverterTests: XCTestCase {
]) ])
XCTAssertEqual(convert(" \n\ta"), result) XCTAssertEqual(convert(" \n\ta"), result)
XCTAssertEqual(convert(" \n\t<p>a</p>"), result) XCTAssertEqual(convert(" \n\t<p>a</p>"), result)
XCTAssertEqual(convert("a \n\t"), result) XCTAssertEqual(convert("a\n\t"), result)
XCTAssertEqual(convert("<p>a</p> \n\t"), result) XCTAssertEqual(convert("<p>a</p> \n\t"), result)
let result2 = NSAttributedString(string: "a ", attributes: [
.font: font,
.paragraphStyle: NSParagraphStyle.default,
.foregroundColor: color,
])
XCTAssertEqual(convert("a \n\t"), result2)
let pre = NSAttributedString(string: "a", attributes: [ let pre = NSAttributedString(string: "a", attributes: [
.font: monospaceFont, .font: monospaceFont,
@ -531,8 +537,8 @@ final class AttributedStringConverterTests: XCTestCase {
XCTAssertEqual(convert("<pre>a</pre> \n\t"), pre) XCTAssertEqual(convert("<pre>a</pre> \n\t"), pre)
} }
func testWhitespaceCollapsing() { func testDoesNotCollapseWhitespace() {
let result = NSAttributedString(string: "a b", attributes: [ let result = NSAttributedString(string: "a \t\nb", attributes: [
.font: font, .font: font,
.paragraphStyle: NSParagraphStyle.default, .paragraphStyle: NSParagraphStyle.default,
.foregroundColor: color, .foregroundColor: color,
@ -565,11 +571,16 @@ final class AttributedStringConverterTests: XCTestCase {
.foregroundColor: color, .foregroundColor: color,
]) ])
XCTAssertEqual(convert("<ol><li>a</li>c<li>b</li></ol>"), result) XCTAssertEqual(convert("<ol><li>a</li>c<li>b</li></ol>"), result)
XCTAssertEqual(convert("<ol><li>a</li>c <li>b</li></ol>"), result) let result2 = NSAttributedString(string: "\t1.\ta\n\t\tc \n\t2.\tb", attributes: [
.font: font,
.paragraphStyle: listParagraphStyle,
.foregroundColor: color,
])
XCTAssertEqual(convert("<ol><li>a</li>c <li>b</li></ol>"), result2)
} }
func testWhitespaceCollapsingInTextBetweenListItems() { func testWhitespaceCollapsingInTextBetweenListItems() {
let result = NSAttributedString(string: "\t1.\ta\n\t\tc d\n\t2.\tb", attributes: [ let result = NSAttributedString(string: "\t1.\ta\n\t\tc d\n\t2.\tb", attributes: [
.font: font, .font: font,
.paragraphStyle: listParagraphStyle, .paragraphStyle: listParagraphStyle,
.foregroundColor: color, .foregroundColor: color,