Return temporary buffer as a .characterSequence

This commit is contained in:
Shadowfacts 2023-12-23 11:48:31 -05:00
parent f585a6b502
commit 38b1d2949b
2 changed files with 12 additions and 10 deletions

View File

@ -30,11 +30,13 @@ struct Tokenizer<Chars: IteratorProtocol<Unicode.Scalar>>: IteratorProtocol {
mutating func next() -> Token? { mutating func next() -> Token? {
switch state { switch state {
case .flushingTemporaryBuffer(let returnState): case .flushingTemporaryBuffer(let returnState):
if temporaryBuffer == nil || temporaryBuffer!.isEmpty {
state = returnState state = returnState
if temporaryBuffer == nil || temporaryBuffer!.isEmpty {
return next() return next()
} else { } else {
return .character(temporaryBuffer!.unicodeScalars.removeFirst()) var buffer: String? = nil
swap(&buffer, &temporaryBuffer)
return .characterSequence(buffer!)
} }
case .endOfFile: case .endOfFile:
return nil return nil

View File

@ -17,22 +17,22 @@ final class TokenizerTests: XCTestCase {
} }
func testNamedCharacterReferences() { func testNamedCharacterReferences() {
XCTAssertEqual(tokenize("&amp;"), [.character("&")]) XCTAssertEqual(tokenize("&amp;"), [.characterSequence("&")])
// missing-semicolon-after-character-reference: // missing-semicolon-after-character-reference:
XCTAssertEqual(tokenize("&not;in"), [.character("¬"), .characterSequence("in")]) XCTAssertEqual(tokenize("&not;in"), [.characterSequence("¬"), .characterSequence("in")])
XCTAssertEqual(tokenize("&notin"), [.character("¬"), .characterSequence("in")]) XCTAssertEqual(tokenize("&notin"), [.characterSequence("¬"), .characterSequence("in")])
// unknown-named-character-reference: // unknown-named-character-reference:
XCTAssertEqual(tokenize("&notit;"), [.character("¬"), .characterSequence("it;")]) XCTAssertEqual(tokenize("&notit;"), [.characterSequence("¬"), .characterSequence("it;")])
XCTAssertEqual(tokenize("&asdf"), "&asdf".unicodeScalars.map { .character($0) }) XCTAssertEqual(tokenize("&asdf"), [.characterSequence("&asdf")])
XCTAssertEqual(tokenize("&a"), "&a".unicodeScalars.map { .character($0) }) XCTAssertEqual(tokenize("&a"), [.characterSequence("&a")])
// attribute special case // attribute special case
XCTAssertEqual(tokenize("<a a='&nota' />"), [.startTag("a", selfClosing: true, attributes: [Attribute(name: "a", value: "&nota")])]) XCTAssertEqual(tokenize("<a a='&nota' />"), [.startTag("a", selfClosing: true, attributes: [Attribute(name: "a", value: "&nota")])])
} }
func testNumericCharacterReference() { func testNumericCharacterReference() {
XCTAssertEqual(tokenize("&#33;"), [.character("!")]) XCTAssertEqual(tokenize("&#33;"), [.characterSequence("!")])
XCTAssertEqual(tokenize("&#x21;"), [.character("!")]) XCTAssertEqual(tokenize("&#x21;"), [.characterSequence("!")])
} }
func testStartTag() { func testStartTag() {