diff --git a/Sources/HTMLStreamer/Tokenizer.swift b/Sources/HTMLStreamer/Tokenizer.swift index 81d7a36..ec07217 100644 --- a/Sources/HTMLStreamer/Tokenizer.swift +++ b/Sources/HTMLStreamer/Tokenizer.swift @@ -30,11 +30,13 @@ struct Tokenizer>: IteratorProtocol { mutating func next() -> Token? { switch state { case .flushingTemporaryBuffer(let returnState): + state = returnState if temporaryBuffer == nil || temporaryBuffer!.isEmpty { - state = returnState return next() } else { - return .character(temporaryBuffer!.unicodeScalars.removeFirst()) + var buffer: String? = nil + swap(&buffer, &temporaryBuffer) + return .characterSequence(buffer!) } case .endOfFile: return nil diff --git a/Tests/HTMLStreamerTests/TokenizerTests.swift b/Tests/HTMLStreamerTests/TokenizerTests.swift index d110e77..1f3ce8d 100644 --- a/Tests/HTMLStreamerTests/TokenizerTests.swift +++ b/Tests/HTMLStreamerTests/TokenizerTests.swift @@ -17,22 +17,22 @@ final class TokenizerTests: XCTestCase { } func testNamedCharacterReferences() { - XCTAssertEqual(tokenize("&"), [.character("&")]) + XCTAssertEqual(tokenize("&"), [.characterSequence("&")]) // missing-semicolon-after-character-reference: - XCTAssertEqual(tokenize("¬in"), [.character("¬"), .characterSequence("in")]) - XCTAssertEqual(tokenize("¬in"), [.character("¬"), .characterSequence("in")]) + XCTAssertEqual(tokenize("¬in"), [.characterSequence("¬"), .characterSequence("in")]) + XCTAssertEqual(tokenize("¬in"), [.characterSequence("¬"), .characterSequence("in")]) // unknown-named-character-reference: - XCTAssertEqual(tokenize("¬it;"), [.character("¬"), .characterSequence("it;")]) - XCTAssertEqual(tokenize("&asdf"), "&asdf".unicodeScalars.map { .character($0) }) - XCTAssertEqual(tokenize("&a"), "&a".unicodeScalars.map { .character($0) }) + XCTAssertEqual(tokenize("¬it;"), [.characterSequence("¬"), .characterSequence("it;")]) + XCTAssertEqual(tokenize("&asdf"), [.characterSequence("&asdf")]) + XCTAssertEqual(tokenize("&a"), [.characterSequence("&a")]) // attribute special case XCTAssertEqual(tokenize(""), [.startTag("a", selfClosing: true, attributes: [Attribute(name: "a", value: "¬a")])]) } func testNumericCharacterReference() { - XCTAssertEqual(tokenize("!"), [.character("!")]) - XCTAssertEqual(tokenize("!"), [.character("!")]) + XCTAssertEqual(tokenize("!"), [.characterSequence("!")]) + XCTAssertEqual(tokenize("!"), [.characterSequence("!")]) } func testStartTag() {