diff --git a/Sources/HTMLStreamer/Tokenizer.swift b/Sources/HTMLStreamer/Tokenizer.swift index ec07217..cc90048 100644 --- a/Sources/HTMLStreamer/Tokenizer.swift +++ b/Sources/HTMLStreamer/Tokenizer.swift @@ -731,7 +731,7 @@ private extension Tokenizer { switch c { case "\t", "\n", "\u{000C}", " ": // ignore the character - return next() + return tokenizeBeforeAttributeName() case "/", ">", nil: reconsume(c) state = .afterAttributeName @@ -794,13 +794,16 @@ private extension Tokenizer { switch nextChar() { case "\t", "\n", "\u{000C}", " ": // ignore the character - return tokenizeAttributeName() + return tokenizeAfterAttributeName() case "/": state = .selfClosingStartTag return tokenizeSelfClosingStartTag() case "=": state = .beforeAttributeValue return tokenizeBeforeAttributeValue() + case ">": + state = .data + return takeCurrentToken() case nil: // parse error: eof-in-tag state = .endOfFile diff --git a/Tests/HTMLStreamerTests/TokenizerTests.swift b/Tests/HTMLStreamerTests/TokenizerTests.swift index 1f3ce8d..47c6935 100644 --- a/Tests/HTMLStreamerTests/TokenizerTests.swift +++ b/Tests/HTMLStreamerTests/TokenizerTests.swift @@ -74,6 +74,10 @@ final class TokenizerTests: XCTestCase { XCTAssertEqual(tokenize("🇺🇸"), [.characterSequence("\u{1F1FA}\u{1F1F8}")]) } + func testWhitespaceAfterAttributeName() { + XCTAssertEqual(tokenize(""), [.startTag("a", selfClosing: false, attributes: [.init(name: "foo", value: "")])]) + } + } private struct PrintIterator: IteratorProtocol {