diff --git a/Sources/HTMLStreamer/Tokenizer.swift b/Sources/HTMLStreamer/Tokenizer.swift
index 2eeb415..d91fbbf 100644
--- a/Sources/HTMLStreamer/Tokenizer.swift
+++ b/Sources/HTMLStreamer/Tokenizer.swift
@@ -636,37 +636,39 @@ private extension Tokenizer {
}
mutating func tokenizeTagName() -> Token? {
- switch nextChar() {
- case "\t", "\n", "\u{000C}", " ":
- state = .beforeAttributeName
- return tokenizeBeforeAttributeName()
- case "/":
- state = .selfClosingStartTag
- return tokenizeSelfClosingStartTag()
- case ">":
- state = .data
- return takeCurrentToken()
- case nil:
- // parse error: eof-in-tag
- state = .endOfFile
- return nil
- case .some(var c):
- if c == "\0" {
- // parse error: unexpected-null-character
- c = "\u{FFFD}"
- } else if ("A"..."Z").contains(c) {
- c = c.asciiLowercase
- }
- if case .startTag(var s, let selfClosing, let attributes) = currentToken {
- s.append(c)
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
- return tokenizeTagName()
- } else if case .endTag(var s) = currentToken {
- s.append(c)
- currentToken = .endTag(s)
- return tokenizeTagName()
- } else {
- fatalError("bad current token")
+ while true {
+ switch nextChar() {
+ case "\t", "\n", "\u{000C}", " ":
+ state = .beforeAttributeName
+ return tokenizeBeforeAttributeName()
+ case "/":
+ state = .selfClosingStartTag
+ return tokenizeSelfClosingStartTag()
+ case ">":
+ state = .data
+ return takeCurrentToken()
+ case nil:
+ // parse error: eof-in-tag
+ state = .endOfFile
+ return nil
+ case .some(var c):
+ if c == "\0" {
+ // parse error: unexpected-null-character
+ c = "\u{FFFD}"
+ } else if ("A"..."Z").contains(c) {
+ c = c.asciiLowercase
+ }
+ if case .startTag(var s, let selfClosing, let attributes) = currentToken {
+ s.append(c)
+ currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
+ continue
+ } else if case .endTag(var s) = currentToken {
+ s.append(c)
+ currentToken = .endTag(s)
+ continue
+ } else {
+ fatalError("bad current token")
+ }
}
}
}
@@ -732,32 +734,34 @@ private extension Tokenizer {
}
mutating func tokenizeAttributeName() -> Token? {
- let c = nextChar()
- switch c {
- case "\t", "\n", "\u{000C}", " ", "/", ">", nil:
- reconsume(c)
- state = .afterAttributeName
- return tokenizeAfterAttributeName()
- case "=":
- state = .beforeAttributeValue
- return tokenizeBeforeAttributeValue()
- case .some(var c):
- if ("A"..."Z").contains(c) {
- c = c.asciiLowercase
- }
- // if null, parse error: unexpected-null-character
- if c == "\0" {
- c = "\u{FFFD}"
- }
- // if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name
- if case .startTag(let s, let selfClosing, var attributes) = currentToken {
- attributes[attributes.count - 1].name.append(c)
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
- return tokenizeAttributeName()
- } else if case .endTag(_) = currentToken {
- return tokenizeAttributeName()
- } else {
- fatalError("bad curren token")
+ while true {
+ let c = nextChar()
+ switch c {
+ case "\t", "\n", "\u{000C}", " ", "/", ">", nil:
+ reconsume(c)
+ state = .afterAttributeName
+ return tokenizeAfterAttributeName()
+ case "=":
+ state = .beforeAttributeValue
+ return tokenizeBeforeAttributeValue()
+ case .some(var c):
+ if ("A"..."Z").contains(c) {
+ c = c.asciiLowercase
+ }
+ // if null, parse error: unexpected-null-character
+ if c == "\0" {
+ c = "\u{FFFD}"
+ }
+ // if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name
+ if case .startTag(let s, let selfClosing, var attributes) = currentToken {
+ attributes[attributes.count - 1].name.append(c)
+ currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
+ continue
+ } else if case .endTag(_) = currentToken {
+ continue
+ } else {
+ fatalError("bad curren token")
+ }
}
}
}
@@ -817,62 +821,66 @@ private extension Tokenizer {
}
mutating func tokenizeAttributeValue(quotes: AttributeValueQuotation) -> Token? {
- if quotes == .unquoted {
- switch nextChar() {
- case "\t", "\n", "\u{000C}", " ":
- state = .beforeAttributeName
- return tokenizeBeforeAttributeName()
- case "&":
- returnState = .attributeValue(.unquoted)
- state = .characterReference
- return tokenizeCharacterReference()
- case ">":
- state = .data
- return takeCurrentToken()
- case nil:
- // parse error: eof-in-tag
- state = .endOfFile
- return nil
- case .some(let c):
- // if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value
- if case .startTag(let s, let selfClosing, var attributes) = currentToken {
- attributes[attributes.count - 1].value.append(c)
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
- return tokenizeAttributeValue(quotes: quotes)
- } else {
- fatalError("bad current token")
+ while true {
+ if quotes == .unquoted {
+ switch nextChar() {
+ case "\t", "\n", "\u{000C}", " ":
+ state = .beforeAttributeName
+ return tokenizeBeforeAttributeName()
+ case "&":
+ returnState = .attributeValue(.unquoted)
+ state = .characterReference
+ return tokenizeCharacterReference()
+ case ">":
+ state = .data
+ return takeCurrentToken()
+ case nil:
+ // parse error: eof-in-tag
+ state = .endOfFile
+ return nil
+ case .some(let c):
+ // if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value
+ if case .startTag(let s, let selfClosing, var attributes) = currentToken {
+ attributes[attributes.count - 1].value.append(c)
+ currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
+ continue
+ } else if case .endTag(_) = currentToken {
+ continue
+ } else {
+ fatalError("bad current token")
+ }
}
- }
- } else {
- let c = nextChar()
- switch c {
- case "\"" where quotes == .doubleQuoted:
- state = .afterAttributeValueQuoted
- return tokenizeAfterAttributeValueQuoted()
- case "'" where quotes == .singleQuoted:
- state = .afterAttributeValueQuoted
- return tokenizeAfterAttributeValueQuoted()
- case "&":
- returnState = .attributeValue(quotes)
- state = .characterReference
- return tokenizeCharacterReference()
- case nil:
- // parse error: eof-in-tag
- state = .endOfFile
- return nil
- case .some(var c):
- if c == "\0" {
- // parse error: unexpected-null-character
- c = "\u{FFFD}"
- }
- if case .startTag(let s, let selfClosing, var attributes) = currentToken {
- attributes[attributes.count - 1].value.append(c)
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
- return tokenizeAttributeValue(quotes: quotes)
- } else if case .endTag(_) = currentToken {
- return tokenizeAttributeValue(quotes: quotes)
- } else {
- fatalError("bad current token")
+ } else {
+ let c = nextChar()
+ switch c {
+ case "\"" where quotes == .doubleQuoted:
+ state = .afterAttributeValueQuoted
+ return tokenizeAfterAttributeValueQuoted()
+ case "'" where quotes == .singleQuoted:
+ state = .afterAttributeValueQuoted
+ return tokenizeAfterAttributeValueQuoted()
+ case "&":
+ returnState = .attributeValue(quotes)
+ state = .characterReference
+ return tokenizeCharacterReference()
+ case nil:
+ // parse error: eof-in-tag
+ state = .endOfFile
+ return nil
+ case .some(var c):
+ if c == "\0" {
+ // parse error: unexpected-null-character
+ c = "\u{FFFD}"
+ }
+ if case .startTag(let s, let selfClosing, var attributes) = currentToken {
+ attributes[attributes.count - 1].value.append(c)
+ currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
+ continue
+ } else if case .endTag(_) = currentToken {
+ continue
+ } else {
+ fatalError("bad current token")
+ }
}
}
}