Use loops instead of recursion in hot path

Small but measurable perf win
This commit is contained in:
Shadowfacts 2023-11-27 00:04:10 -05:00
parent 29a065049e
commit 31bd174a69
1 changed files with 120 additions and 112 deletions

View File

@ -636,37 +636,39 @@ private extension Tokenizer {
} }
mutating func tokenizeTagName() -> Token? { mutating func tokenizeTagName() -> Token? {
switch nextChar() { while true {
case "\t", "\n", "\u{000C}", " ": switch nextChar() {
state = .beforeAttributeName case "\t", "\n", "\u{000C}", " ":
return tokenizeBeforeAttributeName() state = .beforeAttributeName
case "/": return tokenizeBeforeAttributeName()
state = .selfClosingStartTag case "/":
return tokenizeSelfClosingStartTag() state = .selfClosingStartTag
case ">": return tokenizeSelfClosingStartTag()
state = .data case ">":
return takeCurrentToken() state = .data
case nil: return takeCurrentToken()
// parse error: eof-in-tag case nil:
state = .endOfFile // parse error: eof-in-tag
return nil state = .endOfFile
case .some(var c): return nil
if c == "\0" { case .some(var c):
// parse error: unexpected-null-character if c == "\0" {
c = "\u{FFFD}" // parse error: unexpected-null-character
} else if ("A"..."Z").contains(c) { c = "\u{FFFD}"
c = c.asciiLowercase } else if ("A"..."Z").contains(c) {
} c = c.asciiLowercase
if case .startTag(var s, let selfClosing, let attributes) = currentToken { }
s.append(c) if case .startTag(var s, let selfClosing, let attributes) = currentToken {
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) s.append(c)
return tokenizeTagName() currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
} else if case .endTag(var s) = currentToken { continue
s.append(c) } else if case .endTag(var s) = currentToken {
currentToken = .endTag(s) s.append(c)
return tokenizeTagName() currentToken = .endTag(s)
} else { continue
fatalError("bad current token") } else {
fatalError("bad current token")
}
} }
} }
} }
@ -732,32 +734,34 @@ private extension Tokenizer {
} }
mutating func tokenizeAttributeName() -> Token? { mutating func tokenizeAttributeName() -> Token? {
let c = nextChar() while true {
switch c { let c = nextChar()
case "\t", "\n", "\u{000C}", " ", "/", ">", nil: switch c {
reconsume(c) case "\t", "\n", "\u{000C}", " ", "/", ">", nil:
state = .afterAttributeName reconsume(c)
return tokenizeAfterAttributeName() state = .afterAttributeName
case "=": return tokenizeAfterAttributeName()
state = .beforeAttributeValue case "=":
return tokenizeBeforeAttributeValue() state = .beforeAttributeValue
case .some(var c): return tokenizeBeforeAttributeValue()
if ("A"..."Z").contains(c) { case .some(var c):
c = c.asciiLowercase if ("A"..."Z").contains(c) {
} c = c.asciiLowercase
// if null, parse error: unexpected-null-character }
if c == "\0" { // if null, parse error: unexpected-null-character
c = "\u{FFFD}" if c == "\0" {
} c = "\u{FFFD}"
// if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name }
if case .startTag(let s, let selfClosing, var attributes) = currentToken { // if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name
attributes[attributes.count - 1].name.append(c) if case .startTag(let s, let selfClosing, var attributes) = currentToken {
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) attributes[attributes.count - 1].name.append(c)
return tokenizeAttributeName() currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
} else if case .endTag(_) = currentToken { continue
return tokenizeAttributeName() } else if case .endTag(_) = currentToken {
} else { continue
fatalError("bad curren token") } else {
fatalError("bad curren token")
}
} }
} }
} }
@ -817,62 +821,66 @@ private extension Tokenizer {
} }
mutating func tokenizeAttributeValue(quotes: AttributeValueQuotation) -> Token? { mutating func tokenizeAttributeValue(quotes: AttributeValueQuotation) -> Token? {
if quotes == .unquoted { while true {
switch nextChar() { if quotes == .unquoted {
case "\t", "\n", "\u{000C}", " ": switch nextChar() {
state = .beforeAttributeName case "\t", "\n", "\u{000C}", " ":
return tokenizeBeforeAttributeName() state = .beforeAttributeName
case "&": return tokenizeBeforeAttributeName()
returnState = .attributeValue(.unquoted) case "&":
state = .characterReference returnState = .attributeValue(.unquoted)
return tokenizeCharacterReference() state = .characterReference
case ">": return tokenizeCharacterReference()
state = .data case ">":
return takeCurrentToken() state = .data
case nil: return takeCurrentToken()
// parse error: eof-in-tag case nil:
state = .endOfFile // parse error: eof-in-tag
return nil state = .endOfFile
case .some(let c): return nil
// if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value case .some(let c):
if case .startTag(let s, let selfClosing, var attributes) = currentToken { // if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value
attributes[attributes.count - 1].value.append(c) if case .startTag(let s, let selfClosing, var attributes) = currentToken {
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) attributes[attributes.count - 1].value.append(c)
return tokenizeAttributeValue(quotes: quotes) currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
} else { continue
fatalError("bad current token") } else if case .endTag(_) = currentToken {
continue
} else {
fatalError("bad current token")
}
} }
} } else {
} else { let c = nextChar()
let c = nextChar() switch c {
switch c { case "\"" where quotes == .doubleQuoted:
case "\"" where quotes == .doubleQuoted: state = .afterAttributeValueQuoted
state = .afterAttributeValueQuoted return tokenizeAfterAttributeValueQuoted()
return tokenizeAfterAttributeValueQuoted() case "'" where quotes == .singleQuoted:
case "'" where quotes == .singleQuoted: state = .afterAttributeValueQuoted
state = .afterAttributeValueQuoted return tokenizeAfterAttributeValueQuoted()
return tokenizeAfterAttributeValueQuoted() case "&":
case "&": returnState = .attributeValue(quotes)
returnState = .attributeValue(quotes) state = .characterReference
state = .characterReference return tokenizeCharacterReference()
return tokenizeCharacterReference() case nil:
case nil: // parse error: eof-in-tag
// parse error: eof-in-tag state = .endOfFile
state = .endOfFile return nil
return nil case .some(var c):
case .some(var c): if c == "\0" {
if c == "\0" { // parse error: unexpected-null-character
// parse error: unexpected-null-character c = "\u{FFFD}"
c = "\u{FFFD}" }
} if case .startTag(let s, let selfClosing, var attributes) = currentToken {
if case .startTag(let s, let selfClosing, var attributes) = currentToken { attributes[attributes.count - 1].value.append(c)
attributes[attributes.count - 1].value.append(c) currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) continue
return tokenizeAttributeValue(quotes: quotes) } else if case .endTag(_) = currentToken {
} else if case .endTag(_) = currentToken { continue
return tokenizeAttributeValue(quotes: quotes) } else {
} else { fatalError("bad current token")
fatalError("bad current token") }
} }
} }
} }