Use loops instead of recursion in hot path

Small but measurable perf win
This commit is contained in:
Shadowfacts 2023-11-27 00:04:10 -05:00
parent 29a065049e
commit 31bd174a69
1 changed files with 120 additions and 112 deletions

View File

@ -636,37 +636,39 @@ private extension Tokenizer {
}
mutating func tokenizeTagName() -> Token? {
switch nextChar() {
case "\t", "\n", "\u{000C}", " ":
state = .beforeAttributeName
return tokenizeBeforeAttributeName()
case "/":
state = .selfClosingStartTag
return tokenizeSelfClosingStartTag()
case ">":
state = .data
return takeCurrentToken()
case nil:
// parse error: eof-in-tag
state = .endOfFile
return nil
case .some(var c):
if c == "\0" {
// parse error: unexpected-null-character
c = "\u{FFFD}"
} else if ("A"..."Z").contains(c) {
c = c.asciiLowercase
}
if case .startTag(var s, let selfClosing, let attributes) = currentToken {
s.append(c)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
return tokenizeTagName()
} else if case .endTag(var s) = currentToken {
s.append(c)
currentToken = .endTag(s)
return tokenizeTagName()
} else {
fatalError("bad current token")
while true {
switch nextChar() {
case "\t", "\n", "\u{000C}", " ":
state = .beforeAttributeName
return tokenizeBeforeAttributeName()
case "/":
state = .selfClosingStartTag
return tokenizeSelfClosingStartTag()
case ">":
state = .data
return takeCurrentToken()
case nil:
// parse error: eof-in-tag
state = .endOfFile
return nil
case .some(var c):
if c == "\0" {
// parse error: unexpected-null-character
c = "\u{FFFD}"
} else if ("A"..."Z").contains(c) {
c = c.asciiLowercase
}
if case .startTag(var s, let selfClosing, let attributes) = currentToken {
s.append(c)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
continue
} else if case .endTag(var s) = currentToken {
s.append(c)
currentToken = .endTag(s)
continue
} else {
fatalError("bad current token")
}
}
}
}
@ -732,32 +734,34 @@ private extension Tokenizer {
}
mutating func tokenizeAttributeName() -> Token? {
let c = nextChar()
switch c {
case "\t", "\n", "\u{000C}", " ", "/", ">", nil:
reconsume(c)
state = .afterAttributeName
return tokenizeAfterAttributeName()
case "=":
state = .beforeAttributeValue
return tokenizeBeforeAttributeValue()
case .some(var c):
if ("A"..."Z").contains(c) {
c = c.asciiLowercase
}
// if null, parse error: unexpected-null-character
if c == "\0" {
c = "\u{FFFD}"
}
// if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes[attributes.count - 1].name.append(c)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
return tokenizeAttributeName()
} else if case .endTag(_) = currentToken {
return tokenizeAttributeName()
} else {
fatalError("bad curren token")
while true {
let c = nextChar()
switch c {
case "\t", "\n", "\u{000C}", " ", "/", ">", nil:
reconsume(c)
state = .afterAttributeName
return tokenizeAfterAttributeName()
case "=":
state = .beforeAttributeValue
return tokenizeBeforeAttributeValue()
case .some(var c):
if ("A"..."Z").contains(c) {
c = c.asciiLowercase
}
// if null, parse error: unexpected-null-character
if c == "\0" {
c = "\u{FFFD}"
}
// if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes[attributes.count - 1].name.append(c)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
continue
} else if case .endTag(_) = currentToken {
continue
} else {
fatalError("bad curren token")
}
}
}
}
@ -817,62 +821,66 @@ private extension Tokenizer {
}
mutating func tokenizeAttributeValue(quotes: AttributeValueQuotation) -> Token? {
if quotes == .unquoted {
switch nextChar() {
case "\t", "\n", "\u{000C}", " ":
state = .beforeAttributeName
return tokenizeBeforeAttributeName()
case "&":
returnState = .attributeValue(.unquoted)
state = .characterReference
return tokenizeCharacterReference()
case ">":
state = .data
return takeCurrentToken()
case nil:
// parse error: eof-in-tag
state = .endOfFile
return nil
case .some(let c):
// if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes[attributes.count - 1].value.append(c)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
return tokenizeAttributeValue(quotes: quotes)
} else {
fatalError("bad current token")
while true {
if quotes == .unquoted {
switch nextChar() {
case "\t", "\n", "\u{000C}", " ":
state = .beforeAttributeName
return tokenizeBeforeAttributeName()
case "&":
returnState = .attributeValue(.unquoted)
state = .characterReference
return tokenizeCharacterReference()
case ">":
state = .data
return takeCurrentToken()
case nil:
// parse error: eof-in-tag
state = .endOfFile
return nil
case .some(let c):
// if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes[attributes.count - 1].value.append(c)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
continue
} else if case .endTag(_) = currentToken {
continue
} else {
fatalError("bad current token")
}
}
}
} else {
let c = nextChar()
switch c {
case "\"" where quotes == .doubleQuoted:
state = .afterAttributeValueQuoted
return tokenizeAfterAttributeValueQuoted()
case "'" where quotes == .singleQuoted:
state = .afterAttributeValueQuoted
return tokenizeAfterAttributeValueQuoted()
case "&":
returnState = .attributeValue(quotes)
state = .characterReference
return tokenizeCharacterReference()
case nil:
// parse error: eof-in-tag
state = .endOfFile
return nil
case .some(var c):
if c == "\0" {
// parse error: unexpected-null-character
c = "\u{FFFD}"
}
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes[attributes.count - 1].value.append(c)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
return tokenizeAttributeValue(quotes: quotes)
} else if case .endTag(_) = currentToken {
return tokenizeAttributeValue(quotes: quotes)
} else {
fatalError("bad current token")
} else {
let c = nextChar()
switch c {
case "\"" where quotes == .doubleQuoted:
state = .afterAttributeValueQuoted
return tokenizeAfterAttributeValueQuoted()
case "'" where quotes == .singleQuoted:
state = .afterAttributeValueQuoted
return tokenizeAfterAttributeValueQuoted()
case "&":
returnState = .attributeValue(quotes)
state = .characterReference
return tokenizeCharacterReference()
case nil:
// parse error: eof-in-tag
state = .endOfFile
return nil
case .some(var c):
if c == "\0" {
// parse error: unexpected-null-character
c = "\u{FFFD}"
}
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes[attributes.count - 1].value.append(c)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
continue
} else if case .endTag(_) = currentToken {
continue
} else {
fatalError("bad current token")
}
}
}
}