Use loops instead of recursion in hot path
Small but measurable perf win
This commit is contained in:
parent
29a065049e
commit
31bd174a69
|
@ -636,37 +636,39 @@ private extension Tokenizer {
|
|||
}
|
||||
|
||||
mutating func tokenizeTagName() -> Token? {
|
||||
switch nextChar() {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
state = .beforeAttributeName
|
||||
return tokenizeBeforeAttributeName()
|
||||
case "/":
|
||||
state = .selfClosingStartTag
|
||||
return tokenizeSelfClosingStartTag()
|
||||
case ">":
|
||||
state = .data
|
||||
return takeCurrentToken()
|
||||
case nil:
|
||||
// parse error: eof-in-tag
|
||||
state = .endOfFile
|
||||
return nil
|
||||
case .some(var c):
|
||||
if c == "\0" {
|
||||
// parse error: unexpected-null-character
|
||||
c = "\u{FFFD}"
|
||||
} else if ("A"..."Z").contains(c) {
|
||||
c = c.asciiLowercase
|
||||
}
|
||||
if case .startTag(var s, let selfClosing, let attributes) = currentToken {
|
||||
s.append(c)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
return tokenizeTagName()
|
||||
} else if case .endTag(var s) = currentToken {
|
||||
s.append(c)
|
||||
currentToken = .endTag(s)
|
||||
return tokenizeTagName()
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
while true {
|
||||
switch nextChar() {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
state = .beforeAttributeName
|
||||
return tokenizeBeforeAttributeName()
|
||||
case "/":
|
||||
state = .selfClosingStartTag
|
||||
return tokenizeSelfClosingStartTag()
|
||||
case ">":
|
||||
state = .data
|
||||
return takeCurrentToken()
|
||||
case nil:
|
||||
// parse error: eof-in-tag
|
||||
state = .endOfFile
|
||||
return nil
|
||||
case .some(var c):
|
||||
if c == "\0" {
|
||||
// parse error: unexpected-null-character
|
||||
c = "\u{FFFD}"
|
||||
} else if ("A"..."Z").contains(c) {
|
||||
c = c.asciiLowercase
|
||||
}
|
||||
if case .startTag(var s, let selfClosing, let attributes) = currentToken {
|
||||
s.append(c)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
continue
|
||||
} else if case .endTag(var s) = currentToken {
|
||||
s.append(c)
|
||||
currentToken = .endTag(s)
|
||||
continue
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -732,32 +734,34 @@ private extension Tokenizer {
|
|||
}
|
||||
|
||||
mutating func tokenizeAttributeName() -> Token? {
|
||||
let c = nextChar()
|
||||
switch c {
|
||||
case "\t", "\n", "\u{000C}", " ", "/", ">", nil:
|
||||
reconsume(c)
|
||||
state = .afterAttributeName
|
||||
return tokenizeAfterAttributeName()
|
||||
case "=":
|
||||
state = .beforeAttributeValue
|
||||
return tokenizeBeforeAttributeValue()
|
||||
case .some(var c):
|
||||
if ("A"..."Z").contains(c) {
|
||||
c = c.asciiLowercase
|
||||
}
|
||||
// if null, parse error: unexpected-null-character
|
||||
if c == "\0" {
|
||||
c = "\u{FFFD}"
|
||||
}
|
||||
// if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes[attributes.count - 1].name.append(c)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
return tokenizeAttributeName()
|
||||
} else if case .endTag(_) = currentToken {
|
||||
return tokenizeAttributeName()
|
||||
} else {
|
||||
fatalError("bad curren token")
|
||||
while true {
|
||||
let c = nextChar()
|
||||
switch c {
|
||||
case "\t", "\n", "\u{000C}", " ", "/", ">", nil:
|
||||
reconsume(c)
|
||||
state = .afterAttributeName
|
||||
return tokenizeAfterAttributeName()
|
||||
case "=":
|
||||
state = .beforeAttributeValue
|
||||
return tokenizeBeforeAttributeValue()
|
||||
case .some(var c):
|
||||
if ("A"..."Z").contains(c) {
|
||||
c = c.asciiLowercase
|
||||
}
|
||||
// if null, parse error: unexpected-null-character
|
||||
if c == "\0" {
|
||||
c = "\u{FFFD}"
|
||||
}
|
||||
// if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes[attributes.count - 1].name.append(c)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
continue
|
||||
} else if case .endTag(_) = currentToken {
|
||||
continue
|
||||
} else {
|
||||
fatalError("bad curren token")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -817,62 +821,66 @@ private extension Tokenizer {
|
|||
}
|
||||
|
||||
mutating func tokenizeAttributeValue(quotes: AttributeValueQuotation) -> Token? {
|
||||
if quotes == .unquoted {
|
||||
switch nextChar() {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
state = .beforeAttributeName
|
||||
return tokenizeBeforeAttributeName()
|
||||
case "&":
|
||||
returnState = .attributeValue(.unquoted)
|
||||
state = .characterReference
|
||||
return tokenizeCharacterReference()
|
||||
case ">":
|
||||
state = .data
|
||||
return takeCurrentToken()
|
||||
case nil:
|
||||
// parse error: eof-in-tag
|
||||
state = .endOfFile
|
||||
return nil
|
||||
case .some(let c):
|
||||
// if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes[attributes.count - 1].value.append(c)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
return tokenizeAttributeValue(quotes: quotes)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
while true {
|
||||
if quotes == .unquoted {
|
||||
switch nextChar() {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
state = .beforeAttributeName
|
||||
return tokenizeBeforeAttributeName()
|
||||
case "&":
|
||||
returnState = .attributeValue(.unquoted)
|
||||
state = .characterReference
|
||||
return tokenizeCharacterReference()
|
||||
case ">":
|
||||
state = .data
|
||||
return takeCurrentToken()
|
||||
case nil:
|
||||
// parse error: eof-in-tag
|
||||
state = .endOfFile
|
||||
return nil
|
||||
case .some(let c):
|
||||
// if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes[attributes.count - 1].value.append(c)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
continue
|
||||
} else if case .endTag(_) = currentToken {
|
||||
continue
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let c = nextChar()
|
||||
switch c {
|
||||
case "\"" where quotes == .doubleQuoted:
|
||||
state = .afterAttributeValueQuoted
|
||||
return tokenizeAfterAttributeValueQuoted()
|
||||
case "'" where quotes == .singleQuoted:
|
||||
state = .afterAttributeValueQuoted
|
||||
return tokenizeAfterAttributeValueQuoted()
|
||||
case "&":
|
||||
returnState = .attributeValue(quotes)
|
||||
state = .characterReference
|
||||
return tokenizeCharacterReference()
|
||||
case nil:
|
||||
// parse error: eof-in-tag
|
||||
state = .endOfFile
|
||||
return nil
|
||||
case .some(var c):
|
||||
if c == "\0" {
|
||||
// parse error: unexpected-null-character
|
||||
c = "\u{FFFD}"
|
||||
}
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes[attributes.count - 1].value.append(c)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
return tokenizeAttributeValue(quotes: quotes)
|
||||
} else if case .endTag(_) = currentToken {
|
||||
return tokenizeAttributeValue(quotes: quotes)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
} else {
|
||||
let c = nextChar()
|
||||
switch c {
|
||||
case "\"" where quotes == .doubleQuoted:
|
||||
state = .afterAttributeValueQuoted
|
||||
return tokenizeAfterAttributeValueQuoted()
|
||||
case "'" where quotes == .singleQuoted:
|
||||
state = .afterAttributeValueQuoted
|
||||
return tokenizeAfterAttributeValueQuoted()
|
||||
case "&":
|
||||
returnState = .attributeValue(quotes)
|
||||
state = .characterReference
|
||||
return tokenizeCharacterReference()
|
||||
case nil:
|
||||
// parse error: eof-in-tag
|
||||
state = .endOfFile
|
||||
return nil
|
||||
case .some(var c):
|
||||
if c == "\0" {
|
||||
// parse error: unexpected-null-character
|
||||
c = "\u{FFFD}"
|
||||
}
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes[attributes.count - 1].value.append(c)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
continue
|
||||
} else if case .endTag(_) = currentToken {
|
||||
continue
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue