Don't use enum with associated values for current token

They prevent in-place modification, resulting in a bunch of extra copies
This commit is contained in:
Shadowfacts 2023-11-28 10:36:04 -05:00
parent 31bd174a69
commit f412369cf7
1 changed files with 172 additions and 358 deletions

View File

@ -16,7 +16,12 @@ struct Tokenizer<Chars: IteratorProtocol<Character>>: IteratorProtocol {
private var returnState: State?
private var temporaryBuffer: String?
private var characterReferenceCode: UInt32?
private var currentToken: Token?
// Optimization: using an enum for the current token means we can't modify the associated values in-place
// Separate fields for everything increases the risk of invalid states, but nets us a small perf gain.
private var currentStartTag: (String, selfClosing: Bool, attributes: [Attribute])?
private var currentEndTag: String?
private var currentComment: String?
private var currentDoctype: (String, forceQuirks: Bool, publicIdentifier: String?, systemIdentifier: String?)?
init(chars: Chars) {
self.chars = chars
@ -187,8 +192,21 @@ struct Tokenizer<Chars: IteratorProtocol<Character>>: IteratorProtocol {
}
private mutating func takeCurrentToken() -> Token {
defer { currentToken = nil }
return currentToken!
if let currentStartTag {
self.currentStartTag = nil
return .startTag(currentStartTag.0, selfClosing: currentStartTag.selfClosing, attributes: currentStartTag.attributes)
} else if let currentEndTag {
self.currentEndTag = nil
return .endTag(currentEndTag)
} else if let currentComment {
self.currentComment = nil
return .comment(currentComment)
} else if let currentDoctype {
self.currentDoctype = nil
return .doctype(currentDoctype.0, forceQuirks: currentDoctype.forceQuirks, publicIdentifier: currentDoctype.publicIdentifier, systemIdentifier: currentDoctype.systemIdentifier)
} else {
preconditionFailure("takeCurrentToken called without current token")
}
}
}
@ -404,14 +422,9 @@ private extension Tokenizer {
mutating func flushCharacterReference() {
if case .attributeValue(_) = returnState {
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes[attributes.count - 1].value.append(temporaryBuffer!)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
temporaryBuffer = nil
state = returnState!
} else {
fatalError("bad current tag")
}
currentStartTag!.attributes.uncheckedLast.value.append(temporaryBuffer!)
temporaryBuffer = nil
state = returnState!
} else {
state = .flushingTemporaryBuffer(returnState!)
}
@ -561,13 +574,8 @@ private extension Tokenizer {
switch c {
case .some("0"..."9"), .some("a"..."z"), .some("A"..."Z"):
if case .attributeValue(_) = returnState {
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes[attributes.count - 1].value.append(c!)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
} else {
fatalError("bad current token")
}
return next()
currentStartTag!.attributes.uncheckedLast.value.append(c!)
return tokenizeAmbiguousAmpersand()
} else {
return .character(c!)
}
@ -590,7 +598,7 @@ private extension Tokenizer {
return tokenizeEndTagOpen()
case "?":
// parse error: unexpected-question-mark-instead-of-tag-name
currentToken = .comment("")
currentComment = ""
state = .bogusComment
return tokenizeBogusComment()
case nil:
@ -598,7 +606,7 @@ private extension Tokenizer {
state = .endOfFile
return .character("<")
case .some("a"..."z"), .some("A"..."Z"):
currentToken = .startTag("", selfClosing: false, attributes: [])
currentStartTag = ("", selfClosing: false, attributes: [])
reconsume(c)
state = .tagName
return tokenizeTagName()
@ -614,7 +622,7 @@ private extension Tokenizer {
let c = nextChar()
switch c {
case .some("a"..."z"), .some("A"..."Z"):
currentToken = .endTag("")
currentEndTag = ""
reconsume(c)
state = .tagName
return tokenizeTagName()
@ -628,7 +636,7 @@ private extension Tokenizer {
return .character("<")
case .some(let c):
// parse error: invalid-first-character-of-tag-name
currentToken = .comment("")
currentComment = ""
reconsume(c)
state = .bogusComment
return tokenizeBogusComment()
@ -636,6 +644,8 @@ private extension Tokenizer {
}
mutating func tokenizeTagName() -> Token? {
// Optimization: this is a hot path where we stay in this state for a while before emitting a token,
// and the function call overhead of recursion costs a bit of perf.
while true {
switch nextChar() {
case "\t", "\n", "\u{000C}", " ":
@ -658,13 +668,11 @@ private extension Tokenizer {
} else if ("A"..."Z").contains(c) {
c = c.asciiLowercase
}
if case .startTag(var s, let selfClosing, let attributes) = currentToken {
s.append(c)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
if currentStartTag != nil {
currentStartTag!.0.append(c)
continue
} else if case .endTag(var s) = currentToken {
s.append(c)
currentToken = .endTag(s)
} else if currentEndTag != nil {
currentEndTag!.append(c)
continue
} else {
fatalError("bad current token")
@ -676,11 +684,7 @@ private extension Tokenizer {
mutating func tokenizeSelfClosingStartTag() -> Token? {
switch nextChar() {
case ">":
if case .startTag(let s, _, let attributes) = currentToken {
currentToken = .startTag(s, selfClosing: true, attributes: attributes)
} else {
fatalError("bad current token")
}
currentStartTag!.selfClosing = true
state = .data
return takeCurrentToken()
case nil:
@ -707,22 +711,16 @@ private extension Tokenizer {
return tokenizeAfterAttributeName()
case "=":
// parse error: unexpected-equals-sign-before-attribute-name
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes.append(Attribute(name: "=", value: ""))
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
state = .attributeName
return tokenizeAttributeName()
} else {
fatalError("bad current token")
}
currentStartTag!.attributes.append(Attribute(name: "=", value: ""))
state = .attributeName
return tokenizeAttributeName()
default:
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes.append(Attribute(name: "", value: ""))
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
if currentStartTag != nil {
currentStartTag!.attributes.append(Attribute(name: "", value: ""))
reconsume(c)
state = .attributeName
return tokenizeAttributeName()
} else if case .endTag(_) = currentToken {
} else if currentEndTag != nil {
// ignore
reconsume(c)
state = .attributeName
@ -753,11 +751,10 @@ private extension Tokenizer {
c = "\u{FFFD}"
}
// if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes[attributes.count - 1].name.append(c)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
if currentStartTag != nil {
currentStartTag!.attributes.uncheckedLast.name.append(c)
continue
} else if case .endTag(_) = currentToken {
} else if currentEndTag != nil {
continue
} else {
fatalError("bad curren token")
@ -782,13 +779,12 @@ private extension Tokenizer {
state = .endOfFile
return nil
case .some(let c):
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes.append(Attribute(name: "", value: ""))
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
if currentStartTag != nil {
currentStartTag!.attributes.append(Attribute(name: "", value: ""))
reconsume(c)
state = .attributeName
return tokenizeAttributeName()
} else if case .endTag(_) = currentToken {
} else if currentEndTag != nil {
reconsume(c)
state = .attributeName
return tokenizeAttributeName()
@ -840,11 +836,10 @@ private extension Tokenizer {
return nil
case .some(let c):
// if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes[attributes.count - 1].value.append(c)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
if currentStartTag != nil {
currentStartTag!.attributes.uncheckedLast.value.append(c)
continue
} else if case .endTag(_) = currentToken {
} else if currentEndTag != nil {
continue
} else {
fatalError("bad current token")
@ -872,14 +867,11 @@ private extension Tokenizer {
// parse error: unexpected-null-character
c = "\u{FFFD}"
}
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
attributes[attributes.count - 1].value.append(c)
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
if currentStartTag != nil {
currentStartTag!.attributes.uncheckedLast.value.append(c)
continue
} else if case .endTag(_) = currentToken {
} else if currentEndTag != nil {
continue
} else {
fatalError("bad current token")
}
}
}
@ -922,13 +914,8 @@ private extension Tokenizer {
// parse error: unexpected-null-character
c = "\u{FFFD}"
}
if case .comment(var s) = currentToken {
s.append(c)
currentToken = .comment(s)
return tokenizeBogusComment()
} else {
fatalError("bad current token")
}
currentComment!.append(c)
return tokenizeBogusComment()
}
}
@ -936,7 +923,7 @@ private extension Tokenizer {
let peeked = peek(count: 7)
if peeked.starts(with: "--") {
consume(count: 2)
currentToken = .comment("")
currentComment = ""
state = .commentStart
return tokenizeCommentStart()
} else if peeked.lowercased() == "doctype" {
@ -946,12 +933,12 @@ private extension Tokenizer {
} else if peeked == "[CDATA[" {
// TODO: we don't do any of the tree construction stuff yet, so can't really handle this
// consume(count: 7)
currentToken = .comment("")
currentComment = ""
state = .bogusComment
return tokenizeBogusComment()
} else {
// parse error: incorrectly-opened-comment
currentToken = .comment("")
currentComment = ""
state = .bogusComment
return tokenizeBogusComment()
}
@ -986,29 +973,19 @@ private extension Tokenizer {
// parse error: eof-in-comment
return takeCurrentToken()
case .some(let c):
if case .comment(var s) = currentToken {
s.append("-")
currentToken = .comment(s)
reconsume(c)
state = .comment
return tokenizeComment()
} else {
fatalError("bad current token")
}
currentComment!.append("-")
reconsume(c)
state = .comment
return tokenizeComment()
}
}
mutating func tokenizeComment() -> Token? {
switch nextChar() {
case "<":
if case .comment(var s) = currentToken {
s.append("<")
currentToken = .comment(s)
state = .commentLessThanSign
return tokenizeCommentLessThanSign()
} else {
fatalError("bad current token")
}
currentComment!.append("<")
state = .commentLessThanSign
return tokenizeCommentLessThanSign()
case "-":
state = .commentEndDash
return tokenizeCommentEndDash()
@ -1021,35 +998,20 @@ private extension Tokenizer {
// parse error: unexpected-null-character
c = "\u{FFFD}"
}
if case .comment(var s) = currentToken {
s.append(c)
currentToken = .comment(s)
return tokenizeComment()
} else {
fatalError("bad current token")
}
currentComment!.append(c)
return tokenizeComment()
}
}
mutating func tokenizeCommentLessThanSign() -> Token? {
switch nextChar() {
case "!":
if case .comment(var s) = currentToken {
s.append("!")
currentToken = .comment(s)
state = .commentLessThanSignBang
return tokenizeCommentLessThanSignBang()
} else {
fatalError("bad current token")
}
currentComment!.append("!")
state = .commentLessThanSignBang
return tokenizeCommentLessThanSignBang()
case "<":
if case .comment(var s) = currentToken {
s.append("<")
currentToken = .comment(s)
return tokenizeComment()
} else {
fatalError("bad current token")
}
currentComment!.append("<")
return tokenizeComment()
case let c:
reconsume(c)
state = .comment
@ -1106,15 +1068,10 @@ private extension Tokenizer {
state = .endOfFile
return takeCurrentToken()
case let c:
if case .comment(var s) = currentToken {
s.append("-")
currentToken = .comment(s)
} else {
fatalError("bad current token")
}
currentComment!.append("-")
reconsume(c)
state = .comment
return next()
return tokenizeComment()
}
}
@ -1127,24 +1084,14 @@ private extension Tokenizer {
state = .commentEndBang
return tokenizeCommentEndBang()
case "-":
if case .comment(var s) = currentToken {
s.append("-")
currentToken = .comment(s)
return tokenizeCommentEnd()
} else {
fatalError("bad current token")
}
currentComment!.append("-")
return tokenizeCommentEnd()
case nil:
// parse error: eof-in-comment
state = .endOfFile
return takeCurrentToken()
case .some(let c):
if case .comment(var s) = currentToken {
s.append("--")
currentToken = .comment(s)
} else {
fatalError("bad current token")
}
currentComment!.append("--")
reconsume(c)
state = .comment
return tokenizeComment()
@ -1154,14 +1101,9 @@ private extension Tokenizer {
mutating func tokenizeCommentEndBang() -> Token? {
switch nextChar() {
case "-":
if case .comment(var s) = currentToken {
s.append("--!")
currentToken = .comment(s)
state = .commentEndDash
return tokenizeCommentEndDash()
} else {
fatalError("bad current token")
}
currentComment!.append("--!")
state = .commentEndDash
return tokenizeCommentEndDash()
case ">":
// parse error: incorrectly-closed-comment
state = .data
@ -1171,15 +1113,10 @@ private extension Tokenizer {
state = .endOfFile
return takeCurrentToken()
case .some(let c):
if case .comment(var s) = currentToken {
s.append("--!")
currentToken = .comment(s)
reconsume(c)
state = .comment
return tokenizeComment()
} else {
fatalError("bad current token")
}
currentComment!.append("--!")
reconsume(c)
state = .comment
return tokenizeComment()
}
}
@ -1210,12 +1147,12 @@ private extension Tokenizer {
// ignore the character
return tokenizeBeforeDoctypeName()
case .some(let c) where ("A"..."Z").contains(c):
currentToken = .doctype("\(c.asciiLowercase)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
currentDoctype = ("\(c.asciiLowercase)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
state = .doctypeName
return tokenizeDoctypeName()
case "\0":
// parse error: unexpected-null-character
currentToken = .doctype("\u{FFFD}", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
currentDoctype = ("\u{FFFD}", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
state = .doctypeName
return tokenizeDoctypeName()
case ">":
@ -1227,7 +1164,7 @@ private extension Tokenizer {
state = .endOfFile
return .doctype("", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
case .some(let c):
currentToken = .doctype("\(c)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
currentDoctype = ("\(c)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
state = .doctypeName
return tokenizeDoctypeName()
}
@ -1243,25 +1180,16 @@ private extension Tokenizer {
return takeCurrentToken()
case nil:
// parse error: eof-in-doctype
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case .some(var c):
if c == "\0" {
c = "\u{FFFD}"
} else if ("A"..."Z").contains(c) {
c = c.asciiLowercase
}
if case .doctype(var s, let forceQuirks, _, _) = currentToken {
s.append(c)
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: nil, systemIdentifier: nil)
return tokenizeDoctypeName()
} else {
fatalError("bad current token")
}
currentDoctype!.0.append(c)
return tokenizeDoctypeName()
}
}
@ -1276,12 +1204,8 @@ private extension Tokenizer {
case nil:
// parse error: eof-in-doctype
state = .endOfFile
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case .some(let c):
reconsume(c)
let peeked = peek(count: 6).lowercased()
@ -1295,11 +1219,7 @@ private extension Tokenizer {
return tokenizeAfterDoctypeSystemKeyword()
} else {
// parse error: invalid-character-sequence-after-doctype-name
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
state = .bogusDoctype
return tokenizeBogusDoctype()
}
@ -1313,39 +1233,23 @@ private extension Tokenizer {
return tokenizeBeforeDoctypePublicIdentifier()
case .some(let c) where c == "\"" || c == "'":
// parse error: missing-whitespace-after-doctype-public-keyword
if case .doctype(let s, let forceQuirks, _, _) = currentToken {
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: "", systemIdentifier: nil)
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
state = .doctypePublicIdentifier(quotes)
return tokenizeDoctypePublicIdentifier(quotes: quotes)
} else {
fatalError("bad current token")
}
currentDoctype!.publicIdentifier = ""
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
state = .doctypePublicIdentifier(quotes)
return tokenizeDoctypePublicIdentifier(quotes: quotes)
case ">":
// parse error: missing-doctype-public-identifier
state = .data
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case nil:
// parse error: eof-in-doctype
state = .endOfFile
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case .some(let c):
// parse error: missing-quote-before-doctype-public-identifier
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
state = .bogusDoctype
reconsume(c)
return tokenizeBogusDoctype()
@ -1358,39 +1262,23 @@ private extension Tokenizer {
// ignore the character
return tokenizeBeforeDoctypePublicIdentifier()
case .some(let c) where c == "\"" || c == "'":
if case .doctype(let s, let forceQuirks, _, _) = currentToken {
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: "", systemIdentifier: nil)
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
state = .doctypePublicIdentifier(quotes)
return tokenizeDoctypePublicIdentifier(quotes: quotes)
} else {
fatalError("bad current token")
}
currentDoctype!.publicIdentifier = ""
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
state = .doctypePublicIdentifier(quotes)
return tokenizeDoctypePublicIdentifier(quotes: quotes)
case ">":
// parse error: missing-doctype-public-identifier
state = .data
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case nil:
// parse error: eof-in-doctype
state = .endOfFile
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case .some(let c):
// parse error: missing-quote-before-doctype-public-identifier
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
reconsume(c)
state = .bogusDoctype
return tokenizeBogusDoctype()
@ -1409,33 +1297,20 @@ private extension Tokenizer {
// parse error: abrupt-doctype-public-identifier
reconsume(">")
state = .data
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case nil:
// parse error: eof-in-doctype
state = .endOfFile
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case .some(var c):
if c == "\0" {
// parse error: unexpected-null-character
c = "\u{FFFD}"
}
if case .doctype(let s, let forceQuirks, var publicIdentifier, _) = currentToken {
publicIdentifier!.append(c)
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: nil)
return tokenizeDoctypePublicIdentifier(quotes: quotes)
} else {
fatalError("bad current token")
}
currentDoctype!.publicIdentifier!.append(c)
return tokenizeDoctypePublicIdentifier(quotes: quotes)
}
}
@ -1449,30 +1324,18 @@ private extension Tokenizer {
return takeCurrentToken()
case .some(let c) where c == "\"" || c == "'":
// parse error: missing-whitespace-between-doctype-public-and-system-identifiers
if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "")
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
state = .doctypeSystemIdentifier(quotes)
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
} else {
fatalError("bad current token")
}
currentDoctype!.systemIdentifier = ""
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
state = .doctypeSystemIdentifier(quotes)
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
case nil:
// parse error: eof-in-doctype
state = .endOfFile
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case .some(let c):
// parse error: missing-quote-before-doctype-system-identifier
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
reconsume(c)
state = .bogusDoctype
return tokenizeBogusDoctype()
@ -1488,30 +1351,18 @@ private extension Tokenizer {
state = .data
return takeCurrentToken()
case .some(let c) where c == "\"" || c == "'":
if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "")
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
state = .doctypeSystemIdentifier(quotes)
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
} else {
fatalError("bad current token")
}
currentDoctype!.systemIdentifier = ""
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
state = .doctypeSystemIdentifier(quotes)
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
case nil:
// parse error: eof-in-doctype
state = .endOfFile
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case .some(let c):
// parse error: missing-quote-before-doctype-system-identifier
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
reconsume(c)
state = .bogusComment
return tokenizeBogusComment()
@ -1524,39 +1375,23 @@ private extension Tokenizer {
state = .beforeDoctypeSystemIdentifier
return tokenizeBeforeDoctypeSystemIdentifier()
case .some(let c) where c == "\"" || c == "'":
if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "")
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
state = .doctypeSystemIdentifier(quotes)
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
} else {
fatalError("bad current token")
}
currentDoctype!.systemIdentifier = ""
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
state = .doctypeSystemIdentifier(quotes)
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
case ">":
// parse error: missing-doctype-system-identifier
state = .data
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case nil:
// parse error: eof-in-doctype:
state = .endOfFile
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case .some(let c):
// parse error: missing-quote-before-doctype-system-identifier
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
reconsume(c)
state = .bogusDoctype
return tokenizeBogusDoctype()
@ -1569,39 +1404,23 @@ private extension Tokenizer {
// ignore the character
return tokenizeBeforeDoctypeSystemIdentifier()
case .some(let c) where c == "\"" || c == "'":
if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: " ")
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
state = .doctypeSystemIdentifier(quotes)
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
} else {
fatalError("bad current token")
}
currentDoctype!.systemIdentifier = ""
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
state = .doctypeSystemIdentifier(quotes)
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
case ">":
// parse error: missing-doctype-system-identifier
state = .data
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case nil:
// parse error: eof-in-doctype:
state = .endOfFile
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case .some(let c):
// parse error: missing-quote-before-doctype-system-identifier
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
reconsume(c)
state = .bogusDoctype
return tokenizeBogusDoctype()
@ -1619,33 +1438,20 @@ private extension Tokenizer {
case ">":
// parse error: abrupt-doctype-system-identifier
state = .data
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case nil:
// parse error: eof-in-doctype
state = .endOfFile
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case .some(var c):
if c == "\0" {
// parse error: unexpected-null-character
c = "\u{FFFD}"
}
if case .doctype(let s, let forceQuirks, let publicIdentifier, var systemIdentifier) = currentToken {
systemIdentifier!.append(c)
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
} else {
fatalError("bad current token")
}
currentDoctype!.systemIdentifier!.append(c)
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
}
}
@ -1660,12 +1466,8 @@ private extension Tokenizer {
case nil:
// parse error: eof-in-doctype
state = .endOfFile
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
currentToken = nil
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
} else {
fatalError("bad current token")
}
currentDoctype!.forceQuirks = true
return takeCurrentToken()
case .some(let c):
// parse error: unexpected-character-after-doctype-system-identifier
// Note: This does not set the current DOCTYPE token's force-quirks flag to on.
@ -1699,3 +1501,15 @@ private extension Character {
return Character(Unicode.Scalar(asciiValue! + 0x20))
}
}
private extension Array {
// Optimization: allows in-place modification of the last element of the array.
var uncheckedLast: Element {
_read {
yield self[count - 1]
}
_modify {
yield &self[count - 1]
}
}
}