diff --git a/Sources/HTMLStreamer/Tokenizer.swift b/Sources/HTMLStreamer/Tokenizer.swift
index d91fbbf..2dd39d0 100644
--- a/Sources/HTMLStreamer/Tokenizer.swift
+++ b/Sources/HTMLStreamer/Tokenizer.swift
@@ -16,7 +16,12 @@ struct Tokenizer>: IteratorProtocol {
private var returnState: State?
private var temporaryBuffer: String?
private var characterReferenceCode: UInt32?
- private var currentToken: Token?
+ // Optimization: using an enum for the current token means we can't modify the associated values in-place
+ // Separate fields for everything increases the risk of invalid states, but nets us a small perf gain.
+ private var currentStartTag: (String, selfClosing: Bool, attributes: [Attribute])?
+ private var currentEndTag: String?
+ private var currentComment: String?
+ private var currentDoctype: (String, forceQuirks: Bool, publicIdentifier: String?, systemIdentifier: String?)?
init(chars: Chars) {
self.chars = chars
@@ -187,8 +192,21 @@ struct Tokenizer>: IteratorProtocol {
}
private mutating func takeCurrentToken() -> Token {
- defer { currentToken = nil }
- return currentToken!
+ if let currentStartTag {
+ self.currentStartTag = nil
+ return .startTag(currentStartTag.0, selfClosing: currentStartTag.selfClosing, attributes: currentStartTag.attributes)
+ } else if let currentEndTag {
+ self.currentEndTag = nil
+ return .endTag(currentEndTag)
+ } else if let currentComment {
+ self.currentComment = nil
+ return .comment(currentComment)
+ } else if let currentDoctype {
+ self.currentDoctype = nil
+ return .doctype(currentDoctype.0, forceQuirks: currentDoctype.forceQuirks, publicIdentifier: currentDoctype.publicIdentifier, systemIdentifier: currentDoctype.systemIdentifier)
+ } else {
+ preconditionFailure("takeCurrentToken called without current token")
+ }
}
}
@@ -404,14 +422,9 @@ private extension Tokenizer {
mutating func flushCharacterReference() {
if case .attributeValue(_) = returnState {
- if case .startTag(let s, let selfClosing, var attributes) = currentToken {
- attributes[attributes.count - 1].value.append(temporaryBuffer!)
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
- temporaryBuffer = nil
- state = returnState!
- } else {
- fatalError("bad current tag")
- }
+ currentStartTag!.attributes.uncheckedLast.value.append(temporaryBuffer!)
+ temporaryBuffer = nil
+ state = returnState!
} else {
state = .flushingTemporaryBuffer(returnState!)
}
@@ -561,13 +574,8 @@ private extension Tokenizer {
switch c {
case .some("0"..."9"), .some("a"..."z"), .some("A"..."Z"):
if case .attributeValue(_) = returnState {
- if case .startTag(let s, let selfClosing, var attributes) = currentToken {
- attributes[attributes.count - 1].value.append(c!)
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
- } else {
- fatalError("bad current token")
- }
- return next()
+ currentStartTag!.attributes.uncheckedLast.value.append(c!)
+ return tokenizeAmbiguousAmpersand()
} else {
return .character(c!)
}
@@ -590,7 +598,7 @@ private extension Tokenizer {
return tokenizeEndTagOpen()
case "?":
// parse error: unexpected-question-mark-instead-of-tag-name
- currentToken = .comment("")
+ currentComment = ""
state = .bogusComment
return tokenizeBogusComment()
case nil:
@@ -598,7 +606,7 @@ private extension Tokenizer {
state = .endOfFile
return .character("<")
case .some("a"..."z"), .some("A"..."Z"):
- currentToken = .startTag("", selfClosing: false, attributes: [])
+ currentStartTag = ("", selfClosing: false, attributes: [])
reconsume(c)
state = .tagName
return tokenizeTagName()
@@ -614,7 +622,7 @@ private extension Tokenizer {
let c = nextChar()
switch c {
case .some("a"..."z"), .some("A"..."Z"):
- currentToken = .endTag("")
+ currentEndTag = ""
reconsume(c)
state = .tagName
return tokenizeTagName()
@@ -628,7 +636,7 @@ private extension Tokenizer {
return .character("<")
case .some(let c):
// parse error: invalid-first-character-of-tag-name
- currentToken = .comment("")
+ currentComment = ""
reconsume(c)
state = .bogusComment
return tokenizeBogusComment()
@@ -636,6 +644,8 @@ private extension Tokenizer {
}
mutating func tokenizeTagName() -> Token? {
+ // Optimization: this is a hot path where we stay in this state for a while before emitting a token,
+ // and the function call overhead of recursion costs a bit of perf.
while true {
switch nextChar() {
case "\t", "\n", "\u{000C}", " ":
@@ -658,13 +668,11 @@ private extension Tokenizer {
} else if ("A"..."Z").contains(c) {
c = c.asciiLowercase
}
- if case .startTag(var s, let selfClosing, let attributes) = currentToken {
- s.append(c)
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
+ if currentStartTag != nil {
+ currentStartTag!.0.append(c)
continue
- } else if case .endTag(var s) = currentToken {
- s.append(c)
- currentToken = .endTag(s)
+ } else if currentEndTag != nil {
+ currentEndTag!.append(c)
continue
} else {
fatalError("bad current token")
@@ -676,11 +684,7 @@ private extension Tokenizer {
mutating func tokenizeSelfClosingStartTag() -> Token? {
switch nextChar() {
case ">":
- if case .startTag(let s, _, let attributes) = currentToken {
- currentToken = .startTag(s, selfClosing: true, attributes: attributes)
- } else {
- fatalError("bad current token")
- }
+ currentStartTag!.selfClosing = true
state = .data
return takeCurrentToken()
case nil:
@@ -707,22 +711,16 @@ private extension Tokenizer {
return tokenizeAfterAttributeName()
case "=":
// parse error: unexpected-equals-sign-before-attribute-name
- if case .startTag(let s, let selfClosing, var attributes) = currentToken {
- attributes.append(Attribute(name: "=", value: ""))
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
- state = .attributeName
- return tokenizeAttributeName()
- } else {
- fatalError("bad current token")
- }
+ currentStartTag!.attributes.append(Attribute(name: "=", value: ""))
+ state = .attributeName
+ return tokenizeAttributeName()
default:
- if case .startTag(let s, let selfClosing, var attributes) = currentToken {
- attributes.append(Attribute(name: "", value: ""))
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
+ if currentStartTag != nil {
+ currentStartTag!.attributes.append(Attribute(name: "", value: ""))
reconsume(c)
state = .attributeName
return tokenizeAttributeName()
- } else if case .endTag(_) = currentToken {
+ } else if currentEndTag != nil {
// ignore
reconsume(c)
state = .attributeName
@@ -753,11 +751,10 @@ private extension Tokenizer {
c = "\u{FFFD}"
}
// if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name
- if case .startTag(let s, let selfClosing, var attributes) = currentToken {
- attributes[attributes.count - 1].name.append(c)
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
+ if currentStartTag != nil {
+ currentStartTag!.attributes.uncheckedLast.name.append(c)
continue
- } else if case .endTag(_) = currentToken {
+ } else if currentEndTag != nil {
continue
} else {
fatalError("bad curren token")
@@ -782,13 +779,12 @@ private extension Tokenizer {
state = .endOfFile
return nil
case .some(let c):
- if case .startTag(let s, let selfClosing, var attributes) = currentToken {
- attributes.append(Attribute(name: "", value: ""))
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
+ if currentStartTag != nil {
+ currentStartTag!.attributes.append(Attribute(name: "", value: ""))
reconsume(c)
state = .attributeName
return tokenizeAttributeName()
- } else if case .endTag(_) = currentToken {
+ } else if currentEndTag != nil {
reconsume(c)
state = .attributeName
return tokenizeAttributeName()
@@ -840,11 +836,10 @@ private extension Tokenizer {
return nil
case .some(let c):
// if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value
- if case .startTag(let s, let selfClosing, var attributes) = currentToken {
- attributes[attributes.count - 1].value.append(c)
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
+ if currentStartTag != nil {
+ currentStartTag!.attributes.uncheckedLast.value.append(c)
continue
- } else if case .endTag(_) = currentToken {
+ } else if currentEndTag != nil {
continue
} else {
fatalError("bad current token")
@@ -872,14 +867,11 @@ private extension Tokenizer {
// parse error: unexpected-null-character
c = "\u{FFFD}"
}
- if case .startTag(let s, let selfClosing, var attributes) = currentToken {
- attributes[attributes.count - 1].value.append(c)
- currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
+ if currentStartTag != nil {
+ currentStartTag!.attributes.uncheckedLast.value.append(c)
continue
- } else if case .endTag(_) = currentToken {
+ } else if currentEndTag != nil {
continue
- } else {
- fatalError("bad current token")
}
}
}
@@ -922,13 +914,8 @@ private extension Tokenizer {
// parse error: unexpected-null-character
c = "\u{FFFD}"
}
- if case .comment(var s) = currentToken {
- s.append(c)
- currentToken = .comment(s)
- return tokenizeBogusComment()
- } else {
- fatalError("bad current token")
- }
+ currentComment!.append(c)
+ return tokenizeBogusComment()
}
}
@@ -936,7 +923,7 @@ private extension Tokenizer {
let peeked = peek(count: 7)
if peeked.starts(with: "--") {
consume(count: 2)
- currentToken = .comment("")
+ currentComment = ""
state = .commentStart
return tokenizeCommentStart()
} else if peeked.lowercased() == "doctype" {
@@ -946,12 +933,12 @@ private extension Tokenizer {
} else if peeked == "[CDATA[" {
// TODO: we don't do any of the tree construction stuff yet, so can't really handle this
// consume(count: 7)
- currentToken = .comment("")
+ currentComment = ""
state = .bogusComment
return tokenizeBogusComment()
} else {
// parse error: incorrectly-opened-comment
- currentToken = .comment("")
+ currentComment = ""
state = .bogusComment
return tokenizeBogusComment()
}
@@ -986,29 +973,19 @@ private extension Tokenizer {
// parse error: eof-in-comment
return takeCurrentToken()
case .some(let c):
- if case .comment(var s) = currentToken {
- s.append("-")
- currentToken = .comment(s)
- reconsume(c)
- state = .comment
- return tokenizeComment()
- } else {
- fatalError("bad current token")
- }
+ currentComment!.append("-")
+ reconsume(c)
+ state = .comment
+ return tokenizeComment()
}
}
mutating func tokenizeComment() -> Token? {
switch nextChar() {
case "<":
- if case .comment(var s) = currentToken {
- s.append("<")
- currentToken = .comment(s)
- state = .commentLessThanSign
- return tokenizeCommentLessThanSign()
- } else {
- fatalError("bad current token")
- }
+ currentComment!.append("<")
+ state = .commentLessThanSign
+ return tokenizeCommentLessThanSign()
case "-":
state = .commentEndDash
return tokenizeCommentEndDash()
@@ -1021,35 +998,20 @@ private extension Tokenizer {
// parse error: unexpected-null-character
c = "\u{FFFD}"
}
- if case .comment(var s) = currentToken {
- s.append(c)
- currentToken = .comment(s)
- return tokenizeComment()
- } else {
- fatalError("bad current token")
- }
+ currentComment!.append(c)
+ return tokenizeComment()
}
}
mutating func tokenizeCommentLessThanSign() -> Token? {
switch nextChar() {
case "!":
- if case .comment(var s) = currentToken {
- s.append("!")
- currentToken = .comment(s)
- state = .commentLessThanSignBang
- return tokenizeCommentLessThanSignBang()
- } else {
- fatalError("bad current token")
- }
+ currentComment!.append("!")
+ state = .commentLessThanSignBang
+ return tokenizeCommentLessThanSignBang()
case "<":
- if case .comment(var s) = currentToken {
- s.append("<")
- currentToken = .comment(s)
- return tokenizeComment()
- } else {
- fatalError("bad current token")
- }
+ currentComment!.append("<")
+ return tokenizeComment()
case let c:
reconsume(c)
state = .comment
@@ -1106,15 +1068,10 @@ private extension Tokenizer {
state = .endOfFile
return takeCurrentToken()
case let c:
- if case .comment(var s) = currentToken {
- s.append("-")
- currentToken = .comment(s)
- } else {
- fatalError("bad current token")
- }
+ currentComment!.append("-")
reconsume(c)
state = .comment
- return next()
+ return tokenizeComment()
}
}
@@ -1127,24 +1084,14 @@ private extension Tokenizer {
state = .commentEndBang
return tokenizeCommentEndBang()
case "-":
- if case .comment(var s) = currentToken {
- s.append("-")
- currentToken = .comment(s)
- return tokenizeCommentEnd()
- } else {
- fatalError("bad current token")
- }
+ currentComment!.append("-")
+ return tokenizeCommentEnd()
case nil:
// parse error: eof-in-comment
state = .endOfFile
return takeCurrentToken()
case .some(let c):
- if case .comment(var s) = currentToken {
- s.append("--")
- currentToken = .comment(s)
- } else {
- fatalError("bad current token")
- }
+ currentComment!.append("--")
reconsume(c)
state = .comment
return tokenizeComment()
@@ -1154,14 +1101,9 @@ private extension Tokenizer {
mutating func tokenizeCommentEndBang() -> Token? {
switch nextChar() {
case "-":
- if case .comment(var s) = currentToken {
- s.append("--!")
- currentToken = .comment(s)
- state = .commentEndDash
- return tokenizeCommentEndDash()
- } else {
- fatalError("bad current token")
- }
+ currentComment!.append("--!")
+ state = .commentEndDash
+ return tokenizeCommentEndDash()
case ">":
// parse error: incorrectly-closed-comment
state = .data
@@ -1171,15 +1113,10 @@ private extension Tokenizer {
state = .endOfFile
return takeCurrentToken()
case .some(let c):
- if case .comment(var s) = currentToken {
- s.append("--!")
- currentToken = .comment(s)
- reconsume(c)
- state = .comment
- return tokenizeComment()
- } else {
- fatalError("bad current token")
- }
+ currentComment!.append("--!")
+ reconsume(c)
+ state = .comment
+ return tokenizeComment()
}
}
@@ -1210,12 +1147,12 @@ private extension Tokenizer {
// ignore the character
return tokenizeBeforeDoctypeName()
case .some(let c) where ("A"..."Z").contains(c):
- currentToken = .doctype("\(c.asciiLowercase)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
+ currentDoctype = ("\(c.asciiLowercase)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
state = .doctypeName
return tokenizeDoctypeName()
case "\0":
// parse error: unexpected-null-character
- currentToken = .doctype("\u{FFFD}", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
+ currentDoctype = ("\u{FFFD}", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
state = .doctypeName
return tokenizeDoctypeName()
case ">":
@@ -1227,7 +1164,7 @@ private extension Tokenizer {
state = .endOfFile
return .doctype("", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
case .some(let c):
- currentToken = .doctype("\(c)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
+ currentDoctype = ("\(c)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
state = .doctypeName
return tokenizeDoctypeName()
}
@@ -1243,25 +1180,16 @@ private extension Tokenizer {
return takeCurrentToken()
case nil:
// parse error: eof-in-doctype
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case .some(var c):
if c == "\0" {
c = "\u{FFFD}"
} else if ("A"..."Z").contains(c) {
c = c.asciiLowercase
}
- if case .doctype(var s, let forceQuirks, _, _) = currentToken {
- s.append(c)
- currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: nil, systemIdentifier: nil)
- return tokenizeDoctypeName()
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.0.append(c)
+ return tokenizeDoctypeName()
}
}
@@ -1276,12 +1204,8 @@ private extension Tokenizer {
case nil:
// parse error: eof-in-doctype
state = .endOfFile
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case .some(let c):
reconsume(c)
let peeked = peek(count: 6).lowercased()
@@ -1295,11 +1219,7 @@ private extension Tokenizer {
return tokenizeAfterDoctypeSystemKeyword()
} else {
// parse error: invalid-character-sequence-after-doctype-name
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
state = .bogusDoctype
return tokenizeBogusDoctype()
}
@@ -1313,39 +1233,23 @@ private extension Tokenizer {
return tokenizeBeforeDoctypePublicIdentifier()
case .some(let c) where c == "\"" || c == "'":
// parse error: missing-whitespace-after-doctype-public-keyword
- if case .doctype(let s, let forceQuirks, _, _) = currentToken {
- currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: "", systemIdentifier: nil)
- let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
- state = .doctypePublicIdentifier(quotes)
- return tokenizeDoctypePublicIdentifier(quotes: quotes)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.publicIdentifier = ""
+ let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
+ state = .doctypePublicIdentifier(quotes)
+ return tokenizeDoctypePublicIdentifier(quotes: quotes)
case ">":
// parse error: missing-doctype-public-identifier
state = .data
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case nil:
// parse error: eof-in-doctype
state = .endOfFile
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case .some(let c):
// parse error: missing-quote-before-doctype-public-identifier
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
state = .bogusDoctype
reconsume(c)
return tokenizeBogusDoctype()
@@ -1358,39 +1262,23 @@ private extension Tokenizer {
// ignore the character
return tokenizeBeforeDoctypePublicIdentifier()
case .some(let c) where c == "\"" || c == "'":
- if case .doctype(let s, let forceQuirks, _, _) = currentToken {
- currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: "", systemIdentifier: nil)
- let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
- state = .doctypePublicIdentifier(quotes)
- return tokenizeDoctypePublicIdentifier(quotes: quotes)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.publicIdentifier = ""
+ let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
+ state = .doctypePublicIdentifier(quotes)
+ return tokenizeDoctypePublicIdentifier(quotes: quotes)
case ">":
// parse error: missing-doctype-public-identifier
state = .data
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case nil:
// parse error: eof-in-doctype
state = .endOfFile
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case .some(let c):
// parse error: missing-quote-before-doctype-public-identifier
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
reconsume(c)
state = .bogusDoctype
return tokenizeBogusDoctype()
@@ -1409,33 +1297,20 @@ private extension Tokenizer {
// parse error: abrupt-doctype-public-identifier
reconsume(">")
state = .data
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case nil:
// parse error: eof-in-doctype
state = .endOfFile
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case .some(var c):
if c == "\0" {
// parse error: unexpected-null-character
c = "\u{FFFD}"
}
- if case .doctype(let s, let forceQuirks, var publicIdentifier, _) = currentToken {
- publicIdentifier!.append(c)
- currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: nil)
- return tokenizeDoctypePublicIdentifier(quotes: quotes)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.publicIdentifier!.append(c)
+ return tokenizeDoctypePublicIdentifier(quotes: quotes)
}
}
@@ -1449,30 +1324,18 @@ private extension Tokenizer {
return takeCurrentToken()
case .some(let c) where c == "\"" || c == "'":
// parse error: missing-whitespace-between-doctype-public-and-system-identifiers
- if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
- currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "")
- let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
- state = .doctypeSystemIdentifier(quotes)
- return tokenizeDoctypeSystemIdentifier(quotes: quotes)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.systemIdentifier = ""
+ let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
+ state = .doctypeSystemIdentifier(quotes)
+ return tokenizeDoctypeSystemIdentifier(quotes: quotes)
case nil:
// parse error: eof-in-doctype
state = .endOfFile
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case .some(let c):
// parse error: missing-quote-before-doctype-system-identifier
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
reconsume(c)
state = .bogusDoctype
return tokenizeBogusDoctype()
@@ -1488,30 +1351,18 @@ private extension Tokenizer {
state = .data
return takeCurrentToken()
case .some(let c) where c == "\"" || c == "'":
- if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
- currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "")
- let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
- state = .doctypeSystemIdentifier(quotes)
- return tokenizeDoctypeSystemIdentifier(quotes: quotes)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.systemIdentifier = ""
+ let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
+ state = .doctypeSystemIdentifier(quotes)
+ return tokenizeDoctypeSystemIdentifier(quotes: quotes)
case nil:
// parse error: eof-in-doctype
state = .endOfFile
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case .some(let c):
// parse error: missing-quote-before-doctype-system-identifier
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
reconsume(c)
state = .bogusComment
return tokenizeBogusComment()
@@ -1524,39 +1375,23 @@ private extension Tokenizer {
state = .beforeDoctypeSystemIdentifier
return tokenizeBeforeDoctypeSystemIdentifier()
case .some(let c) where c == "\"" || c == "'":
- if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
- currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "")
- let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
- state = .doctypeSystemIdentifier(quotes)
- return tokenizeDoctypeSystemIdentifier(quotes: quotes)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.systemIdentifier = ""
+ let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
+ state = .doctypeSystemIdentifier(quotes)
+ return tokenizeDoctypeSystemIdentifier(quotes: quotes)
case ">":
// parse error: missing-doctype-system-identifier
state = .data
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case nil:
// parse error: eof-in-doctype:
state = .endOfFile
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case .some(let c):
// parse error: missing-quote-before-doctype-system-identifier
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
reconsume(c)
state = .bogusDoctype
return tokenizeBogusDoctype()
@@ -1569,39 +1404,23 @@ private extension Tokenizer {
// ignore the character
return tokenizeBeforeDoctypeSystemIdentifier()
case .some(let c) where c == "\"" || c == "'":
- if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
- currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: " ")
- let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
- state = .doctypeSystemIdentifier(quotes)
- return tokenizeDoctypeSystemIdentifier(quotes: quotes)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.systemIdentifier = ""
+ let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
+ state = .doctypeSystemIdentifier(quotes)
+ return tokenizeDoctypeSystemIdentifier(quotes: quotes)
case ">":
// parse error: missing-doctype-system-identifier
state = .data
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case nil:
// parse error: eof-in-doctype:
state = .endOfFile
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case .some(let c):
// parse error: missing-quote-before-doctype-system-identifier
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
reconsume(c)
state = .bogusDoctype
return tokenizeBogusDoctype()
@@ -1619,33 +1438,20 @@ private extension Tokenizer {
case ">":
// parse error: abrupt-doctype-system-identifier
state = .data
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case nil:
// parse error: eof-in-doctype
state = .endOfFile
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case .some(var c):
if c == "\0" {
// parse error: unexpected-null-character
c = "\u{FFFD}"
}
- if case .doctype(let s, let forceQuirks, let publicIdentifier, var systemIdentifier) = currentToken {
- systemIdentifier!.append(c)
- currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- return tokenizeDoctypeSystemIdentifier(quotes: quotes)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.systemIdentifier!.append(c)
+ return tokenizeDoctypeSystemIdentifier(quotes: quotes)
}
}
@@ -1660,12 +1466,8 @@ private extension Tokenizer {
case nil:
// parse error: eof-in-doctype
state = .endOfFile
- if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
- currentToken = nil
- return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
- } else {
- fatalError("bad current token")
- }
+ currentDoctype!.forceQuirks = true
+ return takeCurrentToken()
case .some(let c):
// parse error: unexpected-character-after-doctype-system-identifier
// Note: This does not set the current DOCTYPE token's force-quirks flag to on.
@@ -1699,3 +1501,15 @@ private extension Character {
return Character(Unicode.Scalar(asciiValue! + 0x20))
}
}
+
+private extension Array {
+ // Optimization: allows in-place modification of the last element of the array.
+ var uncheckedLast: Element {
+ _read {
+ yield self[count - 1]
+ }
+ _modify {
+ yield &self[count - 1]
+ }
+ }
+}