Don't use enum with associated values for current token
They prevent in-place modification, resulting in a bunch of extra copies
This commit is contained in:
parent
31bd174a69
commit
f412369cf7
|
@ -16,7 +16,12 @@ struct Tokenizer<Chars: IteratorProtocol<Character>>: IteratorProtocol {
|
|||
private var returnState: State?
|
||||
private var temporaryBuffer: String?
|
||||
private var characterReferenceCode: UInt32?
|
||||
private var currentToken: Token?
|
||||
// Optimization: using an enum for the current token means we can't modify the associated values in-place
|
||||
// Separate fields for everything increases the risk of invalid states, but nets us a small perf gain.
|
||||
private var currentStartTag: (String, selfClosing: Bool, attributes: [Attribute])?
|
||||
private var currentEndTag: String?
|
||||
private var currentComment: String?
|
||||
private var currentDoctype: (String, forceQuirks: Bool, publicIdentifier: String?, systemIdentifier: String?)?
|
||||
|
||||
init(chars: Chars) {
|
||||
self.chars = chars
|
||||
|
@ -187,8 +192,21 @@ struct Tokenizer<Chars: IteratorProtocol<Character>>: IteratorProtocol {
|
|||
}
|
||||
|
||||
private mutating func takeCurrentToken() -> Token {
|
||||
defer { currentToken = nil }
|
||||
return currentToken!
|
||||
if let currentStartTag {
|
||||
self.currentStartTag = nil
|
||||
return .startTag(currentStartTag.0, selfClosing: currentStartTag.selfClosing, attributes: currentStartTag.attributes)
|
||||
} else if let currentEndTag {
|
||||
self.currentEndTag = nil
|
||||
return .endTag(currentEndTag)
|
||||
} else if let currentComment {
|
||||
self.currentComment = nil
|
||||
return .comment(currentComment)
|
||||
} else if let currentDoctype {
|
||||
self.currentDoctype = nil
|
||||
return .doctype(currentDoctype.0, forceQuirks: currentDoctype.forceQuirks, publicIdentifier: currentDoctype.publicIdentifier, systemIdentifier: currentDoctype.systemIdentifier)
|
||||
} else {
|
||||
preconditionFailure("takeCurrentToken called without current token")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -404,14 +422,9 @@ private extension Tokenizer {
|
|||
|
||||
mutating func flushCharacterReference() {
|
||||
if case .attributeValue(_) = returnState {
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes[attributes.count - 1].value.append(temporaryBuffer!)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
temporaryBuffer = nil
|
||||
state = returnState!
|
||||
} else {
|
||||
fatalError("bad current tag")
|
||||
}
|
||||
currentStartTag!.attributes.uncheckedLast.value.append(temporaryBuffer!)
|
||||
temporaryBuffer = nil
|
||||
state = returnState!
|
||||
} else {
|
||||
state = .flushingTemporaryBuffer(returnState!)
|
||||
}
|
||||
|
@ -561,13 +574,8 @@ private extension Tokenizer {
|
|||
switch c {
|
||||
case .some("0"..."9"), .some("a"..."z"), .some("A"..."Z"):
|
||||
if case .attributeValue(_) = returnState {
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes[attributes.count - 1].value.append(c!)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
return next()
|
||||
currentStartTag!.attributes.uncheckedLast.value.append(c!)
|
||||
return tokenizeAmbiguousAmpersand()
|
||||
} else {
|
||||
return .character(c!)
|
||||
}
|
||||
|
@ -590,7 +598,7 @@ private extension Tokenizer {
|
|||
return tokenizeEndTagOpen()
|
||||
case "?":
|
||||
// parse error: unexpected-question-mark-instead-of-tag-name
|
||||
currentToken = .comment("")
|
||||
currentComment = ""
|
||||
state = .bogusComment
|
||||
return tokenizeBogusComment()
|
||||
case nil:
|
||||
|
@ -598,7 +606,7 @@ private extension Tokenizer {
|
|||
state = .endOfFile
|
||||
return .character("<")
|
||||
case .some("a"..."z"), .some("A"..."Z"):
|
||||
currentToken = .startTag("", selfClosing: false, attributes: [])
|
||||
currentStartTag = ("", selfClosing: false, attributes: [])
|
||||
reconsume(c)
|
||||
state = .tagName
|
||||
return tokenizeTagName()
|
||||
|
@ -614,7 +622,7 @@ private extension Tokenizer {
|
|||
let c = nextChar()
|
||||
switch c {
|
||||
case .some("a"..."z"), .some("A"..."Z"):
|
||||
currentToken = .endTag("")
|
||||
currentEndTag = ""
|
||||
reconsume(c)
|
||||
state = .tagName
|
||||
return tokenizeTagName()
|
||||
|
@ -628,7 +636,7 @@ private extension Tokenizer {
|
|||
return .character("<")
|
||||
case .some(let c):
|
||||
// parse error: invalid-first-character-of-tag-name
|
||||
currentToken = .comment("")
|
||||
currentComment = ""
|
||||
reconsume(c)
|
||||
state = .bogusComment
|
||||
return tokenizeBogusComment()
|
||||
|
@ -636,6 +644,8 @@ private extension Tokenizer {
|
|||
}
|
||||
|
||||
mutating func tokenizeTagName() -> Token? {
|
||||
// Optimization: this is a hot path where we stay in this state for a while before emitting a token,
|
||||
// and the function call overhead of recursion costs a bit of perf.
|
||||
while true {
|
||||
switch nextChar() {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
|
@ -658,13 +668,11 @@ private extension Tokenizer {
|
|||
} else if ("A"..."Z").contains(c) {
|
||||
c = c.asciiLowercase
|
||||
}
|
||||
if case .startTag(var s, let selfClosing, let attributes) = currentToken {
|
||||
s.append(c)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
if currentStartTag != nil {
|
||||
currentStartTag!.0.append(c)
|
||||
continue
|
||||
} else if case .endTag(var s) = currentToken {
|
||||
s.append(c)
|
||||
currentToken = .endTag(s)
|
||||
} else if currentEndTag != nil {
|
||||
currentEndTag!.append(c)
|
||||
continue
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
|
@ -676,11 +684,7 @@ private extension Tokenizer {
|
|||
mutating func tokenizeSelfClosingStartTag() -> Token? {
|
||||
switch nextChar() {
|
||||
case ">":
|
||||
if case .startTag(let s, _, let attributes) = currentToken {
|
||||
currentToken = .startTag(s, selfClosing: true, attributes: attributes)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentStartTag!.selfClosing = true
|
||||
state = .data
|
||||
return takeCurrentToken()
|
||||
case nil:
|
||||
|
@ -707,22 +711,16 @@ private extension Tokenizer {
|
|||
return tokenizeAfterAttributeName()
|
||||
case "=":
|
||||
// parse error: unexpected-equals-sign-before-attribute-name
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes.append(Attribute(name: "=", value: ""))
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
state = .attributeName
|
||||
return tokenizeAttributeName()
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentStartTag!.attributes.append(Attribute(name: "=", value: ""))
|
||||
state = .attributeName
|
||||
return tokenizeAttributeName()
|
||||
default:
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes.append(Attribute(name: "", value: ""))
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
if currentStartTag != nil {
|
||||
currentStartTag!.attributes.append(Attribute(name: "", value: ""))
|
||||
reconsume(c)
|
||||
state = .attributeName
|
||||
return tokenizeAttributeName()
|
||||
} else if case .endTag(_) = currentToken {
|
||||
} else if currentEndTag != nil {
|
||||
// ignore
|
||||
reconsume(c)
|
||||
state = .attributeName
|
||||
|
@ -753,11 +751,10 @@ private extension Tokenizer {
|
|||
c = "\u{FFFD}"
|
||||
}
|
||||
// if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes[attributes.count - 1].name.append(c)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
if currentStartTag != nil {
|
||||
currentStartTag!.attributes.uncheckedLast.name.append(c)
|
||||
continue
|
||||
} else if case .endTag(_) = currentToken {
|
||||
} else if currentEndTag != nil {
|
||||
continue
|
||||
} else {
|
||||
fatalError("bad curren token")
|
||||
|
@ -782,13 +779,12 @@ private extension Tokenizer {
|
|||
state = .endOfFile
|
||||
return nil
|
||||
case .some(let c):
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes.append(Attribute(name: "", value: ""))
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
if currentStartTag != nil {
|
||||
currentStartTag!.attributes.append(Attribute(name: "", value: ""))
|
||||
reconsume(c)
|
||||
state = .attributeName
|
||||
return tokenizeAttributeName()
|
||||
} else if case .endTag(_) = currentToken {
|
||||
} else if currentEndTag != nil {
|
||||
reconsume(c)
|
||||
state = .attributeName
|
||||
return tokenizeAttributeName()
|
||||
|
@ -840,11 +836,10 @@ private extension Tokenizer {
|
|||
return nil
|
||||
case .some(let c):
|
||||
// if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes[attributes.count - 1].value.append(c)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
if currentStartTag != nil {
|
||||
currentStartTag!.attributes.uncheckedLast.value.append(c)
|
||||
continue
|
||||
} else if case .endTag(_) = currentToken {
|
||||
} else if currentEndTag != nil {
|
||||
continue
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
|
@ -872,14 +867,11 @@ private extension Tokenizer {
|
|||
// parse error: unexpected-null-character
|
||||
c = "\u{FFFD}"
|
||||
}
|
||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
||||
attributes[attributes.count - 1].value.append(c)
|
||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
||||
if currentStartTag != nil {
|
||||
currentStartTag!.attributes.uncheckedLast.value.append(c)
|
||||
continue
|
||||
} else if case .endTag(_) = currentToken {
|
||||
} else if currentEndTag != nil {
|
||||
continue
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -922,13 +914,8 @@ private extension Tokenizer {
|
|||
// parse error: unexpected-null-character
|
||||
c = "\u{FFFD}"
|
||||
}
|
||||
if case .comment(var s) = currentToken {
|
||||
s.append(c)
|
||||
currentToken = .comment(s)
|
||||
return tokenizeBogusComment()
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentComment!.append(c)
|
||||
return tokenizeBogusComment()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -936,7 +923,7 @@ private extension Tokenizer {
|
|||
let peeked = peek(count: 7)
|
||||
if peeked.starts(with: "--") {
|
||||
consume(count: 2)
|
||||
currentToken = .comment("")
|
||||
currentComment = ""
|
||||
state = .commentStart
|
||||
return tokenizeCommentStart()
|
||||
} else if peeked.lowercased() == "doctype" {
|
||||
|
@ -946,12 +933,12 @@ private extension Tokenizer {
|
|||
} else if peeked == "[CDATA[" {
|
||||
// TODO: we don't do any of the tree construction stuff yet, so can't really handle this
|
||||
// consume(count: 7)
|
||||
currentToken = .comment("")
|
||||
currentComment = ""
|
||||
state = .bogusComment
|
||||
return tokenizeBogusComment()
|
||||
} else {
|
||||
// parse error: incorrectly-opened-comment
|
||||
currentToken = .comment("")
|
||||
currentComment = ""
|
||||
state = .bogusComment
|
||||
return tokenizeBogusComment()
|
||||
}
|
||||
|
@ -986,29 +973,19 @@ private extension Tokenizer {
|
|||
// parse error: eof-in-comment
|
||||
return takeCurrentToken()
|
||||
case .some(let c):
|
||||
if case .comment(var s) = currentToken {
|
||||
s.append("-")
|
||||
currentToken = .comment(s)
|
||||
reconsume(c)
|
||||
state = .comment
|
||||
return tokenizeComment()
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentComment!.append("-")
|
||||
reconsume(c)
|
||||
state = .comment
|
||||
return tokenizeComment()
|
||||
}
|
||||
}
|
||||
|
||||
mutating func tokenizeComment() -> Token? {
|
||||
switch nextChar() {
|
||||
case "<":
|
||||
if case .comment(var s) = currentToken {
|
||||
s.append("<")
|
||||
currentToken = .comment(s)
|
||||
state = .commentLessThanSign
|
||||
return tokenizeCommentLessThanSign()
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentComment!.append("<")
|
||||
state = .commentLessThanSign
|
||||
return tokenizeCommentLessThanSign()
|
||||
case "-":
|
||||
state = .commentEndDash
|
||||
return tokenizeCommentEndDash()
|
||||
|
@ -1021,35 +998,20 @@ private extension Tokenizer {
|
|||
// parse error: unexpected-null-character
|
||||
c = "\u{FFFD}"
|
||||
}
|
||||
if case .comment(var s) = currentToken {
|
||||
s.append(c)
|
||||
currentToken = .comment(s)
|
||||
return tokenizeComment()
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentComment!.append(c)
|
||||
return tokenizeComment()
|
||||
}
|
||||
}
|
||||
|
||||
mutating func tokenizeCommentLessThanSign() -> Token? {
|
||||
switch nextChar() {
|
||||
case "!":
|
||||
if case .comment(var s) = currentToken {
|
||||
s.append("!")
|
||||
currentToken = .comment(s)
|
||||
state = .commentLessThanSignBang
|
||||
return tokenizeCommentLessThanSignBang()
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentComment!.append("!")
|
||||
state = .commentLessThanSignBang
|
||||
return tokenizeCommentLessThanSignBang()
|
||||
case "<":
|
||||
if case .comment(var s) = currentToken {
|
||||
s.append("<")
|
||||
currentToken = .comment(s)
|
||||
return tokenizeComment()
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentComment!.append("<")
|
||||
return tokenizeComment()
|
||||
case let c:
|
||||
reconsume(c)
|
||||
state = .comment
|
||||
|
@ -1106,15 +1068,10 @@ private extension Tokenizer {
|
|||
state = .endOfFile
|
||||
return takeCurrentToken()
|
||||
case let c:
|
||||
if case .comment(var s) = currentToken {
|
||||
s.append("-")
|
||||
currentToken = .comment(s)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentComment!.append("-")
|
||||
reconsume(c)
|
||||
state = .comment
|
||||
return next()
|
||||
return tokenizeComment()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1127,24 +1084,14 @@ private extension Tokenizer {
|
|||
state = .commentEndBang
|
||||
return tokenizeCommentEndBang()
|
||||
case "-":
|
||||
if case .comment(var s) = currentToken {
|
||||
s.append("-")
|
||||
currentToken = .comment(s)
|
||||
return tokenizeCommentEnd()
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentComment!.append("-")
|
||||
return tokenizeCommentEnd()
|
||||
case nil:
|
||||
// parse error: eof-in-comment
|
||||
state = .endOfFile
|
||||
return takeCurrentToken()
|
||||
case .some(let c):
|
||||
if case .comment(var s) = currentToken {
|
||||
s.append("--")
|
||||
currentToken = .comment(s)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentComment!.append("--")
|
||||
reconsume(c)
|
||||
state = .comment
|
||||
return tokenizeComment()
|
||||
|
@ -1154,14 +1101,9 @@ private extension Tokenizer {
|
|||
mutating func tokenizeCommentEndBang() -> Token? {
|
||||
switch nextChar() {
|
||||
case "-":
|
||||
if case .comment(var s) = currentToken {
|
||||
s.append("--!")
|
||||
currentToken = .comment(s)
|
||||
state = .commentEndDash
|
||||
return tokenizeCommentEndDash()
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentComment!.append("--!")
|
||||
state = .commentEndDash
|
||||
return tokenizeCommentEndDash()
|
||||
case ">":
|
||||
// parse error: incorrectly-closed-comment
|
||||
state = .data
|
||||
|
@ -1171,15 +1113,10 @@ private extension Tokenizer {
|
|||
state = .endOfFile
|
||||
return takeCurrentToken()
|
||||
case .some(let c):
|
||||
if case .comment(var s) = currentToken {
|
||||
s.append("--!")
|
||||
currentToken = .comment(s)
|
||||
reconsume(c)
|
||||
state = .comment
|
||||
return tokenizeComment()
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentComment!.append("--!")
|
||||
reconsume(c)
|
||||
state = .comment
|
||||
return tokenizeComment()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1210,12 +1147,12 @@ private extension Tokenizer {
|
|||
// ignore the character
|
||||
return tokenizeBeforeDoctypeName()
|
||||
case .some(let c) where ("A"..."Z").contains(c):
|
||||
currentToken = .doctype("\(c.asciiLowercase)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
||||
currentDoctype = ("\(c.asciiLowercase)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
||||
state = .doctypeName
|
||||
return tokenizeDoctypeName()
|
||||
case "\0":
|
||||
// parse error: unexpected-null-character
|
||||
currentToken = .doctype("\u{FFFD}", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
||||
currentDoctype = ("\u{FFFD}", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
||||
state = .doctypeName
|
||||
return tokenizeDoctypeName()
|
||||
case ">":
|
||||
|
@ -1227,7 +1164,7 @@ private extension Tokenizer {
|
|||
state = .endOfFile
|
||||
return .doctype("", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
||||
case .some(let c):
|
||||
currentToken = .doctype("\(c)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
||||
currentDoctype = ("\(c)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
||||
state = .doctypeName
|
||||
return tokenizeDoctypeName()
|
||||
}
|
||||
|
@ -1243,25 +1180,16 @@ private extension Tokenizer {
|
|||
return takeCurrentToken()
|
||||
case nil:
|
||||
// parse error: eof-in-doctype
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case .some(var c):
|
||||
if c == "\0" {
|
||||
c = "\u{FFFD}"
|
||||
} else if ("A"..."Z").contains(c) {
|
||||
c = c.asciiLowercase
|
||||
}
|
||||
if case .doctype(var s, let forceQuirks, _, _) = currentToken {
|
||||
s.append(c)
|
||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: nil, systemIdentifier: nil)
|
||||
return tokenizeDoctypeName()
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.0.append(c)
|
||||
return tokenizeDoctypeName()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1276,12 +1204,8 @@ private extension Tokenizer {
|
|||
case nil:
|
||||
// parse error: eof-in-doctype
|
||||
state = .endOfFile
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case .some(let c):
|
||||
reconsume(c)
|
||||
let peeked = peek(count: 6).lowercased()
|
||||
|
@ -1295,11 +1219,7 @@ private extension Tokenizer {
|
|||
return tokenizeAfterDoctypeSystemKeyword()
|
||||
} else {
|
||||
// parse error: invalid-character-sequence-after-doctype-name
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
state = .bogusDoctype
|
||||
return tokenizeBogusDoctype()
|
||||
}
|
||||
|
@ -1313,39 +1233,23 @@ private extension Tokenizer {
|
|||
return tokenizeBeforeDoctypePublicIdentifier()
|
||||
case .some(let c) where c == "\"" || c == "'":
|
||||
// parse error: missing-whitespace-after-doctype-public-keyword
|
||||
if case .doctype(let s, let forceQuirks, _, _) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: "", systemIdentifier: nil)
|
||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||
state = .doctypePublicIdentifier(quotes)
|
||||
return tokenizeDoctypePublicIdentifier(quotes: quotes)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.publicIdentifier = ""
|
||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||
state = .doctypePublicIdentifier(quotes)
|
||||
return tokenizeDoctypePublicIdentifier(quotes: quotes)
|
||||
case ">":
|
||||
// parse error: missing-doctype-public-identifier
|
||||
state = .data
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case nil:
|
||||
// parse error: eof-in-doctype
|
||||
state = .endOfFile
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case .some(let c):
|
||||
// parse error: missing-quote-before-doctype-public-identifier
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
state = .bogusDoctype
|
||||
reconsume(c)
|
||||
return tokenizeBogusDoctype()
|
||||
|
@ -1358,39 +1262,23 @@ private extension Tokenizer {
|
|||
// ignore the character
|
||||
return tokenizeBeforeDoctypePublicIdentifier()
|
||||
case .some(let c) where c == "\"" || c == "'":
|
||||
if case .doctype(let s, let forceQuirks, _, _) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: "", systemIdentifier: nil)
|
||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||
state = .doctypePublicIdentifier(quotes)
|
||||
return tokenizeDoctypePublicIdentifier(quotes: quotes)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.publicIdentifier = ""
|
||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||
state = .doctypePublicIdentifier(quotes)
|
||||
return tokenizeDoctypePublicIdentifier(quotes: quotes)
|
||||
case ">":
|
||||
// parse error: missing-doctype-public-identifier
|
||||
state = .data
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case nil:
|
||||
// parse error: eof-in-doctype
|
||||
state = .endOfFile
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case .some(let c):
|
||||
// parse error: missing-quote-before-doctype-public-identifier
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
reconsume(c)
|
||||
state = .bogusDoctype
|
||||
return tokenizeBogusDoctype()
|
||||
|
@ -1409,33 +1297,20 @@ private extension Tokenizer {
|
|||
// parse error: abrupt-doctype-public-identifier
|
||||
reconsume(">")
|
||||
state = .data
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case nil:
|
||||
// parse error: eof-in-doctype
|
||||
state = .endOfFile
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case .some(var c):
|
||||
if c == "\0" {
|
||||
// parse error: unexpected-null-character
|
||||
c = "\u{FFFD}"
|
||||
}
|
||||
if case .doctype(let s, let forceQuirks, var publicIdentifier, _) = currentToken {
|
||||
publicIdentifier!.append(c)
|
||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: nil)
|
||||
return tokenizeDoctypePublicIdentifier(quotes: quotes)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.publicIdentifier!.append(c)
|
||||
return tokenizeDoctypePublicIdentifier(quotes: quotes)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1449,30 +1324,18 @@ private extension Tokenizer {
|
|||
return takeCurrentToken()
|
||||
case .some(let c) where c == "\"" || c == "'":
|
||||
// parse error: missing-whitespace-between-doctype-public-and-system-identifiers
|
||||
if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "")
|
||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||
state = .doctypeSystemIdentifier(quotes)
|
||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.systemIdentifier = ""
|
||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||
state = .doctypeSystemIdentifier(quotes)
|
||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||
case nil:
|
||||
// parse error: eof-in-doctype
|
||||
state = .endOfFile
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case .some(let c):
|
||||
// parse error: missing-quote-before-doctype-system-identifier
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
reconsume(c)
|
||||
state = .bogusDoctype
|
||||
return tokenizeBogusDoctype()
|
||||
|
@ -1488,30 +1351,18 @@ private extension Tokenizer {
|
|||
state = .data
|
||||
return takeCurrentToken()
|
||||
case .some(let c) where c == "\"" || c == "'":
|
||||
if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "")
|
||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||
state = .doctypeSystemIdentifier(quotes)
|
||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.systemIdentifier = ""
|
||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||
state = .doctypeSystemIdentifier(quotes)
|
||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||
case nil:
|
||||
// parse error: eof-in-doctype
|
||||
state = .endOfFile
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case .some(let c):
|
||||
// parse error: missing-quote-before-doctype-system-identifier
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
reconsume(c)
|
||||
state = .bogusComment
|
||||
return tokenizeBogusComment()
|
||||
|
@ -1524,39 +1375,23 @@ private extension Tokenizer {
|
|||
state = .beforeDoctypeSystemIdentifier
|
||||
return tokenizeBeforeDoctypeSystemIdentifier()
|
||||
case .some(let c) where c == "\"" || c == "'":
|
||||
if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "")
|
||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||
state = .doctypeSystemIdentifier(quotes)
|
||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.systemIdentifier = ""
|
||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||
state = .doctypeSystemIdentifier(quotes)
|
||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||
case ">":
|
||||
// parse error: missing-doctype-system-identifier
|
||||
state = .data
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case nil:
|
||||
// parse error: eof-in-doctype:
|
||||
state = .endOfFile
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case .some(let c):
|
||||
// parse error: missing-quote-before-doctype-system-identifier
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
reconsume(c)
|
||||
state = .bogusDoctype
|
||||
return tokenizeBogusDoctype()
|
||||
|
@ -1569,39 +1404,23 @@ private extension Tokenizer {
|
|||
// ignore the character
|
||||
return tokenizeBeforeDoctypeSystemIdentifier()
|
||||
case .some(let c) where c == "\"" || c == "'":
|
||||
if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: " ")
|
||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||
state = .doctypeSystemIdentifier(quotes)
|
||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.systemIdentifier = ""
|
||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||
state = .doctypeSystemIdentifier(quotes)
|
||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||
case ">":
|
||||
// parse error: missing-doctype-system-identifier
|
||||
state = .data
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case nil:
|
||||
// parse error: eof-in-doctype:
|
||||
state = .endOfFile
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case .some(let c):
|
||||
// parse error: missing-quote-before-doctype-system-identifier
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
reconsume(c)
|
||||
state = .bogusDoctype
|
||||
return tokenizeBogusDoctype()
|
||||
|
@ -1619,33 +1438,20 @@ private extension Tokenizer {
|
|||
case ">":
|
||||
// parse error: abrupt-doctype-system-identifier
|
||||
state = .data
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case nil:
|
||||
// parse error: eof-in-doctype
|
||||
state = .endOfFile
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case .some(var c):
|
||||
if c == "\0" {
|
||||
// parse error: unexpected-null-character
|
||||
c = "\u{FFFD}"
|
||||
}
|
||||
if case .doctype(let s, let forceQuirks, let publicIdentifier, var systemIdentifier) = currentToken {
|
||||
systemIdentifier!.append(c)
|
||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.systemIdentifier!.append(c)
|
||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1660,12 +1466,8 @@ private extension Tokenizer {
|
|||
case nil:
|
||||
// parse error: eof-in-doctype
|
||||
state = .endOfFile
|
||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
||||
currentToken = nil
|
||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
||||
} else {
|
||||
fatalError("bad current token")
|
||||
}
|
||||
currentDoctype!.forceQuirks = true
|
||||
return takeCurrentToken()
|
||||
case .some(let c):
|
||||
// parse error: unexpected-character-after-doctype-system-identifier
|
||||
// Note: This does not set the current DOCTYPE token's force-quirks flag to on.
|
||||
|
@ -1699,3 +1501,15 @@ private extension Character {
|
|||
return Character(Unicode.Scalar(asciiValue! + 0x20))
|
||||
}
|
||||
}
|
||||
|
||||
private extension Array {
|
||||
// Optimization: allows in-place modification of the last element of the array.
|
||||
var uncheckedLast: Element {
|
||||
_read {
|
||||
yield self[count - 1]
|
||||
}
|
||||
_modify {
|
||||
yield &self[count - 1]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue