From f412369cf787d49989272ab7db89f25dad72f433 Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Tue, 28 Nov 2023 10:36:04 -0500 Subject: [PATCH] Don't use enum with associated values for current token They prevent in-place modification, resulting in a bunch of extra copies --- Sources/HTMLStreamer/Tokenizer.swift | 530 +++++++++------------------ 1 file changed, 172 insertions(+), 358 deletions(-) diff --git a/Sources/HTMLStreamer/Tokenizer.swift b/Sources/HTMLStreamer/Tokenizer.swift index d91fbbf..2dd39d0 100644 --- a/Sources/HTMLStreamer/Tokenizer.swift +++ b/Sources/HTMLStreamer/Tokenizer.swift @@ -16,7 +16,12 @@ struct Tokenizer>: IteratorProtocol { private var returnState: State? private var temporaryBuffer: String? private var characterReferenceCode: UInt32? - private var currentToken: Token? + // Optimization: using an enum for the current token means we can't modify the associated values in-place + // Separate fields for everything increases the risk of invalid states, but nets us a small perf gain. + private var currentStartTag: (String, selfClosing: Bool, attributes: [Attribute])? + private var currentEndTag: String? + private var currentComment: String? + private var currentDoctype: (String, forceQuirks: Bool, publicIdentifier: String?, systemIdentifier: String?)? init(chars: Chars) { self.chars = chars @@ -187,8 +192,21 @@ struct Tokenizer>: IteratorProtocol { } private mutating func takeCurrentToken() -> Token { - defer { currentToken = nil } - return currentToken! + if let currentStartTag { + self.currentStartTag = nil + return .startTag(currentStartTag.0, selfClosing: currentStartTag.selfClosing, attributes: currentStartTag.attributes) + } else if let currentEndTag { + self.currentEndTag = nil + return .endTag(currentEndTag) + } else if let currentComment { + self.currentComment = nil + return .comment(currentComment) + } else if let currentDoctype { + self.currentDoctype = nil + return .doctype(currentDoctype.0, forceQuirks: currentDoctype.forceQuirks, publicIdentifier: currentDoctype.publicIdentifier, systemIdentifier: currentDoctype.systemIdentifier) + } else { + preconditionFailure("takeCurrentToken called without current token") + } } } @@ -404,14 +422,9 @@ private extension Tokenizer { mutating func flushCharacterReference() { if case .attributeValue(_) = returnState { - if case .startTag(let s, let selfClosing, var attributes) = currentToken { - attributes[attributes.count - 1].value.append(temporaryBuffer!) - currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) - temporaryBuffer = nil - state = returnState! - } else { - fatalError("bad current tag") - } + currentStartTag!.attributes.uncheckedLast.value.append(temporaryBuffer!) + temporaryBuffer = nil + state = returnState! } else { state = .flushingTemporaryBuffer(returnState!) } @@ -561,13 +574,8 @@ private extension Tokenizer { switch c { case .some("0"..."9"), .some("a"..."z"), .some("A"..."Z"): if case .attributeValue(_) = returnState { - if case .startTag(let s, let selfClosing, var attributes) = currentToken { - attributes[attributes.count - 1].value.append(c!) - currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) - } else { - fatalError("bad current token") - } - return next() + currentStartTag!.attributes.uncheckedLast.value.append(c!) + return tokenizeAmbiguousAmpersand() } else { return .character(c!) } @@ -590,7 +598,7 @@ private extension Tokenizer { return tokenizeEndTagOpen() case "?": // parse error: unexpected-question-mark-instead-of-tag-name - currentToken = .comment("") + currentComment = "" state = .bogusComment return tokenizeBogusComment() case nil: @@ -598,7 +606,7 @@ private extension Tokenizer { state = .endOfFile return .character("<") case .some("a"..."z"), .some("A"..."Z"): - currentToken = .startTag("", selfClosing: false, attributes: []) + currentStartTag = ("", selfClosing: false, attributes: []) reconsume(c) state = .tagName return tokenizeTagName() @@ -614,7 +622,7 @@ private extension Tokenizer { let c = nextChar() switch c { case .some("a"..."z"), .some("A"..."Z"): - currentToken = .endTag("") + currentEndTag = "" reconsume(c) state = .tagName return tokenizeTagName() @@ -628,7 +636,7 @@ private extension Tokenizer { return .character("<") case .some(let c): // parse error: invalid-first-character-of-tag-name - currentToken = .comment("") + currentComment = "" reconsume(c) state = .bogusComment return tokenizeBogusComment() @@ -636,6 +644,8 @@ private extension Tokenizer { } mutating func tokenizeTagName() -> Token? { + // Optimization: this is a hot path where we stay in this state for a while before emitting a token, + // and the function call overhead of recursion costs a bit of perf. while true { switch nextChar() { case "\t", "\n", "\u{000C}", " ": @@ -658,13 +668,11 @@ private extension Tokenizer { } else if ("A"..."Z").contains(c) { c = c.asciiLowercase } - if case .startTag(var s, let selfClosing, let attributes) = currentToken { - s.append(c) - currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + if currentStartTag != nil { + currentStartTag!.0.append(c) continue - } else if case .endTag(var s) = currentToken { - s.append(c) - currentToken = .endTag(s) + } else if currentEndTag != nil { + currentEndTag!.append(c) continue } else { fatalError("bad current token") @@ -676,11 +684,7 @@ private extension Tokenizer { mutating func tokenizeSelfClosingStartTag() -> Token? { switch nextChar() { case ">": - if case .startTag(let s, _, let attributes) = currentToken { - currentToken = .startTag(s, selfClosing: true, attributes: attributes) - } else { - fatalError("bad current token") - } + currentStartTag!.selfClosing = true state = .data return takeCurrentToken() case nil: @@ -707,22 +711,16 @@ private extension Tokenizer { return tokenizeAfterAttributeName() case "=": // parse error: unexpected-equals-sign-before-attribute-name - if case .startTag(let s, let selfClosing, var attributes) = currentToken { - attributes.append(Attribute(name: "=", value: "")) - currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) - state = .attributeName - return tokenizeAttributeName() - } else { - fatalError("bad current token") - } + currentStartTag!.attributes.append(Attribute(name: "=", value: "")) + state = .attributeName + return tokenizeAttributeName() default: - if case .startTag(let s, let selfClosing, var attributes) = currentToken { - attributes.append(Attribute(name: "", value: "")) - currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + if currentStartTag != nil { + currentStartTag!.attributes.append(Attribute(name: "", value: "")) reconsume(c) state = .attributeName return tokenizeAttributeName() - } else if case .endTag(_) = currentToken { + } else if currentEndTag != nil { // ignore reconsume(c) state = .attributeName @@ -753,11 +751,10 @@ private extension Tokenizer { c = "\u{FFFD}" } // if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name - if case .startTag(let s, let selfClosing, var attributes) = currentToken { - attributes[attributes.count - 1].name.append(c) - currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + if currentStartTag != nil { + currentStartTag!.attributes.uncheckedLast.name.append(c) continue - } else if case .endTag(_) = currentToken { + } else if currentEndTag != nil { continue } else { fatalError("bad curren token") @@ -782,13 +779,12 @@ private extension Tokenizer { state = .endOfFile return nil case .some(let c): - if case .startTag(let s, let selfClosing, var attributes) = currentToken { - attributes.append(Attribute(name: "", value: "")) - currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + if currentStartTag != nil { + currentStartTag!.attributes.append(Attribute(name: "", value: "")) reconsume(c) state = .attributeName return tokenizeAttributeName() - } else if case .endTag(_) = currentToken { + } else if currentEndTag != nil { reconsume(c) state = .attributeName return tokenizeAttributeName() @@ -840,11 +836,10 @@ private extension Tokenizer { return nil case .some(let c): // if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value - if case .startTag(let s, let selfClosing, var attributes) = currentToken { - attributes[attributes.count - 1].value.append(c) - currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + if currentStartTag != nil { + currentStartTag!.attributes.uncheckedLast.value.append(c) continue - } else if case .endTag(_) = currentToken { + } else if currentEndTag != nil { continue } else { fatalError("bad current token") @@ -872,14 +867,11 @@ private extension Tokenizer { // parse error: unexpected-null-character c = "\u{FFFD}" } - if case .startTag(let s, let selfClosing, var attributes) = currentToken { - attributes[attributes.count - 1].value.append(c) - currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes) + if currentStartTag != nil { + currentStartTag!.attributes.uncheckedLast.value.append(c) continue - } else if case .endTag(_) = currentToken { + } else if currentEndTag != nil { continue - } else { - fatalError("bad current token") } } } @@ -922,13 +914,8 @@ private extension Tokenizer { // parse error: unexpected-null-character c = "\u{FFFD}" } - if case .comment(var s) = currentToken { - s.append(c) - currentToken = .comment(s) - return tokenizeBogusComment() - } else { - fatalError("bad current token") - } + currentComment!.append(c) + return tokenizeBogusComment() } } @@ -936,7 +923,7 @@ private extension Tokenizer { let peeked = peek(count: 7) if peeked.starts(with: "--") { consume(count: 2) - currentToken = .comment("") + currentComment = "" state = .commentStart return tokenizeCommentStart() } else if peeked.lowercased() == "doctype" { @@ -946,12 +933,12 @@ private extension Tokenizer { } else if peeked == "[CDATA[" { // TODO: we don't do any of the tree construction stuff yet, so can't really handle this // consume(count: 7) - currentToken = .comment("") + currentComment = "" state = .bogusComment return tokenizeBogusComment() } else { // parse error: incorrectly-opened-comment - currentToken = .comment("") + currentComment = "" state = .bogusComment return tokenizeBogusComment() } @@ -986,29 +973,19 @@ private extension Tokenizer { // parse error: eof-in-comment return takeCurrentToken() case .some(let c): - if case .comment(var s) = currentToken { - s.append("-") - currentToken = .comment(s) - reconsume(c) - state = .comment - return tokenizeComment() - } else { - fatalError("bad current token") - } + currentComment!.append("-") + reconsume(c) + state = .comment + return tokenizeComment() } } mutating func tokenizeComment() -> Token? { switch nextChar() { case "<": - if case .comment(var s) = currentToken { - s.append("<") - currentToken = .comment(s) - state = .commentLessThanSign - return tokenizeCommentLessThanSign() - } else { - fatalError("bad current token") - } + currentComment!.append("<") + state = .commentLessThanSign + return tokenizeCommentLessThanSign() case "-": state = .commentEndDash return tokenizeCommentEndDash() @@ -1021,35 +998,20 @@ private extension Tokenizer { // parse error: unexpected-null-character c = "\u{FFFD}" } - if case .comment(var s) = currentToken { - s.append(c) - currentToken = .comment(s) - return tokenizeComment() - } else { - fatalError("bad current token") - } + currentComment!.append(c) + return tokenizeComment() } } mutating func tokenizeCommentLessThanSign() -> Token? { switch nextChar() { case "!": - if case .comment(var s) = currentToken { - s.append("!") - currentToken = .comment(s) - state = .commentLessThanSignBang - return tokenizeCommentLessThanSignBang() - } else { - fatalError("bad current token") - } + currentComment!.append("!") + state = .commentLessThanSignBang + return tokenizeCommentLessThanSignBang() case "<": - if case .comment(var s) = currentToken { - s.append("<") - currentToken = .comment(s) - return tokenizeComment() - } else { - fatalError("bad current token") - } + currentComment!.append("<") + return tokenizeComment() case let c: reconsume(c) state = .comment @@ -1106,15 +1068,10 @@ private extension Tokenizer { state = .endOfFile return takeCurrentToken() case let c: - if case .comment(var s) = currentToken { - s.append("-") - currentToken = .comment(s) - } else { - fatalError("bad current token") - } + currentComment!.append("-") reconsume(c) state = .comment - return next() + return tokenizeComment() } } @@ -1127,24 +1084,14 @@ private extension Tokenizer { state = .commentEndBang return tokenizeCommentEndBang() case "-": - if case .comment(var s) = currentToken { - s.append("-") - currentToken = .comment(s) - return tokenizeCommentEnd() - } else { - fatalError("bad current token") - } + currentComment!.append("-") + return tokenizeCommentEnd() case nil: // parse error: eof-in-comment state = .endOfFile return takeCurrentToken() case .some(let c): - if case .comment(var s) = currentToken { - s.append("--") - currentToken = .comment(s) - } else { - fatalError("bad current token") - } + currentComment!.append("--") reconsume(c) state = .comment return tokenizeComment() @@ -1154,14 +1101,9 @@ private extension Tokenizer { mutating func tokenizeCommentEndBang() -> Token? { switch nextChar() { case "-": - if case .comment(var s) = currentToken { - s.append("--!") - currentToken = .comment(s) - state = .commentEndDash - return tokenizeCommentEndDash() - } else { - fatalError("bad current token") - } + currentComment!.append("--!") + state = .commentEndDash + return tokenizeCommentEndDash() case ">": // parse error: incorrectly-closed-comment state = .data @@ -1171,15 +1113,10 @@ private extension Tokenizer { state = .endOfFile return takeCurrentToken() case .some(let c): - if case .comment(var s) = currentToken { - s.append("--!") - currentToken = .comment(s) - reconsume(c) - state = .comment - return tokenizeComment() - } else { - fatalError("bad current token") - } + currentComment!.append("--!") + reconsume(c) + state = .comment + return tokenizeComment() } } @@ -1210,12 +1147,12 @@ private extension Tokenizer { // ignore the character return tokenizeBeforeDoctypeName() case .some(let c) where ("A"..."Z").contains(c): - currentToken = .doctype("\(c.asciiLowercase)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil) + currentDoctype = ("\(c.asciiLowercase)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil) state = .doctypeName return tokenizeDoctypeName() case "\0": // parse error: unexpected-null-character - currentToken = .doctype("\u{FFFD}", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil) + currentDoctype = ("\u{FFFD}", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil) state = .doctypeName return tokenizeDoctypeName() case ">": @@ -1227,7 +1164,7 @@ private extension Tokenizer { state = .endOfFile return .doctype("", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil) case .some(let c): - currentToken = .doctype("\(c)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil) + currentDoctype = ("\(c)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil) state = .doctypeName return tokenizeDoctypeName() } @@ -1243,25 +1180,16 @@ private extension Tokenizer { return takeCurrentToken() case nil: // parse error: eof-in-doctype - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case .some(var c): if c == "\0" { c = "\u{FFFD}" } else if ("A"..."Z").contains(c) { c = c.asciiLowercase } - if case .doctype(var s, let forceQuirks, _, _) = currentToken { - s.append(c) - currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: nil, systemIdentifier: nil) - return tokenizeDoctypeName() - } else { - fatalError("bad current token") - } + currentDoctype!.0.append(c) + return tokenizeDoctypeName() } } @@ -1276,12 +1204,8 @@ private extension Tokenizer { case nil: // parse error: eof-in-doctype state = .endOfFile - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case .some(let c): reconsume(c) let peeked = peek(count: 6).lowercased() @@ -1295,11 +1219,7 @@ private extension Tokenizer { return tokenizeAfterDoctypeSystemKeyword() } else { // parse error: invalid-character-sequence-after-doctype-name - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true state = .bogusDoctype return tokenizeBogusDoctype() } @@ -1313,39 +1233,23 @@ private extension Tokenizer { return tokenizeBeforeDoctypePublicIdentifier() case .some(let c) where c == "\"" || c == "'": // parse error: missing-whitespace-after-doctype-public-keyword - if case .doctype(let s, let forceQuirks, _, _) = currentToken { - currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: "", systemIdentifier: nil) - let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted - state = .doctypePublicIdentifier(quotes) - return tokenizeDoctypePublicIdentifier(quotes: quotes) - } else { - fatalError("bad current token") - } + currentDoctype!.publicIdentifier = "" + let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted + state = .doctypePublicIdentifier(quotes) + return tokenizeDoctypePublicIdentifier(quotes: quotes) case ">": // parse error: missing-doctype-public-identifier state = .data - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case nil: // parse error: eof-in-doctype state = .endOfFile - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case .some(let c): // parse error: missing-quote-before-doctype-public-identifier - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true state = .bogusDoctype reconsume(c) return tokenizeBogusDoctype() @@ -1358,39 +1262,23 @@ private extension Tokenizer { // ignore the character return tokenizeBeforeDoctypePublicIdentifier() case .some(let c) where c == "\"" || c == "'": - if case .doctype(let s, let forceQuirks, _, _) = currentToken { - currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: "", systemIdentifier: nil) - let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted - state = .doctypePublicIdentifier(quotes) - return tokenizeDoctypePublicIdentifier(quotes: quotes) - } else { - fatalError("bad current token") - } + currentDoctype!.publicIdentifier = "" + let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted + state = .doctypePublicIdentifier(quotes) + return tokenizeDoctypePublicIdentifier(quotes: quotes) case ">": // parse error: missing-doctype-public-identifier state = .data - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case nil: // parse error: eof-in-doctype state = .endOfFile - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case .some(let c): // parse error: missing-quote-before-doctype-public-identifier - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true reconsume(c) state = .bogusDoctype return tokenizeBogusDoctype() @@ -1409,33 +1297,20 @@ private extension Tokenizer { // parse error: abrupt-doctype-public-identifier reconsume(">") state = .data - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case nil: // parse error: eof-in-doctype state = .endOfFile - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case .some(var c): if c == "\0" { // parse error: unexpected-null-character c = "\u{FFFD}" } - if case .doctype(let s, let forceQuirks, var publicIdentifier, _) = currentToken { - publicIdentifier!.append(c) - currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: nil) - return tokenizeDoctypePublicIdentifier(quotes: quotes) - } else { - fatalError("bad current token") - } + currentDoctype!.publicIdentifier!.append(c) + return tokenizeDoctypePublicIdentifier(quotes: quotes) } } @@ -1449,30 +1324,18 @@ private extension Tokenizer { return takeCurrentToken() case .some(let c) where c == "\"" || c == "'": // parse error: missing-whitespace-between-doctype-public-and-system-identifiers - if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken { - currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "") - let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted - state = .doctypeSystemIdentifier(quotes) - return tokenizeDoctypeSystemIdentifier(quotes: quotes) - } else { - fatalError("bad current token") - } + currentDoctype!.systemIdentifier = "" + let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted + state = .doctypeSystemIdentifier(quotes) + return tokenizeDoctypeSystemIdentifier(quotes: quotes) case nil: // parse error: eof-in-doctype state = .endOfFile - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case .some(let c): // parse error: missing-quote-before-doctype-system-identifier - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true reconsume(c) state = .bogusDoctype return tokenizeBogusDoctype() @@ -1488,30 +1351,18 @@ private extension Tokenizer { state = .data return takeCurrentToken() case .some(let c) where c == "\"" || c == "'": - if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken { - currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "") - let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted - state = .doctypeSystemIdentifier(quotes) - return tokenizeDoctypeSystemIdentifier(quotes: quotes) - } else { - fatalError("bad current token") - } + currentDoctype!.systemIdentifier = "" + let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted + state = .doctypeSystemIdentifier(quotes) + return tokenizeDoctypeSystemIdentifier(quotes: quotes) case nil: // parse error: eof-in-doctype state = .endOfFile - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case .some(let c): // parse error: missing-quote-before-doctype-system-identifier - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true reconsume(c) state = .bogusComment return tokenizeBogusComment() @@ -1524,39 +1375,23 @@ private extension Tokenizer { state = .beforeDoctypeSystemIdentifier return tokenizeBeforeDoctypeSystemIdentifier() case .some(let c) where c == "\"" || c == "'": - if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken { - currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "") - let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted - state = .doctypeSystemIdentifier(quotes) - return tokenizeDoctypeSystemIdentifier(quotes: quotes) - } else { - fatalError("bad current token") - } + currentDoctype!.systemIdentifier = "" + let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted + state = .doctypeSystemIdentifier(quotes) + return tokenizeDoctypeSystemIdentifier(quotes: quotes) case ">": // parse error: missing-doctype-system-identifier state = .data - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case nil: // parse error: eof-in-doctype: state = .endOfFile - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case .some(let c): // parse error: missing-quote-before-doctype-system-identifier - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true reconsume(c) state = .bogusDoctype return tokenizeBogusDoctype() @@ -1569,39 +1404,23 @@ private extension Tokenizer { // ignore the character return tokenizeBeforeDoctypeSystemIdentifier() case .some(let c) where c == "\"" || c == "'": - if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken { - currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: " ") - let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted - state = .doctypeSystemIdentifier(quotes) - return tokenizeDoctypeSystemIdentifier(quotes: quotes) - } else { - fatalError("bad current token") - } + currentDoctype!.systemIdentifier = "" + let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted + state = .doctypeSystemIdentifier(quotes) + return tokenizeDoctypeSystemIdentifier(quotes: quotes) case ">": // parse error: missing-doctype-system-identifier state = .data - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case nil: // parse error: eof-in-doctype: state = .endOfFile - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case .some(let c): // parse error: missing-quote-before-doctype-system-identifier - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true reconsume(c) state = .bogusDoctype return tokenizeBogusDoctype() @@ -1619,33 +1438,20 @@ private extension Tokenizer { case ">": // parse error: abrupt-doctype-system-identifier state = .data - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case nil: // parse error: eof-in-doctype state = .endOfFile - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case .some(var c): if c == "\0" { // parse error: unexpected-null-character c = "\u{FFFD}" } - if case .doctype(let s, let forceQuirks, let publicIdentifier, var systemIdentifier) = currentToken { - systemIdentifier!.append(c) - currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - return tokenizeDoctypeSystemIdentifier(quotes: quotes) - } else { - fatalError("bad current token") - } + currentDoctype!.systemIdentifier!.append(c) + return tokenizeDoctypeSystemIdentifier(quotes: quotes) } } @@ -1660,12 +1466,8 @@ private extension Tokenizer { case nil: // parse error: eof-in-doctype state = .endOfFile - if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken { - currentToken = nil - return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier) - } else { - fatalError("bad current token") - } + currentDoctype!.forceQuirks = true + return takeCurrentToken() case .some(let c): // parse error: unexpected-character-after-doctype-system-identifier // Note: This does not set the current DOCTYPE token's force-quirks flag to on. @@ -1699,3 +1501,15 @@ private extension Character { return Character(Unicode.Scalar(asciiValue! + 0x20)) } } + +private extension Array { + // Optimization: allows in-place modification of the last element of the array. + var uncheckedLast: Element { + _read { + yield self[count - 1] + } + _modify { + yield &self[count - 1] + } + } +}