Don't use enum with associated values for current token
They prevent in-place modification, resulting in a bunch of extra copies
This commit is contained in:
parent
31bd174a69
commit
f412369cf7
|
@ -16,7 +16,12 @@ struct Tokenizer<Chars: IteratorProtocol<Character>>: IteratorProtocol {
|
||||||
private var returnState: State?
|
private var returnState: State?
|
||||||
private var temporaryBuffer: String?
|
private var temporaryBuffer: String?
|
||||||
private var characterReferenceCode: UInt32?
|
private var characterReferenceCode: UInt32?
|
||||||
private var currentToken: Token?
|
// Optimization: using an enum for the current token means we can't modify the associated values in-place
|
||||||
|
// Separate fields for everything increases the risk of invalid states, but nets us a small perf gain.
|
||||||
|
private var currentStartTag: (String, selfClosing: Bool, attributes: [Attribute])?
|
||||||
|
private var currentEndTag: String?
|
||||||
|
private var currentComment: String?
|
||||||
|
private var currentDoctype: (String, forceQuirks: Bool, publicIdentifier: String?, systemIdentifier: String?)?
|
||||||
|
|
||||||
init(chars: Chars) {
|
init(chars: Chars) {
|
||||||
self.chars = chars
|
self.chars = chars
|
||||||
|
@ -187,8 +192,21 @@ struct Tokenizer<Chars: IteratorProtocol<Character>>: IteratorProtocol {
|
||||||
}
|
}
|
||||||
|
|
||||||
private mutating func takeCurrentToken() -> Token {
|
private mutating func takeCurrentToken() -> Token {
|
||||||
defer { currentToken = nil }
|
if let currentStartTag {
|
||||||
return currentToken!
|
self.currentStartTag = nil
|
||||||
|
return .startTag(currentStartTag.0, selfClosing: currentStartTag.selfClosing, attributes: currentStartTag.attributes)
|
||||||
|
} else if let currentEndTag {
|
||||||
|
self.currentEndTag = nil
|
||||||
|
return .endTag(currentEndTag)
|
||||||
|
} else if let currentComment {
|
||||||
|
self.currentComment = nil
|
||||||
|
return .comment(currentComment)
|
||||||
|
} else if let currentDoctype {
|
||||||
|
self.currentDoctype = nil
|
||||||
|
return .doctype(currentDoctype.0, forceQuirks: currentDoctype.forceQuirks, publicIdentifier: currentDoctype.publicIdentifier, systemIdentifier: currentDoctype.systemIdentifier)
|
||||||
|
} else {
|
||||||
|
preconditionFailure("takeCurrentToken called without current token")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -404,14 +422,9 @@ private extension Tokenizer {
|
||||||
|
|
||||||
mutating func flushCharacterReference() {
|
mutating func flushCharacterReference() {
|
||||||
if case .attributeValue(_) = returnState {
|
if case .attributeValue(_) = returnState {
|
||||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
currentStartTag!.attributes.uncheckedLast.value.append(temporaryBuffer!)
|
||||||
attributes[attributes.count - 1].value.append(temporaryBuffer!)
|
|
||||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
|
||||||
temporaryBuffer = nil
|
temporaryBuffer = nil
|
||||||
state = returnState!
|
state = returnState!
|
||||||
} else {
|
|
||||||
fatalError("bad current tag")
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
state = .flushingTemporaryBuffer(returnState!)
|
state = .flushingTemporaryBuffer(returnState!)
|
||||||
}
|
}
|
||||||
|
@ -561,13 +574,8 @@ private extension Tokenizer {
|
||||||
switch c {
|
switch c {
|
||||||
case .some("0"..."9"), .some("a"..."z"), .some("A"..."Z"):
|
case .some("0"..."9"), .some("a"..."z"), .some("A"..."Z"):
|
||||||
if case .attributeValue(_) = returnState {
|
if case .attributeValue(_) = returnState {
|
||||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
currentStartTag!.attributes.uncheckedLast.value.append(c!)
|
||||||
attributes[attributes.count - 1].value.append(c!)
|
return tokenizeAmbiguousAmpersand()
|
||||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
return next()
|
|
||||||
} else {
|
} else {
|
||||||
return .character(c!)
|
return .character(c!)
|
||||||
}
|
}
|
||||||
|
@ -590,7 +598,7 @@ private extension Tokenizer {
|
||||||
return tokenizeEndTagOpen()
|
return tokenizeEndTagOpen()
|
||||||
case "?":
|
case "?":
|
||||||
// parse error: unexpected-question-mark-instead-of-tag-name
|
// parse error: unexpected-question-mark-instead-of-tag-name
|
||||||
currentToken = .comment("")
|
currentComment = ""
|
||||||
state = .bogusComment
|
state = .bogusComment
|
||||||
return tokenizeBogusComment()
|
return tokenizeBogusComment()
|
||||||
case nil:
|
case nil:
|
||||||
|
@ -598,7 +606,7 @@ private extension Tokenizer {
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
return .character("<")
|
return .character("<")
|
||||||
case .some("a"..."z"), .some("A"..."Z"):
|
case .some("a"..."z"), .some("A"..."Z"):
|
||||||
currentToken = .startTag("", selfClosing: false, attributes: [])
|
currentStartTag = ("", selfClosing: false, attributes: [])
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .tagName
|
state = .tagName
|
||||||
return tokenizeTagName()
|
return tokenizeTagName()
|
||||||
|
@ -614,7 +622,7 @@ private extension Tokenizer {
|
||||||
let c = nextChar()
|
let c = nextChar()
|
||||||
switch c {
|
switch c {
|
||||||
case .some("a"..."z"), .some("A"..."Z"):
|
case .some("a"..."z"), .some("A"..."Z"):
|
||||||
currentToken = .endTag("")
|
currentEndTag = ""
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .tagName
|
state = .tagName
|
||||||
return tokenizeTagName()
|
return tokenizeTagName()
|
||||||
|
@ -628,7 +636,7 @@ private extension Tokenizer {
|
||||||
return .character("<")
|
return .character("<")
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
// parse error: invalid-first-character-of-tag-name
|
// parse error: invalid-first-character-of-tag-name
|
||||||
currentToken = .comment("")
|
currentComment = ""
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .bogusComment
|
state = .bogusComment
|
||||||
return tokenizeBogusComment()
|
return tokenizeBogusComment()
|
||||||
|
@ -636,6 +644,8 @@ private extension Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
mutating func tokenizeTagName() -> Token? {
|
mutating func tokenizeTagName() -> Token? {
|
||||||
|
// Optimization: this is a hot path where we stay in this state for a while before emitting a token,
|
||||||
|
// and the function call overhead of recursion costs a bit of perf.
|
||||||
while true {
|
while true {
|
||||||
switch nextChar() {
|
switch nextChar() {
|
||||||
case "\t", "\n", "\u{000C}", " ":
|
case "\t", "\n", "\u{000C}", " ":
|
||||||
|
@ -658,13 +668,11 @@ private extension Tokenizer {
|
||||||
} else if ("A"..."Z").contains(c) {
|
} else if ("A"..."Z").contains(c) {
|
||||||
c = c.asciiLowercase
|
c = c.asciiLowercase
|
||||||
}
|
}
|
||||||
if case .startTag(var s, let selfClosing, let attributes) = currentToken {
|
if currentStartTag != nil {
|
||||||
s.append(c)
|
currentStartTag!.0.append(c)
|
||||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
|
||||||
continue
|
continue
|
||||||
} else if case .endTag(var s) = currentToken {
|
} else if currentEndTag != nil {
|
||||||
s.append(c)
|
currentEndTag!.append(c)
|
||||||
currentToken = .endTag(s)
|
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
fatalError("bad current token")
|
fatalError("bad current token")
|
||||||
|
@ -676,11 +684,7 @@ private extension Tokenizer {
|
||||||
mutating func tokenizeSelfClosingStartTag() -> Token? {
|
mutating func tokenizeSelfClosingStartTag() -> Token? {
|
||||||
switch nextChar() {
|
switch nextChar() {
|
||||||
case ">":
|
case ">":
|
||||||
if case .startTag(let s, _, let attributes) = currentToken {
|
currentStartTag!.selfClosing = true
|
||||||
currentToken = .startTag(s, selfClosing: true, attributes: attributes)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
state = .data
|
state = .data
|
||||||
return takeCurrentToken()
|
return takeCurrentToken()
|
||||||
case nil:
|
case nil:
|
||||||
|
@ -707,22 +711,16 @@ private extension Tokenizer {
|
||||||
return tokenizeAfterAttributeName()
|
return tokenizeAfterAttributeName()
|
||||||
case "=":
|
case "=":
|
||||||
// parse error: unexpected-equals-sign-before-attribute-name
|
// parse error: unexpected-equals-sign-before-attribute-name
|
||||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
currentStartTag!.attributes.append(Attribute(name: "=", value: ""))
|
||||||
attributes.append(Attribute(name: "=", value: ""))
|
|
||||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
|
||||||
state = .attributeName
|
state = .attributeName
|
||||||
return tokenizeAttributeName()
|
return tokenizeAttributeName()
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
if currentStartTag != nil {
|
||||||
attributes.append(Attribute(name: "", value: ""))
|
currentStartTag!.attributes.append(Attribute(name: "", value: ""))
|
||||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .attributeName
|
state = .attributeName
|
||||||
return tokenizeAttributeName()
|
return tokenizeAttributeName()
|
||||||
} else if case .endTag(_) = currentToken {
|
} else if currentEndTag != nil {
|
||||||
// ignore
|
// ignore
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .attributeName
|
state = .attributeName
|
||||||
|
@ -753,11 +751,10 @@ private extension Tokenizer {
|
||||||
c = "\u{FFFD}"
|
c = "\u{FFFD}"
|
||||||
}
|
}
|
||||||
// if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name
|
// if c in ["\"", "'", "<"], parse error: unexpected-character-in-attribute-name
|
||||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
if currentStartTag != nil {
|
||||||
attributes[attributes.count - 1].name.append(c)
|
currentStartTag!.attributes.uncheckedLast.name.append(c)
|
||||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
|
||||||
continue
|
continue
|
||||||
} else if case .endTag(_) = currentToken {
|
} else if currentEndTag != nil {
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
fatalError("bad curren token")
|
fatalError("bad curren token")
|
||||||
|
@ -782,13 +779,12 @@ private extension Tokenizer {
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
return nil
|
return nil
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
if currentStartTag != nil {
|
||||||
attributes.append(Attribute(name: "", value: ""))
|
currentStartTag!.attributes.append(Attribute(name: "", value: ""))
|
||||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .attributeName
|
state = .attributeName
|
||||||
return tokenizeAttributeName()
|
return tokenizeAttributeName()
|
||||||
} else if case .endTag(_) = currentToken {
|
} else if currentEndTag != nil {
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .attributeName
|
state = .attributeName
|
||||||
return tokenizeAttributeName()
|
return tokenizeAttributeName()
|
||||||
|
@ -840,11 +836,10 @@ private extension Tokenizer {
|
||||||
return nil
|
return nil
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
// if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value
|
// if c in ["\"", "'", "<", "=", "`"], parse error: unexpected-character-in-unquoted-attribute-value
|
||||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
if currentStartTag != nil {
|
||||||
attributes[attributes.count - 1].value.append(c)
|
currentStartTag!.attributes.uncheckedLast.value.append(c)
|
||||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
|
||||||
continue
|
continue
|
||||||
} else if case .endTag(_) = currentToken {
|
} else if currentEndTag != nil {
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
fatalError("bad current token")
|
fatalError("bad current token")
|
||||||
|
@ -872,14 +867,11 @@ private extension Tokenizer {
|
||||||
// parse error: unexpected-null-character
|
// parse error: unexpected-null-character
|
||||||
c = "\u{FFFD}"
|
c = "\u{FFFD}"
|
||||||
}
|
}
|
||||||
if case .startTag(let s, let selfClosing, var attributes) = currentToken {
|
if currentStartTag != nil {
|
||||||
attributes[attributes.count - 1].value.append(c)
|
currentStartTag!.attributes.uncheckedLast.value.append(c)
|
||||||
currentToken = .startTag(s, selfClosing: selfClosing, attributes: attributes)
|
|
||||||
continue
|
continue
|
||||||
} else if case .endTag(_) = currentToken {
|
} else if currentEndTag != nil {
|
||||||
continue
|
continue
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -922,13 +914,8 @@ private extension Tokenizer {
|
||||||
// parse error: unexpected-null-character
|
// parse error: unexpected-null-character
|
||||||
c = "\u{FFFD}"
|
c = "\u{FFFD}"
|
||||||
}
|
}
|
||||||
if case .comment(var s) = currentToken {
|
currentComment!.append(c)
|
||||||
s.append(c)
|
|
||||||
currentToken = .comment(s)
|
|
||||||
return tokenizeBogusComment()
|
return tokenizeBogusComment()
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -936,7 +923,7 @@ private extension Tokenizer {
|
||||||
let peeked = peek(count: 7)
|
let peeked = peek(count: 7)
|
||||||
if peeked.starts(with: "--") {
|
if peeked.starts(with: "--") {
|
||||||
consume(count: 2)
|
consume(count: 2)
|
||||||
currentToken = .comment("")
|
currentComment = ""
|
||||||
state = .commentStart
|
state = .commentStart
|
||||||
return tokenizeCommentStart()
|
return tokenizeCommentStart()
|
||||||
} else if peeked.lowercased() == "doctype" {
|
} else if peeked.lowercased() == "doctype" {
|
||||||
|
@ -946,12 +933,12 @@ private extension Tokenizer {
|
||||||
} else if peeked == "[CDATA[" {
|
} else if peeked == "[CDATA[" {
|
||||||
// TODO: we don't do any of the tree construction stuff yet, so can't really handle this
|
// TODO: we don't do any of the tree construction stuff yet, so can't really handle this
|
||||||
// consume(count: 7)
|
// consume(count: 7)
|
||||||
currentToken = .comment("")
|
currentComment = ""
|
||||||
state = .bogusComment
|
state = .bogusComment
|
||||||
return tokenizeBogusComment()
|
return tokenizeBogusComment()
|
||||||
} else {
|
} else {
|
||||||
// parse error: incorrectly-opened-comment
|
// parse error: incorrectly-opened-comment
|
||||||
currentToken = .comment("")
|
currentComment = ""
|
||||||
state = .bogusComment
|
state = .bogusComment
|
||||||
return tokenizeBogusComment()
|
return tokenizeBogusComment()
|
||||||
}
|
}
|
||||||
|
@ -986,29 +973,19 @@ private extension Tokenizer {
|
||||||
// parse error: eof-in-comment
|
// parse error: eof-in-comment
|
||||||
return takeCurrentToken()
|
return takeCurrentToken()
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
if case .comment(var s) = currentToken {
|
currentComment!.append("-")
|
||||||
s.append("-")
|
|
||||||
currentToken = .comment(s)
|
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .comment
|
state = .comment
|
||||||
return tokenizeComment()
|
return tokenizeComment()
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mutating func tokenizeComment() -> Token? {
|
mutating func tokenizeComment() -> Token? {
|
||||||
switch nextChar() {
|
switch nextChar() {
|
||||||
case "<":
|
case "<":
|
||||||
if case .comment(var s) = currentToken {
|
currentComment!.append("<")
|
||||||
s.append("<")
|
|
||||||
currentToken = .comment(s)
|
|
||||||
state = .commentLessThanSign
|
state = .commentLessThanSign
|
||||||
return tokenizeCommentLessThanSign()
|
return tokenizeCommentLessThanSign()
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case "-":
|
case "-":
|
||||||
state = .commentEndDash
|
state = .commentEndDash
|
||||||
return tokenizeCommentEndDash()
|
return tokenizeCommentEndDash()
|
||||||
|
@ -1021,35 +998,20 @@ private extension Tokenizer {
|
||||||
// parse error: unexpected-null-character
|
// parse error: unexpected-null-character
|
||||||
c = "\u{FFFD}"
|
c = "\u{FFFD}"
|
||||||
}
|
}
|
||||||
if case .comment(var s) = currentToken {
|
currentComment!.append(c)
|
||||||
s.append(c)
|
|
||||||
currentToken = .comment(s)
|
|
||||||
return tokenizeComment()
|
return tokenizeComment()
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mutating func tokenizeCommentLessThanSign() -> Token? {
|
mutating func tokenizeCommentLessThanSign() -> Token? {
|
||||||
switch nextChar() {
|
switch nextChar() {
|
||||||
case "!":
|
case "!":
|
||||||
if case .comment(var s) = currentToken {
|
currentComment!.append("!")
|
||||||
s.append("!")
|
|
||||||
currentToken = .comment(s)
|
|
||||||
state = .commentLessThanSignBang
|
state = .commentLessThanSignBang
|
||||||
return tokenizeCommentLessThanSignBang()
|
return tokenizeCommentLessThanSignBang()
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case "<":
|
case "<":
|
||||||
if case .comment(var s) = currentToken {
|
currentComment!.append("<")
|
||||||
s.append("<")
|
|
||||||
currentToken = .comment(s)
|
|
||||||
return tokenizeComment()
|
return tokenizeComment()
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case let c:
|
case let c:
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .comment
|
state = .comment
|
||||||
|
@ -1106,15 +1068,10 @@ private extension Tokenizer {
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
return takeCurrentToken()
|
return takeCurrentToken()
|
||||||
case let c:
|
case let c:
|
||||||
if case .comment(var s) = currentToken {
|
currentComment!.append("-")
|
||||||
s.append("-")
|
|
||||||
currentToken = .comment(s)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .comment
|
state = .comment
|
||||||
return next()
|
return tokenizeComment()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1127,24 +1084,14 @@ private extension Tokenizer {
|
||||||
state = .commentEndBang
|
state = .commentEndBang
|
||||||
return tokenizeCommentEndBang()
|
return tokenizeCommentEndBang()
|
||||||
case "-":
|
case "-":
|
||||||
if case .comment(var s) = currentToken {
|
currentComment!.append("-")
|
||||||
s.append("-")
|
|
||||||
currentToken = .comment(s)
|
|
||||||
return tokenizeCommentEnd()
|
return tokenizeCommentEnd()
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case nil:
|
case nil:
|
||||||
// parse error: eof-in-comment
|
// parse error: eof-in-comment
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
return takeCurrentToken()
|
return takeCurrentToken()
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
if case .comment(var s) = currentToken {
|
currentComment!.append("--")
|
||||||
s.append("--")
|
|
||||||
currentToken = .comment(s)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .comment
|
state = .comment
|
||||||
return tokenizeComment()
|
return tokenizeComment()
|
||||||
|
@ -1154,14 +1101,9 @@ private extension Tokenizer {
|
||||||
mutating func tokenizeCommentEndBang() -> Token? {
|
mutating func tokenizeCommentEndBang() -> Token? {
|
||||||
switch nextChar() {
|
switch nextChar() {
|
||||||
case "-":
|
case "-":
|
||||||
if case .comment(var s) = currentToken {
|
currentComment!.append("--!")
|
||||||
s.append("--!")
|
|
||||||
currentToken = .comment(s)
|
|
||||||
state = .commentEndDash
|
state = .commentEndDash
|
||||||
return tokenizeCommentEndDash()
|
return tokenizeCommentEndDash()
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case ">":
|
case ">":
|
||||||
// parse error: incorrectly-closed-comment
|
// parse error: incorrectly-closed-comment
|
||||||
state = .data
|
state = .data
|
||||||
|
@ -1171,15 +1113,10 @@ private extension Tokenizer {
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
return takeCurrentToken()
|
return takeCurrentToken()
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
if case .comment(var s) = currentToken {
|
currentComment!.append("--!")
|
||||||
s.append("--!")
|
|
||||||
currentToken = .comment(s)
|
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .comment
|
state = .comment
|
||||||
return tokenizeComment()
|
return tokenizeComment()
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1210,12 +1147,12 @@ private extension Tokenizer {
|
||||||
// ignore the character
|
// ignore the character
|
||||||
return tokenizeBeforeDoctypeName()
|
return tokenizeBeforeDoctypeName()
|
||||||
case .some(let c) where ("A"..."Z").contains(c):
|
case .some(let c) where ("A"..."Z").contains(c):
|
||||||
currentToken = .doctype("\(c.asciiLowercase)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
currentDoctype = ("\(c.asciiLowercase)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
||||||
state = .doctypeName
|
state = .doctypeName
|
||||||
return tokenizeDoctypeName()
|
return tokenizeDoctypeName()
|
||||||
case "\0":
|
case "\0":
|
||||||
// parse error: unexpected-null-character
|
// parse error: unexpected-null-character
|
||||||
currentToken = .doctype("\u{FFFD}", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
currentDoctype = ("\u{FFFD}", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
||||||
state = .doctypeName
|
state = .doctypeName
|
||||||
return tokenizeDoctypeName()
|
return tokenizeDoctypeName()
|
||||||
case ">":
|
case ">":
|
||||||
|
@ -1227,7 +1164,7 @@ private extension Tokenizer {
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
return .doctype("", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
return .doctype("", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
currentToken = .doctype("\(c)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
currentDoctype = ("\(c)", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)
|
||||||
state = .doctypeName
|
state = .doctypeName
|
||||||
return tokenizeDoctypeName()
|
return tokenizeDoctypeName()
|
||||||
}
|
}
|
||||||
|
@ -1243,25 +1180,16 @@ private extension Tokenizer {
|
||||||
return takeCurrentToken()
|
return takeCurrentToken()
|
||||||
case nil:
|
case nil:
|
||||||
// parse error: eof-in-doctype
|
// parse error: eof-in-doctype
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case .some(var c):
|
case .some(var c):
|
||||||
if c == "\0" {
|
if c == "\0" {
|
||||||
c = "\u{FFFD}"
|
c = "\u{FFFD}"
|
||||||
} else if ("A"..."Z").contains(c) {
|
} else if ("A"..."Z").contains(c) {
|
||||||
c = c.asciiLowercase
|
c = c.asciiLowercase
|
||||||
}
|
}
|
||||||
if case .doctype(var s, let forceQuirks, _, _) = currentToken {
|
currentDoctype!.0.append(c)
|
||||||
s.append(c)
|
|
||||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: nil, systemIdentifier: nil)
|
|
||||||
return tokenizeDoctypeName()
|
return tokenizeDoctypeName()
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1276,12 +1204,8 @@ private extension Tokenizer {
|
||||||
case nil:
|
case nil:
|
||||||
// parse error: eof-in-doctype
|
// parse error: eof-in-doctype
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
let peeked = peek(count: 6).lowercased()
|
let peeked = peek(count: 6).lowercased()
|
||||||
|
@ -1295,11 +1219,7 @@ private extension Tokenizer {
|
||||||
return tokenizeAfterDoctypeSystemKeyword()
|
return tokenizeAfterDoctypeSystemKeyword()
|
||||||
} else {
|
} else {
|
||||||
// parse error: invalid-character-sequence-after-doctype-name
|
// parse error: invalid-character-sequence-after-doctype-name
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
state = .bogusDoctype
|
state = .bogusDoctype
|
||||||
return tokenizeBogusDoctype()
|
return tokenizeBogusDoctype()
|
||||||
}
|
}
|
||||||
|
@ -1313,39 +1233,23 @@ private extension Tokenizer {
|
||||||
return tokenizeBeforeDoctypePublicIdentifier()
|
return tokenizeBeforeDoctypePublicIdentifier()
|
||||||
case .some(let c) where c == "\"" || c == "'":
|
case .some(let c) where c == "\"" || c == "'":
|
||||||
// parse error: missing-whitespace-after-doctype-public-keyword
|
// parse error: missing-whitespace-after-doctype-public-keyword
|
||||||
if case .doctype(let s, let forceQuirks, _, _) = currentToken {
|
currentDoctype!.publicIdentifier = ""
|
||||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: "", systemIdentifier: nil)
|
|
||||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||||
state = .doctypePublicIdentifier(quotes)
|
state = .doctypePublicIdentifier(quotes)
|
||||||
return tokenizeDoctypePublicIdentifier(quotes: quotes)
|
return tokenizeDoctypePublicIdentifier(quotes: quotes)
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case ">":
|
case ">":
|
||||||
// parse error: missing-doctype-public-identifier
|
// parse error: missing-doctype-public-identifier
|
||||||
state = .data
|
state = .data
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case nil:
|
case nil:
|
||||||
// parse error: eof-in-doctype
|
// parse error: eof-in-doctype
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
// parse error: missing-quote-before-doctype-public-identifier
|
// parse error: missing-quote-before-doctype-public-identifier
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
state = .bogusDoctype
|
state = .bogusDoctype
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
return tokenizeBogusDoctype()
|
return tokenizeBogusDoctype()
|
||||||
|
@ -1358,39 +1262,23 @@ private extension Tokenizer {
|
||||||
// ignore the character
|
// ignore the character
|
||||||
return tokenizeBeforeDoctypePublicIdentifier()
|
return tokenizeBeforeDoctypePublicIdentifier()
|
||||||
case .some(let c) where c == "\"" || c == "'":
|
case .some(let c) where c == "\"" || c == "'":
|
||||||
if case .doctype(let s, let forceQuirks, _, _) = currentToken {
|
currentDoctype!.publicIdentifier = ""
|
||||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: "", systemIdentifier: nil)
|
|
||||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||||
state = .doctypePublicIdentifier(quotes)
|
state = .doctypePublicIdentifier(quotes)
|
||||||
return tokenizeDoctypePublicIdentifier(quotes: quotes)
|
return tokenizeDoctypePublicIdentifier(quotes: quotes)
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case ">":
|
case ">":
|
||||||
// parse error: missing-doctype-public-identifier
|
// parse error: missing-doctype-public-identifier
|
||||||
state = .data
|
state = .data
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case nil:
|
case nil:
|
||||||
// parse error: eof-in-doctype
|
// parse error: eof-in-doctype
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
// parse error: missing-quote-before-doctype-public-identifier
|
// parse error: missing-quote-before-doctype-public-identifier
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .bogusDoctype
|
state = .bogusDoctype
|
||||||
return tokenizeBogusDoctype()
|
return tokenizeBogusDoctype()
|
||||||
|
@ -1409,33 +1297,20 @@ private extension Tokenizer {
|
||||||
// parse error: abrupt-doctype-public-identifier
|
// parse error: abrupt-doctype-public-identifier
|
||||||
reconsume(">")
|
reconsume(">")
|
||||||
state = .data
|
state = .data
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case nil:
|
case nil:
|
||||||
// parse error: eof-in-doctype
|
// parse error: eof-in-doctype
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case .some(var c):
|
case .some(var c):
|
||||||
if c == "\0" {
|
if c == "\0" {
|
||||||
// parse error: unexpected-null-character
|
// parse error: unexpected-null-character
|
||||||
c = "\u{FFFD}"
|
c = "\u{FFFD}"
|
||||||
}
|
}
|
||||||
if case .doctype(let s, let forceQuirks, var publicIdentifier, _) = currentToken {
|
currentDoctype!.publicIdentifier!.append(c)
|
||||||
publicIdentifier!.append(c)
|
|
||||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: nil)
|
|
||||||
return tokenizeDoctypePublicIdentifier(quotes: quotes)
|
return tokenizeDoctypePublicIdentifier(quotes: quotes)
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1449,30 +1324,18 @@ private extension Tokenizer {
|
||||||
return takeCurrentToken()
|
return takeCurrentToken()
|
||||||
case .some(let c) where c == "\"" || c == "'":
|
case .some(let c) where c == "\"" || c == "'":
|
||||||
// parse error: missing-whitespace-between-doctype-public-and-system-identifiers
|
// parse error: missing-whitespace-between-doctype-public-and-system-identifiers
|
||||||
if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
|
currentDoctype!.systemIdentifier = ""
|
||||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "")
|
|
||||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||||
state = .doctypeSystemIdentifier(quotes)
|
state = .doctypeSystemIdentifier(quotes)
|
||||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case nil:
|
case nil:
|
||||||
// parse error: eof-in-doctype
|
// parse error: eof-in-doctype
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
// parse error: missing-quote-before-doctype-system-identifier
|
// parse error: missing-quote-before-doctype-system-identifier
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .bogusDoctype
|
state = .bogusDoctype
|
||||||
return tokenizeBogusDoctype()
|
return tokenizeBogusDoctype()
|
||||||
|
@ -1488,30 +1351,18 @@ private extension Tokenizer {
|
||||||
state = .data
|
state = .data
|
||||||
return takeCurrentToken()
|
return takeCurrentToken()
|
||||||
case .some(let c) where c == "\"" || c == "'":
|
case .some(let c) where c == "\"" || c == "'":
|
||||||
if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
|
currentDoctype!.systemIdentifier = ""
|
||||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "")
|
|
||||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||||
state = .doctypeSystemIdentifier(quotes)
|
state = .doctypeSystemIdentifier(quotes)
|
||||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case nil:
|
case nil:
|
||||||
// parse error: eof-in-doctype
|
// parse error: eof-in-doctype
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
// parse error: missing-quote-before-doctype-system-identifier
|
// parse error: missing-quote-before-doctype-system-identifier
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .bogusComment
|
state = .bogusComment
|
||||||
return tokenizeBogusComment()
|
return tokenizeBogusComment()
|
||||||
|
@ -1524,39 +1375,23 @@ private extension Tokenizer {
|
||||||
state = .beforeDoctypeSystemIdentifier
|
state = .beforeDoctypeSystemIdentifier
|
||||||
return tokenizeBeforeDoctypeSystemIdentifier()
|
return tokenizeBeforeDoctypeSystemIdentifier()
|
||||||
case .some(let c) where c == "\"" || c == "'":
|
case .some(let c) where c == "\"" || c == "'":
|
||||||
if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
|
currentDoctype!.systemIdentifier = ""
|
||||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: "")
|
|
||||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||||
state = .doctypeSystemIdentifier(quotes)
|
state = .doctypeSystemIdentifier(quotes)
|
||||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case ">":
|
case ">":
|
||||||
// parse error: missing-doctype-system-identifier
|
// parse error: missing-doctype-system-identifier
|
||||||
state = .data
|
state = .data
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case nil:
|
case nil:
|
||||||
// parse error: eof-in-doctype:
|
// parse error: eof-in-doctype:
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
// parse error: missing-quote-before-doctype-system-identifier
|
// parse error: missing-quote-before-doctype-system-identifier
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .bogusDoctype
|
state = .bogusDoctype
|
||||||
return tokenizeBogusDoctype()
|
return tokenizeBogusDoctype()
|
||||||
|
@ -1569,39 +1404,23 @@ private extension Tokenizer {
|
||||||
// ignore the character
|
// ignore the character
|
||||||
return tokenizeBeforeDoctypeSystemIdentifier()
|
return tokenizeBeforeDoctypeSystemIdentifier()
|
||||||
case .some(let c) where c == "\"" || c == "'":
|
case .some(let c) where c == "\"" || c == "'":
|
||||||
if case .doctype(let s, let forceQuirks, let publicIdentifier, _) = currentToken {
|
currentDoctype!.systemIdentifier = ""
|
||||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: " ")
|
|
||||||
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
let quotes = c == "\"" ? DoctypeIdentifierQuotation.doubleQuoted : .singleQuoted
|
||||||
state = .doctypeSystemIdentifier(quotes)
|
state = .doctypeSystemIdentifier(quotes)
|
||||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case ">":
|
case ">":
|
||||||
// parse error: missing-doctype-system-identifier
|
// parse error: missing-doctype-system-identifier
|
||||||
state = .data
|
state = .data
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case nil:
|
case nil:
|
||||||
// parse error: eof-in-doctype:
|
// parse error: eof-in-doctype:
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
// parse error: missing-quote-before-doctype-system-identifier
|
// parse error: missing-quote-before-doctype-system-identifier
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
reconsume(c)
|
reconsume(c)
|
||||||
state = .bogusDoctype
|
state = .bogusDoctype
|
||||||
return tokenizeBogusDoctype()
|
return tokenizeBogusDoctype()
|
||||||
|
@ -1619,33 +1438,20 @@ private extension Tokenizer {
|
||||||
case ">":
|
case ">":
|
||||||
// parse error: abrupt-doctype-system-identifier
|
// parse error: abrupt-doctype-system-identifier
|
||||||
state = .data
|
state = .data
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case nil:
|
case nil:
|
||||||
// parse error: eof-in-doctype
|
// parse error: eof-in-doctype
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case .some(var c):
|
case .some(var c):
|
||||||
if c == "\0" {
|
if c == "\0" {
|
||||||
// parse error: unexpected-null-character
|
// parse error: unexpected-null-character
|
||||||
c = "\u{FFFD}"
|
c = "\u{FFFD}"
|
||||||
}
|
}
|
||||||
if case .doctype(let s, let forceQuirks, let publicIdentifier, var systemIdentifier) = currentToken {
|
currentDoctype!.systemIdentifier!.append(c)
|
||||||
systemIdentifier!.append(c)
|
|
||||||
currentToken = .doctype(s, forceQuirks: forceQuirks, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
return tokenizeDoctypeSystemIdentifier(quotes: quotes)
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1660,12 +1466,8 @@ private extension Tokenizer {
|
||||||
case nil:
|
case nil:
|
||||||
// parse error: eof-in-doctype
|
// parse error: eof-in-doctype
|
||||||
state = .endOfFile
|
state = .endOfFile
|
||||||
if case .doctype(let s, _, let publicIdentifier, let systemIdentifier) = currentToken {
|
currentDoctype!.forceQuirks = true
|
||||||
currentToken = nil
|
return takeCurrentToken()
|
||||||
return .doctype(s, forceQuirks: true, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier)
|
|
||||||
} else {
|
|
||||||
fatalError("bad current token")
|
|
||||||
}
|
|
||||||
case .some(let c):
|
case .some(let c):
|
||||||
// parse error: unexpected-character-after-doctype-system-identifier
|
// parse error: unexpected-character-after-doctype-system-identifier
|
||||||
// Note: This does not set the current DOCTYPE token's force-quirks flag to on.
|
// Note: This does not set the current DOCTYPE token's force-quirks flag to on.
|
||||||
|
@ -1699,3 +1501,15 @@ private extension Character {
|
||||||
return Character(Unicode.Scalar(asciiValue! + 0x20))
|
return Character(Unicode.Scalar(asciiValue! + 0x20))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private extension Array {
|
||||||
|
// Optimization: allows in-place modification of the last element of the array.
|
||||||
|
var uncheckedLast: Element {
|
||||||
|
_read {
|
||||||
|
yield self[count - 1]
|
||||||
|
}
|
||||||
|
_modify {
|
||||||
|
yield &self[count - 1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue