95 lines
4.5 KiB
Swift
95 lines
4.5 KiB
Swift
//
|
|
// TokenizerTests.swift
|
|
//
|
|
//
|
|
// Created by Shadowfacts on 11/22/23.
|
|
//
|
|
|
|
import XCTest
|
|
@testable import HTMLStreamer
|
|
|
|
final class TokenizerTests: XCTestCase {
|
|
|
|
private func tokenize(_ s: String) -> [Token] {
|
|
let iterator = Tokenizer(chars: s.unicodeScalars.makeIterator())
|
|
// let iterator = PrintIterator(inner: Tokenizer(chars: s.makeIterator()))
|
|
return Array(AnySequence({ iterator }))
|
|
}
|
|
|
|
func testNamedCharacterReferences() {
|
|
XCTAssertEqual(tokenize("&"), [.characterSequence("&")])
|
|
// missing-semicolon-after-character-reference:
|
|
XCTAssertEqual(tokenize("¬in"), [.characterSequence("¬"), .characterSequence("in")])
|
|
XCTAssertEqual(tokenize("¬in"), [.characterSequence("¬"), .characterSequence("in")])
|
|
// unknown-named-character-reference:
|
|
XCTAssertEqual(tokenize("¬it;"), [.characterSequence("¬"), .characterSequence("it;")])
|
|
XCTAssertEqual(tokenize("&asdf"), [.characterSequence("&asdf")])
|
|
XCTAssertEqual(tokenize("&a"), [.characterSequence("&a")])
|
|
|
|
// attribute special case
|
|
XCTAssertEqual(tokenize("<a a='¬a' />"), [.startTag("a", selfClosing: true, attributes: [Attribute(name: "a", value: "¬a")])])
|
|
}
|
|
|
|
func testNumericCharacterReference() {
|
|
XCTAssertEqual(tokenize("!"), [.characterSequence("!")])
|
|
XCTAssertEqual(tokenize("!"), [.characterSequence("!")])
|
|
XCTAssertEqual(tokenize("J"), [.characterSequence("J")])
|
|
XCTAssertEqual(tokenize("J"), [.characterSequence("J")])
|
|
}
|
|
|
|
func testStartTag() {
|
|
XCTAssertEqual(tokenize("<asdf>"), [.startTag("asdf", selfClosing: false, attributes: [])])
|
|
XCTAssertEqual(tokenize("<asdf/>"), [.startTag("asdf", selfClosing: true, attributes: [])])
|
|
XCTAssertEqual(tokenize("<asdf />"), [.startTag("asdf", selfClosing: true, attributes: [])])
|
|
|
|
// double-quoted attributes
|
|
XCTAssertEqual(tokenize("<asdf a=\"b\" />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])])
|
|
XCTAssertEqual(tokenize("<asdf a=\"&\" />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "&")])])
|
|
|
|
// single-quoted attributes
|
|
XCTAssertEqual(tokenize("<asdf a='b' />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])])
|
|
XCTAssertEqual(tokenize("<asdf a=' ' />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: " ")])])
|
|
|
|
// unquoted attributes
|
|
XCTAssertEqual(tokenize("<asdf a=b />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])])
|
|
XCTAssertEqual(tokenize("<asdf a=  />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: " ")])])
|
|
}
|
|
|
|
func testEndTag() {
|
|
XCTAssertEqual(tokenize("</asdf>"), [.endTag("asdf")])
|
|
XCTAssertEqual(tokenize("</asdf a b='c'>"), [.endTag("asdf")])
|
|
}
|
|
|
|
func testComment() {
|
|
XCTAssertEqual(tokenize("<!-- hello -->"), [.comment(" hello ")])
|
|
XCTAssertEqual(tokenize("<!- hello -->"), [.comment("- hello --")])
|
|
XCTAssertEqual(tokenize("<!-- hello ->"), [.comment(" hello ->")])
|
|
}
|
|
|
|
func testDoctype() {
|
|
XCTAssertEqual(tokenize("<!DOCTYPE html>"), [.doctype("html", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)])
|
|
XCTAssertEqual(tokenize(#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN">"#), [.doctype("html", forceQuirks: false, publicIdentifier: "-//W3C//DTD HTML 4.01//EN", systemIdentifier: nil)])
|
|
XCTAssertEqual(tokenize(#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">"#), [.doctype("html", forceQuirks: false, publicIdentifier: "-//W3C//DTD HTML 4.01//EN", systemIdentifier: "http://www.w3.org/TR/html4/strict.dtd")])
|
|
}
|
|
|
|
func testMultiScalar() {
|
|
XCTAssertEqual(tokenize("🇺🇸"), [.characterSequence("\u{1F1FA}\u{1F1F8}")])
|
|
}
|
|
|
|
func testWhitespaceAfterAttributeName() {
|
|
XCTAssertEqual(tokenize("<a foo >"), [.startTag("a", selfClosing: false, attributes: [.init(name: "foo", value: "")])])
|
|
}
|
|
|
|
}
|
|
|
|
private struct PrintIterator<Inner: IteratorProtocol>: IteratorProtocol {
|
|
typealias Element = Inner.Element
|
|
var inner: Inner
|
|
|
|
mutating func next() -> Inner.Element? {
|
|
let next = inner.next()
|
|
print(String(describing: next))
|
|
return next
|
|
}
|
|
}
|