2023-11-24 20:18:37 +00:00
//
// T o k e n i z e r T e s t s . s w i f t
//
//
// C r e a t e d b y S h a d o w f a c t s o n 1 1 / 2 2 / 2 3 .
//
import XCTest
@ testable import HTMLStreamer
final class TokenizerTests : XCTestCase {
private func tokenize ( _ s : String ) -> [ Token ] {
2023-11-28 16:56:56 +00:00
let iterator = Tokenizer ( chars : s . unicodeScalars . makeIterator ( ) )
2023-11-24 20:18:37 +00:00
// l e t i t e r a t o r = P r i n t I t e r a t o r ( i n n e r : T o k e n i z e r ( c h a r s : s . m a k e I t e r a t o r ( ) ) )
return Array ( AnySequence ( { iterator } ) )
}
func testNamedCharacterReferences ( ) {
2023-12-23 16:48:31 +00:00
XCTAssertEqual ( tokenize ( " & " ) , [ . characterSequence ( " & " ) ] )
2023-11-24 20:18:37 +00:00
// m i s s i n g - s e m i c o l o n - a f t e r - c h a r a c t e r - r e f e r e n c e :
2023-12-23 16:48:31 +00:00
XCTAssertEqual ( tokenize ( " ¬in " ) , [ . characterSequence ( " ¬ " ) , . characterSequence ( " in " ) ] )
XCTAssertEqual ( tokenize ( " ¬in " ) , [ . characterSequence ( " ¬ " ) , . characterSequence ( " in " ) ] )
2023-11-24 20:18:37 +00:00
// u n k n o w n - n a m e d - c h a r a c t e r - r e f e r e n c e :
2023-12-23 16:48:31 +00:00
XCTAssertEqual ( tokenize ( " ¬it; " ) , [ . characterSequence ( " ¬ " ) , . characterSequence ( " it; " ) ] )
XCTAssertEqual ( tokenize ( " &asdf " ) , [ . characterSequence ( " &asdf " ) ] )
XCTAssertEqual ( tokenize ( " &a " ) , [ . characterSequence ( " &a " ) ] )
2023-11-26 23:26:15 +00:00
// a t t r i b u t e s p e c i a l c a s e
XCTAssertEqual ( tokenize ( " <a a='¬a' /> " ) , [ . startTag ( " a " , selfClosing : true , attributes : [ Attribute ( name : " a " , value : " ¬a " ) ] ) ] )
2023-11-24 20:18:37 +00:00
}
func testNumericCharacterReference ( ) {
2023-12-23 16:48:31 +00:00
XCTAssertEqual ( tokenize ( " ! " ) , [ . characterSequence ( " ! " ) ] )
XCTAssertEqual ( tokenize ( " ! " ) , [ . characterSequence ( " ! " ) ] )
2023-11-24 20:18:37 +00:00
}
func testStartTag ( ) {
XCTAssertEqual ( tokenize ( " <asdf> " ) , [ . startTag ( " asdf " , selfClosing : false , attributes : [ ] ) ] )
XCTAssertEqual ( tokenize ( " <asdf/> " ) , [ . startTag ( " asdf " , selfClosing : true , attributes : [ ] ) ] )
XCTAssertEqual ( tokenize ( " <asdf /> " ) , [ . startTag ( " asdf " , selfClosing : true , attributes : [ ] ) ] )
// d o u b l e - q u o t e d a t t r i b u t e s
XCTAssertEqual ( tokenize ( " <asdf a= \" b \" /> " ) , [ . startTag ( " asdf " , selfClosing : true , attributes : [ . init ( name : " a " , value : " b " ) ] ) ] )
XCTAssertEqual ( tokenize ( " <asdf a= \" & \" /> " ) , [ . startTag ( " asdf " , selfClosing : true , attributes : [ . init ( name : " a " , value : " & " ) ] ) ] )
// s i n g l e - q u o t e d a t t r i b u t e s
XCTAssertEqual ( tokenize ( " <asdf a='b' /> " ) , [ . startTag ( " asdf " , selfClosing : true , attributes : [ . init ( name : " a " , value : " b " ) ] ) ] )
XCTAssertEqual ( tokenize ( " <asdf a=' ' /> " ) , [ . startTag ( " asdf " , selfClosing : true , attributes : [ . init ( name : " a " , value : " " ) ] ) ] )
// u n q u o t e d a t t r i b u t e s
XCTAssertEqual ( tokenize ( " <asdf a=b /> " ) , [ . startTag ( " asdf " , selfClosing : true , attributes : [ . init ( name : " a " , value : " b " ) ] ) ] )
XCTAssertEqual ( tokenize ( " <asdf a=  /> " ) , [ . startTag ( " asdf " , selfClosing : true , attributes : [ . init ( name : " a " , value : " " ) ] ) ] )
}
func testEndTag ( ) {
XCTAssertEqual ( tokenize ( " </asdf> " ) , [ . endTag ( " asdf " ) ] )
XCTAssertEqual ( tokenize ( " </asdf a b='c'> " ) , [ . endTag ( " asdf " ) ] )
}
func testComment ( ) {
XCTAssertEqual ( tokenize ( " <!-- hello --> " ) , [ . comment ( " hello " ) ] )
XCTAssertEqual ( tokenize ( " <!- hello --> " ) , [ . comment ( " - hello -- " ) ] )
XCTAssertEqual ( tokenize ( " <!-- hello -> " ) , [ . comment ( " hello -> " ) ] )
}
func testDoctype ( ) {
XCTAssertEqual ( tokenize ( " <!DOCTYPE html> " ) , [ . doctype ( " html " , forceQuirks : false , publicIdentifier : nil , systemIdentifier : nil ) ] )
XCTAssertEqual ( tokenize ( # " <!DOCTYPE HTML PUBLIC " -// W3C // D T D H T M L 4 . 0 1 / / E N " > " # ) , [ . d o c t y p e ( " h t m l " , f o r c e Q u i r k s : f a l s e , p u b l i c I d e n t i f i e r : " - / / W 3 C / / D T D H T M L 4 . 0 1 / / E N " , s y s t e m I d e n t i f i e r : n i l ) ] )
XCTAssertEqual ( tokenize ( # " <!DOCTYPE HTML PUBLIC " -// W3C // D T D H T M L 4 . 0 1 / / E N " " h t t p : / / w w w . w 3 . o r g / T R / h t m l 4 / s t r i c t . d t d " > " # ) , [ . d o c t y p e ( " h t m l " , f o r c e Q u i r k s : f a l s e , p u b l i c I d e n t i f i e r : " - / / W 3 C / / D T D H T M L 4 . 0 1 / / E N " , s y s t e m I d e n t i f i e r : " h t t p : / / w w w . w 3 . o r g / T R / h t m l 4 / s t r i c t . d t d " ) ] )
}
2023-11-28 16:56:56 +00:00
func testMultiScalar ( ) {
2023-11-29 01:58:01 +00:00
XCTAssertEqual ( tokenize ( " 🇺🇸 " ) , [ . characterSequence ( " \ u{1F1FA} \ u{1F1F8} " ) ] )
2023-11-28 16:56:56 +00:00
}
2023-11-24 20:18:37 +00:00
}
private struct PrintIterator < Inner : IteratorProtocol > : IteratorProtocol {
typealias Element = Inner . Element
var inner : Inner
mutating func next ( ) -> Inner . Element ? {
let next = inner . next ( )
print ( String ( describing : next ) )
return next
}
}