Gemini/Gemini-iOS/Vendor/URIFixup.swift
2021-08-30 20:53:57 -04:00

226 lines
8.0 KiB
Swift

//
// URIFixup.swift
// Gemini
//
// Created by Shadowfacts on 8/30/21.
//
// This file is based on URIFixup & co. from the firefox-ios project, licensed under MPLv2.
// https://github.com/mozilla-mobile/firefox-ios/blob/8e796aa8ed70395f104ed83ac72c32fc2aba54ea/Client/Frontend/Browser/URIFixup.swift
import Foundation
class URIFixup {
static func getURL(_ entry: String) -> URL? {
let trimmed = entry.trimmingCharacters(in: .whitespacesAndNewlines)
guard var escaped = trimmed.addingPercentEncoding(withAllowedCharacters: .URLAllowed) else {
return nil
}
escaped = replaceBrackets(url: escaped)
// Then check if the URL includes a scheme. This will handle
// all valid requests starting with "http://", "about:", etc.
// However, we ensure that the scheme is one that is listed in
// the official URI scheme list, so that other such search phrases
// like "filetype:" are recognised as searches rather than URLs.
if let url = punycodedURL(escaped), url.schemeIsValid {
return url
}
// If there's no scheme, we're going to prepend "gemini://". First,
// make sure there's at least one "." in the host. This means
// we'll allow single-word searches (e.g., "foo") at the expense
// of breaking single-word hosts without a scheme (e.g., "localhost").
if trimmed.range(of: ".") == nil {
return nil
}
if trimmed.range(of: " ") != nil {
return nil
}
// If there is a ".", prepend "gemini://" and try again. Since this
// is strictly an "gemini://" URL, we also require a host.
if let url = punycodedURL("gemini://\(escaped)"), url.host != nil {
return url
}
return nil
}
static func punycodedURL(_ string: String) -> URL? {
var string = string
if string.filter({ $0 == "#" }).count > 1 {
string = replaceHashMarks(url: string)
}
guard let url = URL(string: string) else { return nil }
var components = URLComponents(url: url, resolvingAgainstBaseURL: false)
let host = components?.host?.utf8HostToAscii()
components?.host = host
return components?.url
}
static func replaceBrackets(url: String) -> String {
return url.replacingOccurrences(of: "[", with: "%5B").replacingOccurrences(of: "]", with: "%5D")
}
static func replaceHashMarks(url: String) -> String {
guard let firstIndex = url.firstIndex(of: "#") else { return String() }
let start = url.index(firstIndex, offsetBy: 1)
return url.replacingOccurrences(of: "#", with: "%23", range: start..<url.endIndex)
}
}
private extension CharacterSet {
static let URLAllowed = CharacterSet(charactersIn: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;=%")
}
// The list of permanent URI schemes has been taken from http://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
private let permanentURISchemes = ["aaa", "aaas", "about", "acap", "acct", "cap", "cid", "coap", "coaps", "crid", "data", "dav", "dict", "dns", "example", "file", "ftp", "geo", "go", "gopher", "h323", "http", "https", "iax", "icap", "im", "imap", "info", "ipp", "ipps", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs", "jabber", "javascript", "ldap", "mailto", "mid", "msrp", "msrps", "mtqp", "mupdate", "news", "nfs", "ni", "nih", "nntp", "opaquelocktoken", "pkcs11", "pop", "pres", "reload", "rtsp", "rtsps", "rtspu", "service", "session", "shttp", "sieve", "sip", "sips", "sms", "snmp", "soap.beep", "soap.beeps", "stun", "stuns", "tag", "tel", "telnet", "tftp", "thismessage", "tip", "tn3270", "turn", "turns", "tv", "urn", "vemmi", "vnc", "ws", "wss", "xcon", "xcon-userid", "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "z39.50r", "z39.50s"]
private extension URL {
var schemeIsValid: Bool {
guard let scheme = scheme else { return false }
let lowercasedScheme = scheme.lowercased()
// we care about gemini, unlike firefox
let schemeMatches = lowercasedScheme == "gemini" || permanentURISchemes.contains(lowercasedScheme)
return schemeMatches && self.absoluteURL.absoluteString.lowercased() != scheme + ":"
}
}
private let base = 36
private let tMin = 1
private let tMax = 26
private let initialBias = 72
private let initialN: Int = 128 // 0x80
private let delimiter: Character = "-"; // '\x2D'
private let prefixPunycode = "xn--"
private let asciiPunycode = Array("abcdefghijklmnopqrstuvwxyz0123456789")
private extension String {
func toValue(_ index: Int) -> Character {
return asciiPunycode[index]
}
func adapt(_ delta: Int, numPoints: Int, firstTime: Bool) -> Int {
let skew = 38
let damp = firstTime ? 700 : 2
var delta = delta
delta = delta / damp
delta += delta / numPoints
var k = 0
while delta > ((base - tMin) * tMax) / 2 {
delta /= (base - tMin)
k += base
}
return k + ((base - tMin + 1) * delta) / (delta + skew)
}
func encode(_ input: String) -> String {
var output = ""
var d: Int = 0
var extendedChars = [Int]()
for c in input.unicodeScalars {
if Int(c.value) < initialN {
d += 1
output.append(String(c))
} else {
extendedChars.append(Int(c.value))
}
}
if extendedChars.count == 0 {
return output
}
if d > 0 {
output.append(delimiter)
}
var n = initialN
var delta = 0
var bias = initialBias
var h: Int = 0
var b: Int = 0
if d > 0 {
h = output.unicodeScalars.count - 1
b = output.unicodeScalars.count - 1
} else {
h = output.unicodeScalars.count
b = output.unicodeScalars.count
}
while h < input.unicodeScalars.count {
var char = Int(0x7fffffff)
for c in input.unicodeScalars {
let ci = Int(c.value)
if char > ci && ci >= n {
char = ci
}
}
delta = delta + (char - n) * (h + 1)
if delta < 0 {
print("error: invalid char:")
output = ""
return output
}
n = char
for c in input.unicodeScalars {
let ci = Int(c.value)
if ci < n || ci < initialN {
delta += 1
continue
}
if ci > n {
continue
}
var q = delta
var k = base
while true {
let t = max(min(k - bias, tMax), tMin)
if q < t {
break
}
let code = t + ((q - t) % (base - t))
output.append(toValue(code))
q = (q - t) / (base - t)
k += base
}
output.append(toValue(q))
bias = self.adapt(delta, numPoints: h + 1, firstTime: h == b)
delta = 0
h += 1
}
delta += 1
n += 1
}
return output
}
func isValidUnicodeScala(_ s: String) -> Bool {
for c in s.unicodeScalars {
let ci = Int(c.value)
if ci >= initialN {
return false
}
}
return true
}
func utf8HostToAscii() -> String {
if isValidUnicodeScala(self) {
return self
}
var labels = self.components(separatedBy: ".")
for (i, part) in labels.enumerated() {
if !isValidUnicodeScala(part) {
let a = encode(part)
labels[i] = prefixPunycode + a
}
}
let resultString = labels.joined(separator: ".")
return resultString
}
}