Improve URL entry heuristics, add punycode encoding for UTF-8 hosts

Uses URI fixup code from firefox-ios
This commit is contained in:
Shadowfacts 2021-08-30 20:53:57 -04:00
parent 3055cc339f
commit 66f318f0e7
Signed by: shadowfacts
GPG Key ID: 94A5AB95422746E5
3 changed files with 239 additions and 5 deletions

View File

@ -80,11 +80,8 @@ class NavigationBarView: UIView {
textField.resignFirstResponder() textField.resignFirstResponder()
if let text = textField.text, if let text = textField.text,
!text.isEmpty, !text.isEmpty,
var components = URLComponents(string: text) { let url = URIFixup.getURL(text) {
if components.scheme == nil { navigator.changeURL(url)
components.scheme = "gemini"
}
navigator.changeURL(components.url!)
} else { } else {
textField.text = navigator.displayURL textField.text = navigator.displayURL
} }

225
Gemini-iOS/Vendor/URIFixup.swift vendored Normal file
View File

@ -0,0 +1,225 @@
//
// URIFixup.swift
// Gemini
//
// Created by Shadowfacts on 8/30/21.
//
// This file is based on URIFixup & co. from the firefox-ios project, licensed under MPLv2.
// https://github.com/mozilla-mobile/firefox-ios/blob/8e796aa8ed70395f104ed83ac72c32fc2aba54ea/Client/Frontend/Browser/URIFixup.swift
import Foundation
class URIFixup {
static func getURL(_ entry: String) -> URL? {
let trimmed = entry.trimmingCharacters(in: .whitespacesAndNewlines)
guard var escaped = trimmed.addingPercentEncoding(withAllowedCharacters: .URLAllowed) else {
return nil
}
escaped = replaceBrackets(url: escaped)
// Then check if the URL includes a scheme. This will handle
// all valid requests starting with "http://", "about:", etc.
// However, we ensure that the scheme is one that is listed in
// the official URI scheme list, so that other such search phrases
// like "filetype:" are recognised as searches rather than URLs.
if let url = punycodedURL(escaped), url.schemeIsValid {
return url
}
// If there's no scheme, we're going to prepend "gemini://". First,
// make sure there's at least one "." in the host. This means
// we'll allow single-word searches (e.g., "foo") at the expense
// of breaking single-word hosts without a scheme (e.g., "localhost").
if trimmed.range(of: ".") == nil {
return nil
}
if trimmed.range(of: " ") != nil {
return nil
}
// If there is a ".", prepend "gemini://" and try again. Since this
// is strictly an "gemini://" URL, we also require a host.
if let url = punycodedURL("gemini://\(escaped)"), url.host != nil {
return url
}
return nil
}
static func punycodedURL(_ string: String) -> URL? {
var string = string
if string.filter({ $0 == "#" }).count > 1 {
string = replaceHashMarks(url: string)
}
guard let url = URL(string: string) else { return nil }
var components = URLComponents(url: url, resolvingAgainstBaseURL: false)
let host = components?.host?.utf8HostToAscii()
components?.host = host
return components?.url
}
static func replaceBrackets(url: String) -> String {
return url.replacingOccurrences(of: "[", with: "%5B").replacingOccurrences(of: "]", with: "%5D")
}
static func replaceHashMarks(url: String) -> String {
guard let firstIndex = url.firstIndex(of: "#") else { return String() }
let start = url.index(firstIndex, offsetBy: 1)
return url.replacingOccurrences(of: "#", with: "%23", range: start..<url.endIndex)
}
}
private extension CharacterSet {
static let URLAllowed = CharacterSet(charactersIn: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;=%")
}
// The list of permanent URI schemes has been taken from http://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
private let permanentURISchemes = ["aaa", "aaas", "about", "acap", "acct", "cap", "cid", "coap", "coaps", "crid", "data", "dav", "dict", "dns", "example", "file", "ftp", "geo", "go", "gopher", "h323", "http", "https", "iax", "icap", "im", "imap", "info", "ipp", "ipps", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs", "jabber", "javascript", "ldap", "mailto", "mid", "msrp", "msrps", "mtqp", "mupdate", "news", "nfs", "ni", "nih", "nntp", "opaquelocktoken", "pkcs11", "pop", "pres", "reload", "rtsp", "rtsps", "rtspu", "service", "session", "shttp", "sieve", "sip", "sips", "sms", "snmp", "soap.beep", "soap.beeps", "stun", "stuns", "tag", "tel", "telnet", "tftp", "thismessage", "tip", "tn3270", "turn", "turns", "tv", "urn", "vemmi", "vnc", "ws", "wss", "xcon", "xcon-userid", "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "z39.50r", "z39.50s"]
private extension URL {
var schemeIsValid: Bool {
guard let scheme = scheme else { return false }
let lowercasedScheme = scheme.lowercased()
// we care about gemini, unlike firefox
let schemeMatches = lowercasedScheme == "gemini" || permanentURISchemes.contains(lowercasedScheme)
return schemeMatches && self.absoluteURL.absoluteString.lowercased() != scheme + ":"
}
}
private let base = 36
private let tMin = 1
private let tMax = 26
private let initialBias = 72
private let initialN: Int = 128 // 0x80
private let delimiter: Character = "-"; // '\x2D'
private let prefixPunycode = "xn--"
private let asciiPunycode = Array("abcdefghijklmnopqrstuvwxyz0123456789")
private extension String {
func toValue(_ index: Int) -> Character {
return asciiPunycode[index]
}
func adapt(_ delta: Int, numPoints: Int, firstTime: Bool) -> Int {
let skew = 38
let damp = firstTime ? 700 : 2
var delta = delta
delta = delta / damp
delta += delta / numPoints
var k = 0
while delta > ((base - tMin) * tMax) / 2 {
delta /= (base - tMin)
k += base
}
return k + ((base - tMin + 1) * delta) / (delta + skew)
}
func encode(_ input: String) -> String {
var output = ""
var d: Int = 0
var extendedChars = [Int]()
for c in input.unicodeScalars {
if Int(c.value) < initialN {
d += 1
output.append(String(c))
} else {
extendedChars.append(Int(c.value))
}
}
if extendedChars.count == 0 {
return output
}
if d > 0 {
output.append(delimiter)
}
var n = initialN
var delta = 0
var bias = initialBias
var h: Int = 0
var b: Int = 0
if d > 0 {
h = output.unicodeScalars.count - 1
b = output.unicodeScalars.count - 1
} else {
h = output.unicodeScalars.count
b = output.unicodeScalars.count
}
while h < input.unicodeScalars.count {
var char = Int(0x7fffffff)
for c in input.unicodeScalars {
let ci = Int(c.value)
if char > ci && ci >= n {
char = ci
}
}
delta = delta + (char - n) * (h + 1)
if delta < 0 {
print("error: invalid char:")
output = ""
return output
}
n = char
for c in input.unicodeScalars {
let ci = Int(c.value)
if ci < n || ci < initialN {
delta += 1
continue
}
if ci > n {
continue
}
var q = delta
var k = base
while true {
let t = max(min(k - bias, tMax), tMin)
if q < t {
break
}
let code = t + ((q - t) % (base - t))
output.append(toValue(code))
q = (q - t) / (base - t)
k += base
}
output.append(toValue(q))
bias = self.adapt(delta, numPoints: h + 1, firstTime: h == b)
delta = 0
h += 1
}
delta += 1
n += 1
}
return output
}
func isValidUnicodeScala(_ s: String) -> Bool {
for c in s.unicodeScalars {
let ci = Int(c.value)
if ci >= initialN {
return false
}
}
return true
}
func utf8HostToAscii() -> String {
if isValidUnicodeScala(self) {
return self
}
var labels = self.components(separatedBy: ".")
for (i, part) in labels.enumerated() {
if !isValidUnicodeScala(part) {
let a = encode(part)
labels[i] = prefixPunycode + a
}
}
let resultString = labels.joined(separator: ".")
return resultString
}
}

View File

@ -36,6 +36,7 @@
D62664EE24BC0BCE00DF9B88 /* MaybeLazyVStack.swift in Sources */ = {isa = PBXBuildFile; fileRef = D62664ED24BC0BCE00DF9B88 /* MaybeLazyVStack.swift */; }; D62664EE24BC0BCE00DF9B88 /* MaybeLazyVStack.swift in Sources */ = {isa = PBXBuildFile; fileRef = D62664ED24BC0BCE00DF9B88 /* MaybeLazyVStack.swift */; };
D62664F024BC0D7700DF9B88 /* GeminiFormat.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D62664A824BBF26A00DF9B88 /* GeminiFormat.framework */; }; D62664F024BC0D7700DF9B88 /* GeminiFormat.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D62664A824BBF26A00DF9B88 /* GeminiFormat.framework */; };
D62664FA24BC12BC00DF9B88 /* DocumentTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D62664F924BC12BC00DF9B88 /* DocumentTests.swift */; }; D62664FA24BC12BC00DF9B88 /* DocumentTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D62664F924BC12BC00DF9B88 /* DocumentTests.swift */; };
D6376A7026DDAF65005AD89C /* URIFixup.swift in Sources */ = {isa = PBXBuildFile; fileRef = D6376A6F26DDAF65005AD89C /* URIFixup.swift */; };
D653F40B267996FF004E32B1 /* ActivityItemSource.swift in Sources */ = {isa = PBXBuildFile; fileRef = D653F40A267996FF004E32B1 /* ActivityItemSource.swift */; }; D653F40B267996FF004E32B1 /* ActivityItemSource.swift in Sources */ = {isa = PBXBuildFile; fileRef = D653F40A267996FF004E32B1 /* ActivityItemSource.swift */; };
D653F40D26799F2F004E32B1 /* HomepagePrefView.swift in Sources */ = {isa = PBXBuildFile; fileRef = D653F40C26799F2F004E32B1 /* HomepagePrefView.swift */; }; D653F40D26799F2F004E32B1 /* HomepagePrefView.swift in Sources */ = {isa = PBXBuildFile; fileRef = D653F40C26799F2F004E32B1 /* HomepagePrefView.swift */; };
D653F40F2679A0AB004E32B1 /* SetHomepageActivity.swift in Sources */ = {isa = PBXBuildFile; fileRef = D653F40E2679A0AB004E32B1 /* SetHomepageActivity.swift */; }; D653F40F2679A0AB004E32B1 /* SetHomepageActivity.swift in Sources */ = {isa = PBXBuildFile; fileRef = D653F40E2679A0AB004E32B1 /* SetHomepageActivity.swift */; };
@ -306,6 +307,7 @@
D62664EB24BC0B4D00DF9B88 /* DocumentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DocumentView.swift; sourceTree = "<group>"; }; D62664EB24BC0B4D00DF9B88 /* DocumentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DocumentView.swift; sourceTree = "<group>"; };
D62664ED24BC0BCE00DF9B88 /* MaybeLazyVStack.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MaybeLazyVStack.swift; sourceTree = "<group>"; }; D62664ED24BC0BCE00DF9B88 /* MaybeLazyVStack.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MaybeLazyVStack.swift; sourceTree = "<group>"; };
D62664F924BC12BC00DF9B88 /* DocumentTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DocumentTests.swift; sourceTree = "<group>"; }; D62664F924BC12BC00DF9B88 /* DocumentTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DocumentTests.swift; sourceTree = "<group>"; };
D6376A6F26DDAF65005AD89C /* URIFixup.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = URIFixup.swift; sourceTree = "<group>"; };
D653F40A267996FF004E32B1 /* ActivityItemSource.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ActivityItemSource.swift; sourceTree = "<group>"; }; D653F40A267996FF004E32B1 /* ActivityItemSource.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ActivityItemSource.swift; sourceTree = "<group>"; };
D653F40C26799F2F004E32B1 /* HomepagePrefView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HomepagePrefView.swift; sourceTree = "<group>"; }; D653F40C26799F2F004E32B1 /* HomepagePrefView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HomepagePrefView.swift; sourceTree = "<group>"; };
D653F40E2679A0AB004E32B1 /* SetHomepageActivity.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SetHomepageActivity.swift; sourceTree = "<group>"; }; D653F40E2679A0AB004E32B1 /* SetHomepageActivity.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SetHomepageActivity.swift; sourceTree = "<group>"; };
@ -576,6 +578,14 @@
name = Frameworks; name = Frameworks;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
D6376A6E26DDAF57005AD89C /* Vendor */ = {
isa = PBXGroup;
children = (
D6376A6F26DDAF65005AD89C /* URIFixup.swift */,
);
path = Vendor;
sourceTree = "<group>";
};
D688F618258AD231003A0A73 /* Resources */ = { D688F618258AD231003A0A73 /* Resources */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
@ -603,6 +613,7 @@
D653F40A267996FF004E32B1 /* ActivityItemSource.swift */, D653F40A267996FF004E32B1 /* ActivityItemSource.swift */,
D653F40E2679A0AB004E32B1 /* SetHomepageActivity.swift */, D653F40E2679A0AB004E32B1 /* SetHomepageActivity.swift */,
D688F618258AD231003A0A73 /* Resources */, D688F618258AD231003A0A73 /* Resources */,
D6376A6E26DDAF57005AD89C /* Vendor */,
D6E152AA24BFFDF600FDF9D3 /* Assets.xcassets */, D6E152AA24BFFDF600FDF9D3 /* Assets.xcassets */,
D6E152AF24BFFDF600FDF9D3 /* LaunchScreen.storyboard */, D6E152AF24BFFDF600FDF9D3 /* LaunchScreen.storyboard */,
D6E152B224BFFDF600FDF9D3 /* Info.plist */, D6E152B224BFFDF600FDF9D3 /* Info.plist */,
@ -1120,6 +1131,7 @@
D688F633258B09BB003A0A73 /* TrackpadScrollGestureRecognizer.swift in Sources */, D688F633258B09BB003A0A73 /* TrackpadScrollGestureRecognizer.swift in Sources */,
D6E152A524BFFDF500FDF9D3 /* AppDelegate.swift in Sources */, D6E152A524BFFDF500FDF9D3 /* AppDelegate.swift in Sources */,
D6E152A724BFFDF500FDF9D3 /* SceneDelegate.swift in Sources */, D6E152A724BFFDF500FDF9D3 /* SceneDelegate.swift in Sources */,
D6376A7026DDAF65005AD89C /* URIFixup.swift in Sources */,
D653F40B267996FF004E32B1 /* ActivityItemSource.swift in Sources */, D653F40B267996FF004E32B1 /* ActivityItemSource.swift in Sources */,
D6BC9AB3258E8E13008652BC /* ToolbarView.swift in Sources */, D6BC9AB3258E8E13008652BC /* ToolbarView.swift in Sources */,
D688F64A258C17F3003A0A73 /* SymbolCache.swift in Sources */, D688F64A258C17F3003A0A73 /* SymbolCache.swift in Sources */,