From 66f318f0e721e91bfa9ef578c5e4f0e9b6c72537 Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Mon, 30 Aug 2021 20:53:57 -0400 Subject: [PATCH] Improve URL entry heuristics, add punycode encoding for UTF-8 hosts Uses URI fixup code from firefox-ios --- Gemini-iOS/NavigationBarView.swift | 7 +- Gemini-iOS/Vendor/URIFixup.swift | 225 +++++++++++++++++++++++++++++ Gemini.xcodeproj/project.pbxproj | 12 ++ 3 files changed, 239 insertions(+), 5 deletions(-) create mode 100644 Gemini-iOS/Vendor/URIFixup.swift diff --git a/Gemini-iOS/NavigationBarView.swift b/Gemini-iOS/NavigationBarView.swift index 8030106..16cb627 100644 --- a/Gemini-iOS/NavigationBarView.swift +++ b/Gemini-iOS/NavigationBarView.swift @@ -80,11 +80,8 @@ class NavigationBarView: UIView { textField.resignFirstResponder() if let text = textField.text, !text.isEmpty, - var components = URLComponents(string: text) { - if components.scheme == nil { - components.scheme = "gemini" - } - navigator.changeURL(components.url!) + let url = URIFixup.getURL(text) { + navigator.changeURL(url) } else { textField.text = navigator.displayURL } diff --git a/Gemini-iOS/Vendor/URIFixup.swift b/Gemini-iOS/Vendor/URIFixup.swift new file mode 100644 index 0000000..f076744 --- /dev/null +++ b/Gemini-iOS/Vendor/URIFixup.swift @@ -0,0 +1,225 @@ +// +// URIFixup.swift +// Gemini +// +// Created by Shadowfacts on 8/30/21. +// +// This file is based on URIFixup & co. from the firefox-ios project, licensed under MPLv2. +// https://github.com/mozilla-mobile/firefox-ios/blob/8e796aa8ed70395f104ed83ac72c32fc2aba54ea/Client/Frontend/Browser/URIFixup.swift + +import Foundation + +class URIFixup { + static func getURL(_ entry: String) -> URL? { + + let trimmed = entry.trimmingCharacters(in: .whitespacesAndNewlines) + guard var escaped = trimmed.addingPercentEncoding(withAllowedCharacters: .URLAllowed) else { + return nil + } + escaped = replaceBrackets(url: escaped) + + // Then check if the URL includes a scheme. This will handle + // all valid requests starting with "http://", "about:", etc. + // However, we ensure that the scheme is one that is listed in + // the official URI scheme list, so that other such search phrases + // like "filetype:" are recognised as searches rather than URLs. + if let url = punycodedURL(escaped), url.schemeIsValid { + return url + } + + // If there's no scheme, we're going to prepend "gemini://". First, + // make sure there's at least one "." in the host. This means + // we'll allow single-word searches (e.g., "foo") at the expense + // of breaking single-word hosts without a scheme (e.g., "localhost"). + if trimmed.range(of: ".") == nil { + return nil + } + + if trimmed.range(of: " ") != nil { + return nil + } + + // If there is a ".", prepend "gemini://" and try again. Since this + // is strictly an "gemini://" URL, we also require a host. + if let url = punycodedURL("gemini://\(escaped)"), url.host != nil { + return url + } + + return nil + } + + static func punycodedURL(_ string: String) -> URL? { + var string = string + if string.filter({ $0 == "#" }).count > 1 { + string = replaceHashMarks(url: string) + } + + guard let url = URL(string: string) else { return nil } + + var components = URLComponents(url: url, resolvingAgainstBaseURL: false) + let host = components?.host?.utf8HostToAscii() + components?.host = host + return components?.url + } + + static func replaceBrackets(url: String) -> String { + return url.replacingOccurrences(of: "[", with: "%5B").replacingOccurrences(of: "]", with: "%5D") + } + + static func replaceHashMarks(url: String) -> String { + guard let firstIndex = url.firstIndex(of: "#") else { return String() } + let start = url.index(firstIndex, offsetBy: 1) + return url.replacingOccurrences(of: "#", with: "%23", range: start.. Character { + return asciiPunycode[index] + } + + func adapt(_ delta: Int, numPoints: Int, firstTime: Bool) -> Int { + let skew = 38 + let damp = firstTime ? 700 : 2 + var delta = delta + delta = delta / damp + delta += delta / numPoints + var k = 0 + while delta > ((base - tMin) * tMax) / 2 { + delta /= (base - tMin) + k += base + } + return k + ((base - tMin + 1) * delta) / (delta + skew) + } + + func encode(_ input: String) -> String { + var output = "" + var d: Int = 0 + var extendedChars = [Int]() + for c in input.unicodeScalars { + if Int(c.value) < initialN { + d += 1 + output.append(String(c)) + } else { + extendedChars.append(Int(c.value)) + } + } + if extendedChars.count == 0 { + return output + } + if d > 0 { + output.append(delimiter) + } + + var n = initialN + var delta = 0 + var bias = initialBias + var h: Int = 0 + var b: Int = 0 + + if d > 0 { + h = output.unicodeScalars.count - 1 + b = output.unicodeScalars.count - 1 + } else { + h = output.unicodeScalars.count + b = output.unicodeScalars.count + } + + while h < input.unicodeScalars.count { + var char = Int(0x7fffffff) + for c in input.unicodeScalars { + let ci = Int(c.value) + if char > ci && ci >= n { + char = ci + } + } + delta = delta + (char - n) * (h + 1) + if delta < 0 { + print("error: invalid char:") + output = "" + return output + } + n = char + for c in input.unicodeScalars { + let ci = Int(c.value) + if ci < n || ci < initialN { + delta += 1 + continue + } + if ci > n { + continue + } + var q = delta + var k = base + while true { + let t = max(min(k - bias, tMax), tMin) + if q < t { + break + } + let code = t + ((q - t) % (base - t)) + output.append(toValue(code)) + q = (q - t) / (base - t) + k += base + } + output.append(toValue(q)) + bias = self.adapt(delta, numPoints: h + 1, firstTime: h == b) + delta = 0 + h += 1 + } + delta += 1 + n += 1 + } + return output + } + + func isValidUnicodeScala(_ s: String) -> Bool { + for c in s.unicodeScalars { + let ci = Int(c.value) + if ci >= initialN { + return false + } + } + return true + } + + func utf8HostToAscii() -> String { + if isValidUnicodeScala(self) { + return self + } + var labels = self.components(separatedBy: ".") + for (i, part) in labels.enumerated() { + if !isValidUnicodeScala(part) { + let a = encode(part) + labels[i] = prefixPunycode + a + } + } + let resultString = labels.joined(separator: ".") + return resultString + } +} diff --git a/Gemini.xcodeproj/project.pbxproj b/Gemini.xcodeproj/project.pbxproj index a665513..c2a1cb1 100644 --- a/Gemini.xcodeproj/project.pbxproj +++ b/Gemini.xcodeproj/project.pbxproj @@ -36,6 +36,7 @@ D62664EE24BC0BCE00DF9B88 /* MaybeLazyVStack.swift in Sources */ = {isa = PBXBuildFile; fileRef = D62664ED24BC0BCE00DF9B88 /* MaybeLazyVStack.swift */; }; D62664F024BC0D7700DF9B88 /* GeminiFormat.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D62664A824BBF26A00DF9B88 /* GeminiFormat.framework */; }; D62664FA24BC12BC00DF9B88 /* DocumentTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D62664F924BC12BC00DF9B88 /* DocumentTests.swift */; }; + D6376A7026DDAF65005AD89C /* URIFixup.swift in Sources */ = {isa = PBXBuildFile; fileRef = D6376A6F26DDAF65005AD89C /* URIFixup.swift */; }; D653F40B267996FF004E32B1 /* ActivityItemSource.swift in Sources */ = {isa = PBXBuildFile; fileRef = D653F40A267996FF004E32B1 /* ActivityItemSource.swift */; }; D653F40D26799F2F004E32B1 /* HomepagePrefView.swift in Sources */ = {isa = PBXBuildFile; fileRef = D653F40C26799F2F004E32B1 /* HomepagePrefView.swift */; }; D653F40F2679A0AB004E32B1 /* SetHomepageActivity.swift in Sources */ = {isa = PBXBuildFile; fileRef = D653F40E2679A0AB004E32B1 /* SetHomepageActivity.swift */; }; @@ -306,6 +307,7 @@ D62664EB24BC0B4D00DF9B88 /* DocumentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DocumentView.swift; sourceTree = ""; }; D62664ED24BC0BCE00DF9B88 /* MaybeLazyVStack.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MaybeLazyVStack.swift; sourceTree = ""; }; D62664F924BC12BC00DF9B88 /* DocumentTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DocumentTests.swift; sourceTree = ""; }; + D6376A6F26DDAF65005AD89C /* URIFixup.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = URIFixup.swift; sourceTree = ""; }; D653F40A267996FF004E32B1 /* ActivityItemSource.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ActivityItemSource.swift; sourceTree = ""; }; D653F40C26799F2F004E32B1 /* HomepagePrefView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HomepagePrefView.swift; sourceTree = ""; }; D653F40E2679A0AB004E32B1 /* SetHomepageActivity.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SetHomepageActivity.swift; sourceTree = ""; }; @@ -576,6 +578,14 @@ name = Frameworks; sourceTree = ""; }; + D6376A6E26DDAF57005AD89C /* Vendor */ = { + isa = PBXGroup; + children = ( + D6376A6F26DDAF65005AD89C /* URIFixup.swift */, + ); + path = Vendor; + sourceTree = ""; + }; D688F618258AD231003A0A73 /* Resources */ = { isa = PBXGroup; children = ( @@ -603,6 +613,7 @@ D653F40A267996FF004E32B1 /* ActivityItemSource.swift */, D653F40E2679A0AB004E32B1 /* SetHomepageActivity.swift */, D688F618258AD231003A0A73 /* Resources */, + D6376A6E26DDAF57005AD89C /* Vendor */, D6E152AA24BFFDF600FDF9D3 /* Assets.xcassets */, D6E152AF24BFFDF600FDF9D3 /* LaunchScreen.storyboard */, D6E152B224BFFDF600FDF9D3 /* Info.plist */, @@ -1120,6 +1131,7 @@ D688F633258B09BB003A0A73 /* TrackpadScrollGestureRecognizer.swift in Sources */, D6E152A524BFFDF500FDF9D3 /* AppDelegate.swift in Sources */, D6E152A724BFFDF500FDF9D3 /* SceneDelegate.swift in Sources */, + D6376A7026DDAF65005AD89C /* URIFixup.swift in Sources */, D653F40B267996FF004E32B1 /* ActivityItemSource.swift in Sources */, D6BC9AB3258E8E13008652BC /* ToolbarView.swift in Sources */, D688F64A258C17F3003A0A73 /* SymbolCache.swift in Sources */,