diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..bb614d6 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "lol-html"] + path = lol-html + url = https://github.com/cloudflare/lol-html.git diff --git a/README.md b/README.md new file mode 100644 index 0000000..bd85a70 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +# Reader + +In order to build reader you need the appropriate targets added to your Rust toolchain. + +```sh +$ rustup target add aarch64-apple-ios aarch64-apple-ios-sim x86_64-apple-ios +``` + +x86_64-apple-ios is only necessary if you're on an Intel Mac, and aarch-64-apple-ios-sim if you're on Apple Silicon. + +The Xcode build script will take care of actually building the Rust code. diff --git a/Reader.xcodeproj/project.pbxproj b/Reader.xcodeproj/project.pbxproj index 1a32989..fa2c8d9 100644 --- a/Reader.xcodeproj/project.pbxproj +++ b/Reader.xcodeproj/project.pbxproj @@ -15,6 +15,7 @@ D65B18BC27504FE7004A9448 /* Token.swift in Sources */ = {isa = PBXBuildFile; fileRef = D65B18BB27504FE7004A9448 /* Token.swift */; }; D65B18BE275051A1004A9448 /* LocalData.swift in Sources */ = {isa = PBXBuildFile; fileRef = D65B18BD275051A1004A9448 /* LocalData.swift */; }; D65B18C127505348004A9448 /* HomeViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = D65B18C027505348004A9448 /* HomeViewController.swift */; }; + D68B303627907D9200E8B3FA /* ExcerptGenerator.swift in Sources */ = {isa = PBXBuildFile; fileRef = D68B303527907D9200E8B3FA /* ExcerptGenerator.swift */; }; D6A8A33427766C2800CCEC72 /* PersistentContainer.swift in Sources */ = {isa = PBXBuildFile; fileRef = D6A8A33327766C2800CCEC72 /* PersistentContainer.swift */; }; D6C687EC272CD27600874C10 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = D6C687EB272CD27600874C10 /* AppDelegate.swift */; }; D6C687EE272CD27600874C10 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = D6C687ED272CD27600874C10 /* SceneDelegate.swift */; }; @@ -41,7 +42,6 @@ D6E2435F278B97240005E546 /* Group+CoreDataClass.swift in Sources */ = {isa = PBXBuildFile; fileRef = D6E2435B278B97240005E546 /* Group+CoreDataClass.swift */; }; D6E24360278B97240005E546 /* Group+CoreDataProperties.swift in Sources */ = {isa = PBXBuildFile; fileRef = D6E2435C278B97240005E546 /* Group+CoreDataProperties.swift */; }; D6E24363278BA1410005E546 /* ItemCollectionViewCell.swift in Sources */ = {isa = PBXBuildFile; fileRef = D6E24361278BA1410005E546 /* ItemCollectionViewCell.swift */; }; - D6E24367278BA2660005E546 /* SwiftSoup in Frameworks */ = {isa = PBXBuildFile; productRef = D6E24366278BA2660005E546 /* SwiftSoup */; }; D6E24369278BABB40005E546 /* UIColor+App.swift in Sources */ = {isa = PBXBuildFile; fileRef = D6E24368278BABB40005E546 /* UIColor+App.swift */; }; D6E2436B278BB1880005E546 /* HomeCollectionViewCell.swift in Sources */ = {isa = PBXBuildFile; fileRef = D6E2436A278BB1880005E546 /* HomeCollectionViewCell.swift */; }; D6E2436E278BD8160005E546 /* ReadViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = D6E2436D278BD8160005E546 /* ReadViewController.swift */; }; @@ -98,6 +98,9 @@ D65B18BB27504FE7004A9448 /* Token.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Token.swift; sourceTree = ""; }; D65B18BD275051A1004A9448 /* LocalData.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LocalData.swift; sourceTree = ""; }; D65B18C027505348004A9448 /* HomeViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HomeViewController.swift; sourceTree = ""; }; + D68B3032278FDD1A00E8B3FA /* Reader-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "Reader-Bridging-Header.h"; sourceTree = ""; }; + D68B303527907D9200E8B3FA /* ExcerptGenerator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ExcerptGenerator.swift; sourceTree = ""; }; + D68B3037279099FD00E8B3FA /* liblolhtml.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = liblolhtml.a; path = "lol-html/c-api/target/aarch64-apple-ios-sim/release/liblolhtml.a"; sourceTree = ""; }; D6A8A33327766C2800CCEC72 /* PersistentContainer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PersistentContainer.swift; sourceTree = ""; }; D6C687E8272CD27600874C10 /* Reader.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Reader.app; sourceTree = BUILT_PRODUCTS_DIR; }; D6C687EB272CD27600874C10 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; @@ -141,7 +144,6 @@ buildActionMask = 2147483647; files = ( D6C68829272CD2BA00874C10 /* Fervor.framework in Frameworks */, - D6E24367278BA2660005E546 /* SwiftSoup in Frameworks */, D6E24371278BE1250005E546 /* HTMLEntities in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; @@ -199,6 +201,14 @@ path = Home; sourceTree = ""; }; + D68B302E278FDCE200E8B3FA /* Frameworks */ = { + isa = PBXGroup; + children = ( + D68B3037279099FD00E8B3FA /* liblolhtml.a */, + ); + name = Frameworks; + sourceTree = ""; + }; D6A8A33527766E9300CCEC72 /* CoreData */ = { isa = PBXGroup; children = ( @@ -222,6 +232,7 @@ D6C6880E272CD27700874C10 /* ReaderUITests */, D6C68824272CD2BA00874C10 /* Fervor */, D6C687E9272CD27600874C10 /* Products */, + D68B302E278FDCE200E8B3FA /* Frameworks */, ); sourceTree = ""; }; @@ -239,12 +250,14 @@ D6C687EA272CD27600874C10 /* Reader */ = { isa = PBXGroup; children = ( + D68B3032278FDD1A00E8B3FA /* Reader-Bridging-Header.h */, D6C687EB272CD27600874C10 /* AppDelegate.swift */, D6C687ED272CD27600874C10 /* SceneDelegate.swift */, D65B18B527504920004A9448 /* FervorController.swift */, D65B18BD275051A1004A9448 /* LocalData.swift */, D6E24368278BABB40005E546 /* UIColor+App.swift */, D6EB531E278E4A7500AD2E61 /* StretchyMenuInteraction.swift */, + D68B303527907D9200E8B3FA /* ExcerptGenerator.swift */, D6A8A33527766E9300CCEC72 /* CoreData */, D65B18AF2750468B004A9448 /* Screens */, D6C687F7272CD27700874C10 /* Assets.xcassets */, @@ -325,6 +338,7 @@ isa = PBXNativeTarget; buildConfigurationList = D6C68815272CD27700874C10 /* Build configuration list for PBXNativeTarget "Reader" */; buildPhases = ( + D68B303B2791D2A900E8B3FA /* Compile lol-html c-api */, D6C687E4272CD27600874C10 /* Sources */, D6C687E5272CD27600874C10 /* Frameworks */, D6C687E6272CD27600874C10 /* Resources */, @@ -337,7 +351,6 @@ ); name = Reader; packageProductDependencies = ( - D6E24366278BA2660005E546 /* SwiftSoup */, D6E24370278BE1250005E546 /* HTMLEntities */, ); productName = Reader; @@ -435,7 +448,6 @@ ); mainGroup = D6C687DF272CD27600874C10; packageReferences = ( - D6E24365278BA2660005E546 /* XCRemoteSwiftPackageReference "SwiftSoup" */, D6E2436F278BE1250005E546 /* XCRemoteSwiftPackageReference "swift-html-entities" */, ); productRefGroup = D6C687E9272CD27600874C10 /* Products */; @@ -484,6 +496,28 @@ }; /* End PBXResourcesBuildPhase section */ +/* Begin PBXShellScriptBuildPhase section */ + D68B303B2791D2A900E8B3FA /* Compile lol-html c-api */ = { + isa = PBXShellScriptBuildPhase; + alwaysOutOfDate = 1; + buildActionMask = 2147483647; + files = ( + ); + inputFileListPaths = ( + ); + inputPaths = ( + ); + name = "Compile lol-html c-api"; + outputFileListPaths = ( + ); + outputPaths = ( + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/bash; + shellScript = "pushd \"$PROJECT_DIR/lol-html/c-api/\"\n\nif [ \"$PLATFORM_NAME\" == \"iphonesimulator\" ]; then\n if [ \"$ARCHS\" == \"arm64\" ]; then\n export CARGO_TARGET=\"aarch64-apple-ios-sim\"\n elif [ \"$ARCHS\" == \"x86_64\" ]; then\n export CARGO_TARGET=\"x86_64-apple-ios\"\n else\n echo \"error: unknown value for \\$ARCHS\"\n fi\nelif [ \"$PLATFORM_NAME\" == \"iphoneos\" ]; then\n export CARGO_TARGET=\"aarch64-apple-ios\"\nfi\n\necho \"Building lol-html with CARGO_TARGET: $CARGO_TARGET\"\n\n~/.cargo/bin/cargo build --release --target $CARGO_TARGET\n"; + }; +/* End PBXShellScriptBuildPhase section */ + /* Begin PBXSourcesBuildPhase section */ D6C687E4272CD27600874C10 /* Sources */ = { isa = PBXSourcesBuildPhase; @@ -506,6 +540,7 @@ D6E24358278B96E40005E546 /* Feed+CoreDataProperties.swift in Sources */, D65B18BE275051A1004A9448 /* LocalData.swift in Sources */, D65B18B22750469D004A9448 /* LoginViewController.swift in Sources */, + D68B303627907D9200E8B3FA /* ExcerptGenerator.swift in Sources */, D6E24363278BA1410005E546 /* ItemCollectionViewCell.swift in Sources */, D6E2436E278BD8160005E546 /* ReadViewController.swift in Sources */, D65B18C127505348004A9448 /* HomeViewController.swift in Sources */, @@ -630,6 +665,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ""; IPHONEOS_DEPLOYMENT_TARGET = 15.2; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_FAST_MATH = YES; @@ -685,6 +721,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ""; IPHONEOS_DEPLOYMENT_TARGET = 15.2; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; @@ -705,6 +742,7 @@ CURRENT_PROJECT_VERSION = 1; DEVELOPMENT_TEAM = ZPBBSK8L8B; GENERATE_INFOPLIST_FILE = YES; + HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/lol-html/c-api/include/"; INFOPLIST_FILE = Reader/Info.plist; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen; @@ -715,10 +753,19 @@ "$(inherited)", "@executable_path/Frameworks", ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)/lol-html/c-api/target/aarch64-apple-ios-sim/release", + ); + "LIBRARY_SEARCH_PATHS[sdk=iphoneos*]" = "$(PROJECT_DIR)/lol-html/c-api/target/aarch64-apple-ios/release/"; + "LIBRARY_SEARCH_PATHS[sdk=iphonesimulator*][arch=arm64]" = "$(PROJECT_DIR)/lol-html/c-api/target/aarch64-apple-ios-sim/release/"; + "LIBRARY_SEARCH_PATHS[sdk=iphonesimulator*][arch=x86_64]" = "$(PROJECT_DIR)/lol-html/c-api/target/x86_64-apple-ios/release/"; MARKETING_VERSION = 1.0; + OTHER_LDFLAGS = "-llolhtml"; PRODUCT_BUNDLE_IDENTIFIER = net.shadowfacts.Reader; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_OBJC_BRIDGING_HEADER = "Reader/Reader-Bridging-Header.h"; SWIFT_VERSION = 5.0; TARGETED_DEVICE_FAMILY = "1,2"; }; @@ -734,6 +781,7 @@ CURRENT_PROJECT_VERSION = 1; DEVELOPMENT_TEAM = ZPBBSK8L8B; GENERATE_INFOPLIST_FILE = YES; + HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/lol-html/c-api/include/"; INFOPLIST_FILE = Reader/Info.plist; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen; @@ -744,10 +792,19 @@ "$(inherited)", "@executable_path/Frameworks", ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)/lol-html/c-api/target/aarch64-apple-ios-sim/release", + ); + "LIBRARY_SEARCH_PATHS[sdk=iphoneos*]" = "$(PROJECT_DIR)/lol-html/c-api/target/aarch64-apple-ios/release/"; + "LIBRARY_SEARCH_PATHS[sdk=iphonesimulator*][arch=arm64]" = "$(PROJECT_DIR)/lol-html/c-api/target/aarch64-apple-ios-sim/release/"; + "LIBRARY_SEARCH_PATHS[sdk=iphonesimulator*][arch=x86_64]" = "$(PROJECT_DIR)/lol-html/c-api/target/x86_64-apple-ios/release/"; MARKETING_VERSION = 1.0; + OTHER_LDFLAGS = "-llolhtml"; PRODUCT_BUNDLE_IDENTIFIER = net.shadowfacts.Reader; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_OBJC_BRIDGING_HEADER = "Reader/Reader-Bridging-Header.h"; SWIFT_VERSION = 5.0; TARGETED_DEVICE_FAMILY = "1,2"; }; @@ -762,6 +819,7 @@ CURRENT_PROJECT_VERSION = 1; DEVELOPMENT_TEAM = ZPBBSK8L8B; GENERATE_INFOPLIST_FILE = YES; + HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/lol-html/c-api/include/"; IPHONEOS_DEPLOYMENT_TARGET = 15.2; MARKETING_VERSION = 1.0; PRODUCT_BUNDLE_IDENTIFIER = net.shadowfacts.ReaderTests; @@ -782,6 +840,7 @@ CURRENT_PROJECT_VERSION = 1; DEVELOPMENT_TEAM = ZPBBSK8L8B; GENERATE_INFOPLIST_FILE = YES; + HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/lol-html/c-api/include/"; IPHONEOS_DEPLOYMENT_TARGET = 15.2; MARKETING_VERSION = 1.0; PRODUCT_BUNDLE_IDENTIFIER = net.shadowfacts.ReaderTests; @@ -945,14 +1004,6 @@ /* End XCConfigurationList section */ /* Begin XCRemoteSwiftPackageReference section */ - D6E24365278BA2660005E546 /* XCRemoteSwiftPackageReference "SwiftSoup" */ = { - isa = XCRemoteSwiftPackageReference; - repositoryURL = "https://github.com/scinfu/SwiftSoup"; - requirement = { - kind = upToNextMinorVersion; - minimumVersion = 2.3.0; - }; - }; D6E2436F278BE1250005E546 /* XCRemoteSwiftPackageReference "swift-html-entities" */ = { isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/Kitura/swift-html-entities.git"; @@ -964,11 +1015,6 @@ /* End XCRemoteSwiftPackageReference section */ /* Begin XCSwiftPackageProductDependency section */ - D6E24366278BA2660005E546 /* SwiftSoup */ = { - isa = XCSwiftPackageProductDependency; - package = D6E24365278BA2660005E546 /* XCRemoteSwiftPackageReference "SwiftSoup" */; - productName = SwiftSoup; - }; D6E24370278BE1250005E546 /* HTMLEntities */ = { isa = XCSwiftPackageProductDependency; package = D6E2436F278BE1250005E546 /* XCRemoteSwiftPackageReference "swift-html-entities" */; diff --git a/Reader/CoreData/Item+CoreDataProperties.swift b/Reader/CoreData/Item+CoreDataProperties.swift index 16c93ab..1a1961e 100644 --- a/Reader/CoreData/Item+CoreDataProperties.swift +++ b/Reader/CoreData/Item+CoreDataProperties.swift @@ -18,6 +18,8 @@ extension Item { @NSManaged public var author: String? @NSManaged public var content: String? + @NSManaged public var excerpt: String? + @NSManaged public var generatedExcerpt: Bool @NSManaged public var id: String? @NSManaged public var needsReadStateSync: Bool @NSManaged public var published: Date? diff --git a/Reader/CoreData/Reader.xcdatamodeld/Reader.xcdatamodel/contents b/Reader/CoreData/Reader.xcdatamodeld/Reader.xcdatamodel/contents index 280cb74..3698692 100644 --- a/Reader/CoreData/Reader.xcdatamodeld/Reader.xcdatamodel/contents +++ b/Reader/CoreData/Reader.xcdatamodeld/Reader.xcdatamodel/contents @@ -16,6 +16,8 @@ + + @@ -30,7 +32,7 @@ - + \ No newline at end of file diff --git a/Reader/ExcerptGenerator.swift b/Reader/ExcerptGenerator.swift new file mode 100644 index 0000000..9aa681a --- /dev/null +++ b/Reader/ExcerptGenerator.swift @@ -0,0 +1,131 @@ +// +// ExcerptGenerator.swift +// Reader +// +// Created by Shadowfacts on 1/13/22. +// + +import Foundation +import OSLog +import CoreData + +// public so that it can be imported in ReaderTests even when Reader is compiled in release mode (w/ testing disabled) +public struct ExcerptGenerator { + private init() {} + + private static let logger = Logger(subsystem: Bundle.main.bundleIdentifier!, category: "ExcerptGenerator") + + static func generateAll(_ fervorController: FervorController) { + let req = Item.fetchRequest() + req.predicate = NSPredicate(format: "generatedExcerpt = NO") + req.sortDescriptors = [NSSortDescriptor(key: "published", ascending: false)] + req.fetchBatchSize = 50 + fervorController.persistentContainer.performBackgroundTask { ctx in + guard let items = try? ctx.fetch(req) else { return } + var count = 0 + for item in items { + if let excerpt = excerpt(for: item) { + item.excerpt = excerpt + count += 1 + if count % 50 == 0 { + logger.debug("Generated \(count, privacy: .public) excerpts") + } + } + item.generatedExcerpt = true + } + logger.log("Generated excerpts for \(count, privacy: .public) items") + if ctx.hasChanges { + do { + // get the updated objects now, because this set is empty after .save is called + let updated = ctx.updatedObjects + try ctx.save() + + // make sure the view context has the newly added excerpts + NSManagedObjectContext.mergeChanges(fromRemoteContextSave: [ + NSUpdatedObjectsKey: Array(updated) + ], into: [fervorController.persistentContainer.viewContext]) + } catch { + logger.error("Unable to save context: \(error.localizedDescription, privacy: .public)") + } + } + } + } + + public static func excerpt(for item: Item) -> String? { + guard let content = item.content else { + return nil + } + return excerpt(from: content) + } + + public static func excerpt(from html: String) -> String? { + var html = html + + let builder = lol_html_rewriter_builder_new()! + let pSelector = lol_html_selector_parse("p", 1)! + var userData = UserData() + withUnsafeMutablePointer(to: &userData) { userDataPtr in + let rawPtr = UnsafeMutableRawPointer(userDataPtr) + let res = lol_html_rewriter_builder_add_element_content_handlers(builder, pSelector, elementHandler, rawPtr, nil, nil, textHandler, rawPtr) + guard res == 0 else { + lolHtmlError() + } + let memSettings = lol_html_memory_settings_t(preallocated_parsing_buffer_size: 1024, max_allowed_memory_usage: .max) + let rewriter = lol_html_rewriter_build(builder, "utf-8", 5, memSettings, outputSink, nil, true) + lol_html_rewriter_builder_free(builder) + lol_html_selector_free(pSelector) + + guard let rewriter = rewriter else { + lolHtmlError() + } + + _ = html.withUTF8 { buffer in + buffer.withMemoryRebound(to: CChar.self) { buffer in + lol_html_rewriter_write(rewriter, buffer.baseAddress!, buffer.count) + } + } + } + if userData.isInParagraph { + return userData.paragraphText.htmlUnescape().trimmingCharacters(in: .whitespacesAndNewlines) + // todo: steal css whitespace collapsing from tusker + } else { + return nil + } + } + + private static func lolHtmlError() -> Never { + let lastError = lol_html_take_last_error() + let message = String(bytesNoCopy: UnsafeMutableRawPointer(mutating: lastError.data!), length: lastError.len, encoding: .utf8, freeWhenDone: false) + fatalError(message ?? "Unknown lol-html error") + } +} + +private struct UserData { + var isInParagraph = false + var paragraphText = "" +} + +private func elementHandler(element: OpaquePointer!, userData: UnsafeMutableRawPointer!) -> lol_html_rewriter_directive_t { + let userDataPtr = userData.assumingMemoryBound(to: UserData.self) + if userDataPtr.pointee.isInParagraph { + return LOL_HTML_STOP + } else { + let s = lol_html_element_tag_name_get(element) + let tagName = String(bytesNoCopy: UnsafeMutableRawPointer(mutating: s.data), length: s.len, encoding: .utf8, freeWhenDone: false)! + userDataPtr.pointee.isInParagraph = tagName == "p" || tagName == "P" + lol_html_str_free(s) + return LOL_HTML_CONTINUE + } +} + +private func textHandler(chunk: OpaquePointer!, userData: UnsafeMutableRawPointer!) -> lol_html_rewriter_directive_t { + let userDataPtr = userData.assumingMemoryBound(to: UserData.self) + let s = lol_html_text_chunk_content_get(chunk) + let content = String(bytesNoCopy: UnsafeMutableRawPointer(mutating: s.data), length: s.len, encoding: .utf8, freeWhenDone: false)! + userDataPtr.pointee.paragraphText += content + return LOL_HTML_CONTINUE +} + +private func outputSink(chunk: UnsafePointer!, chunkLen: Int, userData: UnsafeMutableRawPointer!) { + // no-op +} diff --git a/Reader/Reader-Bridging-Header.h b/Reader/Reader-Bridging-Header.h new file mode 100644 index 0000000..fb9b6b2 --- /dev/null +++ b/Reader/Reader-Bridging-Header.h @@ -0,0 +1,13 @@ +// +// Reader-Bridging-Header.h +// Reader +// +// Created by Shadowfacts on 1/12/22. +// + +#ifndef Reader_Bridging_Header_h +#define Reader_Bridging_Header_h + +#import "lol_html.h" + +#endif /* Reader_Bridging_Header_h */ diff --git a/Reader/SceneDelegate.swift b/Reader/SceneDelegate.swift index 7b1d60c..1f12f57 100644 --- a/Reader/SceneDelegate.swift +++ b/Reader/SceneDelegate.swift @@ -86,6 +86,8 @@ class SceneDelegate: UIResponder, UIWindowSceneDelegate { } catch { logger.error("Unable to sync from server: \(error.localizedDescription, privacy: .public)") } + + ExcerptGenerator.generateAll(fervorController) } } diff --git a/Reader/Screens/Items/ItemCollectionViewCell.swift b/Reader/Screens/Items/ItemCollectionViewCell.swift index a41ae08..b26633f 100644 --- a/Reader/Screens/Items/ItemCollectionViewCell.swift +++ b/Reader/Screens/Items/ItemCollectionViewCell.swift @@ -6,7 +6,6 @@ // import UIKit -import SwiftSoup protocol ItemCollectionViewCellDelegate: AnyObject { func itemCellSelected(cell: ItemCollectionViewCell, item: Item) @@ -74,13 +73,12 @@ class ItemCollectionViewCell: UICollectionViewListCell { titleLabel.text = item.title feedTitleLabel.text = item.feed!.title ?? item.feed!.url?.host - if let content = item.content { - let doc = try! SwiftSoup.parse(content) - contentLabel.text = try! doc.select("p").first()?.text() + if let excerpt = item.excerpt { + contentLabel.text = excerpt + contentLabel.isHidden = false } else { - contentLabel.text = "" + contentLabel.isHidden = true } - contentLabel.isHidden = contentLabel.text?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ?? true updateColors() } diff --git a/ReaderTests/ReaderTests.swift b/ReaderTests/ReaderTests.swift index 841902f..ce302a7 100644 --- a/ReaderTests/ReaderTests.swift +++ b/ReaderTests/ReaderTests.swift @@ -6,7 +6,8 @@ // import XCTest -@testable import Reader +import Reader +//import SwiftSoup class ReaderTests: XCTestCase { @@ -22,12 +23,164 @@ class ReaderTests: XCTestCase { // This is an example of a functional test case. // Use XCTAssert and related functions to verify your tests produce the correct results. } + + // HTML parsing comparison conducted on iPhone 12 Pro + // SwiftSoup: approx 0.53 sec + // lol-html: approx 0.003 sec - func testPerformanceExample() throws { - // This is an example of a performance test case. + // note: when testing this, make sure to set the Reader scheme to build in release mode for fair comparison +// func testSwiftSoupPerformance() { +// self.measure { +// for i in 0..<100 { +// if i % 10 == 0 { +// print(i) +// } +// let doc = try! SwiftSoup.parseBodyFragment(html) +// let excerpt = try! doc.text() +// } +// } +// } + + func testLolHtmlPerformance() { self.measure { - // Put the code you want to measure the time of here. + for i in 0..<100 { + if i % 10 == 0 { + print(i) + } + let excerpt = ExcerptGenerator.excerpt(from: html) + } } } + + let html = """ +

On Tuesday, Republicans on the House Committee on Oversight and Reform released a letter that + paints a damning picture of U.S. government officials wrestling with + whether the novel coronavirus may have leaked out of a lab they were + funding, acknowledging that it may have, and then keeping the discussion + from spilling out into public view.

The letter, signed by James Comer, R-Ky., and Jim Jordan, R-Ohio, was followed by pages of notes on emails that were first obtained through the Freedom of Information Act by BuzzFeed News and the Washington Post, + but were heavily redacted when published in June 2021. The redacted + emails included the agenda for a February 1, 2020, telephone conference + between National Institute of Allergy and Infectious Diseases director + Anthony Fauci; his then-boss, former National Institutes of Health + director Francis Collins; and several of the world’s leading + virologists. The communications contained extensive notes summarizing + what was said during the call, but their substance was hidden at the + time.

Oversight Committee staff were able to view the full emails + “in camera,” which means they could physically look at them and take + notes but couldn’t take copies with them. The information released + Tuesday for the first time reveals the content of notes taken on the + February 1 call.

On + that call, virologists Michael Farzan and Robert Garry told Fauci and + Collins the virus might have leaked from the Wuhan lab. It might have + been genetically engineered, the transcription of Garry’s notes + suggests, but this now seems unlikely. Another possibility, put forward + by Farzan, was that it could have been evolved in the lab through a + process known as serial passage.

“The email is out-of-context,” + Garry wrote Wednesday in an email to The Intercept. “This was one email + among many I was sharing with my colleagues.”

The + two methods represent two different ideas behind the so-called lab-leak + hypothesis. The one that gained notoriety early in the pandemic is + genetic engineering, where scientists insert and delete nucleotides in + the virus’s genetic code, in this case viral RNA, to turn it into + something new. This version forms the basis of accusations that the + virus was intentionally created as a bioweapon — which practically every + credible scientist has dismissed as an illogical conspiracy, but was + quickly embraced by former President Donald Trump and much of the + American right wing, souring scientists, liberals, and the mainstream on + the possibility of lab origin. The less lurid but seemingly more + plausible version is the idea of evolution through serial passage, in + which scientists allow a virus to jump between host species or cell + cultures, spurring new mutations.

The day before the call, Scripps + Research infectious disease expert Kristian Andersen had warned Fauci + that the virus may have been engineered in a lab, noting that he and + several other high-profile scientists “all find the genome inconsistent + with expectations from evolutionary theory.” The scientists agreed to + have a conference call the next day. “It was a very productive + back-and-forth conversation where some on the call felt it could + possibly be an engineered virus,” Fauci told Alison Young, writing for USA Today, in June 2021.

Not long after the call, Andersen was the lead author on a paper in Nature Medicine + titled “The Proximal Origin of SARS-CoV-2.” The paper proposed “two + scenarios that can plausibly explain the origin of SARS-CoV-2: (i) + natural selection in an animal host before zoonotic transfer; and (ii) + natural selection in humans following zoonotic transfer.” For the + scientists and pundits who sought to discount the emerging lab-leak + hypothesis, it offered the authoritative proof they needed. The paper + has since been accessed more than 5.6 million times, with over 2,000 + citations.

The authors acknowledged a third scenario, “selection + during passage,” but they discussed it briefly and presented it as by + far the least plausible. The newly released notes from the call, + however, suggest that the scientists Fauci consulted initially + considered that possibility to be much more serious than the paper let + on.

On February 2, Jeremy Farrar, an + infectious disease expert and the director of Wellcome, sent around + notes, including to Fauci and Collins, summarizing what some of the + scientists had said on the call. Farzan, a Scripps professor who studied + the spike protein on the 2003 SARS virus, “is bothered by the furin + site and has a hard time explain that as an event outside the lab + (though, there are possible ways in nature, but highly unlikely),” + Farrar’s note reads, referring to a spike protein feature that aids + interaction with furin, a common enzyme in human lung cells. Farzan + didn’t think the site was the product of “directed engineering,” but + found that the changes would be “highly compatible with the idea of + continued passage of the virus in tissue culture.”

According to + the transcribed notes, Garry, a professor at the Tulane University + School of Medicine, said on the call that he had aligned the SARS-CoV-2 + genome with that of RaTG13, a 96-percent similar virus isolated from + bats at the Wuhan Institute of Virology that was long regarded as the + new virus’s closest known relative — though a closer one has since been identified. + Garry found that the spike proteins of RaTG13 and SARS-CoV-2, which + makes the latter so infectious, were nearly identical. The key + distinction was in the ability of the new virus’s spike protein to + interact with furin, which Garry found too perfect to make natural + sense. “I just can’t figure out how this gets accomplished in nature,” + he said.

“My initial impression and that of others about the + [furin cleavage site] was wrong. I changed my mind with new + information/new data,” Garry wrote to The Intercept. “That’s how science + works. No one was trying to mislead the public. What was in the + Proximal Origins paper was our best analysis — it’s held up extremely + well.”

As they discussed what to + present to the public, the scientists determined that questions of + potential lab origin might prove more trouble than they’re worth. “Given + the evidence presented and the discussions around it, I would conclude + that a follow-up discussion on the possible origin of 2019-nCoV would be + of much interest,” wrote Ron Fouchier, a virologist at the Erasmus MC + Center for Viroscience in the Netherlands, on February 2. Years earlier, + Fouchier’s gain-of-function research had brought the discipline under + fire for a 2011 experiment + in which he infected ferrets in adjacent cages with the avian influenza + virus, allowing it to become airborne and infect mammals. “However, + further debate about such accusations would unnecessarily distract top + researchers from their active duties and do unnecessary harm to science + in general and science in China in particular,” Fouchier wrote.

Farzan, Fauci, and Fouchier did not immediately respond to The Intercept’s requests for comment.

Several + of the scientists on the email chain ended up co-authoring the Nature + Medicine paper with Andersen and Garry. In a February 4 email, which + House Republicans presented as a response to a first copy of the draft, + Fauci wrote: “?? Serial passage in ACE2-transgenic mice.”

The + early draft has not been made public, so we don’t know what, exactly, + sparked Fauci’s reaction. But his words, which refer to the process of + passaging a virus in “humanized” laboratory mice — or mice that have + been genetically modified to express receptors for human ACE2, an enzyme + that occurs in the lungs — do not appear in the published paper.

“Neither + Drs. Fauci or Collins edited our Proximal Origins paper in any way. The + major feedback we got from the Feb 1 teleconference was: 1. Don’t try + to write a paper at all — it’s unnecessary or 2. If you do write it + don’t mention a lab origin as that will just add fuel to the + conspiracists,” Garry wrote on Wednesday.

When the paper appeared + in Nature Medicine on March 17, 2020, it noted near the end that in + order for the novel coronavirus to have emerged in a lab via serial + passage, scientists would have to conduct those experiments using a + relative with very high genetic similarity, but there was no evidence + that such experiments had been done. The authors added, “Subsequent + generation of a polybasic cleavage site,” which lets the virus process + furin, “would have then required repeated passage in cell culture or + animals with ACE2 receptors similar to those of humans, but such work + has also not previously been described.”

Though the paper was + publicly embraced by the scientific community and the mainstream media, + Collins worried that its impact wasn’t sufficient. “Wondering if there + is something NIH can do to help put down this very destructive + conspiracy,” Collins wrote on April 16, 2020, in reference to a Fox News + segment on the lab-leak theory. “I hoped the Nature Medicine article on + the genomic sequence of SARS-CoV-2 would settle this. But probably + didn’t get much visibility. Anything more we can do?”

“I would not do anything about this right now,” Fauci replied. “It is a shiny object that will go away in times.”

+ """ } diff --git a/lol-html b/lol-html new file mode 160000 index 0000000..f32bd14 --- /dev/null +++ b/lol-html @@ -0,0 +1 @@ +Subproject commit f32bd14b229ed1088c25725cce242817ea2fe43a