// // ImportController.swift // MastoSearch // // Created by Shadowfacts on 12/10/21. // import Foundation import TabularData import Accelerate import OSLog /* imports from pleroma csv dumps generated with the following psql command: \copy (select a.id, a.data as activity_data, o.data as object_data from activities as a left join objects as o on o.data->>'id' = a.data->>'object' where a.data->>'actor'='https://social.shadowfacts.net/users/shadowfacts' and a.data->>'type'='Create' and (o.data->>'type'='Note' or a.data->'object'->>'type'='Note')) to '/home/pleroma/shadowfacts.csv' csv header; */ public class ImportController { public static let shared = ImportController() private let logger = Logger(subsystem: Bundle.main.bundleIdentifier!, category: "ImportController") private let dateFormatter: DateFormatter = { let f = DateFormatter() f.dateFormat = "yyyy-MM-dd'T'HH:mm:ss.SZ" f.timeZone = TimeZone(abbreviation: "UTC") f.locale = Locale(identifier: "en_US_POSIX") return f }() private init() {} public func importCSV(url: URL) { var opts = CSVReadingOptions() opts.usesQuoting = true opts.addDateParseStrategy(Date.ISO8601FormatStyle(includingFractionalSeconds: true)) let dataFrame = try! DataFrame(contentsOfCSVFile: url, columns: ["id", "activity_data", "object_data"], types: [ "id": .string, "activity_data": .data, "object_data": .data, ], options: opts) let statuses = dataFrame.rows.lazy.enumerated().compactMap { (index, row) -> Status? in if index % 100 == 0 { logger.debug("Parsing row \(index, privacy: .public)") } let uuid = row["id"] as! String let activityData = row["activity_data"] as! Data let activity = try! JSONSerialization.jsonObject(with: activityData, options: []) as! [String: Any] let object: [String: Any] if let objectData = row["object_data"] as? Data { object = try! JSONSerialization.jsonObject(with: objectData, options: []) as! [String: Any] } else { object = activity["object"] as! [String: Any] } let id = uuidToFlakeIdStr(uuid) let url = activity["id"] as! String var summary = object["summary"] as? String if let s = summary, s.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { summary = nil } let content = object["content"] as! String let published = self.dateFormatter.date(from: activity["published"] as! String)! return Status(id: id, url: url, summary: summary, content: content, published: published) } DatabaseController.shared.addStatuses(statuses) } // https://git.pleroma.social/pleroma/elixir-libraries/flake_id/-/blob/master/lib/flake_id/ecto/compat_type.ex func uuidToFlakeIdStr(_ uuidStr: String) -> String { let uuid = UUID(uuidString: uuidStr)! var bytes = [UInt8](repeating: 0, count: 16) bytes.withUnsafeMutableBufferPointer { buffer in (uuid as NSUUID).getBytes(buffer.baseAddress!) } let num = bytes.withUnsafeBytes { raw -> UInt128 in let uint64s = raw.bindMemory(to: UInt64.self) return UInt128(upperBits: UInt64(bigEndian: uint64s[0]), lowerBits: UInt64(bigEndian: uint64s[1])) } if num.leadingZeroBitCount >= 64 { return num.description } else { return encodeBase62(num) } } private let base62Alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" func encodeBase62(_ value: UInt128) -> String { var s = "" var cur = value while cur != .zero { let (q, r) = cur.quotientAndRemainder(dividingBy: UInt128(base62Alphabet.count)) cur = q let index = base62Alphabet.index(base62Alphabet.startIndex, offsetBy: Int(r)) let c = base62Alphabet[index] s = String(c) + s } return s } }