113 lines
4.2 KiB
Swift
113 lines
4.2 KiB
Swift
|
//
|
||
|
// ImportController.swift
|
||
|
// MastoSearch
|
||
|
//
|
||
|
// Created by Shadowfacts on 12/10/21.
|
||
|
//
|
||
|
|
||
|
import Foundation
|
||
|
import TabularData
|
||
|
import Accelerate
|
||
|
import OSLog
|
||
|
|
||
|
/*
|
||
|
|
||
|
imports from pleroma csv dumps generated with the following psql command:
|
||
|
|
||
|
\copy (select a.id, a.data as activity_data, o.data as object_data from activities as a left join objects as o on o.data->>'id' = a.data->>'object' where a.data->>'actor'='https://social.shadowfacts.net/users/shadowfacts' and a.data->>'type'='Create' and (o.data->>'type'='Note' or a.data->'object'->>'type'='Note')) to '/home/pleroma/shadowfacts.csv' csv header;
|
||
|
|
||
|
*/
|
||
|
|
||
|
class ImportController {
|
||
|
static let shared = ImportController()
|
||
|
|
||
|
private let logger = Logger(subsystem: Bundle.main.bundleIdentifier!, category: "ImportController")
|
||
|
private let dateFormatter: DateFormatter = {
|
||
|
let f = DateFormatter()
|
||
|
f.dateFormat = "yyyy-MM-dd'T'HH:mm:ss.SZ"
|
||
|
f.timeZone = TimeZone(abbreviation: "UTC")
|
||
|
f.locale = Locale(identifier: "en_US_POSIX")
|
||
|
return f
|
||
|
}()
|
||
|
|
||
|
private init() {}
|
||
|
|
||
|
func importCSV(url: URL) {
|
||
|
var opts = CSVReadingOptions()
|
||
|
opts.usesQuoting = true
|
||
|
opts.addDateParseStrategy(Date.ISO8601FormatStyle(includingFractionalSeconds: true))
|
||
|
let dataFrame = try! DataFrame(contentsOfCSVFile: url, columns: ["id", "activity_data", "object_data"], types: [
|
||
|
"id": .string,
|
||
|
"activity_data": .data,
|
||
|
"object_data": .data,
|
||
|
], options: opts)
|
||
|
let statuses = dataFrame.rows.lazy.enumerated().compactMap { (index, row) -> Status? in
|
||
|
if index % 100 == 0 {
|
||
|
logger.debug("Parsing row \(index, privacy: .public)")
|
||
|
}
|
||
|
|
||
|
let uuid = row["id"] as! String
|
||
|
let activityData = row["activity_data"] as! Data
|
||
|
let activity = try! JSONSerialization.jsonObject(with: activityData, options: []) as! [String: Any]
|
||
|
|
||
|
let object: [String: Any]
|
||
|
if let objectData = row["object_data"] as? Data {
|
||
|
object = try! JSONSerialization.jsonObject(with: objectData, options: []) as! [String: Any]
|
||
|
} else {
|
||
|
object = activity["object"] as! [String: Any]
|
||
|
}
|
||
|
|
||
|
let id = uuidToFlakeIdStr(uuid)
|
||
|
let url = activity["id"] as! String
|
||
|
var summary = object["summary"] as? String
|
||
|
if let s = summary, s.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
|
||
|
summary = nil
|
||
|
}
|
||
|
let content = object["content"] as! String
|
||
|
let published = self.dateFormatter.date(from: activity["published"] as! String)!
|
||
|
|
||
|
return Status(id: id, url: url, summary: summary, content: content, published: published)
|
||
|
}
|
||
|
DatabaseController.shared.addStatuses(statuses)
|
||
|
}
|
||
|
|
||
|
// https://git.pleroma.social/pleroma/elixir-libraries/flake_id/-/blob/master/lib/flake_id/ecto/compat_type.ex
|
||
|
func uuidToFlakeIdStr(_ uuidStr: String) -> String {
|
||
|
let uuid = UUID(uuidString: uuidStr)!
|
||
|
|
||
|
var bytes = [UInt8](repeating: 0, count: 16)
|
||
|
bytes.withUnsafeMutableBufferPointer { buffer in
|
||
|
(uuid as NSUUID).getBytes(buffer.baseAddress!)
|
||
|
}
|
||
|
|
||
|
let num = bytes.withUnsafeBytes { raw -> UInt128 in
|
||
|
let uint64s = raw.bindMemory(to: UInt64.self)
|
||
|
return UInt128(upperBits: UInt64(bigEndian: uint64s[0]), lowerBits: UInt64(bigEndian: uint64s[1]))
|
||
|
}
|
||
|
|
||
|
if num.leadingZeroBitCount >= 64 {
|
||
|
return num.description
|
||
|
} else {
|
||
|
return encodeBase62(num)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private let base62Alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||
|
|
||
|
func encodeBase62(_ value: UInt128) -> String {
|
||
|
var s = ""
|
||
|
var cur = value
|
||
|
|
||
|
while cur != .zero {
|
||
|
let (q, r) = cur.quotientAndRemainder(dividingBy: UInt128(base62Alphabet.count))
|
||
|
cur = q
|
||
|
let index = base62Alphabet.index(base62Alphabet.startIndex, offsetBy: Int(r))
|
||
|
let c = base62Alphabet[index]
|
||
|
s = String(c) + s
|
||
|
}
|
||
|
|
||
|
return s
|
||
|
}
|
||
|
|
||
|
}
|