frenzy/lib/frenzy/update_feeds.ex

173 lines
4.4 KiB
Elixir
Raw Normal View History

2019-02-11 22:22:35 +00:00
defmodule Frenzy.UpdateFeeds do
use GenServer
2020-07-18 23:27:53 +00:00
alias Frenzy.{Network, Repo, Feed, Item}
2019-11-10 19:04:00 +00:00
alias Frenzy.Task.{CreateItem, FetchFavicon}
2019-02-11 22:22:35 +00:00
import Ecto.Query
require Logger
def start_link(state) do
GenServer.start_link(__MODULE__, :ok, state)
end
def refresh(pid, feed) do
2020-05-31 20:13:00 +00:00
GenServer.cast(pid, {:refresh, feed})
2019-02-11 22:22:35 +00:00
end
def init(state) do
update_feeds()
schedule_update()
{:ok, state}
end
2020-05-31 20:13:00 +00:00
def handle_cast({:refresh, feed}, state) do
2019-02-11 22:22:35 +00:00
update_feed(feed)
2020-05-31 20:13:00 +00:00
{:noreply, state}
2019-02-11 22:22:35 +00:00
end
def handle_info(:update_feeds, state) do
update_feeds()
schedule_update()
{:noreply, state}
end
2020-07-18 23:50:41 +00:00
# workaround for unhanled {:ssl_closed, {:sslsocket, {:gen_tcp, ...}}} message when Gemini module
def handle_info({:ssl_closed, _}, state), do: {:noreply, state}
2019-02-11 22:22:35 +00:00
defp schedule_update() do
# 30 minutes
Process.send_after(self(), :update_feeds, 30 * 60 * 1000)
2019-03-14 23:48:46 +00:00
# 1 minutes
2019-03-23 23:42:38 +00:00
# Process.send_after(self(), :update_feeds, 60 * 1000)
2019-02-11 22:22:35 +00:00
end
defp update_feeds() do
2020-06-06 17:45:22 +00:00
{count, feeds} =
Feed
|> where(
[f],
is_nil(f.last_refreshed_at) or
f.last_refreshed_at <= from_now(-1 * f.refresh_frequency, "second")
)
|> select([f], f)
|> Repo.update_all(set: [last_refreshed_at: DateTime.utc_now()])
2020-06-06 17:45:22 +00:00
Logger.info("Updating #{count} feeds")
Enum.each(feeds, &update_feed/1)
Enum.each(feeds, fn feed ->
try do
update_feed(feed)
rescue
error ->
Logger.warn(
"Encountered error updating feed #{feed.id} #{feed.feed_url}: #{inspect(error)}"
)
end
end)
2019-03-14 23:48:46 +00:00
2019-02-11 22:22:35 +00:00
prune_old_items()
end
defp prune_old_items() do
2019-03-09 15:59:08 +00:00
{count, _} =
2019-03-14 23:48:46 +00:00
from(i in Item,
2019-11-10 19:03:13 +00:00
where: not i.tombstone,
# todo: these time intervals should be configurable by the admin
where:
(i.read and i.read_date <= from_now(-1, "week")) or
(not i.read and i.inserted_at <= from_now(-2, "week")),
2019-03-14 23:48:46 +00:00
update: [
set: [tombstone: true, content: nil, creator: nil, date: nil, url: nil, title: nil]
]
)
|> Repo.update_all([])
Logger.info("Converted #{count} read items to tombstones")
2019-02-11 22:22:35 +00:00
end
defp update_feed(feed) do
Logger.debug("Updating #{feed.feed_url}")
2020-07-18 23:27:53 +00:00
case URI.parse(feed.feed_url) do
%URI{scheme: "gemini"} = uri ->
update_feed_gemini(feed, uri)
%URI{scheme: scheme} when scheme in ["http", "https"] ->
update_feed_http(feed)
%URI{scheme: scheme} ->
Logger.warn("Unhandled scheme for feed: #{scheme}")
end
end
defp update_feed_http(feed) do
case Network.http_get(feed.feed_url) do
2019-09-01 20:46:56 +00:00
{:ok,
2020-09-11 23:15:19 +00:00
%Mojito.Response{
2019-09-01 20:46:56 +00:00
status_code: 200,
body: body,
headers: headers
}} ->
2019-09-01 20:46:56 +00:00
{_, content_type} =
headers
2020-09-11 23:15:19 +00:00
|> Enum.find(fn {k, _v} -> k == "content-type" end)
2019-09-01 20:46:56 +00:00
content_type =
content_type
|> String.split(";")
|> Enum.map(&String.trim/1)
|> Enum.find(fn s -> !String.contains?(s, "=") end)
2020-07-18 23:27:53 +00:00
do_update_feed(feed, content_type, body)
2019-03-09 15:59:08 +00:00
2020-07-18 23:27:53 +00:00
{:error, reason} ->
Logger.error("Couldn't load feed #{feed.feed_url}: #{inspect(reason)}")
end
end
2019-03-09 15:59:08 +00:00
2020-07-18 23:27:53 +00:00
defp update_feed_gemini(feed, feed_uri) do
case Network.gemini_request(feed_uri) do
{:ok, %Gemini.Response{meta: content_type, body: body}} ->
do_update_feed(feed, content_type, body)
2019-07-01 01:41:18 +00:00
2020-07-18 23:27:53 +00:00
{:error, reason} ->
Logger.error("Couldn't load feed #{feed.feed_url}: #{inspect(reason)}")
end
end
2019-07-01 01:41:18 +00:00
2020-07-18 23:27:53 +00:00
defp do_update_feed(feed, content_type, data) do
case FeedParser.parse(data, content_type) do
{:ok, rss} ->
update_feed_from_rss(feed, rss)
2019-09-26 15:59:44 +00:00
2020-07-18 23:27:53 +00:00
{:error, reason} ->
Logger.error("Unable to parse feed at '#{feed.feed_url}': #{inspect(reason)}")
2019-02-11 22:22:35 +00:00
end
end
2019-09-01 20:46:56 +00:00
defp update_feed_from_rss(feed, %FeedParser.Feed{} = rss) do
2019-03-09 15:59:08 +00:00
changeset =
Feed.changeset(feed, %{
title: rss.title,
2019-09-01 20:46:56 +00:00
site_url: rss.site_url,
last_updated: (rss.last_updated || DateTime.utc_now()) |> Timex.Timezone.convert(:utc)
2019-02-11 22:22:35 +00:00
})
2019-03-09 15:59:08 +00:00
2019-11-10 19:04:00 +00:00
{:ok, feed} = Repo.update(changeset)
if is_nil(feed.favicon) do
FetchFavicon.run(feed)
end
2019-02-11 22:22:35 +00:00
feed = Repo.preload(feed, [:items])
2019-02-11 22:22:35 +00:00
2019-11-01 02:21:17 +00:00
Enum.each(rss.items, fn entry ->
2019-03-10 23:47:01 +00:00
# todo: use Repo.exists for this
2019-09-01 20:46:56 +00:00
if !Enum.any?(feed.items, fn item -> item.guid == entry.guid end) do
2019-11-10 19:04:00 +00:00
CreateItem.start_link(feed, entry)
2019-02-11 22:22:35 +00:00
end
end)
end
2019-03-09 15:59:08 +00:00
end