frenzy/lib/frenzy/update_feeds.ex

141 lines
3.7 KiB
Elixir

defmodule Frenzy.UpdateFeeds do
use GenServer
alias Frenzy.{HTTP, Repo, Feed, Item}
alias Frenzy.Task.{CreateItem, FetchFavicon}
import Ecto.Query
require Logger
def start_link(state) do
GenServer.start_link(__MODULE__, :ok, state)
end
def refresh(pid, feed) do
GenServer.call(pid, {:refresh, feed})
end
def init(state) do
update_feeds()
schedule_update()
{:ok, state}
end
def handle_call({:refresh, feed}, _from, state) do
update_feed(feed)
new_feed = Feed |> Repo.get(feed.id) |> Repo.preload(:items)
{:reply, new_feed, state}
end
def handle_info(:update_feeds, state) do
update_feeds()
schedule_update()
{:noreply, state}
end
defp schedule_update() do
# 30 minutes
Process.send_after(self(), :update_feeds, 30 * 60 * 1000)
# 1 minutes
# Process.send_after(self(), :update_feeds, 60 * 1000)
end
defp update_feeds() do
Logger.info("Updating all feeds")
Repo.all(from(Feed))
|> Enum.map(&update_feed/1)
prune_old_items()
end
defp prune_old_items() do
{count, _} =
from(i in Item,
where: not i.tombstone,
# todo: these time intervals should be configurable by the admin
where:
(i.read and i.read_date <= from_now(-1, "week")) or
(not i.read and i.inserted_at <= from_now(-2, "week")),
update: [
set: [tombstone: true, content: nil, creator: nil, date: nil, url: nil, title: nil]
]
)
|> Repo.update_all([])
Logger.info("Converted #{count} read items to tombstones")
end
defp update_feed(feed) do
Logger.debug("Updating #{feed.feed_url}")
case HTTPoison.get(feed.feed_url) do
{:ok,
%HTTPoison.Response{
status_code: 200,
body: body,
headers: headers
}} ->
{_, content_type} =
headers
|> Enum.find(fn {k, _v} -> k == "Content-Type" end)
content_type =
content_type
|> String.split(";")
|> Enum.map(&String.trim/1)
|> Enum.find(fn s -> !String.contains?(s, "=") end)
case FeedParser.parse(body, content_type) do
{:ok, rss} ->
update_feed_from_rss(feed, rss)
end
{:ok, %HTTPoison.Response{status_code: 404}} ->
Logger.warn("RSS feed #{feed.feed_url} not found")
{:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
when status_code in [301, 302] ->
{"Location", new_url} =
Enum.find(headers, fn {name, _value} ->
name == "Location"
end)
Logger.debug("Got 301 redirect from #{feed.feed_url} to #{new_url}, updating feed URL")
changeset = Feed.changeset(feed, %{feed_url: new_url})
{:ok, feed} = Repo.update(changeset)
update_feed(feed)
{:ok, %HTTPoison.Response{} = response} ->
Logger.error(
"Couldn't load RSS feed #{feed.feed_url}, got unexpected response: #{inspect(response)}"
)
{:error, %HTTPoison.Error{reason: reason}} ->
Logger.error("Couldn't load RSS feed #{feed.feed_url}: #{inspect(reason)}")
end
end
defp update_feed_from_rss(feed, %FeedParser.Feed{} = rss) do
changeset =
Feed.changeset(feed, %{
title: rss.title,
site_url: rss.site_url,
last_updated: (rss.last_updated || DateTime.utc_now()) |> Timex.Timezone.convert(:utc)
})
{:ok, feed} = Repo.update(changeset)
if is_nil(feed.favicon) do
FetchFavicon.run(feed)
end
feed = Repo.preload(feed, [:items])
Enum.each(rss.items, fn entry ->
# todo: use Repo.exists for this
if !Enum.any?(feed.items, fn item -> item.guid == entry.guid end) do
CreateItem.start_link(feed, entry)
end
end)
end
end