frenzy/lib/frenzy/update_feeds.ex

214 lines
5.2 KiB
Elixir
Raw Normal View History

2019-02-11 22:22:35 +00:00
defmodule Frenzy.UpdateFeeds do
use GenServer
2019-03-15 01:42:02 +00:00
alias Frenzy.{Repo, Feed, Item, FilterEngine}
2019-02-11 22:22:35 +00:00
import Ecto.Query
require Logger
def start_link(state) do
GenServer.start_link(__MODULE__, :ok, state)
end
def refresh(pid, feed) do
GenServer.call(pid, {:refresh, feed})
end
def init(state) do
update_feeds()
schedule_update()
{:ok, state}
end
def handle_call({:refresh, feed}, _from, state) do
update_feed(feed)
new_feed = Feed |> Repo.get(feed.id) |> Repo.preload(:items)
{:reply, new_feed, state}
end
def handle_info(:update_feeds, state) do
update_feeds()
schedule_update()
{:noreply, state}
end
defp schedule_update() do
2019-03-09 15:59:08 +00:00
# 15 minutes
2019-03-23 23:42:38 +00:00
Process.send_after(self(), :update_feeds, 15 * 60 * 1000)
2019-03-14 23:48:46 +00:00
# 1 minutes
2019-03-23 23:42:38 +00:00
# Process.send_after(self(), :update_feeds, 60 * 1000)
2019-02-11 22:22:35 +00:00
end
defp update_feeds() do
Logger.info("Updating all feeds")
2019-03-14 23:48:46 +00:00
Repo.all(from Feed, preload: [:filter])
|> Enum.map(&update_feed/1)
2019-02-11 22:22:35 +00:00
prune_old_items()
end
defp prune_old_items() do
2019-03-09 15:59:08 +00:00
{count, _} =
2019-03-14 23:48:46 +00:00
from(i in Item,
where: i.read and not i.tombstone,
# where: i.read_date <= from_now(-1, "week"),
where: i.read_date <= from_now(-1, "minute"),
update: [
set: [tombstone: true, content: nil, creator: nil, date: nil, url: nil, title: nil]
]
)
|> Repo.update_all([])
Logger.info("Converted #{count} read items to tombstones")
2019-02-11 22:22:35 +00:00
end
defp update_feed(feed) do
Logger.debug("Updating #{feed.feed_url}")
case HTTPoison.get(feed.feed_url) do
{:ok, %HTTPoison.Response{status_code: 200, body: body}} ->
case Fiet.parse(body) do
{:ok, rss} ->
update_feed_from_rss(feed, rss)
end
2019-03-09 15:59:08 +00:00
2019-02-11 22:22:35 +00:00
{:ok, %HTTPoison.Response{status_code: 404}} ->
Logger.warn("RSS feed #{feed.feed_url} not found")
2019-03-09 15:59:08 +00:00
2019-07-01 01:41:18 +00:00
{:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
when status_code in [301, 302] ->
{"Location", new_url} =
Enum.find(headers, fn {name, _value} ->
name == "Location"
end)
Logger.debug("Got 301 redirect from #{feed.feed_url} to #{new_url}, updating feed URL")
changeset = Feed.changeset(feed, %{feed_url: new_url})
2019-07-01 21:46:54 +00:00
{:ok, feed} = Repo.update(changeset)
2019-07-01 01:41:18 +00:00
update_feed(feed)
2019-02-11 22:22:35 +00:00
{:error, %HTTPoison.Error{reason: reason}} ->
Logger.error("Couldn't load RSS feed: #{reason}")
end
end
defp update_feed_from_rss(feed, rss) do
2019-07-01 01:25:11 +00:00
last_updated =
if rss.updated_at do
parse_date(rss.updated_at)
else
DateTime.utc_now()
end
2019-03-09 15:59:08 +00:00
changeset =
Feed.changeset(feed, %{
title: rss.title,
site_url: rss.link.href,
2019-07-01 01:25:11 +00:00
last_updated: last_updated
2019-02-11 22:22:35 +00:00
})
2019-03-09 15:59:08 +00:00
2019-02-11 22:22:35 +00:00
Repo.update(changeset)
2019-03-15 01:42:02 +00:00
feed = Repo.preload(feed, items: [], filter: [:rules])
2019-02-11 22:22:35 +00:00
Enum.map(rss.items, fn entry ->
2019-03-10 23:47:01 +00:00
# todo: use Repo.exists for this
2019-02-11 22:22:35 +00:00
if !Enum.any?(feed.items, fn item -> item.guid == entry.id end) do
create_item(feed, entry)
end
end)
end
defp create_item(feed, entry) do
url = get_real_url(entry)
2019-03-21 21:40:34 +00:00
Logger.debug("Creating item for #{url}")
2019-03-15 01:42:02 +00:00
item_params = %{
guid: entry.id,
title: entry.title,
url: url,
date: parse_date(entry.published_at),
creator: "",
2019-07-09 02:41:18 +00:00
content: entry.description
2019-03-15 01:42:02 +00:00
}
2019-07-09 02:41:18 +00:00
feed = Repo.preload(feed, :pipeline_stages)
2019-03-15 01:42:02 +00:00
2019-07-09 02:41:18 +00:00
result =
feed.pipeline_stages
|> Enum.sort_by(& &1.index)
|> Enum.reduce({:ok, item_params}, fn
stage, {:ok, item_params} ->
apply(String.to_existing_atom("Elixir." <> stage.module_name), :apply, [
stage.options,
item_params
])
_stage, :tombstone ->
:tombstone
_stage, {:error, _error} = error ->
error
end)
case result do
{:err, error} ->
Logger.error(error)
{:ok, item_params} ->
changeset = Ecto.build_assoc(feed, :items, item_params)
case Repo.insert(changeset) do
{:ok, item} ->
item
{:error, changeset} ->
Logger.error("Error inserting item #{entry.guid}")
Logger.error(changeset)
2019-03-15 01:42:02 +00:00
end
2019-07-09 02:41:18 +00:00
:tombstone ->
changeset =
2019-03-15 01:42:02 +00:00
Ecto.build_assoc(feed, :items, %{
guid: entry.id,
tombstone: true
})
2019-07-09 02:41:18 +00:00
case Repo.insert(changeset) do
{:ok, item} ->
item
{:error, changeset} ->
Logger.error("Error inserting tombstone for #{entry.guid}")
Logger.error(changeset)
end
end
2019-02-11 22:22:35 +00:00
end
defp parse_date(str) do
case Timex.parse(str, "{RFC1123}") do
{:ok, date} ->
Timex.Timezone.convert(date, :utc)
2019-03-09 15:59:08 +00:00
2019-02-11 22:22:35 +00:00
_ ->
{:ok, date, _} = DateTime.from_iso8601(str)
Timex.Timezone.convert(date, :utc)
end
end
defp get_real_url(entry) do
links = Enum.reject(entry.links, fn l -> l.rel == "shorturl" end)
2019-03-09 15:59:08 +00:00
2019-02-11 22:22:35 +00:00
case Enum.find(links, fn l -> l.rel == "related" end) do
nil ->
case Enum.find(links, fn l -> l.rel == "alternate" end) do
nil -> Enum.fetch!(links, 0).href
link -> link.href
end
2019-03-09 15:59:08 +00:00
link ->
link.href
2019-02-11 22:22:35 +00:00
end
end
2019-03-09 15:59:08 +00:00
end