frenzy/lib/frenzy/update_feeds.ex

136 lines
3.5 KiB
Elixir

defmodule Frenzy.UpdateFeeds do
use GenServer
alias Frenzy.{Repo, Feed, Item}
import Ecto.Query
require Logger
def start_link(state) do
GenServer.start_link(__MODULE__, :ok, state)
end
def refresh(pid, feed) do
GenServer.call(pid, {:refresh, feed})
end
def init(state) do
update_feeds()
schedule_update()
{:ok, state}
end
def handle_call({:refresh, feed}, _from, state) do
update_feed(feed)
new_feed = Feed |> Repo.get(feed.id) |> Repo.preload(:items)
{:reply, new_feed, state}
end
def handle_info(:update_feeds, state) do
update_feeds()
schedule_update()
{:noreply, state}
end
defp schedule_update() do
Process.send_after(self(), :update_feeds, 15 * 60 * 1000) # 15 minutes
# Process.send_after(self(), :update_feeds, 60 * 1000) # 1 minutes
end
defp update_feeds() do
Logger.info("Updating all feeds")
Enum.map(Repo.all(Feed), &update_feed/1)
prune_old_items()
end
defp prune_old_items() do
{count, _} = Repo.delete_all(from i in Item, where: i.read, where: i.read_date <= from_now(-1, "week"))
Logger.info("Removed #{count} read items")
end
defp update_feed(feed) do
Logger.debug("Updating #{feed.feed_url}")
case HTTPoison.get(feed.feed_url) do
{:ok, %HTTPoison.Response{status_code: 200, body: body}} ->
case Fiet.parse(body) do
{:ok, rss} ->
update_feed_from_rss(feed, rss)
end
{:ok, %HTTPoison.Response{status_code: 404}} ->
Logger.warn("RSS feed #{feed.feed_url} not found")
{:error, %HTTPoison.Error{reason: reason}} ->
Logger.error("Couldn't load RSS feed: #{reason}")
end
end
defp update_feed_from_rss(feed, rss) do
changeset = Feed.changeset(feed, %{
title: rss.title,
site_url: rss.link.href,
last_updated: parse_date(rss.updated_at)
})
Repo.update(changeset)
feed = Repo.preload(feed, :items)
Enum.map(rss.items, fn entry ->
if !Enum.any?(feed.items, fn item -> item.guid == entry.id end) do
create_item(feed, entry)
end
end)
end
defp create_item(feed, entry) do
Logger.debug("Creating item for:")
IO.inspect(entry)
url = get_real_url(entry)
changeset = Ecto.build_assoc(feed, :items, %{
guid: entry.id,
title: entry.title,
url: url,
date: parse_date(entry.published_at),
creator: "",
content: get_article_content(url)
})
Repo.insert(changeset)
end
defp parse_date(str) do
case Timex.parse(str, "{RFC1123}") do
{:ok, date} ->
Timex.Timezone.convert(date, :utc)
_ ->
{:ok, date, _} = DateTime.from_iso8601(str)
Timex.Timezone.convert(date, :utc)
end
end
defp get_real_url(entry) do
links = Enum.reject(entry.links, fn l -> l.rel == "shorturl" end)
case Enum.find(links, fn l -> l.rel == "related" end) do
nil ->
case Enum.find(links, fn l -> l.rel == "alternate" end) do
nil -> Enum.fetch!(links, 0).href
link -> link.href
end
link -> link.href
end
end
defp get_article_content(url) do
Logger.debug("Getting article from #{url}")
case HTTPoison.get(url) do
{:ok, %HTTPoison.Response{status_code: 200, body: body}} ->
article = Readability.article(body)
Readability.readable_html(article)
{:ok, %HTTPoison.Response{status_code: 404}} ->
Logger.warn("Article #{url} not found")
{:error, %HTTPoison.Error{reason: reason}} ->
Logger.error("Couldn't load article: #{reason}")
end
end
end