defmodule Frenzy.UpdateFeeds do use GenServer alias Frenzy.{Repo, Feed, Item} import Ecto.Query require Logger def start_link(state) do GenServer.start_link(__MODULE__, :ok, state) end def refresh(pid, feed) do GenServer.call(pid, {:refresh, feed}) end def init(state) do update_feeds() schedule_update() {:ok, state} end def handle_call({:refresh, feed}, _from, state) do update_feed(feed) new_feed = Feed |> Repo.get(feed.id) |> Repo.preload(:items) {:reply, new_feed, state} end def handle_info(:update_feeds, state) do update_feeds() schedule_update() {:noreply, state} end defp schedule_update() do Process.send_after(self(), :update_feeds, 15 * 60 * 1000) # 15 minutes # Process.send_after(self(), :update_feeds, 60 * 1000) # 1 minutes end defp update_feeds() do Logger.info("Updating all feeds") Enum.map(Repo.all(Feed), &update_feed/1) prune_old_items() end defp prune_old_items() do {count, _} = Repo.delete_all(from i in Item, where: i.read, where: i.read_date <= from_now(-1, "week")) Logger.info("Removed #{count} read items") end defp update_feed(feed) do Logger.debug("Updating #{feed.feed_url}") case HTTPoison.get(feed.feed_url) do {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> case Fiet.parse(body) do {:ok, rss} -> update_feed_from_rss(feed, rss) end {:ok, %HTTPoison.Response{status_code: 404}} -> Logger.warn("RSS feed #{feed.feed_url} not found") {:error, %HTTPoison.Error{reason: reason}} -> Logger.error("Couldn't load RSS feed: #{reason}") end end defp update_feed_from_rss(feed, rss) do changeset = Feed.changeset(feed, %{ title: rss.title, site_url: rss.link.href, last_updated: parse_date(rss.updated_at) }) Repo.update(changeset) feed = Repo.preload(feed, :items) Enum.map(rss.items, fn entry -> if !Enum.any?(feed.items, fn item -> item.guid == entry.id end) do create_item(feed, entry) end end) end defp create_item(feed, entry) do Logger.debug("Creating item for:") IO.inspect(entry) url = get_real_url(entry) changeset = Ecto.build_assoc(feed, :items, %{ guid: entry.id, title: entry.title, url: url, date: parse_date(entry.published_at), creator: "", content: get_article_content(url) }) Repo.insert(changeset) end defp parse_date(str) do case Timex.parse(str, "{RFC1123}") do {:ok, date} -> Timex.Timezone.convert(date, :utc) _ -> {:ok, date, _} = DateTime.from_iso8601(str) Timex.Timezone.convert(date, :utc) end end defp get_real_url(entry) do links = Enum.reject(entry.links, fn l -> l.rel == "shorturl" end) case Enum.find(links, fn l -> l.rel == "related" end) do nil -> case Enum.find(links, fn l -> l.rel == "alternate" end) do nil -> Enum.fetch!(links, 0).href link -> link.href end link -> link.href end end defp get_article_content(url) do Logger.debug("Getting article from #{url}") case HTTPoison.get(url) do {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> article = Readability.article(body) Readability.readable_html(article) {:ok, %HTTPoison.Response{status_code: 404}} -> Logger.warn("Article #{url} not found") {:error, %HTTPoison.Error{reason: reason}} -> Logger.error("Couldn't load article: #{reason}") end end end