Prevent unnecessary refetching of favicons

This commit is contained in:
Shadowfacts 2020-05-29 19:47:14 -04:00
parent d3f8c95d35
commit b0d9189399
Signed by: shadowfacts
GPG Key ID: 94A5AB95422746E5
3 changed files with 60 additions and 48 deletions

View File

@ -35,6 +35,7 @@ defmodule Frenzy.Feed do
field :site_url, :string field :site_url, :string
field :title, :string field :title, :string
field :favicon, :string field :favicon, :string
field :favicon_url, :string
belongs_to :group, Frenzy.Group belongs_to :group, Frenzy.Group
belongs_to :pipeline, Frenzy.Pipeline belongs_to :pipeline, Frenzy.Pipeline
@ -52,6 +53,7 @@ defmodule Frenzy.Feed do
site_url: String.t() | nil, site_url: String.t() | nil,
title: String.t() | nil, title: String.t() | nil,
favicon: String.t() | nil, favicon: String.t() | nil,
favicon_url: String.t() | nil,
group: Frenzy.Group.t() | Ecto.Association.NotLoaded.t(), group: Frenzy.Group.t() | Ecto.Association.NotLoaded.t(),
pipeline: Frenzy.Pipeline.t() | nil | Ecto.Association.NotLoaded.t(), pipeline: Frenzy.Pipeline.t() | nil | Ecto.Association.NotLoaded.t(),
items: [Frenzy.Item.t()] | Ecto.Association.NotLoaded.t(), items: [Frenzy.Item.t()] | Ecto.Association.NotLoaded.t(),
@ -68,7 +70,8 @@ defmodule Frenzy.Feed do
:site_url, :site_url,
:last_updated, :last_updated,
:pipeline_id, :pipeline_id,
:favicon :favicon,
:favicon_url
]) ])
|> validate_required([:feed_url]) |> validate_required([:feed_url])
end end

View File

@ -21,44 +21,40 @@ defmodule Frenzy.Task.FetchFavicon do
Logger.debug("Fetching favicon for #{site_url}") Logger.debug("Fetching favicon for #{site_url}")
case fetch_favicon_from_webpage(site_url) do favicon_url = fetch_favicon_url_from_webpage(site_url) || URI.merge(site_url, "/favicon.ico")
{:ok, favicon_data} ->
changeset = Feed.changeset(feed, %{favicon: favicon_data}) with %Feed{favicon_url: old_url} when old_url != favicon_url <- feed,
{:ok, favicon_data} <- fetch_favicon_data(favicon_url) do
IO.inspect(favicon_url)
changeset = Feed.changeset(feed, %{favicon: favicon_data, favicon_url: favicon_url})
{:ok, _feed} = Repo.update(changeset) {:ok, _feed} = Repo.update(changeset)
else
{:error, reason} -> _ ->
Logger.info("Couldn't fetch favicon for #{site_url}: #{reason}") :ok
favicon_uri =
%{URI.parse(site_url) | path: "/favicon.ico", query: nil, fragment: nil}
|> URI.to_string()
Logger.info("Trying default path: #{favicon_uri}")
case fetch_favicon_data(favicon_uri, site_url) do
{:ok, favicon_data} ->
changeset = Feed.changeset(feed, %{favicon: favicon_data})
{:ok, _feed} = Repo.update(changeset)
{:error, reason} ->
Logger.info("Couldn't fetch default /favicon.ico for #{site_url}: #{reason}")
end
end end
end end
defp fetch_favicon_from_webpage(url) when is_binary(url) do @spec fetch_favicon_url_from_webpage(url :: String.t()) :: String.t()
defp fetch_favicon_url_from_webpage(url) when is_binary(url) do
case HTTP.get(url) do case HTTP.get(url) do
{:ok, %HTTPoison.Response{body: body}} -> {:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
extract_favicon(url, body) extract_favicon_url(url, body)
{:error, _reason} = err -> {:ok, %HTTPoison.Response{status_code: code}} ->
err Logger.debug("Unhandled HTTP code #{code} for '#{url}'")
nil
{:error, reason} ->
Logger.debug("Error fetching webpage for favicon: #{inspect(reason)}")
nil
end end
end end
defp fetch_favicon_from_webpage(_), do: {:error, "URL must be a string"} defp fetch_favicon_url_from_webpage(_), do: {:error, "URL must be a string"}
defp extract_favicon(page_url, body) do @spec extract_favicon_url(page_url :: String.t(), body :: term()) :: String.t()
defp extract_favicon_url(page_url, body) do
html_tree = Floki.parse(body) html_tree = Floki.parse(body)
case Floki.find(html_tree, "link[rel=icon]") do case Floki.find(html_tree, "link[rel=icon]") do
@ -89,34 +85,38 @@ defmodule Frenzy.Task.FetchFavicon do
nil -> nil ->
{:error, "No link[rel=icon] with type of image/png"} {:error, "No link[rel=icon] with type of image/png"}
# todo: try requesting /favicon.ico
link -> link ->
link link
|> Floki.attribute("href") |> Floki.attribute("href")
|> List.first() |> List.first()
|> fetch_favicon_data(page_url) |> case do
href when is_binary(href) ->
URI.merge(page_url, href) |> to_string()
_ ->
nil
end
end end
end end
end end
defp fetch_favicon_data(favicon_url, site_url) when is_binary(favicon_url) do @spec fetch_favicon_data(favicon_url :: String.t()) :: {:ok, String.t()} | :error
# handle relative URIs, set default scheme if not provided defp fetch_favicon_data(favicon_url) do
absolute_url = Logger.debug("Fetching favicon from: '#{favicon_url}'")
favicon_url
|> URI.parse()
|> HTTP.resolve_uri(URI.parse(site_url))
case HTTP.get(absolute_url) do case HTTP.get(favicon_url) do
{:ok, %HTTPoison.Response{body: body}} -> {:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
{:ok, "data:image/png;base64,#{Base.encode64(body)}"} {:ok, "data:image/png;base64,#{Base.encode64(body)}"}
{:error, _reason} = err -> {:ok, %HTTPoison.Response{status_code: code}} ->
err Logger.debug("Unhandled HTTP code #{code} for '#{favicon_url}'")
end :error
end
defp fetch_favicon_data(_, _), do: {:error, "No or invalid href for link"} {:error, reason} ->
Logger.debug("Error fetching favicon: #{inspect(reason)}")
:error
end
end
# from https://github.com/elixir-plug/plug/blob/v1.8.3/lib/plug/request_id.ex#L60 # from https://github.com/elixir-plug/plug/blob/v1.8.3/lib/plug/request_id.ex#L60
defp generate_task_id() do defp generate_task_id() do

View File

@ -0,0 +1,9 @@
defmodule Frenzy.Repo.Migrations.FeedsAddFaviconUrl do
use Ecto.Migration
def change do
alter table(:feeds) do
add :favicon_url, :string, default: nil
end
end
end