defmodule Frenzy.Task.FetchFavicon do require Logger use Task alias Frenzy.{HTTP, Repo, Feed} def start_link(feed) do Task.start_link(__MODULE__, :run, [feed]) end def run(feed) do Logger.metadata(favicon_task_id: generate_task_id()) case fetch_favicon_from_webpage(feed.site_url) do {:ok, favicon_data} -> changeset = Feed.changeset(feed, %{favicon: favicon_data}) {:ok, _feed} = Repo.update(changeset) {:error, reason} -> Logger.info("Couldn't fetch favicon for #{feed.site_url}: #{reason}") favicon_uri = %{URI.parse(feed.site_url) | path: "/favicon.ico", query: nil, fragment: nil} |> URI.to_string() Logger.info("Trying default path: #{favicon_uri}") case fetch_favicon_data(favicon_uri) do {:ok, favicon_data} -> changeset = Feed.changeset(feed, %{favicon: favicon_data}) {:ok, _feed} = Repo.update(changeset) {:error, reason} -> Logger.info("Couldn't fetch default /favicon.ico for #{feed.site_url}: #{reason}") end end end defp fetch_favicon_from_webpage(url) do case HTTP.get(url) do {:ok, %HTTPoison.Response{body: body}} -> extract_favicon(body) {:error, _reason} = err -> err end end defp extract_favicon(body) do html_tree = Floki.parse(body) case Floki.find(html_tree, "link[rel=icon]") do [] -> {:error, "No element matching link[rel=icon]"} links -> links |> Enum.find(fn link -> link |> Floki.attribute("type") |> Enum.map(&String.downcase/1) |> Enum.any?(&(&1 == "image/png")) |> case do false -> link |> Floki.attribute("href") # bad hack for missing type attr |> Enum.any?(&String.contains?(&1, ".png")) true -> true end # todo: support more image types end) |> case do nil -> {:error, "No link[rel=icon] with type of image/png"} # todo: try requesting /favicon.ico link -> link |> Floki.attribute("href") |> List.first() |> fetch_favicon_data() end end end defp fetch_favicon_data(nil), do: {:error, "No href for link"} defp fetch_favicon_data(url) do case HTTP.get(url) do {:ok, %HTTPoison.Response{body: body}} -> {:ok, "data:image/png;base64,#{Base.encode64(body)}"} {:error, _reason} = err -> err end end # from https://github.com/elixir-plug/plug/blob/v1.8.3/lib/plug/request_id.ex#L60 defp generate_task_id() do binary = << System.system_time(:nanosecond)::64, :erlang.phash2({node(), self()}, 16_777_216)::24, :erlang.unique_integer()::32 >> Base.url_encode64(binary) end end