defmodule Frenzy.Task.FetchFavicon do require Logger use Task alias Frenzy.{HTTP, Repo, Feed} def start_link(feed) do Task.start_link(__MODULE__, :run, [feed]) end def run(feed) do Logger.metadata(favicon_task_id: generate_task_id()) site_url = case feed.site_url do url when is_binary(url) -> url _ -> %URI{URI.parse(feed.feed_url) | path: nil, query: nil, fragment: nil} |> URI.to_string() end Logger.debug("Fetching favicon for #{site_url}") case fetch_favicon_from_webpage(site_url) do {:ok, favicon_data} -> changeset = Feed.changeset(feed, %{favicon: favicon_data}) {:ok, _feed} = Repo.update(changeset) {:error, reason} -> Logger.info("Couldn't fetch favicon for #{site_url}: #{reason}") favicon_uri = %{URI.parse(site_url) | path: "/favicon.ico", query: nil, fragment: nil} |> URI.to_string() Logger.info("Trying default path: #{favicon_uri}") case fetch_favicon_data(favicon_uri, site_url) do {:ok, favicon_data} -> changeset = Feed.changeset(feed, %{favicon: favicon_data}) {:ok, _feed} = Repo.update(changeset) {:error, reason} -> Logger.info("Couldn't fetch default /favicon.ico for #{site_url}: #{reason}") end end end defp fetch_favicon_from_webpage(url) when is_binary(url) do case HTTP.get(url) do {:ok, %HTTPoison.Response{body: body}} -> extract_favicon(url, body) {:error, _reason} = err -> err end end defp fetch_favicon_from_webpage(_), do: {:error, "URL must be a string"} defp extract_favicon(page_url, body) do html_tree = Floki.parse(body) case Floki.find(html_tree, "link[rel=icon]") do [] -> {:error, "No element matching link[rel=icon]"} links -> links |> Enum.find(fn link -> link |> Floki.attribute("type") |> Enum.map(&String.downcase/1) |> Enum.any?(&(&1 == "image/png")) |> case do false -> link |> Floki.attribute("href") # bad hack for missing type attr |> Enum.any?(&String.contains?(&1, ".png")) true -> true end # todo: support more image types end) |> case do nil -> {:error, "No link[rel=icon] with type of image/png"} # todo: try requesting /favicon.ico link -> link |> Floki.attribute("href") |> List.first() |> fetch_favicon_data(page_url) end end end defp fetch_favicon_data(favicon_url, site_url) when is_binary(favicon_url) do # handle relative URIs, set default scheme if not provided absolute_url = favicon_url |> URI.parse() |> HTTP.resolve_uri(URI.parse(site_url)) case HTTP.get(absolute_url) do {:ok, %HTTPoison.Response{body: body}} -> {:ok, "data:image/png;base64,#{Base.encode64(body)}"} {:error, _reason} = err -> err end end defp fetch_favicon_data(_, _), do: {:error, "No or invalid href for link"} # from https://github.com/elixir-plug/plug/blob/v1.8.3/lib/plug/request_id.ex#L60 defp generate_task_id() do binary = << System.system_time(:nanosecond)::64, :erlang.phash2({node(), self()}, 16_777_216)::24, :erlang.unique_integer()::32 >> Base.url_encode64(binary) end end