frenzy/lib/frenzy/task/fetch_favicon.ex

132 lines
3.5 KiB
Elixir

defmodule Frenzy.Task.FetchFavicon do
require Logger
use Task
alias Frenzy.{HTTP, Repo, Feed}
def start_link(feed) do
Task.start_link(__MODULE__, :run, [feed])
end
def run(feed) do
Logger.metadata(favicon_task_id: generate_task_id())
site_url =
case feed.site_url do
url when is_binary(url) ->
url
_ ->
%URI{URI.parse(feed.feed_url) | path: nil, query: nil, fragment: nil} |> URI.to_string()
end
Logger.debug("Fetching favicon for #{site_url}")
case fetch_favicon_from_webpage(site_url) do
{:ok, favicon_data} ->
changeset = Feed.changeset(feed, %{favicon: favicon_data})
{:ok, _feed} = Repo.update(changeset)
{:error, reason} ->
Logger.info("Couldn't fetch favicon for #{site_url}: #{reason}")
favicon_uri =
%{URI.parse(site_url) | path: "/favicon.ico", query: nil, fragment: nil}
|> URI.to_string()
Logger.info("Trying default path: #{favicon_uri}")
case fetch_favicon_data(favicon_uri, site_url) do
{:ok, favicon_data} ->
changeset = Feed.changeset(feed, %{favicon: favicon_data})
{:ok, _feed} = Repo.update(changeset)
{:error, reason} ->
Logger.info("Couldn't fetch default /favicon.ico for #{site_url}: #{reason}")
end
end
end
defp fetch_favicon_from_webpage(url) when is_binary(url) do
case HTTP.get(url) do
{:ok, %HTTPoison.Response{body: body}} ->
extract_favicon(url, body)
{:error, _reason} = err ->
err
end
end
defp fetch_favicon_from_webpage(_), do: {:error, "URL must be a string"}
defp extract_favicon(page_url, body) do
html_tree = Floki.parse(body)
case Floki.find(html_tree, "link[rel=icon]") do
[] ->
{:error, "No element matching link[rel=icon]"}
links ->
links
|> Enum.find(fn link ->
link
|> Floki.attribute("type")
|> Enum.map(&String.downcase/1)
|> Enum.any?(&(&1 == "image/png"))
|> case do
false ->
link
|> Floki.attribute("href")
# bad hack for missing type attr
|> Enum.any?(&String.contains?(&1, ".png"))
true ->
true
end
# todo: support more image types
end)
|> case do
nil ->
{:error, "No link[rel=icon] with type of image/png"}
# todo: try requesting /favicon.ico
link ->
link
|> Floki.attribute("href")
|> List.first()
|> fetch_favicon_data(page_url)
end
end
end
defp fetch_favicon_data(favicon_url, site_url) when is_binary(favicon_url) do
# handle relative URIs, set default scheme if not provided
absolute_url =
favicon_url
|> URI.parse()
|> HTTP.resolve_uri(URI.parse(site_url))
case HTTP.get(absolute_url) do
{:ok, %HTTPoison.Response{body: body}} ->
{:ok, "data:image/png;base64,#{Base.encode64(body)}"}
{:error, _reason} = err ->
err
end
end
defp fetch_favicon_data(_, _), do: {:error, "No or invalid href for link"}
# from https://github.com/elixir-plug/plug/blob/v1.8.3/lib/plug/request_id.ex#L60
defp generate_task_id() do
binary = <<
System.system_time(:nanosecond)::64,
:erlang.phash2({node(), self()}, 16_777_216)::24,
:erlang.unique_integer()::32
>>
Base.url_encode64(binary)
end
end