139 lines
3.7 KiB
Elixir
139 lines
3.7 KiB
Elixir
defmodule Frenzy.Task.FetchFavicon do
|
|
require Logger
|
|
use Task
|
|
alias Frenzy.{Network, Repo, Feed}
|
|
|
|
def start_link(feed) do
|
|
Task.start_link(__MODULE__, :run, [feed])
|
|
end
|
|
|
|
def run(feed) do
|
|
Logger.metadata(favicon_task_id: generate_task_id())
|
|
|
|
site_url =
|
|
case feed.site_url do
|
|
url when is_binary(url) ->
|
|
URI.parse(url)
|
|
|
|
_ ->
|
|
%URI{URI.parse(feed.feed_url) | path: nil, query: nil, fragment: nil}
|
|
end
|
|
|
|
if site_url.scheme in ["http", "https"] do
|
|
Logger.debug("Fetching favicon for #{site_url}")
|
|
|
|
favicon_url =
|
|
fetch_favicon_url_from_webpage(site_url) || URI.merge(site_url, "/favicon.ico")
|
|
|
|
with %Feed{favicon_url: old_url} when old_url != favicon_url <- feed,
|
|
{:ok, favicon_data} <- fetch_favicon_data(favicon_url) do
|
|
changeset =
|
|
Feed.changeset(feed, %{
|
|
favicon: favicon_data,
|
|
favicon_url: to_string(favicon_url)
|
|
})
|
|
|
|
{:ok, _feed} = Repo.update(changeset)
|
|
else
|
|
_ ->
|
|
:ok
|
|
end
|
|
end
|
|
end
|
|
|
|
@spec fetch_favicon_url_from_webpage(url :: String.t()) :: String.t()
|
|
|
|
defp fetch_favicon_url_from_webpage(url) when is_binary(url) do
|
|
case Network.http_get(url) do
|
|
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
|
|
extract_favicon_url(url, body)
|
|
|
|
{:ok, %HTTPoison.Response{status_code: code}} ->
|
|
Logger.debug("Unhandled HTTP code #{code} for '#{url}'")
|
|
nil
|
|
|
|
{:error, reason} ->
|
|
Logger.debug("Error fetching webpage for favicon: #{inspect(reason)}")
|
|
nil
|
|
end
|
|
end
|
|
|
|
defp fetch_favicon_url_from_webpage(_), do: {:error, "URL must be a string"}
|
|
|
|
@spec extract_favicon_url(page_url :: String.t(), body :: term()) :: String.t()
|
|
defp extract_favicon_url(page_url, body) do
|
|
html_tree = Floki.parse(body)
|
|
|
|
case Floki.find(html_tree, "link[rel=icon]") do
|
|
[] ->
|
|
nil
|
|
|
|
links ->
|
|
links
|
|
|> Enum.find(fn link ->
|
|
link
|
|
|> Floki.attribute("type")
|
|
|> Enum.map(&String.downcase/1)
|
|
|> Enum.any?(&(&1 == "image/png"))
|
|
|> case do
|
|
false ->
|
|
link
|
|
|> Floki.attribute("href")
|
|
# bad hack for missing type attr
|
|
|> Enum.any?(&String.contains?(&1, ".png"))
|
|
|
|
true ->
|
|
true
|
|
end
|
|
|
|
# todo: support more image types
|
|
end)
|
|
|> case do
|
|
nil ->
|
|
nil
|
|
|
|
link ->
|
|
link
|
|
|> Floki.attribute("href")
|
|
|> List.first()
|
|
|> case do
|
|
href when is_binary(href) ->
|
|
URI.merge(page_url, href) |> to_string()
|
|
|
|
_ ->
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
@spec fetch_favicon_data(favicon_url :: String.t()) :: {:ok, String.t()} | :error
|
|
defp fetch_favicon_data(favicon_url) do
|
|
Logger.debug("Fetching favicon from: '#{favicon_url}'")
|
|
|
|
case Network.http_get(favicon_url) do
|
|
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
|
|
{:ok, "data:image/png;base64,#{Base.encode64(body)}"}
|
|
|
|
{:ok, %HTTPoison.Response{status_code: code}} ->
|
|
Logger.debug("Unhandled HTTP code #{code} for '#{favicon_url}'")
|
|
:error
|
|
|
|
{:error, reason} ->
|
|
Logger.debug("Error fetching favicon: #{inspect(reason)}")
|
|
:error
|
|
end
|
|
end
|
|
|
|
# from https://github.com/elixir-plug/plug/blob/v1.8.3/lib/plug/request_id.ex#L60
|
|
defp generate_task_id() do
|
|
binary = <<
|
|
System.system_time(:nanosecond)::64,
|
|
:erlang.phash2({node(), self()}, 16_777_216)::24,
|
|
:erlang.unique_integer()::32
|
|
>>
|
|
|
|
Base.url_encode64(binary)
|
|
end
|
|
end
|