Fix incorrect handling of relative favicon links

This commit is contained in:
Shadowfacts 2019-11-10 15:27:40 -05:00
parent 8d790b8af0
commit 2c06b785c9
Signed by: shadowfacts
GPG Key ID: 94A5AB95422746E5
1 changed files with 17 additions and 7 deletions

View File

@ -35,7 +35,7 @@ defmodule Frenzy.Task.FetchFavicon do
Logger.info("Trying default path: #{favicon_uri}")
case fetch_favicon_data(favicon_uri) do
case fetch_favicon_data(favicon_uri, site_url) do
{:ok, favicon_data} ->
changeset = Feed.changeset(feed, %{favicon: favicon_data})
{:ok, _feed} = Repo.update(changeset)
@ -49,7 +49,7 @@ defmodule Frenzy.Task.FetchFavicon do
defp fetch_favicon_from_webpage(url) when is_binary(url) do
case HTTP.get(url) do
{:ok, %HTTPoison.Response{body: body}} ->
extract_favicon(body)
extract_favicon(url, body)
{:error, _reason} = err ->
err
@ -58,7 +58,7 @@ defmodule Frenzy.Task.FetchFavicon do
defp fetch_favicon_from_webpage(_), do: {:error, "URL must be a string"}
defp extract_favicon(body) do
defp extract_favicon(page_url, body) do
html_tree = Floki.parse(body)
case Floki.find(html_tree, "link[rel=icon]") do
@ -95,13 +95,23 @@ defmodule Frenzy.Task.FetchFavicon do
link
|> Floki.attribute("href")
|> List.first()
|> fetch_favicon_data()
|> fetch_favicon_data(page_url)
end
end
end
defp fetch_favicon_data(url) when is_binary(url) do
case HTTP.get(url) do
defp fetch_favicon_data(favicon_url, site_url) when is_binary(favicon_url) do
absolute_url =
case URI.parse(favicon_url) do
%URI{host: nil, path: path} ->
# relative path
%URI{URI.parse(site_url) | path: path} |> URI.to_string()
_ ->
favicon_url
end
case HTTP.get(absolute_url) do
{:ok, %HTTPoison.Response{body: body}} ->
{:ok, "data:image/png;base64,#{Base.encode64(body)}"}
@ -110,7 +120,7 @@ defmodule Frenzy.Task.FetchFavicon do
end
end
defp fetch_favicon_data(_), do: {:error, "No or invalid href for link"}
defp fetch_favicon_data(_, _), do: {:error, "No or invalid href for link"}
# from https://github.com/elixir-plug/plug/blob/v1.8.3/lib/plug/request_id.ex#L60
defp generate_task_id() do