Switch to hackney via Tesla

This commit is contained in:
Shadowfacts 2021-03-31 19:28:25 -04:00
parent 33d1cac5e1
commit 0593fcdb9a
Signed by: shadowfacts
GPG Key ID: 94A5AB95422746E5
7 changed files with 94 additions and 55 deletions

View File

@ -74,4 +74,6 @@ config :frenzy, Frenzy.Repo,
hostname: "localhost", hostname: "localhost",
pool_size: 10 pool_size: 10
config :tesla, Tesla.Middleware.Logger, debug: false
import_config "dev.secret.exs" import_config "dev.secret.exs"

View File

@ -1,50 +1,75 @@
defmodule Frenzy.Network do defmodule Frenzy.Network do
require Logger require Logger
@http_redirect_codes [301, 302] defmodule HTTP do
use Tesla
@spec http_get(String.t()) :: {:ok, HTTPoison.Response.t()} | {:error, term()} adapter(Tesla.Adapter.Hackney)
plug Tesla.Middleware.Logger, log_level: &log_level/1
plug Tesla.Middleware.FollowRedirects
# can't use JSON middleware currently, because feed_parser expects to parse the raw body data itself
# plug Tesla.Middleware.JSON
plug Tesla.Middleware.Timeout, timeout: 10_000
def log_level(env) do
case env.status do
code when code >= 400 -> :warn
_ -> :debug
end
end
end
@spec http_get(String.t()) :: Tesla.Env.result()
def http_get(url) do def http_get(url) do
case HTTPoison.get(url) do HTTP.get(url)
{:ok, %HTTPoison.Response{status_code: 200} = response} ->
{:ok, response}
{:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
when status_code in @http_redirect_codes ->
headers
|> Enum.find(fn {name, _value} -> String.downcase(name) == "location" end)
|> case do
{_, new_url} ->
new_url =
case URI.parse(new_url) do
%URI{host: nil, path: path} ->
# relative path
%URI{URI.parse(url) | path: path} |> URI.to_string()
uri ->
uri
end end
Logger.debug("Got 301 redirect from #{url} to #{new_url}") # @http_redirect_codes [301, 302]
http_get(new_url)
_ -> # @spec http_get(String.t()) :: {:ok, HTTPoison.Response.t()} | {:error, term()}
{:error, "Missing Location header for redirect"} # def http_get(url) do
end # case HTTPoison.get(url) do
# {:ok, %HTTPoison.Response{status_code: 200} = response} ->
# {:ok, response}
{:ok, %HTTPoison.Response{status_code: 403}} -> # {:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
{:error, "403 Forbidden"} # when status_code in @http_redirect_codes ->
# headers
# |> Enum.find(fn {name, _value} -> String.downcase(name) == "location" end)
# |> case do
# {_, new_url} ->
# new_url =
# case URI.parse(new_url) do
# %URI{host: nil, path: path} ->
# # relative path
# %URI{URI.parse(url) | path: path} |> URI.to_string()
{:ok, %HTTPoison.Response{status_code: 404}} -> # uri ->
{:error, "404 Not Found"} # uri
# end
{:ok, %HTTPoison.Response{status_code: status_code}} -> # Logger.debug("Got 301 redirect from #{url} to #{new_url}")
{:error, "HTTP #{status_code}"} # http_get(new_url)
{:error, error} -> # _ ->
{:error, error} # {:error, "Missing Location header for redirect"}
end # end
end
# {:ok, %HTTPoison.Response{status_code: 403}} ->
# {:error, "403 Forbidden"}
# {:ok, %HTTPoison.Response{status_code: 404}} ->
# {:error, "404 Not Found"}
# {:ok, %HTTPoison.Response{status_code: status_code}} ->
# {:error, "HTTP #{status_code}"}
# {:error, error} ->
# {:error, error}
# end
# end
@gemini_success_codes 20..29 @gemini_success_codes 20..29
@gemini_redirect_codes 30..39 @gemini_redirect_codes 30..39

View File

@ -70,7 +70,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do
url url
|> Network.http_get() |> Network.http_get()
|> case do |> case do
{:ok, response} -> {:ok, %Tesla.Env{status: code} = response} when code in 200..299 ->
handle_response(url, response, opts) handle_response(url, response, opts)
{:error, reason} -> {:error, reason} ->
@ -80,9 +80,9 @@ defmodule Frenzy.Pipeline.ScrapeStage do
defp get_article_content(_url, _opts), do: {:error, "URL must be a non-empty string"} defp get_article_content(_url, _opts), do: {:error, "URL must be a non-empty string"}
@spec handle_response(String.t(), HTTPoison.Response.t(), map()) :: @spec handle_response(String.t(), Tesla.Env.t(), map()) ::
{:ok, String.t()} | {:error, String.t()} {:ok, String.t()} | {:error, String.t()}
defp handle_response(url, %HTTPoison.Response{body: body}, opts) do defp handle_response(url, %Tesla.Env{body: body}, opts) do
case opts["extractor"] do case opts["extractor"] do
"builtin" -> "builtin" ->
{:ok, Readability.article(body)} {:ok, Readability.article(body)}
@ -155,10 +155,10 @@ defmodule Frenzy.Pipeline.ScrapeStage do
absolute_url = URI.merge(site_uri, src) |> to_string() absolute_url = URI.merge(site_uri, src) |> to_string()
case Network.http_get(absolute_url) do case Network.http_get(absolute_url) do
{:ok, %HTTPoison.Response{body: body, headers: headers}} -> {:ok, %Tesla.Env{body: body, headers: headers}} ->
Enum.find(headers, fn {header, _value} -> header == "Content-Type" end) Enum.find(headers, fn {header, _value} -> String.downcase(header) == "content-type" end)
|> case do |> case do
{"Content-Type", content_type} when content_type in @content_type_allowlist -> {_, content_type} when content_type in @content_type_allowlist ->
"data:#{content_type};base64,#{Base.encode64(body)}" "data:#{content_type};base64,#{Base.encode64(body)}"
_ -> _ ->

View File

@ -46,7 +46,7 @@ defmodule Frenzy.Task.FetchFavicon do
defp fetch_favicon_url_from_webpage(url) when is_binary(url) do defp fetch_favicon_url_from_webpage(url) when is_binary(url) do
case Network.http_get(url) do case Network.http_get(url) do
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 -> {:ok, %Tesla.Env{body: body, status: code}} when code in 200..299 ->
extract_favicon_url(url, body) extract_favicon_url(url, body)
{:error, reason} -> {:error, reason} ->
@ -109,7 +109,7 @@ defmodule Frenzy.Task.FetchFavicon do
Logger.debug("Fetching favicon from: '#{favicon_url}'") Logger.debug("Fetching favicon from: '#{favicon_url}'")
case Network.http_get(favicon_url) do case Network.http_get(favicon_url) do
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 -> {:ok, %Tesla.Env{body: body, status: code}} when code in 200..299 ->
{:ok, "data:image/png;base64,#{Base.encode64(body)}"} {:ok, "data:image/png;base64,#{Base.encode64(body)}"}
{:error, reason} -> {:error, reason} ->

View File

@ -104,8 +104,8 @@ defmodule Frenzy.UpdateFeeds do
defp update_feed_http(feed) do defp update_feed_http(feed) do
case Network.http_get(feed.feed_url) do case Network.http_get(feed.feed_url) do
{:ok, {:ok,
%HTTPoison.Response{ %Tesla.Env{
status_code: 200, status: 200,
body: body, body: body,
headers: headers headers: headers
}} -> }} ->

View File

@ -20,7 +20,7 @@ defmodule Frenzy.MixProject do
def application do def application do
[ [
mod: {Frenzy.Application, []}, mod: {Frenzy.Application, []},
extra_applications: [:logger, :runtime_tools, :readability, :httpoison] extra_applications: [:logger, :runtime_tools, :readability]
] ]
end end
@ -43,8 +43,8 @@ defmodule Frenzy.MixProject do
{:gettext, "~> 0.11"}, {:gettext, "~> 0.11"},
{:jason, "~> 1.0"}, {:jason, "~> 1.0"},
{:plug_cowboy, "~> 2.3"}, {:plug_cowboy, "~> 2.3"},
{:httpoison, "~> 1.8.0"},
{:hackney, "1.17.4"}, {:hackney, "1.17.4"},
{:tesla, "~> 1.4.0"},
{:feed_parser, {:feed_parser,
git: "https://git.shadowfacts.net/shadowfacts/feed_parser.git", branch: "master"}, git: "https://git.shadowfacts.net/shadowfacts/feed_parser.git", branch: "master"},
{:timex, "~> 3.6"}, {:timex, "~> 3.6"},

View File

@ -110,11 +110,6 @@
{:hex, :html_entities, "0.4.0", {:hex, :html_entities, "0.4.0",
"f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm",
"3e3d7156a272950373ce5a4018b1490bea26676f8d6a7d409f6fac8568b8cb9a"}, "3e3d7156a272950373ce5a4018b1490bea26676f8d6a7d409f6fac8568b8cb9a"},
httpoison:
{:hex, :httpoison, "1.8.0",
"6b85dea15820b7804ef607ff78406ab449dd78bed923a49c7160e1886e987a3d", [:mix],
[{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm",
"28089eaa98cf90c66265b6b5ad87c59a3729bea2e74e9d08f9b51eb9729b3c3a"},
idna: idna:
{:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d",
[:rebar3], [:rebar3],
@ -236,6 +231,23 @@
{:hex, :telemetry, "0.4.2", {:hex, :telemetry, "0.4.2",
"2808c992455e08d6177322f14d3bdb6b625fbcfd233a73505870d8738a2f4599", [:rebar3], [], "hexpm", "2808c992455e08d6177322f14d3bdb6b625fbcfd233a73505870d8738a2f4599", [:rebar3], [], "hexpm",
"2d1419bd9dda6a206d7b5852179511722e2b18812310d304620c7bd92a13fcef"}, "2d1419bd9dda6a206d7b5852179511722e2b18812310d304620c7bd92a13fcef"},
tesla:
{:hex, :tesla, "1.4.0", "1081bef0124b8bdec1c3d330bbe91956648fb008cf0d3950a369cda466a31a87",
[:mix],
[
{:castore, "~> 0.1", [hex: :castore, repo: "hexpm", optional: true]},
{:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]},
{:finch, "~> 0.3", [hex: :finch, repo: "hexpm", optional: true]},
{:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]},
{:gun, "~> 1.3", [hex: :gun, repo: "hexpm", optional: true]},
{:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]},
{:ibrowse, "~> 4.4.0", [hex: :ibrowse, repo: "hexpm", optional: true]},
{:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]},
{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]},
{:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: true]},
{:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]},
{:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}
], "hexpm", "bf1374a5569f5fca8e641363b63f7347d680d91388880979a33bc12a6eb3e0aa"},
timex: timex:
{:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56",
[:mix], [:mix],