From 40984b419d4c386f7228b992c55a67322479b9fb Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Wed, 31 Mar 2021 19:28:25 -0400 Subject: [PATCH] Switch to hackney via Tesla --- config/dev.exs | 2 + lib/frenzy/network.ex | 101 +++++++++++++++++----------- lib/frenzy/pipeline/scrape_stage.ex | 12 ++-- lib/frenzy/task/fetch_favicon.ex | 4 +- lib/frenzy/update_feeds.ex | 4 +- mix.exs | 2 +- mix.lock | 22 ++++-- 7 files changed, 93 insertions(+), 54 deletions(-) diff --git a/config/dev.exs b/config/dev.exs index c3bcee1..0a2618a 100644 --- a/config/dev.exs +++ b/config/dev.exs @@ -74,4 +74,6 @@ config :frenzy, Frenzy.Repo, hostname: "localhost", pool_size: 10 +config :tesla, Tesla.Middleware.Logger, debug: false + import_config "dev.secret.exs" diff --git a/lib/frenzy/network.ex b/lib/frenzy/network.ex index 6edaf62..9c8f756 100644 --- a/lib/frenzy/network.ex +++ b/lib/frenzy/network.ex @@ -1,51 +1,76 @@ defmodule Frenzy.Network do require Logger - @http_redirect_codes [301, 302] + defmodule HTTP do + use Tesla - @spec http_get(String.t()) :: {:ok, HTTPoison.Response.t()} | {:error, term()} - def http_get(url) do - case HTTPoison.get(url) do - {:ok, %HTTPoison.Response{status_code: 200} = response} -> - {:ok, response} + adapter(Tesla.Adapter.Hackney) - {:ok, %HTTPoison.Response{status_code: status_code, headers: headers}} - when status_code in @http_redirect_codes -> - headers - |> Enum.find(fn {name, _value} -> String.downcase(name) == "location" end) - |> case do - {_, new_url} -> - new_url = - case URI.parse(new_url) do - %URI{host: nil, path: path} -> - # relative path - %URI{URI.parse(url) | path: path} |> URI.to_string() + plug Tesla.Middleware.Logger, log_level: &log_level/1 + plug Tesla.Middleware.FollowRedirects - uri -> - uri - end + # can't use JSON middleware currently, because feed_parser expects to parse the raw body data itself + # plug Tesla.Middleware.JSON + plug Tesla.Middleware.Timeout, timeout: 10_000 - Logger.debug("Got 301 redirect from #{url} to #{new_url}") - http_get(new_url) - - _ -> - {:error, "Missing Location header for redirect"} - end - - {:ok, %HTTPoison.Response{status_code: 403}} -> - {:error, "403 Forbidden"} - - {:ok, %HTTPoison.Response{status_code: 404}} -> - {:error, "404 Not Found"} - - {:ok, %HTTPoison.Response{status_code: status_code}} -> - {:error, "HTTP #{status_code}"} - - {:error, error} -> - {:error, error} + def log_level(env) do + case env.status do + code when code >= 400 -> :warn + _ -> :debug + end end end + @spec http_get(String.t()) :: Tesla.Env.result() + def http_get(url) do + HTTP.get(url) + end + + # @http_redirect_codes [301, 302] + + # @spec http_get(String.t()) :: {:ok, HTTPoison.Response.t()} | {:error, term()} + # def http_get(url) do + # case HTTPoison.get(url) do + # {:ok, %HTTPoison.Response{status_code: 200} = response} -> + # {:ok, response} + + # {:ok, %HTTPoison.Response{status_code: status_code, headers: headers}} + # when status_code in @http_redirect_codes -> + # headers + # |> Enum.find(fn {name, _value} -> String.downcase(name) == "location" end) + # |> case do + # {_, new_url} -> + # new_url = + # case URI.parse(new_url) do + # %URI{host: nil, path: path} -> + # # relative path + # %URI{URI.parse(url) | path: path} |> URI.to_string() + + # uri -> + # uri + # end + + # Logger.debug("Got 301 redirect from #{url} to #{new_url}") + # http_get(new_url) + + # _ -> + # {:error, "Missing Location header for redirect"} + # end + + # {:ok, %HTTPoison.Response{status_code: 403}} -> + # {:error, "403 Forbidden"} + + # {:ok, %HTTPoison.Response{status_code: 404}} -> + # {:error, "404 Not Found"} + + # {:ok, %HTTPoison.Response{status_code: status_code}} -> + # {:error, "HTTP #{status_code}"} + + # {:error, error} -> + # {:error, error} + # end + # end + @gemini_success_codes 20..29 @gemini_redirect_codes 30..39 diff --git a/lib/frenzy/pipeline/scrape_stage.ex b/lib/frenzy/pipeline/scrape_stage.ex index e147868..a5830d3 100644 --- a/lib/frenzy/pipeline/scrape_stage.ex +++ b/lib/frenzy/pipeline/scrape_stage.ex @@ -70,7 +70,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do url |> Network.http_get() |> case do - {:ok, response} -> + {:ok, %Tesla.Env{status: code} = response} when code in 200..299 -> handle_response(url, response, opts) {:error, reason} -> @@ -80,9 +80,9 @@ defmodule Frenzy.Pipeline.ScrapeStage do defp get_article_content(_url, _opts), do: {:error, "URL must be a non-empty string"} - @spec handle_response(String.t(), HTTPoison.Response.t(), map()) :: + @spec handle_response(String.t(), Tesla.Env.t(), map()) :: {:ok, String.t()} | {:error, String.t()} - defp handle_response(url, %HTTPoison.Response{body: body}, opts) do + defp handle_response(url, %Tesla.Env{body: body}, opts) do case opts["extractor"] do "builtin" -> {:ok, Readability.article(body)} @@ -155,10 +155,10 @@ defmodule Frenzy.Pipeline.ScrapeStage do absolute_url = URI.merge(site_uri, src) |> to_string() case Network.http_get(absolute_url) do - {:ok, %HTTPoison.Response{body: body, headers: headers}} -> - Enum.find(headers, fn {header, _value} -> header == "Content-Type" end) + {:ok, %Tesla.Env{body: body, headers: headers}} -> + Enum.find(headers, fn {header, _value} -> String.downcase(header) == "content-type" end) |> case do - {"Content-Type", content_type} when content_type in @content_type_allowlist -> + {_, content_type} when content_type in @content_type_allowlist -> "data:#{content_type};base64,#{Base.encode64(body)}" _ -> diff --git a/lib/frenzy/task/fetch_favicon.ex b/lib/frenzy/task/fetch_favicon.ex index 31a43c8..6327252 100644 --- a/lib/frenzy/task/fetch_favicon.ex +++ b/lib/frenzy/task/fetch_favicon.ex @@ -46,7 +46,7 @@ defmodule Frenzy.Task.FetchFavicon do defp fetch_favicon_url_from_webpage(url) when is_binary(url) do case Network.http_get(url) do - {:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 -> + {:ok, %Tesla.Env{body: body, status: code}} when code in 200..299 -> extract_favicon_url(url, body) {:error, reason} -> @@ -109,7 +109,7 @@ defmodule Frenzy.Task.FetchFavicon do Logger.debug("Fetching favicon from: '#{favicon_url}'") case Network.http_get(favicon_url) do - {:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 -> + {:ok, %Tesla.Env{body: body, status: code}} when code in 200..299 -> {:ok, "data:image/png;base64,#{Base.encode64(body)}"} {:error, reason} -> diff --git a/lib/frenzy/update_feeds.ex b/lib/frenzy/update_feeds.ex index d430521..88e8773 100644 --- a/lib/frenzy/update_feeds.ex +++ b/lib/frenzy/update_feeds.ex @@ -104,8 +104,8 @@ defmodule Frenzy.UpdateFeeds do defp update_feed_http(feed) do case Network.http_get(feed.feed_url) do {:ok, - %HTTPoison.Response{ - status_code: 200, + %Tesla.Env{ + status: 200, body: body, headers: headers }} -> diff --git a/mix.exs b/mix.exs index 22090f6..f5a3ced 100644 --- a/mix.exs +++ b/mix.exs @@ -43,8 +43,8 @@ defmodule Frenzy.MixProject do {:gettext, "~> 0.11"}, {:jason, "~> 1.0"}, {:plug_cowboy, "~> 2.3"}, - {:httpoison, "~> 1.8.0"}, {:hackney, "1.17.4"}, + {:tesla, "~> 1.4.0"}, {:feed_parser, git: "https://git.shadowfacts.net/shadowfacts/feed_parser.git", branch: "master"}, {:timex, "~> 3.6"}, diff --git a/mix.lock b/mix.lock index 989e4bc..592d669 100644 --- a/mix.lock +++ b/mix.lock @@ -110,11 +110,6 @@ {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm", "3e3d7156a272950373ce5a4018b1490bea26676f8d6a7d409f6fac8568b8cb9a"}, - httpoison: - {:hex, :httpoison, "1.8.0", - "6b85dea15820b7804ef607ff78406ab449dd78bed923a49c7160e1886e987a3d", [:mix], - [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", - "28089eaa98cf90c66265b6b5ad87c59a3729bea2e74e9d08f9b51eb9729b3c3a"}, idna: {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], @@ -236,6 +231,23 @@ {:hex, :telemetry, "0.4.2", "2808c992455e08d6177322f14d3bdb6b625fbcfd233a73505870d8738a2f4599", [:rebar3], [], "hexpm", "2d1419bd9dda6a206d7b5852179511722e2b18812310d304620c7bd92a13fcef"}, + tesla: + {:hex, :tesla, "1.4.0", "1081bef0124b8bdec1c3d330bbe91956648fb008cf0d3950a369cda466a31a87", + [:mix], + [ + {:castore, "~> 0.1", [hex: :castore, repo: "hexpm", optional: true]}, + {:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]}, + {:finch, "~> 0.3", [hex: :finch, repo: "hexpm", optional: true]}, + {:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]}, + {:gun, "~> 1.3", [hex: :gun, repo: "hexpm", optional: true]}, + {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]}, + {:ibrowse, "~> 4.4.0", [hex: :ibrowse, repo: "hexpm", optional: true]}, + {:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]}, + {:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, + {:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: true]}, + {:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]}, + {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]} + ], "hexpm", "bf1374a5569f5fca8e641363b63f7347d680d91388880979a33bc12a6eb3e0aa"}, timex: {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", [:mix],