Compare commits
1 Commits
0593fcdb9a
...
40984b419d
Author | SHA1 | Date |
---|---|---|
Shadowfacts | 40984b419d |
|
@ -74,4 +74,6 @@ config :frenzy, Frenzy.Repo,
|
|||
hostname: "localhost",
|
||||
pool_size: 10
|
||||
|
||||
config :tesla, Tesla.Middleware.Logger, debug: false
|
||||
|
||||
import_config "dev.secret.exs"
|
||||
|
|
|
@ -1,51 +1,76 @@
|
|||
defmodule Frenzy.Network do
|
||||
require Logger
|
||||
|
||||
@http_redirect_codes [301, 302]
|
||||
defmodule HTTP do
|
||||
use Tesla
|
||||
|
||||
@spec http_get(String.t()) :: {:ok, HTTPoison.Response.t()} | {:error, term()}
|
||||
def http_get(url) do
|
||||
case HTTPoison.get(url) do
|
||||
{:ok, %HTTPoison.Response{status_code: 200} = response} ->
|
||||
{:ok, response}
|
||||
adapter(Tesla.Adapter.Hackney)
|
||||
|
||||
{:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
|
||||
when status_code in @http_redirect_codes ->
|
||||
headers
|
||||
|> Enum.find(fn {name, _value} -> String.downcase(name) == "location" end)
|
||||
|> case do
|
||||
{_, new_url} ->
|
||||
new_url =
|
||||
case URI.parse(new_url) do
|
||||
%URI{host: nil, path: path} ->
|
||||
# relative path
|
||||
%URI{URI.parse(url) | path: path} |> URI.to_string()
|
||||
plug Tesla.Middleware.Logger, log_level: &log_level/1
|
||||
plug Tesla.Middleware.FollowRedirects
|
||||
|
||||
uri ->
|
||||
uri
|
||||
end
|
||||
# can't use JSON middleware currently, because feed_parser expects to parse the raw body data itself
|
||||
# plug Tesla.Middleware.JSON
|
||||
plug Tesla.Middleware.Timeout, timeout: 10_000
|
||||
|
||||
Logger.debug("Got 301 redirect from #{url} to #{new_url}")
|
||||
http_get(new_url)
|
||||
|
||||
_ ->
|
||||
{:error, "Missing Location header for redirect"}
|
||||
end
|
||||
|
||||
{:ok, %HTTPoison.Response{status_code: 403}} ->
|
||||
{:error, "403 Forbidden"}
|
||||
|
||||
{:ok, %HTTPoison.Response{status_code: 404}} ->
|
||||
{:error, "404 Not Found"}
|
||||
|
||||
{:ok, %HTTPoison.Response{status_code: status_code}} ->
|
||||
{:error, "HTTP #{status_code}"}
|
||||
|
||||
{:error, error} ->
|
||||
{:error, error}
|
||||
def log_level(env) do
|
||||
case env.status do
|
||||
code when code >= 400 -> :warn
|
||||
_ -> :debug
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@spec http_get(String.t()) :: Tesla.Env.result()
|
||||
def http_get(url) do
|
||||
HTTP.get(url)
|
||||
end
|
||||
|
||||
# @http_redirect_codes [301, 302]
|
||||
|
||||
# @spec http_get(String.t()) :: {:ok, HTTPoison.Response.t()} | {:error, term()}
|
||||
# def http_get(url) do
|
||||
# case HTTPoison.get(url) do
|
||||
# {:ok, %HTTPoison.Response{status_code: 200} = response} ->
|
||||
# {:ok, response}
|
||||
|
||||
# {:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
|
||||
# when status_code in @http_redirect_codes ->
|
||||
# headers
|
||||
# |> Enum.find(fn {name, _value} -> String.downcase(name) == "location" end)
|
||||
# |> case do
|
||||
# {_, new_url} ->
|
||||
# new_url =
|
||||
# case URI.parse(new_url) do
|
||||
# %URI{host: nil, path: path} ->
|
||||
# # relative path
|
||||
# %URI{URI.parse(url) | path: path} |> URI.to_string()
|
||||
|
||||
# uri ->
|
||||
# uri
|
||||
# end
|
||||
|
||||
# Logger.debug("Got 301 redirect from #{url} to #{new_url}")
|
||||
# http_get(new_url)
|
||||
|
||||
# _ ->
|
||||
# {:error, "Missing Location header for redirect"}
|
||||
# end
|
||||
|
||||
# {:ok, %HTTPoison.Response{status_code: 403}} ->
|
||||
# {:error, "403 Forbidden"}
|
||||
|
||||
# {:ok, %HTTPoison.Response{status_code: 404}} ->
|
||||
# {:error, "404 Not Found"}
|
||||
|
||||
# {:ok, %HTTPoison.Response{status_code: status_code}} ->
|
||||
# {:error, "HTTP #{status_code}"}
|
||||
|
||||
# {:error, error} ->
|
||||
# {:error, error}
|
||||
# end
|
||||
# end
|
||||
|
||||
@gemini_success_codes 20..29
|
||||
@gemini_redirect_codes 30..39
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do
|
|||
url
|
||||
|> Network.http_get()
|
||||
|> case do
|
||||
{:ok, response} ->
|
||||
{:ok, %Tesla.Env{status: code} = response} when code in 200..299 ->
|
||||
handle_response(url, response, opts)
|
||||
|
||||
{:error, reason} ->
|
||||
|
@ -80,9 +80,9 @@ defmodule Frenzy.Pipeline.ScrapeStage do
|
|||
|
||||
defp get_article_content(_url, _opts), do: {:error, "URL must be a non-empty string"}
|
||||
|
||||
@spec handle_response(String.t(), HTTPoison.Response.t(), map()) ::
|
||||
@spec handle_response(String.t(), Tesla.Env.t(), map()) ::
|
||||
{:ok, String.t()} | {:error, String.t()}
|
||||
defp handle_response(url, %HTTPoison.Response{body: body}, opts) do
|
||||
defp handle_response(url, %Tesla.Env{body: body}, opts) do
|
||||
case opts["extractor"] do
|
||||
"builtin" ->
|
||||
{:ok, Readability.article(body)}
|
||||
|
@ -155,10 +155,10 @@ defmodule Frenzy.Pipeline.ScrapeStage do
|
|||
absolute_url = URI.merge(site_uri, src) |> to_string()
|
||||
|
||||
case Network.http_get(absolute_url) do
|
||||
{:ok, %HTTPoison.Response{body: body, headers: headers}} ->
|
||||
Enum.find(headers, fn {header, _value} -> header == "Content-Type" end)
|
||||
{:ok, %Tesla.Env{body: body, headers: headers}} ->
|
||||
Enum.find(headers, fn {header, _value} -> String.downcase(header) == "content-type" end)
|
||||
|> case do
|
||||
{"Content-Type", content_type} when content_type in @content_type_allowlist ->
|
||||
{_, content_type} when content_type in @content_type_allowlist ->
|
||||
"data:#{content_type};base64,#{Base.encode64(body)}"
|
||||
|
||||
_ ->
|
||||
|
|
|
@ -46,7 +46,7 @@ defmodule Frenzy.Task.FetchFavicon do
|
|||
|
||||
defp fetch_favicon_url_from_webpage(url) when is_binary(url) do
|
||||
case Network.http_get(url) do
|
||||
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
|
||||
{:ok, %Tesla.Env{body: body, status: code}} when code in 200..299 ->
|
||||
extract_favicon_url(url, body)
|
||||
|
||||
{:error, reason} ->
|
||||
|
@ -109,7 +109,7 @@ defmodule Frenzy.Task.FetchFavicon do
|
|||
Logger.debug("Fetching favicon from: '#{favicon_url}'")
|
||||
|
||||
case Network.http_get(favicon_url) do
|
||||
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
|
||||
{:ok, %Tesla.Env{body: body, status: code}} when code in 200..299 ->
|
||||
{:ok, "data:image/png;base64,#{Base.encode64(body)}"}
|
||||
|
||||
{:error, reason} ->
|
||||
|
|
|
@ -104,8 +104,8 @@ defmodule Frenzy.UpdateFeeds do
|
|||
defp update_feed_http(feed) do
|
||||
case Network.http_get(feed.feed_url) do
|
||||
{:ok,
|
||||
%HTTPoison.Response{
|
||||
status_code: 200,
|
||||
%Tesla.Env{
|
||||
status: 200,
|
||||
body: body,
|
||||
headers: headers
|
||||
}} ->
|
||||
|
|
2
mix.exs
2
mix.exs
|
@ -43,8 +43,8 @@ defmodule Frenzy.MixProject do
|
|||
{:gettext, "~> 0.11"},
|
||||
{:jason, "~> 1.0"},
|
||||
{:plug_cowboy, "~> 2.3"},
|
||||
{:httpoison, "~> 1.8.0"},
|
||||
{:hackney, "1.17.4"},
|
||||
{:tesla, "~> 1.4.0"},
|
||||
{:feed_parser,
|
||||
git: "https://git.shadowfacts.net/shadowfacts/feed_parser.git", branch: "master"},
|
||||
{:timex, "~> 3.6"},
|
||||
|
|
22
mix.lock
22
mix.lock
|
@ -110,11 +110,6 @@
|
|||
{:hex, :html_entities, "0.4.0",
|
||||
"f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm",
|
||||
"3e3d7156a272950373ce5a4018b1490bea26676f8d6a7d409f6fac8568b8cb9a"},
|
||||
httpoison:
|
||||
{:hex, :httpoison, "1.8.0",
|
||||
"6b85dea15820b7804ef607ff78406ab449dd78bed923a49c7160e1886e987a3d", [:mix],
|
||||
[{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm",
|
||||
"28089eaa98cf90c66265b6b5ad87c59a3729bea2e74e9d08f9b51eb9729b3c3a"},
|
||||
idna:
|
||||
{:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d",
|
||||
[:rebar3],
|
||||
|
@ -236,6 +231,23 @@
|
|||
{:hex, :telemetry, "0.4.2",
|
||||
"2808c992455e08d6177322f14d3bdb6b625fbcfd233a73505870d8738a2f4599", [:rebar3], [], "hexpm",
|
||||
"2d1419bd9dda6a206d7b5852179511722e2b18812310d304620c7bd92a13fcef"},
|
||||
tesla:
|
||||
{:hex, :tesla, "1.4.0", "1081bef0124b8bdec1c3d330bbe91956648fb008cf0d3950a369cda466a31a87",
|
||||
[:mix],
|
||||
[
|
||||
{:castore, "~> 0.1", [hex: :castore, repo: "hexpm", optional: true]},
|
||||
{:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]},
|
||||
{:finch, "~> 0.3", [hex: :finch, repo: "hexpm", optional: true]},
|
||||
{:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]},
|
||||
{:gun, "~> 1.3", [hex: :gun, repo: "hexpm", optional: true]},
|
||||
{:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]},
|
||||
{:ibrowse, "~> 4.4.0", [hex: :ibrowse, repo: "hexpm", optional: true]},
|
||||
{:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]},
|
||||
{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]},
|
||||
{:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: true]},
|
||||
{:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]},
|
||||
{:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}
|
||||
], "hexpm", "bf1374a5569f5fca8e641363b63f7347d680d91388880979a33bc12a6eb3e0aa"},
|
||||
timex:
|
||||
{:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56",
|
||||
[:mix],
|
||||
|
|
Loading…
Reference in New Issue