Add gemini protocol feed fetching
This commit is contained in:
parent
537fcc62f8
commit
4f16933198
@ -1,14 +1,16 @@
|
||||
defmodule Frenzy.HTTP do
|
||||
defmodule Frenzy.Network do
|
||||
require Logger
|
||||
@redirect_codes [301, 302]
|
||||
|
||||
def get(url, opts \\ []) do
|
||||
@http_redirect_codes [301, 302]
|
||||
|
||||
@spec http_get(String.t(), Keyword.t()) :: {:ok, HTTPoison.Response.t()} | {:error, term()}
|
||||
def http_get(url, opts \\ []) do
|
||||
case HTTPoison.get(url, opts) do
|
||||
{:ok, %HTTPoison.Response{status_code: 200} = response} ->
|
||||
{:ok, response}
|
||||
|
||||
{:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
|
||||
when status_code in @redirect_codes ->
|
||||
when status_code in @http_redirect_codes ->
|
||||
headers
|
||||
|> Enum.find(fn {name, _value} -> name == "Location" end)
|
||||
|> case do
|
||||
@ -24,7 +26,7 @@ defmodule Frenzy.HTTP do
|
||||
end
|
||||
|
||||
Logger.debug("Got 301 redirect from #{url} to #{new_url}")
|
||||
get(new_url, opts)
|
||||
http_get(new_url, opts)
|
||||
|
||||
_ ->
|
||||
{:error, "Missing Location header for redirect"}
|
||||
@ -43,4 +45,26 @@ defmodule Frenzy.HTTP do
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
|
||||
@gemini_success_codes 20..29
|
||||
@gemini_redirect_codes 30..39
|
||||
|
||||
@spec gemini_request(String.t() | URI.t()) :: {:ok, Gemini.Response.t()} | {:error, term()}
|
||||
|
||||
def gemini_request(uri) do
|
||||
case Gemini.request(uri) do
|
||||
{:ok, %Gemini.Response{status: code} = response} when code in @gemini_success_codes ->
|
||||
{:ok, response}
|
||||
|
||||
{:ok, %Gemini.Response{status: code, meta: new_url}}
|
||||
when code in @gemini_redirect_codes ->
|
||||
gemini_request(URI.merge(uri, new_url))
|
||||
|
||||
{:ok, %Gemini.Response{status: code}} ->
|
||||
{:error, "Unhandled Gemini status code: #{code}"}
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
end
|
@ -1,6 +1,6 @@
|
||||
defmodule Frenzy.Pipeline.ScrapeStage do
|
||||
require Logger
|
||||
alias Frenzy.HTTP
|
||||
alias Frenzy.Network
|
||||
alias Frenzy.Pipeline.Stage
|
||||
@behaviour Stage
|
||||
|
||||
@ -68,7 +68,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do
|
||||
Logger.debug("Getting article from #{url}")
|
||||
|
||||
url
|
||||
|> HTTP.get()
|
||||
|> Network.http_get()
|
||||
|> case do
|
||||
{:ok, response} ->
|
||||
handle_response(url, response, opts)
|
||||
@ -142,7 +142,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do
|
||||
defp image_to_data_uri(src, site_uri, true) do
|
||||
absolute_url = URI.merge(site_uri, src) |> to_string()
|
||||
|
||||
case HTTP.get(absolute_url) do
|
||||
case Network.http_get(absolute_url) do
|
||||
{:ok, %HTTPoison.Response{body: body, headers: headers}} ->
|
||||
{"Content-Type", content_type} =
|
||||
Enum.find(headers, fn {header, _value} -> header == "Content-Type" end)
|
||||
|
@ -1,7 +1,7 @@
|
||||
defmodule Frenzy.Task.FetchFavicon do
|
||||
require Logger
|
||||
use Task
|
||||
alias Frenzy.{HTTP, Repo, Feed}
|
||||
alias Frenzy.{Network, Repo, Feed}
|
||||
|
||||
def start_link(feed) do
|
||||
Task.start_link(__MODULE__, :run, [feed])
|
||||
@ -41,7 +41,7 @@ defmodule Frenzy.Task.FetchFavicon do
|
||||
@spec fetch_favicon_url_from_webpage(url :: String.t()) :: String.t()
|
||||
|
||||
defp fetch_favicon_url_from_webpage(url) when is_binary(url) do
|
||||
case HTTP.get(url) do
|
||||
case Network.http_get(url) do
|
||||
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
|
||||
extract_favicon_url(url, body)
|
||||
|
||||
@ -108,7 +108,7 @@ defmodule Frenzy.Task.FetchFavicon do
|
||||
defp fetch_favicon_data(favicon_url) do
|
||||
Logger.debug("Fetching favicon from: '#{favicon_url}'")
|
||||
|
||||
case HTTP.get(favicon_url) do
|
||||
case Network.http_get(favicon_url) do
|
||||
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
|
||||
{:ok, "data:image/png;base64,#{Base.encode64(body)}"}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
defmodule Frenzy.UpdateFeeds do
|
||||
use GenServer
|
||||
alias Frenzy.{HTTP, Repo, Feed, Item}
|
||||
alias Frenzy.{Network, Repo, Feed, Item}
|
||||
alias Frenzy.Task.{CreateItem, FetchFavicon}
|
||||
import Ecto.Query
|
||||
require Logger
|
||||
@ -86,7 +86,20 @@ defmodule Frenzy.UpdateFeeds do
|
||||
defp update_feed(feed) do
|
||||
Logger.debug("Updating #{feed.feed_url}")
|
||||
|
||||
case HTTP.get(feed.feed_url) do
|
||||
case URI.parse(feed.feed_url) do
|
||||
%URI{scheme: "gemini"} = uri ->
|
||||
update_feed_gemini(feed, uri)
|
||||
|
||||
%URI{scheme: scheme} when scheme in ["http", "https"] ->
|
||||
update_feed_http(feed)
|
||||
|
||||
%URI{scheme: scheme} ->
|
||||
Logger.warn("Unhandled scheme for feed: #{scheme}")
|
||||
end
|
||||
end
|
||||
|
||||
defp update_feed_http(feed) do
|
||||
case Network.http_get(feed.feed_url) do
|
||||
{:ok,
|
||||
%HTTPoison.Response{
|
||||
status_code: 200,
|
||||
@ -103,36 +116,30 @@ defmodule Frenzy.UpdateFeeds do
|
||||
|> Enum.map(&String.trim/1)
|
||||
|> Enum.find(fn s -> !String.contains?(s, "=") end)
|
||||
|
||||
case FeedParser.parse(body, content_type) do
|
||||
{:ok, rss} ->
|
||||
update_feed_from_rss(feed, rss)
|
||||
do_update_feed(feed, content_type, body)
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.error("Unable to parse feed at '#{feed.feed_url}': #{inspect(reason)}")
|
||||
end
|
||||
{:error, reason} ->
|
||||
Logger.error("Couldn't load feed #{feed.feed_url}: #{inspect(reason)}")
|
||||
end
|
||||
end
|
||||
|
||||
{:ok, %HTTPoison.Response{status_code: 404}} ->
|
||||
Logger.warn("RSS feed #{feed.feed_url} not found")
|
||||
defp update_feed_gemini(feed, feed_uri) do
|
||||
case Network.gemini_request(feed_uri) do
|
||||
{:ok, %Gemini.Response{meta: content_type, body: body}} ->
|
||||
do_update_feed(feed, content_type, body)
|
||||
|
||||
{:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
|
||||
when status_code in [301, 302] ->
|
||||
{"Location", new_url} =
|
||||
Enum.find(headers, fn {name, _value} ->
|
||||
name == "Location"
|
||||
end)
|
||||
{:error, reason} ->
|
||||
Logger.error("Couldn't load feed #{feed.feed_url}: #{inspect(reason)}")
|
||||
end
|
||||
end
|
||||
|
||||
Logger.debug("Got 301 redirect from #{feed.feed_url} to #{new_url}, updating feed URL")
|
||||
changeset = Feed.changeset(feed, %{feed_url: new_url})
|
||||
{:ok, feed} = Repo.update(changeset)
|
||||
update_feed(feed)
|
||||
defp do_update_feed(feed, content_type, data) do
|
||||
case FeedParser.parse(data, content_type) do
|
||||
{:ok, rss} ->
|
||||
update_feed_from_rss(feed, rss)
|
||||
|
||||
{:ok, %HTTPoison.Response{} = response} ->
|
||||
Logger.error(
|
||||
"Couldn't load RSS feed #{feed.feed_url}, got unexpected response: #{inspect(response)}"
|
||||
)
|
||||
|
||||
{:error, %HTTPoison.Error{reason: reason}} ->
|
||||
Logger.error("Couldn't load RSS feed #{feed.feed_url}: #{inspect(reason)}")
|
||||
{:error, reason} ->
|
||||
Logger.error("Unable to parse feed at '#{feed.feed_url}': #{inspect(reason)}")
|
||||
end
|
||||
end
|
||||
|
||||
|
3
mix.exs
3
mix.exs
@ -54,7 +54,8 @@ defmodule Frenzy.MixProject do
|
||||
{:xml_builder, "~> 2.1.1"},
|
||||
{:floki, "~> 0.23"},
|
||||
{:phoenix_live_view,
|
||||
git: "https://github.com/phoenixframework/phoenix_live_view", branch: "master"}
|
||||
git: "https://github.com/phoenixframework/phoenix_live_view", branch: "master"},
|
||||
{:gemini, git: "https://git.shadowfacts.net/shadowfacts/gemini-ex.git", branch: "main"}
|
||||
]
|
||||
end
|
||||
|
||||
|
2
mix.lock
2
mix.lock
@ -18,6 +18,7 @@
|
||||
"fiet": {:git, "https://github.com/shadowfacts/fiet.git", "bf117bc30a6355a189d05a562127cfaf9e0187ae", [branch: "master"]},
|
||||
"file_system": {:hex, :file_system, "0.2.6", "fd4dc3af89b9ab1dc8ccbcc214a0e60c41f34be251d9307920748a14bf41f1d3", [:mix], [], "hexpm", "0d50da6b04c58e101a3793b1600f9a03b86e3a8057b192ac1766013d35706fa6"},
|
||||
"floki": {:hex, :floki, "0.23.0", "956ab6dba828c96e732454809fb0bd8d43ce0979b75f34de6322e73d4c917829", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "e680b5ef0b61ce02faa7137db8d1714903a5552be4c89fb57293b8770e7f49c2"},
|
||||
"gemini": {:git, "https://git.shadowfacts.net/shadowfacts/gemini-ex.git", "37864e9f1196eb0efa71427d76a9279cee84ef19", [branch: "main"]},
|
||||
"gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm", "e0b8598e802676c81e66b061a2148c37c03886b24a3ca86a1f98ed40693b94b3"},
|
||||
"hackney": {:hex, :hackney, "1.16.0", "5096ac8e823e3a441477b2d187e30dd3fff1a82991a806b2003845ce72ce2d84", [:rebar3], [{:certifi, "2.5.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.1", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.0", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.6", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "3bf0bebbd5d3092a3543b783bf065165fa5d3ad4b899b836810e513064134e18"},
|
||||
"html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm", "3e3d7156a272950373ce5a4018b1490bea26676f8d6a7d409f6fac8568b8cb9a"},
|
||||
@ -43,6 +44,7 @@
|
||||
"ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm", "451d8527787df716d99dc36162fca05934915db0b6141bbdac2ea8d3c7afc7d7"},
|
||||
"readability": {:git, "https://github.com/shadowfacts/readability.git", "71fa17caaf8103ef213e2c7dde4b447a48669122", [branch: "master"]},
|
||||
"saxy": {:hex, :saxy, "0.6.0", "cdb2f2fcd8133d1f3f8b0cf6a131ee1ca348dca613de266e9a239db850c4a093", [:mix], [], "hexpm"},
|
||||
"socket": {:hex, :socket, "0.3.13", "98a2ab20ce17f95fb512c5cadddba32b57273e0d2dba2d2e5f976c5969d0c632", [:mix], [], "hexpm", "f82ea9833ef49dde272e6568ab8aac657a636acb4cf44a7de8a935acb8957c2e"},
|
||||
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"},
|
||||
"telemetry": {:hex, :telemetry, "0.4.1", "ae2718484892448a24470e6aa341bc847c3277bfb8d4e9289f7474d752c09c7f", [:rebar3], [], "hexpm", "4738382e36a0a9a2b6e25d67c960e40e1a2c95560b9f936d8e29de8cd858480f"},
|
||||
"timex": {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5 or ~> 1.0.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "f354efb2400dd7a80fd9eb6c8419068c4f632da4ac47f3d8822d6e33f08bc852"},
|
||||
|
Loading…
x
Reference in New Issue
Block a user