diff --git a/lib/frenzy/http.ex b/lib/frenzy/network.ex similarity index 55% rename from lib/frenzy/http.ex rename to lib/frenzy/network.ex index c5dfcd9..c97a324 100644 --- a/lib/frenzy/http.ex +++ b/lib/frenzy/network.ex @@ -1,14 +1,16 @@ -defmodule Frenzy.HTTP do +defmodule Frenzy.Network do require Logger - @redirect_codes [301, 302] - def get(url, opts \\ []) do + @http_redirect_codes [301, 302] + + @spec http_get(String.t(), Keyword.t()) :: {:ok, HTTPoison.Response.t()} | {:error, term()} + def http_get(url, opts \\ []) do case HTTPoison.get(url, opts) do {:ok, %HTTPoison.Response{status_code: 200} = response} -> {:ok, response} {:ok, %HTTPoison.Response{status_code: status_code, headers: headers}} - when status_code in @redirect_codes -> + when status_code in @http_redirect_codes -> headers |> Enum.find(fn {name, _value} -> name == "Location" end) |> case do @@ -24,7 +26,7 @@ defmodule Frenzy.HTTP do end Logger.debug("Got 301 redirect from #{url} to #{new_url}") - get(new_url, opts) + http_get(new_url, opts) _ -> {:error, "Missing Location header for redirect"} @@ -43,4 +45,26 @@ defmodule Frenzy.HTTP do {:error, reason} end end + + @gemini_success_codes 20..29 + @gemini_redirect_codes 30..39 + + @spec gemini_request(String.t() | URI.t()) :: {:ok, Gemini.Response.t()} | {:error, term()} + + def gemini_request(uri) do + case Gemini.request(uri) do + {:ok, %Gemini.Response{status: code} = response} when code in @gemini_success_codes -> + {:ok, response} + + {:ok, %Gemini.Response{status: code, meta: new_url}} + when code in @gemini_redirect_codes -> + gemini_request(URI.merge(uri, new_url)) + + {:ok, %Gemini.Response{status: code}} -> + {:error, "Unhandled Gemini status code: #{code}"} + + {:error, reason} -> + {:error, reason} + end + end end diff --git a/lib/frenzy/pipeline/scrape_stage.ex b/lib/frenzy/pipeline/scrape_stage.ex index e072438..c261b2f 100644 --- a/lib/frenzy/pipeline/scrape_stage.ex +++ b/lib/frenzy/pipeline/scrape_stage.ex @@ -1,6 +1,6 @@ defmodule Frenzy.Pipeline.ScrapeStage do require Logger - alias Frenzy.HTTP + alias Frenzy.Network alias Frenzy.Pipeline.Stage @behaviour Stage @@ -68,7 +68,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do Logger.debug("Getting article from #{url}") url - |> HTTP.get() + |> Network.http_get() |> case do {:ok, response} -> handle_response(url, response, opts) @@ -142,7 +142,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do defp image_to_data_uri(src, site_uri, true) do absolute_url = URI.merge(site_uri, src) |> to_string() - case HTTP.get(absolute_url) do + case Network.http_get(absolute_url) do {:ok, %HTTPoison.Response{body: body, headers: headers}} -> {"Content-Type", content_type} = Enum.find(headers, fn {header, _value} -> header == "Content-Type" end) diff --git a/lib/frenzy/task/fetch_favicon.ex b/lib/frenzy/task/fetch_favicon.ex index 62a4614..4ed2ad4 100644 --- a/lib/frenzy/task/fetch_favicon.ex +++ b/lib/frenzy/task/fetch_favicon.ex @@ -1,7 +1,7 @@ defmodule Frenzy.Task.FetchFavicon do require Logger use Task - alias Frenzy.{HTTP, Repo, Feed} + alias Frenzy.{Network, Repo, Feed} def start_link(feed) do Task.start_link(__MODULE__, :run, [feed]) @@ -41,7 +41,7 @@ defmodule Frenzy.Task.FetchFavicon do @spec fetch_favicon_url_from_webpage(url :: String.t()) :: String.t() defp fetch_favicon_url_from_webpage(url) when is_binary(url) do - case HTTP.get(url) do + case Network.http_get(url) do {:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 -> extract_favicon_url(url, body) @@ -108,7 +108,7 @@ defmodule Frenzy.Task.FetchFavicon do defp fetch_favicon_data(favicon_url) do Logger.debug("Fetching favicon from: '#{favicon_url}'") - case HTTP.get(favicon_url) do + case Network.http_get(favicon_url) do {:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 -> {:ok, "data:image/png;base64,#{Base.encode64(body)}"} diff --git a/lib/frenzy/update_feeds.ex b/lib/frenzy/update_feeds.ex index d28c0b5..3b6c172 100644 --- a/lib/frenzy/update_feeds.ex +++ b/lib/frenzy/update_feeds.ex @@ -1,6 +1,6 @@ defmodule Frenzy.UpdateFeeds do use GenServer - alias Frenzy.{HTTP, Repo, Feed, Item} + alias Frenzy.{Network, Repo, Feed, Item} alias Frenzy.Task.{CreateItem, FetchFavicon} import Ecto.Query require Logger @@ -86,7 +86,20 @@ defmodule Frenzy.UpdateFeeds do defp update_feed(feed) do Logger.debug("Updating #{feed.feed_url}") - case HTTP.get(feed.feed_url) do + case URI.parse(feed.feed_url) do + %URI{scheme: "gemini"} = uri -> + update_feed_gemini(feed, uri) + + %URI{scheme: scheme} when scheme in ["http", "https"] -> + update_feed_http(feed) + + %URI{scheme: scheme} -> + Logger.warn("Unhandled scheme for feed: #{scheme}") + end + end + + defp update_feed_http(feed) do + case Network.http_get(feed.feed_url) do {:ok, %HTTPoison.Response{ status_code: 200, @@ -103,36 +116,30 @@ defmodule Frenzy.UpdateFeeds do |> Enum.map(&String.trim/1) |> Enum.find(fn s -> !String.contains?(s, "=") end) - case FeedParser.parse(body, content_type) do - {:ok, rss} -> - update_feed_from_rss(feed, rss) + do_update_feed(feed, content_type, body) - {:error, reason} -> - Logger.error("Unable to parse feed at '#{feed.feed_url}': #{inspect(reason)}") - end + {:error, reason} -> + Logger.error("Couldn't load feed #{feed.feed_url}: #{inspect(reason)}") + end + end - {:ok, %HTTPoison.Response{status_code: 404}} -> - Logger.warn("RSS feed #{feed.feed_url} not found") + defp update_feed_gemini(feed, feed_uri) do + case Network.gemini_request(feed_uri) do + {:ok, %Gemini.Response{meta: content_type, body: body}} -> + do_update_feed(feed, content_type, body) - {:ok, %HTTPoison.Response{status_code: status_code, headers: headers}} - when status_code in [301, 302] -> - {"Location", new_url} = - Enum.find(headers, fn {name, _value} -> - name == "Location" - end) + {:error, reason} -> + Logger.error("Couldn't load feed #{feed.feed_url}: #{inspect(reason)}") + end + end - Logger.debug("Got 301 redirect from #{feed.feed_url} to #{new_url}, updating feed URL") - changeset = Feed.changeset(feed, %{feed_url: new_url}) - {:ok, feed} = Repo.update(changeset) - update_feed(feed) + defp do_update_feed(feed, content_type, data) do + case FeedParser.parse(data, content_type) do + {:ok, rss} -> + update_feed_from_rss(feed, rss) - {:ok, %HTTPoison.Response{} = response} -> - Logger.error( - "Couldn't load RSS feed #{feed.feed_url}, got unexpected response: #{inspect(response)}" - ) - - {:error, %HTTPoison.Error{reason: reason}} -> - Logger.error("Couldn't load RSS feed #{feed.feed_url}: #{inspect(reason)}") + {:error, reason} -> + Logger.error("Unable to parse feed at '#{feed.feed_url}': #{inspect(reason)}") end end diff --git a/mix.exs b/mix.exs index 02e3fa5..1f99e5f 100644 --- a/mix.exs +++ b/mix.exs @@ -54,7 +54,8 @@ defmodule Frenzy.MixProject do {:xml_builder, "~> 2.1.1"}, {:floki, "~> 0.23"}, {:phoenix_live_view, - git: "https://github.com/phoenixframework/phoenix_live_view", branch: "master"} + git: "https://github.com/phoenixframework/phoenix_live_view", branch: "master"}, + {:gemini, git: "https://git.shadowfacts.net/shadowfacts/gemini-ex.git", branch: "main"} ] end diff --git a/mix.lock b/mix.lock index 3b1d0e2..9758e96 100644 --- a/mix.lock +++ b/mix.lock @@ -18,6 +18,7 @@ "fiet": {:git, "https://github.com/shadowfacts/fiet.git", "bf117bc30a6355a189d05a562127cfaf9e0187ae", [branch: "master"]}, "file_system": {:hex, :file_system, "0.2.6", "fd4dc3af89b9ab1dc8ccbcc214a0e60c41f34be251d9307920748a14bf41f1d3", [:mix], [], "hexpm", "0d50da6b04c58e101a3793b1600f9a03b86e3a8057b192ac1766013d35706fa6"}, "floki": {:hex, :floki, "0.23.0", "956ab6dba828c96e732454809fb0bd8d43ce0979b75f34de6322e73d4c917829", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "e680b5ef0b61ce02faa7137db8d1714903a5552be4c89fb57293b8770e7f49c2"}, + "gemini": {:git, "https://git.shadowfacts.net/shadowfacts/gemini-ex.git", "37864e9f1196eb0efa71427d76a9279cee84ef19", [branch: "main"]}, "gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm", "e0b8598e802676c81e66b061a2148c37c03886b24a3ca86a1f98ed40693b94b3"}, "hackney": {:hex, :hackney, "1.16.0", "5096ac8e823e3a441477b2d187e30dd3fff1a82991a806b2003845ce72ce2d84", [:rebar3], [{:certifi, "2.5.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.1", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.0", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.6", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "3bf0bebbd5d3092a3543b783bf065165fa5d3ad4b899b836810e513064134e18"}, "html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm", "3e3d7156a272950373ce5a4018b1490bea26676f8d6a7d409f6fac8568b8cb9a"}, @@ -43,6 +44,7 @@ "ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm", "451d8527787df716d99dc36162fca05934915db0b6141bbdac2ea8d3c7afc7d7"}, "readability": {:git, "https://github.com/shadowfacts/readability.git", "71fa17caaf8103ef213e2c7dde4b447a48669122", [branch: "master"]}, "saxy": {:hex, :saxy, "0.6.0", "cdb2f2fcd8133d1f3f8b0cf6a131ee1ca348dca613de266e9a239db850c4a093", [:mix], [], "hexpm"}, + "socket": {:hex, :socket, "0.3.13", "98a2ab20ce17f95fb512c5cadddba32b57273e0d2dba2d2e5f976c5969d0c632", [:mix], [], "hexpm", "f82ea9833ef49dde272e6568ab8aac657a636acb4cf44a7de8a935acb8957c2e"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"}, "telemetry": {:hex, :telemetry, "0.4.1", "ae2718484892448a24470e6aa341bc847c3277bfb8d4e9289f7474d752c09c7f", [:rebar3], [], "hexpm", "4738382e36a0a9a2b6e25d67c960e40e1a2c95560b9f936d8e29de8cd858480f"}, "timex": {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5 or ~> 1.0.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "f354efb2400dd7a80fd9eb6c8419068c4f632da4ac47f3d8822d6e33f08bc852"},