Add gemini protocol feed fetching

This commit is contained in:
Shadowfacts 2020-07-18 19:27:53 -04:00
parent 537fcc62f8
commit 4f16933198
Signed by: shadowfacts
GPG Key ID: 94A5AB95422746E5
6 changed files with 73 additions and 39 deletions

View File

@ -1,14 +1,16 @@
defmodule Frenzy.HTTP do
defmodule Frenzy.Network do
require Logger
@redirect_codes [301, 302]
def get(url, opts \\ []) do
@http_redirect_codes [301, 302]
@spec http_get(String.t(), Keyword.t()) :: {:ok, HTTPoison.Response.t()} | {:error, term()}
def http_get(url, opts \\ []) do
case HTTPoison.get(url, opts) do
{:ok, %HTTPoison.Response{status_code: 200} = response} ->
{:ok, response}
{:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
when status_code in @redirect_codes ->
when status_code in @http_redirect_codes ->
headers
|> Enum.find(fn {name, _value} -> name == "Location" end)
|> case do
@ -24,7 +26,7 @@ defmodule Frenzy.HTTP do
end
Logger.debug("Got 301 redirect from #{url} to #{new_url}")
get(new_url, opts)
http_get(new_url, opts)
_ ->
{:error, "Missing Location header for redirect"}
@ -43,4 +45,26 @@ defmodule Frenzy.HTTP do
{:error, reason}
end
end
@gemini_success_codes 20..29
@gemini_redirect_codes 30..39
@spec gemini_request(String.t() | URI.t()) :: {:ok, Gemini.Response.t()} | {:error, term()}
def gemini_request(uri) do
case Gemini.request(uri) do
{:ok, %Gemini.Response{status: code} = response} when code in @gemini_success_codes ->
{:ok, response}
{:ok, %Gemini.Response{status: code, meta: new_url}}
when code in @gemini_redirect_codes ->
gemini_request(URI.merge(uri, new_url))
{:ok, %Gemini.Response{status: code}} ->
{:error, "Unhandled Gemini status code: #{code}"}
{:error, reason} ->
{:error, reason}
end
end
end

View File

@ -1,6 +1,6 @@
defmodule Frenzy.Pipeline.ScrapeStage do
require Logger
alias Frenzy.HTTP
alias Frenzy.Network
alias Frenzy.Pipeline.Stage
@behaviour Stage
@ -68,7 +68,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do
Logger.debug("Getting article from #{url}")
url
|> HTTP.get()
|> Network.http_get()
|> case do
{:ok, response} ->
handle_response(url, response, opts)
@ -142,7 +142,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do
defp image_to_data_uri(src, site_uri, true) do
absolute_url = URI.merge(site_uri, src) |> to_string()
case HTTP.get(absolute_url) do
case Network.http_get(absolute_url) do
{:ok, %HTTPoison.Response{body: body, headers: headers}} ->
{"Content-Type", content_type} =
Enum.find(headers, fn {header, _value} -> header == "Content-Type" end)

View File

@ -1,7 +1,7 @@
defmodule Frenzy.Task.FetchFavicon do
require Logger
use Task
alias Frenzy.{HTTP, Repo, Feed}
alias Frenzy.{Network, Repo, Feed}
def start_link(feed) do
Task.start_link(__MODULE__, :run, [feed])
@ -41,7 +41,7 @@ defmodule Frenzy.Task.FetchFavicon do
@spec fetch_favicon_url_from_webpage(url :: String.t()) :: String.t()
defp fetch_favicon_url_from_webpage(url) when is_binary(url) do
case HTTP.get(url) do
case Network.http_get(url) do
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
extract_favicon_url(url, body)
@ -108,7 +108,7 @@ defmodule Frenzy.Task.FetchFavicon do
defp fetch_favicon_data(favicon_url) do
Logger.debug("Fetching favicon from: '#{favicon_url}'")
case HTTP.get(favicon_url) do
case Network.http_get(favicon_url) do
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
{:ok, "data:image/png;base64,#{Base.encode64(body)}"}

View File

@ -1,6 +1,6 @@
defmodule Frenzy.UpdateFeeds do
use GenServer
alias Frenzy.{HTTP, Repo, Feed, Item}
alias Frenzy.{Network, Repo, Feed, Item}
alias Frenzy.Task.{CreateItem, FetchFavicon}
import Ecto.Query
require Logger
@ -86,7 +86,20 @@ defmodule Frenzy.UpdateFeeds do
defp update_feed(feed) do
Logger.debug("Updating #{feed.feed_url}")
case HTTP.get(feed.feed_url) do
case URI.parse(feed.feed_url) do
%URI{scheme: "gemini"} = uri ->
update_feed_gemini(feed, uri)
%URI{scheme: scheme} when scheme in ["http", "https"] ->
update_feed_http(feed)
%URI{scheme: scheme} ->
Logger.warn("Unhandled scheme for feed: #{scheme}")
end
end
defp update_feed_http(feed) do
case Network.http_get(feed.feed_url) do
{:ok,
%HTTPoison.Response{
status_code: 200,
@ -103,36 +116,30 @@ defmodule Frenzy.UpdateFeeds do
|> Enum.map(&String.trim/1)
|> Enum.find(fn s -> !String.contains?(s, "=") end)
case FeedParser.parse(body, content_type) do
{:ok, rss} ->
update_feed_from_rss(feed, rss)
do_update_feed(feed, content_type, body)
{:error, reason} ->
Logger.error("Unable to parse feed at '#{feed.feed_url}': #{inspect(reason)}")
end
{:error, reason} ->
Logger.error("Couldn't load feed #{feed.feed_url}: #{inspect(reason)}")
end
end
{:ok, %HTTPoison.Response{status_code: 404}} ->
Logger.warn("RSS feed #{feed.feed_url} not found")
defp update_feed_gemini(feed, feed_uri) do
case Network.gemini_request(feed_uri) do
{:ok, %Gemini.Response{meta: content_type, body: body}} ->
do_update_feed(feed, content_type, body)
{:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
when status_code in [301, 302] ->
{"Location", new_url} =
Enum.find(headers, fn {name, _value} ->
name == "Location"
end)
{:error, reason} ->
Logger.error("Couldn't load feed #{feed.feed_url}: #{inspect(reason)}")
end
end
Logger.debug("Got 301 redirect from #{feed.feed_url} to #{new_url}, updating feed URL")
changeset = Feed.changeset(feed, %{feed_url: new_url})
{:ok, feed} = Repo.update(changeset)
update_feed(feed)
defp do_update_feed(feed, content_type, data) do
case FeedParser.parse(data, content_type) do
{:ok, rss} ->
update_feed_from_rss(feed, rss)
{:ok, %HTTPoison.Response{} = response} ->
Logger.error(
"Couldn't load RSS feed #{feed.feed_url}, got unexpected response: #{inspect(response)}"
)
{:error, %HTTPoison.Error{reason: reason}} ->
Logger.error("Couldn't load RSS feed #{feed.feed_url}: #{inspect(reason)}")
{:error, reason} ->
Logger.error("Unable to parse feed at '#{feed.feed_url}': #{inspect(reason)}")
end
end

View File

@ -54,7 +54,8 @@ defmodule Frenzy.MixProject do
{:xml_builder, "~> 2.1.1"},
{:floki, "~> 0.23"},
{:phoenix_live_view,
git: "https://github.com/phoenixframework/phoenix_live_view", branch: "master"}
git: "https://github.com/phoenixframework/phoenix_live_view", branch: "master"},
{:gemini, git: "https://git.shadowfacts.net/shadowfacts/gemini-ex.git", branch: "main"}
]
end

View File

@ -18,6 +18,7 @@
"fiet": {:git, "https://github.com/shadowfacts/fiet.git", "bf117bc30a6355a189d05a562127cfaf9e0187ae", [branch: "master"]},
"file_system": {:hex, :file_system, "0.2.6", "fd4dc3af89b9ab1dc8ccbcc214a0e60c41f34be251d9307920748a14bf41f1d3", [:mix], [], "hexpm", "0d50da6b04c58e101a3793b1600f9a03b86e3a8057b192ac1766013d35706fa6"},
"floki": {:hex, :floki, "0.23.0", "956ab6dba828c96e732454809fb0bd8d43ce0979b75f34de6322e73d4c917829", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "e680b5ef0b61ce02faa7137db8d1714903a5552be4c89fb57293b8770e7f49c2"},
"gemini": {:git, "https://git.shadowfacts.net/shadowfacts/gemini-ex.git", "37864e9f1196eb0efa71427d76a9279cee84ef19", [branch: "main"]},
"gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm", "e0b8598e802676c81e66b061a2148c37c03886b24a3ca86a1f98ed40693b94b3"},
"hackney": {:hex, :hackney, "1.16.0", "5096ac8e823e3a441477b2d187e30dd3fff1a82991a806b2003845ce72ce2d84", [:rebar3], [{:certifi, "2.5.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.1", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.0", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.6", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "3bf0bebbd5d3092a3543b783bf065165fa5d3ad4b899b836810e513064134e18"},
"html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm", "3e3d7156a272950373ce5a4018b1490bea26676f8d6a7d409f6fac8568b8cb9a"},
@ -43,6 +44,7 @@
"ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm", "451d8527787df716d99dc36162fca05934915db0b6141bbdac2ea8d3c7afc7d7"},
"readability": {:git, "https://github.com/shadowfacts/readability.git", "71fa17caaf8103ef213e2c7dde4b447a48669122", [branch: "master"]},
"saxy": {:hex, :saxy, "0.6.0", "cdb2f2fcd8133d1f3f8b0cf6a131ee1ca348dca613de266e9a239db850c4a093", [:mix], [], "hexpm"},
"socket": {:hex, :socket, "0.3.13", "98a2ab20ce17f95fb512c5cadddba32b57273e0d2dba2d2e5f976c5969d0c632", [:mix], [], "hexpm", "f82ea9833ef49dde272e6568ab8aac657a636acb4cf44a7de8a935acb8957c2e"},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"},
"telemetry": {:hex, :telemetry, "0.4.1", "ae2718484892448a24470e6aa341bc847c3277bfb8d4e9289f7474d752c09c7f", [:rebar3], [], "hexpm", "4738382e36a0a9a2b6e25d67c960e40e1a2c95560b9f936d8e29de8cd858480f"},
"timex": {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5 or ~> 1.0.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "f354efb2400dd7a80fd9eb6c8419068c4f632da4ac47f3d8822d6e33f08bc852"},