Add gemini protocol feed fetching
This commit is contained in:
parent
537fcc62f8
commit
4f16933198
|
@ -1,14 +1,16 @@
|
||||||
defmodule Frenzy.HTTP do
|
defmodule Frenzy.Network do
|
||||||
require Logger
|
require Logger
|
||||||
@redirect_codes [301, 302]
|
|
||||||
|
|
||||||
def get(url, opts \\ []) do
|
@http_redirect_codes [301, 302]
|
||||||
|
|
||||||
|
@spec http_get(String.t(), Keyword.t()) :: {:ok, HTTPoison.Response.t()} | {:error, term()}
|
||||||
|
def http_get(url, opts \\ []) do
|
||||||
case HTTPoison.get(url, opts) do
|
case HTTPoison.get(url, opts) do
|
||||||
{:ok, %HTTPoison.Response{status_code: 200} = response} ->
|
{:ok, %HTTPoison.Response{status_code: 200} = response} ->
|
||||||
{:ok, response}
|
{:ok, response}
|
||||||
|
|
||||||
{:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
|
{:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
|
||||||
when status_code in @redirect_codes ->
|
when status_code in @http_redirect_codes ->
|
||||||
headers
|
headers
|
||||||
|> Enum.find(fn {name, _value} -> name == "Location" end)
|
|> Enum.find(fn {name, _value} -> name == "Location" end)
|
||||||
|> case do
|
|> case do
|
||||||
|
@ -24,7 +26,7 @@ defmodule Frenzy.HTTP do
|
||||||
end
|
end
|
||||||
|
|
||||||
Logger.debug("Got 301 redirect from #{url} to #{new_url}")
|
Logger.debug("Got 301 redirect from #{url} to #{new_url}")
|
||||||
get(new_url, opts)
|
http_get(new_url, opts)
|
||||||
|
|
||||||
_ ->
|
_ ->
|
||||||
{:error, "Missing Location header for redirect"}
|
{:error, "Missing Location header for redirect"}
|
||||||
|
@ -43,4 +45,26 @@ defmodule Frenzy.HTTP do
|
||||||
{:error, reason}
|
{:error, reason}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@gemini_success_codes 20..29
|
||||||
|
@gemini_redirect_codes 30..39
|
||||||
|
|
||||||
|
@spec gemini_request(String.t() | URI.t()) :: {:ok, Gemini.Response.t()} | {:error, term()}
|
||||||
|
|
||||||
|
def gemini_request(uri) do
|
||||||
|
case Gemini.request(uri) do
|
||||||
|
{:ok, %Gemini.Response{status: code} = response} when code in @gemini_success_codes ->
|
||||||
|
{:ok, response}
|
||||||
|
|
||||||
|
{:ok, %Gemini.Response{status: code, meta: new_url}}
|
||||||
|
when code in @gemini_redirect_codes ->
|
||||||
|
gemini_request(URI.merge(uri, new_url))
|
||||||
|
|
||||||
|
{:ok, %Gemini.Response{status: code}} ->
|
||||||
|
{:error, "Unhandled Gemini status code: #{code}"}
|
||||||
|
|
||||||
|
{:error, reason} ->
|
||||||
|
{:error, reason}
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
|
@ -1,6 +1,6 @@
|
||||||
defmodule Frenzy.Pipeline.ScrapeStage do
|
defmodule Frenzy.Pipeline.ScrapeStage do
|
||||||
require Logger
|
require Logger
|
||||||
alias Frenzy.HTTP
|
alias Frenzy.Network
|
||||||
alias Frenzy.Pipeline.Stage
|
alias Frenzy.Pipeline.Stage
|
||||||
@behaviour Stage
|
@behaviour Stage
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do
|
||||||
Logger.debug("Getting article from #{url}")
|
Logger.debug("Getting article from #{url}")
|
||||||
|
|
||||||
url
|
url
|
||||||
|> HTTP.get()
|
|> Network.http_get()
|
||||||
|> case do
|
|> case do
|
||||||
{:ok, response} ->
|
{:ok, response} ->
|
||||||
handle_response(url, response, opts)
|
handle_response(url, response, opts)
|
||||||
|
@ -142,7 +142,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do
|
||||||
defp image_to_data_uri(src, site_uri, true) do
|
defp image_to_data_uri(src, site_uri, true) do
|
||||||
absolute_url = URI.merge(site_uri, src) |> to_string()
|
absolute_url = URI.merge(site_uri, src) |> to_string()
|
||||||
|
|
||||||
case HTTP.get(absolute_url) do
|
case Network.http_get(absolute_url) do
|
||||||
{:ok, %HTTPoison.Response{body: body, headers: headers}} ->
|
{:ok, %HTTPoison.Response{body: body, headers: headers}} ->
|
||||||
{"Content-Type", content_type} =
|
{"Content-Type", content_type} =
|
||||||
Enum.find(headers, fn {header, _value} -> header == "Content-Type" end)
|
Enum.find(headers, fn {header, _value} -> header == "Content-Type" end)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
defmodule Frenzy.Task.FetchFavicon do
|
defmodule Frenzy.Task.FetchFavicon do
|
||||||
require Logger
|
require Logger
|
||||||
use Task
|
use Task
|
||||||
alias Frenzy.{HTTP, Repo, Feed}
|
alias Frenzy.{Network, Repo, Feed}
|
||||||
|
|
||||||
def start_link(feed) do
|
def start_link(feed) do
|
||||||
Task.start_link(__MODULE__, :run, [feed])
|
Task.start_link(__MODULE__, :run, [feed])
|
||||||
|
@ -41,7 +41,7 @@ defmodule Frenzy.Task.FetchFavicon do
|
||||||
@spec fetch_favicon_url_from_webpage(url :: String.t()) :: String.t()
|
@spec fetch_favicon_url_from_webpage(url :: String.t()) :: String.t()
|
||||||
|
|
||||||
defp fetch_favicon_url_from_webpage(url) when is_binary(url) do
|
defp fetch_favicon_url_from_webpage(url) when is_binary(url) do
|
||||||
case HTTP.get(url) do
|
case Network.http_get(url) do
|
||||||
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
|
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
|
||||||
extract_favicon_url(url, body)
|
extract_favicon_url(url, body)
|
||||||
|
|
||||||
|
@ -108,7 +108,7 @@ defmodule Frenzy.Task.FetchFavicon do
|
||||||
defp fetch_favicon_data(favicon_url) do
|
defp fetch_favicon_data(favicon_url) do
|
||||||
Logger.debug("Fetching favicon from: '#{favicon_url}'")
|
Logger.debug("Fetching favicon from: '#{favicon_url}'")
|
||||||
|
|
||||||
case HTTP.get(favicon_url) do
|
case Network.http_get(favicon_url) do
|
||||||
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
|
{:ok, %HTTPoison.Response{body: body, status_code: code}} when code in 200..299 ->
|
||||||
{:ok, "data:image/png;base64,#{Base.encode64(body)}"}
|
{:ok, "data:image/png;base64,#{Base.encode64(body)}"}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
defmodule Frenzy.UpdateFeeds do
|
defmodule Frenzy.UpdateFeeds do
|
||||||
use GenServer
|
use GenServer
|
||||||
alias Frenzy.{HTTP, Repo, Feed, Item}
|
alias Frenzy.{Network, Repo, Feed, Item}
|
||||||
alias Frenzy.Task.{CreateItem, FetchFavicon}
|
alias Frenzy.Task.{CreateItem, FetchFavicon}
|
||||||
import Ecto.Query
|
import Ecto.Query
|
||||||
require Logger
|
require Logger
|
||||||
|
@ -86,7 +86,20 @@ defmodule Frenzy.UpdateFeeds do
|
||||||
defp update_feed(feed) do
|
defp update_feed(feed) do
|
||||||
Logger.debug("Updating #{feed.feed_url}")
|
Logger.debug("Updating #{feed.feed_url}")
|
||||||
|
|
||||||
case HTTP.get(feed.feed_url) do
|
case URI.parse(feed.feed_url) do
|
||||||
|
%URI{scheme: "gemini"} = uri ->
|
||||||
|
update_feed_gemini(feed, uri)
|
||||||
|
|
||||||
|
%URI{scheme: scheme} when scheme in ["http", "https"] ->
|
||||||
|
update_feed_http(feed)
|
||||||
|
|
||||||
|
%URI{scheme: scheme} ->
|
||||||
|
Logger.warn("Unhandled scheme for feed: #{scheme}")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp update_feed_http(feed) do
|
||||||
|
case Network.http_get(feed.feed_url) do
|
||||||
{:ok,
|
{:ok,
|
||||||
%HTTPoison.Response{
|
%HTTPoison.Response{
|
||||||
status_code: 200,
|
status_code: 200,
|
||||||
|
@ -103,37 +116,31 @@ defmodule Frenzy.UpdateFeeds do
|
||||||
|> Enum.map(&String.trim/1)
|
|> Enum.map(&String.trim/1)
|
||||||
|> Enum.find(fn s -> !String.contains?(s, "=") end)
|
|> Enum.find(fn s -> !String.contains?(s, "=") end)
|
||||||
|
|
||||||
case FeedParser.parse(body, content_type) do
|
do_update_feed(feed, content_type, body)
|
||||||
|
|
||||||
|
{:error, reason} ->
|
||||||
|
Logger.error("Couldn't load feed #{feed.feed_url}: #{inspect(reason)}")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp update_feed_gemini(feed, feed_uri) do
|
||||||
|
case Network.gemini_request(feed_uri) do
|
||||||
|
{:ok, %Gemini.Response{meta: content_type, body: body}} ->
|
||||||
|
do_update_feed(feed, content_type, body)
|
||||||
|
|
||||||
|
{:error, reason} ->
|
||||||
|
Logger.error("Couldn't load feed #{feed.feed_url}: #{inspect(reason)}")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp do_update_feed(feed, content_type, data) do
|
||||||
|
case FeedParser.parse(data, content_type) do
|
||||||
{:ok, rss} ->
|
{:ok, rss} ->
|
||||||
update_feed_from_rss(feed, rss)
|
update_feed_from_rss(feed, rss)
|
||||||
|
|
||||||
{:error, reason} ->
|
{:error, reason} ->
|
||||||
Logger.error("Unable to parse feed at '#{feed.feed_url}': #{inspect(reason)}")
|
Logger.error("Unable to parse feed at '#{feed.feed_url}': #{inspect(reason)}")
|
||||||
end
|
end
|
||||||
|
|
||||||
{:ok, %HTTPoison.Response{status_code: 404}} ->
|
|
||||||
Logger.warn("RSS feed #{feed.feed_url} not found")
|
|
||||||
|
|
||||||
{:ok, %HTTPoison.Response{status_code: status_code, headers: headers}}
|
|
||||||
when status_code in [301, 302] ->
|
|
||||||
{"Location", new_url} =
|
|
||||||
Enum.find(headers, fn {name, _value} ->
|
|
||||||
name == "Location"
|
|
||||||
end)
|
|
||||||
|
|
||||||
Logger.debug("Got 301 redirect from #{feed.feed_url} to #{new_url}, updating feed URL")
|
|
||||||
changeset = Feed.changeset(feed, %{feed_url: new_url})
|
|
||||||
{:ok, feed} = Repo.update(changeset)
|
|
||||||
update_feed(feed)
|
|
||||||
|
|
||||||
{:ok, %HTTPoison.Response{} = response} ->
|
|
||||||
Logger.error(
|
|
||||||
"Couldn't load RSS feed #{feed.feed_url}, got unexpected response: #{inspect(response)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
{:error, %HTTPoison.Error{reason: reason}} ->
|
|
||||||
Logger.error("Couldn't load RSS feed #{feed.feed_url}: #{inspect(reason)}")
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
defp update_feed_from_rss(feed, %FeedParser.Feed{} = rss) do
|
defp update_feed_from_rss(feed, %FeedParser.Feed{} = rss) do
|
||||||
|
|
3
mix.exs
3
mix.exs
|
@ -54,7 +54,8 @@ defmodule Frenzy.MixProject do
|
||||||
{:xml_builder, "~> 2.1.1"},
|
{:xml_builder, "~> 2.1.1"},
|
||||||
{:floki, "~> 0.23"},
|
{:floki, "~> 0.23"},
|
||||||
{:phoenix_live_view,
|
{:phoenix_live_view,
|
||||||
git: "https://github.com/phoenixframework/phoenix_live_view", branch: "master"}
|
git: "https://github.com/phoenixframework/phoenix_live_view", branch: "master"},
|
||||||
|
{:gemini, git: "https://git.shadowfacts.net/shadowfacts/gemini-ex.git", branch: "main"}
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
2
mix.lock
2
mix.lock
|
@ -18,6 +18,7 @@
|
||||||
"fiet": {:git, "https://github.com/shadowfacts/fiet.git", "bf117bc30a6355a189d05a562127cfaf9e0187ae", [branch: "master"]},
|
"fiet": {:git, "https://github.com/shadowfacts/fiet.git", "bf117bc30a6355a189d05a562127cfaf9e0187ae", [branch: "master"]},
|
||||||
"file_system": {:hex, :file_system, "0.2.6", "fd4dc3af89b9ab1dc8ccbcc214a0e60c41f34be251d9307920748a14bf41f1d3", [:mix], [], "hexpm", "0d50da6b04c58e101a3793b1600f9a03b86e3a8057b192ac1766013d35706fa6"},
|
"file_system": {:hex, :file_system, "0.2.6", "fd4dc3af89b9ab1dc8ccbcc214a0e60c41f34be251d9307920748a14bf41f1d3", [:mix], [], "hexpm", "0d50da6b04c58e101a3793b1600f9a03b86e3a8057b192ac1766013d35706fa6"},
|
||||||
"floki": {:hex, :floki, "0.23.0", "956ab6dba828c96e732454809fb0bd8d43ce0979b75f34de6322e73d4c917829", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "e680b5ef0b61ce02faa7137db8d1714903a5552be4c89fb57293b8770e7f49c2"},
|
"floki": {:hex, :floki, "0.23.0", "956ab6dba828c96e732454809fb0bd8d43ce0979b75f34de6322e73d4c917829", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "e680b5ef0b61ce02faa7137db8d1714903a5552be4c89fb57293b8770e7f49c2"},
|
||||||
|
"gemini": {:git, "https://git.shadowfacts.net/shadowfacts/gemini-ex.git", "37864e9f1196eb0efa71427d76a9279cee84ef19", [branch: "main"]},
|
||||||
"gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm", "e0b8598e802676c81e66b061a2148c37c03886b24a3ca86a1f98ed40693b94b3"},
|
"gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm", "e0b8598e802676c81e66b061a2148c37c03886b24a3ca86a1f98ed40693b94b3"},
|
||||||
"hackney": {:hex, :hackney, "1.16.0", "5096ac8e823e3a441477b2d187e30dd3fff1a82991a806b2003845ce72ce2d84", [:rebar3], [{:certifi, "2.5.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.1", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.0", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.6", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "3bf0bebbd5d3092a3543b783bf065165fa5d3ad4b899b836810e513064134e18"},
|
"hackney": {:hex, :hackney, "1.16.0", "5096ac8e823e3a441477b2d187e30dd3fff1a82991a806b2003845ce72ce2d84", [:rebar3], [{:certifi, "2.5.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.1", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.0", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.6", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "3bf0bebbd5d3092a3543b783bf065165fa5d3ad4b899b836810e513064134e18"},
|
||||||
"html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm", "3e3d7156a272950373ce5a4018b1490bea26676f8d6a7d409f6fac8568b8cb9a"},
|
"html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm", "3e3d7156a272950373ce5a4018b1490bea26676f8d6a7d409f6fac8568b8cb9a"},
|
||||||
|
@ -43,6 +44,7 @@
|
||||||
"ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm", "451d8527787df716d99dc36162fca05934915db0b6141bbdac2ea8d3c7afc7d7"},
|
"ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm", "451d8527787df716d99dc36162fca05934915db0b6141bbdac2ea8d3c7afc7d7"},
|
||||||
"readability": {:git, "https://github.com/shadowfacts/readability.git", "71fa17caaf8103ef213e2c7dde4b447a48669122", [branch: "master"]},
|
"readability": {:git, "https://github.com/shadowfacts/readability.git", "71fa17caaf8103ef213e2c7dde4b447a48669122", [branch: "master"]},
|
||||||
"saxy": {:hex, :saxy, "0.6.0", "cdb2f2fcd8133d1f3f8b0cf6a131ee1ca348dca613de266e9a239db850c4a093", [:mix], [], "hexpm"},
|
"saxy": {:hex, :saxy, "0.6.0", "cdb2f2fcd8133d1f3f8b0cf6a131ee1ca348dca613de266e9a239db850c4a093", [:mix], [], "hexpm"},
|
||||||
|
"socket": {:hex, :socket, "0.3.13", "98a2ab20ce17f95fb512c5cadddba32b57273e0d2dba2d2e5f976c5969d0c632", [:mix], [], "hexpm", "f82ea9833ef49dde272e6568ab8aac657a636acb4cf44a7de8a935acb8957c2e"},
|
||||||
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"},
|
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"},
|
||||||
"telemetry": {:hex, :telemetry, "0.4.1", "ae2718484892448a24470e6aa341bc847c3277bfb8d4e9289f7474d752c09c7f", [:rebar3], [], "hexpm", "4738382e36a0a9a2b6e25d67c960e40e1a2c95560b9f936d8e29de8cd858480f"},
|
"telemetry": {:hex, :telemetry, "0.4.1", "ae2718484892448a24470e6aa341bc847c3277bfb8d4e9289f7474d752c09c7f", [:rebar3], [], "hexpm", "4738382e36a0a9a2b6e25d67c960e40e1a2c95560b9f936d8e29de8cd858480f"},
|
||||||
"timex": {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5 or ~> 1.0.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "f354efb2400dd7a80fd9eb6c8419068c4f632da4ac47f3d8822d6e33f08bc852"},
|
"timex": {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5 or ~> 1.0.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "f354efb2400dd7a80fd9eb6c8419068c4f632da4ac47f3d8822d6e33f08bc852"},
|
||||||
|
|
Loading…
Reference in New Issue