frenzy/lib/frenzy/builtin_extractor.ex

77 lines
2.2 KiB
Elixir

defmodule Frenzy.BuiltinExtractor do
use GenServer
alias Frenzy.Network
require Logger
@external_url Application.get_env(:frenzy, :external_readability_url)
def start_link(state) do
GenServer.start_link(__MODULE__, :ok, state)
end
@spec article(String.t(), String.t()) :: Floki.html_tree()
def article(url, html) do
GenServer.call(__MODULE__, {:article, url, html})
end
def init(_state) do
use_external = Application.get_env(:frenzy, :external_readability)
use_external =
if use_external do
uri = URI.parse(@external_url)
uri = %URI{uri | path: "/status"}
uri = URI.to_string(uri)
case Network.http_get(uri) do
{:ok, %Tesla.Env{status: 200}} ->
true
_ ->
Logger.warn("Could not reach external readability for healthcheck, disabling")
false
end
else
false
end
{:ok, use_external}
end
def handle_call({:article, url, html}, _from, state) do
# the genserver state is a boolean telling us whether to use the external readability
if state do
uri = URI.parse(@external_url)
uri = %URI{uri | path: "/readability", query: URI.encode_query(url: url)}
uri = URI.to_string(uri)
Logger.debug("Sending external readability request: #{uri}")
case Network.http_post(uri, html, headers: [{"content-type", "text/html"}]) do
{:ok, %Tesla.Env{status: 200, body: body}} ->
{:reply, Floki.parse(body), state}
{:ok, %Tesla.Env{status: status}} ->
Logger.error("External readability failed, got HTTP #{status}")
if Frenzy.sentry_enabled?() do
Sentry.capture_message("External readability failed, got HTTP #{status}")
end
{:reply, Readability.article(html), state}
{:error, reason} ->
Logger.error("External readability failed: #{inspect(reason)}")
if Frenzy.sentry_enabled?() do
Sentry.capture_message("External readability failed: #{inspect(reason)}")
end
{:reply, Readability.article(html), state}
end
else
{:reply, Readability.article(html), state}
end
end
end