Add Webmention endpoint discovery

This commit is contained in:
Shadowfacts 2020-05-20 22:10:50 -04:00
parent f93afdbe5f
commit 1e984ba30e
Signed by: shadowfacts
GPG Key ID: 94A5AB95422746E5
5 changed files with 127 additions and 10 deletions

View File

@ -40,6 +40,8 @@ config :clacks, Oban,
prune: {:maxlen, 10_000}, prune: {:maxlen, 10_000},
queues: [federate: 10] queues: [federate: 10]
config :floki, :html_parser, Floki.HTMLParser.FastHtml
# Import environment specific config. This must remain at the bottom # Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above. # of this file so it overrides the configuration defined above.
import_config "#{Mix.env()}.exs" import_config "#{Mix.env()}.exs"

View File

@ -27,15 +27,7 @@ defmodule Clacks.HTTP do
|> Enum.find(fn {name, _value} -> String.downcase(name) == "location" end) |> Enum.find(fn {name, _value} -> String.downcase(name) == "location" end)
|> case do |> case do
{_, new_url} -> {_, new_url} ->
new_url = new_url = URI.merge(URI.parse(url), URI.parse(new_url))
case URI.parse(new_url) do
%URI{host: nil, path: path} ->
# relative path
%URI{URI.parse(url) | path: path} |> URI.to_string()
uri ->
uri
end
Logger.debug("Got 301 redirect from #{url} to #{new_url}") Logger.debug("Got 301 redirect from #{url} to #{new_url}")
fetch(method, new_url, headers) fetch(method, new_url, headers)

View File

@ -0,0 +1,119 @@
defmodule Clacks.Webmention.Endpoint do
require Logger
@spec find_endpoint(url :: String.t()) :: URI.t() | nil
def find_endpoint(url) do
case find_endpoint_by_header(url) do
nil ->
find_endpoint_by_html(url)
:error ->
:error
endpoint ->
endpoint
end
end
defp find_endpoint_by_header(url) do
case Clacks.HTTP.head(url) do
{:ok, %HTTPoison.Response{headers: headers, request: %HTTPoison.Request{url: final_url}}} ->
headers
|> Enum.filter(fn {name, _} -> String.downcase(name) == "link" end)
|> webmention_link()
|> case do
nil ->
nil
str when is_binary(str) ->
URI.merge(final_url, str)
end
{:error, reason} ->
Logger.warn("Unable to find Webmention endpoint for '#{url}': #{reason}")
:error
end
end
defp webmention_link([]), do: nil
defp webmention_link([{_, value} | rest]) do
String.split(value, ",")
|> Enum.map(&parse_link_header/1)
|> Enum.find(fn {rels, _} -> "webmention" in rels end)
|> case do
nil ->
webmention_link(rest)
{_, res} ->
res
end
end
defp parse_link_header(value) do
[value | params] = String.split(value, ";")
uri_reference =
value
|> String.trim()
|> String.slice(1..-1)
{_, rel} =
params
|> Enum.map(fn str ->
str = String.trim(str)
[name | rest] = String.split(str, "=")
rest = Enum.join(rest, "=")
value =
if String.starts_with?(rest, "\"") do
{_, rest} = String.split_at(rest, 1)
if String.ends_with?(rest, "\"") do
{rest, _} = String.split_at(rest, -1)
rest
else
rest
end
else
rest
end
{name, value}
end)
|> Enum.find(fn {name, _} -> String.downcase(name) == "rel" end)
rels = String.split(rel, ~r/\s+/) |> Enum.map(&String.downcase/1)
{rels, uri_reference}
end
defp find_endpoint_by_html(url) do
case Clacks.HTTP.get(url) do
{:ok, %HTTPoison.Response{body: body, request: %HTTPoison.Request{url: final_url}}} ->
{:ok, doc} = Floki.parse_document(body)
Floki.find(doc, "link[rel~=webmention], a[rel~=webmention]")
|> Enum.reduce_while(nil, fn el, _acc ->
case Floki.attribute(el, "href") do
[href] when is_binary(href) ->
{:halt, href}
_ ->
{:cont, nil}
end
end)
|> case do
nil ->
nil
str when is_binary(str) ->
URI.merge(final_url, str)
end
{:error, reason} ->
Logger.warn("Unable to find Webmention endpoint for '#{url}': #{reason}")
:error
end
end
end

View File

@ -52,7 +52,9 @@ defmodule Clacks.MixProject do
{:bcrypt_elixir, "~> 2.0"}, {:bcrypt_elixir, "~> 2.0"},
{:oban, "~> 1.2.0"}, {:oban, "~> 1.2.0"},
{:fast_sanitize, "~> 0.1.7"}, {:fast_sanitize, "~> 0.1.7"},
{:dialyxir, "~> 1.0", only: [:dev], runtime: false} {:fast_html, "~> 1.0.3"},
{:dialyxir, "~> 1.0", only: [:dev], runtime: false},
{:floki, "~> 0.26.0"}
] ]
end end

View File

@ -19,8 +19,10 @@
"fast_sanitize": {:hex, :fast_sanitize, "0.1.7", "2a7cd8734c88a2de6de55022104f8a3b87f1fdbe8bbf131d9049764b53d50d0d", [:mix], [{:fast_html, "~> 1.0", [hex: :fast_html, repo: "hexpm", optional: false]}, {:plug, "~> 1.8", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "f39fe8ea08fbac17487c30bf09b7d9f3e12472e51fb07a88ffeb8fd17da8ab67"}, "fast_sanitize": {:hex, :fast_sanitize, "0.1.7", "2a7cd8734c88a2de6de55022104f8a3b87f1fdbe8bbf131d9049764b53d50d0d", [:mix], [{:fast_html, "~> 1.0", [hex: :fast_html, repo: "hexpm", optional: false]}, {:plug, "~> 1.8", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "f39fe8ea08fbac17487c30bf09b7d9f3e12472e51fb07a88ffeb8fd17da8ab67"},
"file_system": {:hex, :file_system, "0.2.7", "e6f7f155970975789f26e77b8b8d8ab084c59844d8ecfaf58cbda31c494d14aa", [:mix], [], "hexpm", "b4cfa2d69c7f0b18fd06db222b2398abeef743a72504e6bd7df9c52f171b047f"}, "file_system": {:hex, :file_system, "0.2.7", "e6f7f155970975789f26e77b8b8d8ab084c59844d8ecfaf58cbda31c494d14aa", [:mix], [], "hexpm", "b4cfa2d69c7f0b18fd06db222b2398abeef743a72504e6bd7df9c52f171b047f"},
"flake_id": {:hex, :flake_id, "0.1.0", "7716b086d2e405d09b647121a166498a0d93d1a623bead243e1f74216079ccb3", [:mix], [{:base62, "~> 1.2", [hex: :base62, repo: "hexpm", optional: false]}, {:ecto, ">= 2.0.0", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm", "31fc8090fde1acd267c07c36ea7365b8604055f897d3a53dd967658c691bd827"}, "flake_id": {:hex, :flake_id, "0.1.0", "7716b086d2e405d09b647121a166498a0d93d1a623bead243e1f74216079ccb3", [:mix], [{:base62, "~> 1.2", [hex: :base62, repo: "hexpm", optional: false]}, {:ecto, ">= 2.0.0", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm", "31fc8090fde1acd267c07c36ea7365b8604055f897d3a53dd967658c691bd827"},
"floki": {:hex, :floki, "0.26.0", "4df88977e2e357c6720e1b650f613444bfb48c5acfc6a0c646ab007d08ad13bf", [:mix], [{:html_entities, "~> 0.5.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "e7b66ce7feef5518a9cd9fc7b52dd62a64028bd9cb6d6ad282a0f0fc90a4ae52"},
"gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm", "e0b8598e802676c81e66b061a2148c37c03886b24a3ca86a1f98ed40693b94b3"}, "gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm", "e0b8598e802676c81e66b061a2148c37c03886b24a3ca86a1f98ed40693b94b3"},
"hackney": {:hex, :hackney, "1.15.2", "07e33c794f8f8964ee86cebec1a8ed88db5070e52e904b8f12209773c1036085", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.5", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "e0100f8ef7d1124222c11ad362c857d3df7cb5f4204054f9f0f4a728666591fc"}, "hackney": {:hex, :hackney, "1.15.2", "07e33c794f8f8964ee86cebec1a8ed88db5070e52e904b8f12209773c1036085", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.5", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "e0100f8ef7d1124222c11ad362c857d3df7cb5f4204054f9f0f4a728666591fc"},
"html_entities": {:hex, :html_entities, "0.5.1", "1c9715058b42c35a2ab65edc5b36d0ea66dd083767bef6e3edb57870ef556549", [:mix], [], "hexpm", "30efab070904eb897ff05cd52fa61c1025d7f8ef3a9ca250bc4e6513d16c32de"},
"http_signatures": {:git, "https://git.pleroma.social/pleroma/http_signatures.git", "293d77bb6f4a67ac8bde1428735c3b42f22cbb30", [ref: "293d77bb6f4a67ac8bde1428735c3b42f22cbb30"]}, "http_signatures": {:git, "https://git.pleroma.social/pleroma/http_signatures.git", "293d77bb6f4a67ac8bde1428735c3b42f22cbb30", [ref: "293d77bb6f4a67ac8bde1428735c3b42f22cbb30"]},
"httpoison": {:hex, :httpoison, "1.5.1", "0f55b5b673b03c5c327dac7015a67cb571b99b631acc0bc1b0b98dcd6b9f2104", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "191a3b6329c917de4e7ca68431919a59bf19e60694b313a69bc1f56a4cb160bf"}, "httpoison": {:hex, :httpoison, "1.5.1", "0f55b5b673b03c5c327dac7015a67cb571b99b631acc0bc1b0b98dcd6b9f2104", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "191a3b6329c917de4e7ca68431919a59bf19e60694b313a69bc1f56a4cb160bf"},
"idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "4bdd305eb64e18b0273864920695cb18d7a2021f31a11b9c5fbcd9a253f936e2"}, "idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "4bdd305eb64e18b0273864920695cb18d7a2021f31a11b9c5fbcd9a253f936e2"},