Add ELB extractor

This commit is contained in:
Shadowfacts 2023-06-25 14:06:18 -07:00
parent 53cbe0a7e9
commit 6dd4f3ca82
2 changed files with 21 additions and 0 deletions

View File

@ -0,0 +1,20 @@
defmodule Frenzy.Pipeline.Extractor.ElectionLawBlog do
@moduledoc """
Extractor for https://electionlawblog.org
"""
alias Frenzy.Pipeline.Extractor
@behaviour Extractor
@impl Extractor
def extract(html_tree) do
case Floki.find(html_tree, "div.entry-content") do
[content_elem | _] ->
filtered = Floki.filter_out(content_elem, ".addtoany_share_save_container")
{:ok, filtered}
_ ->
{:error, "no matching elements"}
end
end
end

View File

@ -8,6 +8,7 @@ defmodule FrenzyWeb.ConfigureStage.ScrapeStageLive do
{"beckyhansmeyer.com", Frenzy.Pipeline.Extractor.BeckyHansmeyer},
{"birchtree.me", Frenzy.Pipeline.Extractor.Birchtree},
{"daringfireball.net", Frenzy.Pipeline.Extractor.DaringFireball},
{"Election Law Blog", Frenzy.Pipeline.Extractor.ElectionLawBlog},
{"ericasadun.com", Frenzy.Pipeline.Extractor.EricaSadun},
{"finertech.com", Frenzy.Pipeline.Extractor.FinerTech},
{"macstories.net", Frenzy.Pipeline.Extractor.MacStories},