From 6dd4f3ca82556baebadd77812f16d787ae779f93 Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Sun, 25 Jun 2023 14:06:18 -0700 Subject: [PATCH] Add ELB extractor --- .../pipeline/extractor/election_law_blog.ex | 20 +++++++++++++++++++ .../live/configure_stage/scrape_stage_live.ex | 1 + 2 files changed, 21 insertions(+) create mode 100644 lib/frenzy/pipeline/extractor/election_law_blog.ex diff --git a/lib/frenzy/pipeline/extractor/election_law_blog.ex b/lib/frenzy/pipeline/extractor/election_law_blog.ex new file mode 100644 index 0000000..43829e0 --- /dev/null +++ b/lib/frenzy/pipeline/extractor/election_law_blog.ex @@ -0,0 +1,20 @@ +defmodule Frenzy.Pipeline.Extractor.ElectionLawBlog do + @moduledoc """ + Extractor for https://electionlawblog.org + """ + + alias Frenzy.Pipeline.Extractor + @behaviour Extractor + + @impl Extractor + def extract(html_tree) do + case Floki.find(html_tree, "div.entry-content") do + [content_elem | _] -> + filtered = Floki.filter_out(content_elem, ".addtoany_share_save_container") + {:ok, filtered} + + _ -> + {:error, "no matching elements"} + end + end +end diff --git a/lib/frenzy_web/live/configure_stage/scrape_stage_live.ex b/lib/frenzy_web/live/configure_stage/scrape_stage_live.ex index 4245928..6b54429 100644 --- a/lib/frenzy_web/live/configure_stage/scrape_stage_live.ex +++ b/lib/frenzy_web/live/configure_stage/scrape_stage_live.ex @@ -8,6 +8,7 @@ defmodule FrenzyWeb.ConfigureStage.ScrapeStageLive do {"beckyhansmeyer.com", Frenzy.Pipeline.Extractor.BeckyHansmeyer}, {"birchtree.me", Frenzy.Pipeline.Extractor.Birchtree}, {"daringfireball.net", Frenzy.Pipeline.Extractor.DaringFireball}, + {"Election Law Blog", Frenzy.Pipeline.Extractor.ElectionLawBlog}, {"ericasadun.com", Frenzy.Pipeline.Extractor.EricaSadun}, {"finertech.com", Frenzy.Pipeline.Extractor.FinerTech}, {"macstories.net", Frenzy.Pipeline.Extractor.MacStories},