From d476839fce8472385c191c416ed250f5a1f9cb82 Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Thu, 31 Oct 2019 17:03:34 -0400 Subject: [PATCH] Add extractor for ericasadun.com --- lib/frenzy/pipeline/extractor/erica_sadun.ex | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 lib/frenzy/pipeline/extractor/erica_sadun.ex diff --git a/lib/frenzy/pipeline/extractor/erica_sadun.ex b/lib/frenzy/pipeline/extractor/erica_sadun.ex new file mode 100644 index 0000000..3ef44d2 --- /dev/null +++ b/lib/frenzy/pipeline/extractor/erica_sadun.ex @@ -0,0 +1,27 @@ +defmodule Frenzy.Pipeline.Extractor.EricaSadun do + @moduledoc """ + Extractor for https://ericasadun.com + """ + + alias Frenzy.Pipeline.Extractor + @behaviour Extractor + + @impl Extractor + def extract(body) do + html_tree = Floki.parse(body) + + case Floki.find(html_tree, ".post-content") do + [content_elem | _] -> + # content element includes social media buttons and related posts + content = + content_elem + |> Floki.filter_out("div.sharedaddy, div#jp-relatedposts") + |> Floki.raw_html() + + {:ok, content} + + _ -> + {:error, "no matching elements"} + end + end +end