From 6f568a03e15d4631bb23e49813256f59ec4bfb22 Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Thu, 31 Oct 2019 16:52:01 -0400 Subject: [PATCH] Add whatever.scalzi.com extractor --- .../pipeline/extractor/daring_fireball.ex | 4 +++ .../pipeline/extractor/whatever_scalzi.ex | 32 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 lib/frenzy/pipeline/extractor/whatever_scalzi.ex diff --git a/lib/frenzy/pipeline/extractor/daring_fireball.ex b/lib/frenzy/pipeline/extractor/daring_fireball.ex index d668f6d..5daf6f5 100644 --- a/lib/frenzy/pipeline/extractor/daring_fireball.ex +++ b/lib/frenzy/pipeline/extractor/daring_fireball.ex @@ -1,4 +1,8 @@ defmodule Frenzy.Pipeline.Extractor.DaringFireball do + @moduledoc """ + Extractor for https://daringfireball.net + """ + alias Frenzy.Pipeline.Extractor @behaviour Extractor diff --git a/lib/frenzy/pipeline/extractor/whatever_scalzi.ex b/lib/frenzy/pipeline/extractor/whatever_scalzi.ex new file mode 100644 index 0000000..279124a --- /dev/null +++ b/lib/frenzy/pipeline/extractor/whatever_scalzi.ex @@ -0,0 +1,32 @@ +defmodule Frenzy.Pipeline.Extractor.WhateverScale do + @moduledoc """ + Extractor for https://whatever.scalzi.com + """ + + alias Frenzy.Pipeline.Extractor + @behaviour Extractor + + @impl Extractor + def extract(body) do + html_tree = Floki.parse(body) + + case get_article_content(html_tree) do + nil -> + {:error, "no matching elements"} + + elem -> + {:ok, Floki.raw_html(elem)} + end + end + + defp get_article_content(html_tree) do + case Floki.find(html_tree, "article.post > div.entry-content") do + [content_elem | _] -> + # remove social media buttons that are included in the .entry-content element + Floki.filter_out(content_elem, "div#jp-post-flair") + + _ -> + nil + end + end +end