frenzy/lib/frenzy/pipeline/extractor/slate.ex

41 lines
839 B
Elixir

defmodule Frenzy.Pipeline.Extractor.Slate do
@moduledoc """
Extractor for https://slate.com
"""
alias Frenzy.Pipeline.Extractor
@behaviour Extractor
@impl Extractor
def extract(html_tree) do
case get_article_content(html_tree) do
nil ->
{:error, "no matching elements"}
elem ->
{:ok, elem}
end
end
defp get_article_content(html_tree) do
case Floki.find(html_tree, ".article__content") do
[el] ->
article_content =
Floki.filter_out(el, ".slate-ad, .in-article-recirc, .social-share, .newsletter-signup")
image = Floki.find(html_tree, ".article__top-image img")
case image do
[] ->
article_content
[image | _] ->
[image, article_content]
end
_ ->
nil
end
end
end