defmodule Frenzy.Pipeline.Extractor.DaringFireball do alias Frenzy.Pipeline.Extractor @behaviour Extractor @impl Extractor def extract(body) do html_tree = Floki.parse(body) case get_article_element(html_tree) || get_link_element(html_tree) do nil -> {:error, "no matching elements"} elem -> {:ok, Floki.raw_html(elem)} end end defp get_article_element(html_tree) do case Floki.find(html_tree, "div.article") do [article_elem | _] -> # articles include extra information in the div.article element Floki.filter_out(article_elem, "h1, .dateline, #PreviousNext") _ -> nil end end defp get_link_element(html_tree) do case Floki.find(html_tree, "dl.linkedlist dd") do [dd_elem | _] -> dd_elem _ -> nil end end end