frenzy/lib/frenzy/pipeline/extractor/daring_fireball.ex

41 lines
892 B
Elixir

defmodule Frenzy.Pipeline.Extractor.DaringFireball do
@moduledoc """
Extractor for https://daringfireball.net
"""
alias Frenzy.Pipeline.Extractor
@behaviour Extractor
@impl Extractor
def extract(html_tree) do
case get_article_element(html_tree) || get_link_element(html_tree) do
nil ->
{:error, "no matching elements"}
elem ->
{:ok, elem}
end
end
defp get_article_element(html_tree) do
case Floki.find(html_tree, "div.article") do
[article_elem | _] ->
# articles include extra information in the div.article element
Floki.filter_out(article_elem, "h1, .dateline, #PreviousNext")
_ ->
nil
end
end
defp get_link_element(html_tree) do
case Floki.find(html_tree, "dl.linkedlist dd") do
[{_, _, dd_children} | _] ->
dd_children
_ ->
nil
end
end
end