41 lines
876 B
Elixir
41 lines
876 B
Elixir
defmodule Frenzy.Pipeline.Extractor.DaringFireball do
|
|
@moduledoc """
|
|
Extractor for https://daringfireball.net
|
|
"""
|
|
|
|
alias Frenzy.Pipeline.Extractor
|
|
@behaviour Extractor
|
|
|
|
@impl Extractor
|
|
def extract(html_tree) do
|
|
case get_article_element(html_tree) || get_link_element(html_tree) do
|
|
nil ->
|
|
{:error, "no matching elements"}
|
|
|
|
elem ->
|
|
{:ok, elem}
|
|
end
|
|
end
|
|
|
|
defp get_article_element(html_tree) do
|
|
case Floki.find(html_tree, "div.article") do
|
|
[article_elem | _] ->
|
|
# articles include extra information in the div.article element
|
|
Floki.filter_out(article_elem, "h1, .dateline, #PreviousNext")
|
|
|
|
_ ->
|
|
nil
|
|
end
|
|
end
|
|
|
|
defp get_link_element(html_tree) do
|
|
case Floki.find(html_tree, "dl.linkedlist dd") do
|
|
[dd_elem | _] ->
|
|
dd_elem
|
|
|
|
_ ->
|
|
nil
|
|
end
|
|
end
|
|
end
|