Add extractor for ericasadun.com

This commit is contained in:
Shadowfacts 2019-10-31 17:03:34 -04:00
parent 6f568a03e1
commit d476839fce
Signed by: shadowfacts
GPG Key ID: 94A5AB95422746E5
1 changed files with 27 additions and 0 deletions

View File

@ -0,0 +1,27 @@
defmodule Frenzy.Pipeline.Extractor.EricaSadun do
@moduledoc """
Extractor for https://ericasadun.com
"""
alias Frenzy.Pipeline.Extractor
@behaviour Extractor
@impl Extractor
def extract(body) do
html_tree = Floki.parse(body)
case Floki.find(html_tree, ".post-content") do
[content_elem | _] ->
# content element includes social media buttons and related posts
content =
content_elem
|> Floki.filter_out("div.sharedaddy, div#jp-relatedposts")
|> Floki.raw_html()
{:ok, content}
_ ->
{:error, "no matching elements"}
end
end
end