Add whatever.scalzi.com extractor
This commit is contained in:
parent
3192969889
commit
6f568a03e1
@ -1,4 +1,8 @@
|
||||
defmodule Frenzy.Pipeline.Extractor.DaringFireball do
|
||||
@moduledoc """
|
||||
Extractor for https://daringfireball.net
|
||||
"""
|
||||
|
||||
alias Frenzy.Pipeline.Extractor
|
||||
@behaviour Extractor
|
||||
|
||||
|
32
lib/frenzy/pipeline/extractor/whatever_scalzi.ex
Normal file
32
lib/frenzy/pipeline/extractor/whatever_scalzi.ex
Normal file
@ -0,0 +1,32 @@
|
||||
defmodule Frenzy.Pipeline.Extractor.WhateverScale do
|
||||
@moduledoc """
|
||||
Extractor for https://whatever.scalzi.com
|
||||
"""
|
||||
|
||||
alias Frenzy.Pipeline.Extractor
|
||||
@behaviour Extractor
|
||||
|
||||
@impl Extractor
|
||||
def extract(body) do
|
||||
html_tree = Floki.parse(body)
|
||||
|
||||
case get_article_content(html_tree) do
|
||||
nil ->
|
||||
{:error, "no matching elements"}
|
||||
|
||||
elem ->
|
||||
{:ok, Floki.raw_html(elem)}
|
||||
end
|
||||
end
|
||||
|
||||
defp get_article_content(html_tree) do
|
||||
case Floki.find(html_tree, "article.post > div.entry-content") do
|
||||
[content_elem | _] ->
|
||||
# remove social media buttons that are included in the .entry-content element
|
||||
Floki.filter_out(content_elem, "div#jp-post-flair")
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
Loading…
x
Reference in New Issue
Block a user