Add whatever.scalzi.com extractor
This commit is contained in:
parent
3192969889
commit
6f568a03e1
@ -1,4 +1,8 @@
|
|||||||
defmodule Frenzy.Pipeline.Extractor.DaringFireball do
|
defmodule Frenzy.Pipeline.Extractor.DaringFireball do
|
||||||
|
@moduledoc """
|
||||||
|
Extractor for https://daringfireball.net
|
||||||
|
"""
|
||||||
|
|
||||||
alias Frenzy.Pipeline.Extractor
|
alias Frenzy.Pipeline.Extractor
|
||||||
@behaviour Extractor
|
@behaviour Extractor
|
||||||
|
|
||||||
|
32
lib/frenzy/pipeline/extractor/whatever_scalzi.ex
Normal file
32
lib/frenzy/pipeline/extractor/whatever_scalzi.ex
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
defmodule Frenzy.Pipeline.Extractor.WhateverScale do
|
||||||
|
@moduledoc """
|
||||||
|
Extractor for https://whatever.scalzi.com
|
||||||
|
"""
|
||||||
|
|
||||||
|
alias Frenzy.Pipeline.Extractor
|
||||||
|
@behaviour Extractor
|
||||||
|
|
||||||
|
@impl Extractor
|
||||||
|
def extract(body) do
|
||||||
|
html_tree = Floki.parse(body)
|
||||||
|
|
||||||
|
case get_article_content(html_tree) do
|
||||||
|
nil ->
|
||||||
|
{:error, "no matching elements"}
|
||||||
|
|
||||||
|
elem ->
|
||||||
|
{:ok, Floki.raw_html(elem)}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp get_article_content(html_tree) do
|
||||||
|
case Floki.find(html_tree, "article.post > div.entry-content") do
|
||||||
|
[content_elem | _] ->
|
||||||
|
# remove social media buttons that are included in the .entry-content element
|
||||||
|
Floki.filter_out(content_elem, "div#jp-post-flair")
|
||||||
|
|
||||||
|
_ ->
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Loading…
x
Reference in New Issue
Block a user