Compare commits

...

2 Commits

Author SHA1 Message Date
Shadowfacts 9264c9a97d
Add extractor for om.co 2019-11-01 18:27:15 -04:00
Shadowfacts 5d38d9567e
Fix error while validating scrape stage options 2019-11-01 18:27:08 -04:00
2 changed files with 35 additions and 6 deletions

View File

@ -0,0 +1,23 @@
defmodule Frenzy.Pipeline.Extractor.OmMalik do
@moduledoc """
Extractor for https://om.co
"""
alias Frenzy.Pipeline.Extractor
@behaviour Extractor
@impl Extractor
def extract(html_tree) do
case Floki.find(html_tree, ".entry-content") do
[content_elem | _] ->
{
:ok,
# remove related posts list
Floki.filter_out(content_elem, ".rpbt_shortcode")
}
_ ->
{:error, "no matching elements"}
end
end
end

View File

@ -39,6 +39,8 @@ defmodule Frenzy.Pipeline.ScrapeStage do
end end
end end
case opts do
{:ok, opts} ->
case opts["convert_to_data_uris"] do case opts["convert_to_data_uris"] do
nil -> nil ->
{:ok, %{opts | convert_to_data_uris: true}} {:ok, %{opts | convert_to_data_uris: true}}
@ -49,6 +51,10 @@ defmodule Frenzy.Pipeline.ScrapeStage do
_ -> _ ->
{:error, "convert_to_data_uris must be a boolean"} {:error, "convert_to_data_uris must be a boolean"}
end end
_ ->
opts
end
end end
@impl Stage @impl Stage