diff --git a/lib/frenzy/pipeline/extractor/macstories.ex b/lib/frenzy/pipeline/extractor/macstories.ex new file mode 100644 index 0000000..f7c8f15 --- /dev/null +++ b/lib/frenzy/pipeline/extractor/macstories.ex @@ -0,0 +1,36 @@ +defmodule Frenzy.Pipeline.Extractor.MacStories do + @moduledoc """ + Extractor for https://macstories.net + """ + + alias Frenzy.Pipeline.Extractor + @behaviour Extractor + + @impl Extractor + def extract(html_tree) do + case Floki.find(html_tree, ".post-content") do + [content_elem | _] -> + content_elem = + content_elem + # some images have full size links, strip those out + |> Floki.filter_out("a.view-full-size") + # rewrite non-standard images captions to
/
+ |> Floki.map(&rewrite_element/1) + + {:ok, content_elem} + + _ -> + {:error, "no matching elements"} + end + end + + defp rewrite_element({"div", [{"class", "media-wrapper"}]}) do + {"figure", []} + end + + defp rewrite_element({"p", [{"class", "image-caption"}]}) do + {"figcaption", []} + end + + defp rewrite_element(elem), do: elem +end