diff --git a/lib/frenzy/pipeline/extractor/slate.ex b/lib/frenzy/pipeline/extractor/slate.ex index ac22176..e3e18e9 100644 --- a/lib/frenzy/pipeline/extractor/slate.ex +++ b/lib/frenzy/pipeline/extractor/slate.ex @@ -21,7 +21,10 @@ defmodule Frenzy.Pipeline.Extractor.Slate do case Floki.find(html_tree, ".article__content") do [el] -> article_content = - Floki.filter_out(el, ".slate-ad, .in-article-recirc, .social-share, .newsletter-signup") + Floki.filter_out( + el, + ".slate-ad, .in-article-recirc, .social-share, .newsletter-signup, .recirc-line, .product" + ) image = Floki.find(html_tree, ".article__top-image img") diff --git a/lib/frenzy/pipeline/extractor/verge.ex b/lib/frenzy/pipeline/extractor/verge.ex index 32c152e..ef29053 100644 --- a/lib/frenzy/pipeline/extractor/verge.ex +++ b/lib/frenzy/pipeline/extractor/verge.ex @@ -16,7 +16,7 @@ defmodule Frenzy.Pipeline.Extractor.TheVerge do html_tree |> Floki.find("article#content > div:not(.duet--article--lede)") |> Floki.filter_out( - ".duet--layout--rail, .duet--article--article-pullquote, .duet--article--comments-join-the-conversation" + ".duet--layout--rail, .duet--article--article-pullquote, .duet--article--comments-join-the-conversation, .duet--recirculation--related-list, .duet--article--comments-button" ) {:ok, image ++ content}