Filter more things out of Slate and The Verge
This commit is contained in:
parent
6dd4f3ca82
commit
1f94e9080d
|
@ -21,7 +21,10 @@ defmodule Frenzy.Pipeline.Extractor.Slate do
|
|||
case Floki.find(html_tree, ".article__content") do
|
||||
[el] ->
|
||||
article_content =
|
||||
Floki.filter_out(el, ".slate-ad, .in-article-recirc, .social-share, .newsletter-signup")
|
||||
Floki.filter_out(
|
||||
el,
|
||||
".slate-ad, .in-article-recirc, .social-share, .newsletter-signup, .recirc-line, .product"
|
||||
)
|
||||
|
||||
image = Floki.find(html_tree, ".article__top-image img")
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ defmodule Frenzy.Pipeline.Extractor.TheVerge do
|
|||
html_tree
|
||||
|> Floki.find("article#content > div:not(.duet--article--lede)")
|
||||
|> Floki.filter_out(
|
||||
".duet--layout--rail, .duet--article--article-pullquote, .duet--article--comments-join-the-conversation"
|
||||
".duet--layout--rail, .duet--article--article-pullquote, .duet--article--comments-join-the-conversation, .duet--recirculation--related-list, .duet--article--comments-button"
|
||||
)
|
||||
|
||||
{:ok, image ++ content}
|
||||
|
|
Loading…
Reference in New Issue