Generalize WP lazy-loading stripper

This commit is contained in:
Shadowfacts 2021-03-31 20:19:01 -04:00
parent 37dccdd4db
commit 5c8baa2057
Signed by: shadowfacts
GPG Key ID: 94A5AB95422746E5
3 changed files with 30 additions and 21 deletions

View File

@ -19,5 +19,6 @@ defmodule Frenzy.Pipeline.Extractor.OmMalik do
_ ->
{:error, "no matching elements"}
end
|> Extractor.Util.strip_wp_lazy_loading()
end
end

View File

@ -0,0 +1,28 @@
defmodule Frenzy.Pipeline.Extractor.Util do
@doc """
WordPress Jetpack uses a 1x1 pixel transparent gif in a srcset to keep browsers from loading images
by overriding the src attribute. We want to strip those so the images actually load.
"""
@spec strip_wp_lazy_loading(Floki.html_tree()) :: Floki.html_tree()
def strip_wp_lazy_loading(tree) do
Floki.map(tree, fn
{"img", attrs} = el ->
class = Enum.find(attrs, fn {k, _} -> k == "class" end)
if !is_nil(class) && String.contains?(elem(class, 1), "jetpack-lazy-image") do
{
"img",
Enum.filter(attrs, fn
{"srcset", _} -> false
_ -> true
end)
}
else
el
end
el ->
el
end)
end
end

View File

@ -43,27 +43,7 @@ defmodule Frenzy.Pipeline.Extractor.WhateverScalzi do
true
end)
article_content
|> Floki.map(fn
{"img", attrs} = el ->
class = Enum.find(attrs, fn {k, _} -> k == "class" end)
class = if is_nil(class), do: nil, else: elem(class, 1)
if !is_nil(class) && String.contains?(class, "jetpack-lazy-image") do
{
"img",
Enum.filter(attrs, fn
{"srcset", _} -> false
_ -> true
end)
}
else
el
end
el ->
el
end)
Extractor.Util.strip_wp_lazy_loading(article_content)
_ ->
nil