From 5c8baa20573abebf5acab0ecabc068773a064049 Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Wed, 31 Mar 2021 20:19:01 -0400 Subject: [PATCH] Generalize WP lazy-loading stripper --- lib/frenzy/pipeline/extractor/om_malik.ex | 1 + lib/frenzy/pipeline/extractor/util.ex | 28 +++++++++++++++++++ .../pipeline/extractor/whatever_scalzi.ex | 22 +-------------- 3 files changed, 30 insertions(+), 21 deletions(-) create mode 100644 lib/frenzy/pipeline/extractor/util.ex diff --git a/lib/frenzy/pipeline/extractor/om_malik.ex b/lib/frenzy/pipeline/extractor/om_malik.ex index b4f8369..f678885 100644 --- a/lib/frenzy/pipeline/extractor/om_malik.ex +++ b/lib/frenzy/pipeline/extractor/om_malik.ex @@ -19,5 +19,6 @@ defmodule Frenzy.Pipeline.Extractor.OmMalik do _ -> {:error, "no matching elements"} end + |> Extractor.Util.strip_wp_lazy_loading() end end diff --git a/lib/frenzy/pipeline/extractor/util.ex b/lib/frenzy/pipeline/extractor/util.ex new file mode 100644 index 0000000..f257a83 --- /dev/null +++ b/lib/frenzy/pipeline/extractor/util.ex @@ -0,0 +1,28 @@ +defmodule Frenzy.Pipeline.Extractor.Util do + @doc """ + WordPress Jetpack uses a 1x1 pixel transparent gif in a srcset to keep browsers from loading images + by overriding the src attribute. We want to strip those so the images actually load. + """ + @spec strip_wp_lazy_loading(Floki.html_tree()) :: Floki.html_tree() + def strip_wp_lazy_loading(tree) do + Floki.map(tree, fn + {"img", attrs} = el -> + class = Enum.find(attrs, fn {k, _} -> k == "class" end) + + if !is_nil(class) && String.contains?(elem(class, 1), "jetpack-lazy-image") do + { + "img", + Enum.filter(attrs, fn + {"srcset", _} -> false + _ -> true + end) + } + else + el + end + + el -> + el + end) + end +end diff --git a/lib/frenzy/pipeline/extractor/whatever_scalzi.ex b/lib/frenzy/pipeline/extractor/whatever_scalzi.ex index ae8090e..b0cc8d1 100644 --- a/lib/frenzy/pipeline/extractor/whatever_scalzi.ex +++ b/lib/frenzy/pipeline/extractor/whatever_scalzi.ex @@ -43,27 +43,7 @@ defmodule Frenzy.Pipeline.Extractor.WhateverScalzi do true end) - article_content - |> Floki.map(fn - {"img", attrs} = el -> - class = Enum.find(attrs, fn {k, _} -> k == "class" end) - class = if is_nil(class), do: nil, else: elem(class, 1) - - if !is_nil(class) && String.contains?(class, "jetpack-lazy-image") do - { - "img", - Enum.filter(attrs, fn - {"srcset", _} -> false - _ -> true - end) - } - else - el - end - - el -> - el - end) + Extractor.Util.strip_wp_lazy_loading(article_content) _ -> nil