Ensure `remove_tag` returns a valid html_tree
If the entire input is stripped out, this used to return `nil` which caused downstream parsing to fail. Instead, return `[]` which is the Floki representation of an empty tree. Fixes #36
This commit is contained in:
parent
b35746bfed
commit
5dd52d5698
|
@ -64,7 +64,7 @@ defmodule Readability.Helper do
|
||||||
def remove_tag([h | t], fun) do
|
def remove_tag([h | t], fun) do
|
||||||
node = remove_tag(h, fun)
|
node = remove_tag(h, fun)
|
||||||
|
|
||||||
if is_nil(node) do
|
if node == [] do
|
||||||
remove_tag(t, fun)
|
remove_tag(t, fun)
|
||||||
else
|
else
|
||||||
[node | remove_tag(t, fun)]
|
[node | remove_tag(t, fun)]
|
||||||
|
@ -73,7 +73,7 @@ defmodule Readability.Helper do
|
||||||
|
|
||||||
def remove_tag({tag, attrs, inner_tree} = html_tree, fun) do
|
def remove_tag({tag, attrs, inner_tree} = html_tree, fun) do
|
||||||
if fun.(html_tree) do
|
if fun.(html_tree) do
|
||||||
nil
|
[]
|
||||||
else
|
else
|
||||||
{tag, attrs, remove_tag(inner_tree, fun)}
|
{tag, attrs, remove_tag(inner_tree, fun)}
|
||||||
end
|
end
|
||||||
|
|
|
@ -45,6 +45,19 @@ defmodule Readability.HelperTest do
|
||||||
assert result == expected
|
assert result == expected
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "remove all tags", %{html_tree: html_tree} do
|
||||||
|
expected = "" |> parse
|
||||||
|
|
||||||
|
result =
|
||||||
|
html_tree
|
||||||
|
|> Helper.remove_tag(fn {tag, _, _} ->
|
||||||
|
tag == "html"
|
||||||
|
end)
|
||||||
|
|
||||||
|
assert result == expected
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
test "inner text length", %{html_tree: html_tree} do
|
test "inner text length", %{html_tree: html_tree} do
|
||||||
result = html_tree |> Helper.text_length()
|
result = html_tree |> Helper.text_length()
|
||||||
assert result == 5
|
assert result == 5
|
||||||
|
|
Loading…
Reference in New Issue