Ensure `remove_tag` returns a valid html_tree
If the entire input is stripped out, this used to return `nil` which caused downstream parsing to fail. Instead, return `[]` which is the Floki representation of an empty tree. Fixes #36
This commit is contained in:
parent
b35746bfed
commit
5dd52d5698
|
@ -64,7 +64,7 @@ defmodule Readability.Helper do
|
|||
def remove_tag([h | t], fun) do
|
||||
node = remove_tag(h, fun)
|
||||
|
||||
if is_nil(node) do
|
||||
if node == [] do
|
||||
remove_tag(t, fun)
|
||||
else
|
||||
[node | remove_tag(t, fun)]
|
||||
|
@ -73,7 +73,7 @@ defmodule Readability.Helper do
|
|||
|
||||
def remove_tag({tag, attrs, inner_tree} = html_tree, fun) do
|
||||
if fun.(html_tree) do
|
||||
nil
|
||||
[]
|
||||
else
|
||||
{tag, attrs, remove_tag(inner_tree, fun)}
|
||||
end
|
||||
|
|
|
@ -45,6 +45,19 @@ defmodule Readability.HelperTest do
|
|||
assert result == expected
|
||||
end
|
||||
|
||||
test "remove all tags", %{html_tree: html_tree} do
|
||||
expected = "" |> parse
|
||||
|
||||
result =
|
||||
html_tree
|
||||
|> Helper.remove_tag(fn {tag, _, _} ->
|
||||
tag == "html"
|
||||
end)
|
||||
|
||||
assert result == expected
|
||||
end
|
||||
|
||||
|
||||
test "inner text length", %{html_tree: html_tree} do
|
||||
result = html_tree |> Helper.text_length()
|
||||
assert result == 5
|
||||
|
|
Loading…
Reference in New Issue