Ensure `remove_tag` returns a valid html_tree

If the entire input is stripped out, this used to return `nil` which
caused downstream parsing to fail. Instead, return `[]` which is the
Floki representation of an empty tree.

Fixes #36
This commit is contained in:
Ben Olive 2018-10-10 20:29:48 -04:00 committed by Jaehyun Shin
parent b35746bfed
commit 5dd52d5698
2 changed files with 15 additions and 2 deletions

View File

@ -64,7 +64,7 @@ defmodule Readability.Helper do
def remove_tag([h | t], fun) do def remove_tag([h | t], fun) do
node = remove_tag(h, fun) node = remove_tag(h, fun)
if is_nil(node) do if node == [] do
remove_tag(t, fun) remove_tag(t, fun)
else else
[node | remove_tag(t, fun)] [node | remove_tag(t, fun)]
@ -73,7 +73,7 @@ defmodule Readability.Helper do
def remove_tag({tag, attrs, inner_tree} = html_tree, fun) do def remove_tag({tag, attrs, inner_tree} = html_tree, fun) do
if fun.(html_tree) do if fun.(html_tree) do
nil []
else else
{tag, attrs, remove_tag(inner_tree, fun)} {tag, attrs, remove_tag(inner_tree, fun)}
end end

View File

@ -45,6 +45,19 @@ defmodule Readability.HelperTest do
assert result == expected assert result == expected
end end
test "remove all tags", %{html_tree: html_tree} do
expected = "" |> parse
result =
html_tree
|> Helper.remove_tag(fn {tag, _, _} ->
tag == "html"
end)
assert result == expected
end
test "inner text length", %{html_tree: html_tree} do test "inner text length", %{html_tree: html_tree} do
result = html_tree |> Helper.text_length() result = html_tree |> Helper.text_length()
assert result == 5 assert result == 5