From 5dd52d5698bf2f0727e26f9740dbf75c5e11d6c3 Mon Sep 17 00:00:00 2001 From: Ben Olive Date: Wed, 10 Oct 2018 20:29:48 -0400 Subject: [PATCH] Ensure `remove_tag` returns a valid html_tree If the entire input is stripped out, this used to return `nil` which caused downstream parsing to fail. Instead, return `[]` which is the Floki representation of an empty tree. Fixes #36 --- lib/readability/helper.ex | 4 ++-- test/readability/helper_test.exs | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/lib/readability/helper.ex b/lib/readability/helper.ex index 7562dbc..a30c755 100644 --- a/lib/readability/helper.ex +++ b/lib/readability/helper.ex @@ -64,7 +64,7 @@ defmodule Readability.Helper do def remove_tag([h | t], fun) do node = remove_tag(h, fun) - if is_nil(node) do + if node == [] do remove_tag(t, fun) else [node | remove_tag(t, fun)] @@ -73,7 +73,7 @@ defmodule Readability.Helper do def remove_tag({tag, attrs, inner_tree} = html_tree, fun) do if fun.(html_tree) do - nil + [] else {tag, attrs, remove_tag(inner_tree, fun)} end diff --git a/test/readability/helper_test.exs b/test/readability/helper_test.exs index dd85b59..3c57359 100644 --- a/test/readability/helper_test.exs +++ b/test/readability/helper_test.exs @@ -45,6 +45,19 @@ defmodule Readability.HelperTest do assert result == expected end + test "remove all tags", %{html_tree: html_tree} do + expected = "" |> parse + + result = + html_tree + |> Helper.remove_tag(fn {tag, _, _} -> + tag == "html" + end) + + assert result == expected + end + + test "inner text length", %{html_tree: html_tree} do result = html_tree |> Helper.text_length() assert result == 5