readability/test/readability/candidate/cleaner_test.exs

55 lines
1.5 KiB
Elixir

defmodule Readability.Candidate.CleanerTest do
use ExUnit.Case, async: true
doctest Readability.Candidate.Cleaner
alias Readability.Candidate.Cleaner
@sample """
<html>
<head>
<title>title!</title>
</head>
<body class='comment'>
<div>
<p class='comment'>a comment</p>
<div class='comment' id='body'>real content</div>
<div id="contains_blockquote"><blockquote>something in a table</blockquote></div>
</div>
</body>
</html>
"""
setup do
html_tree = Readability.parse(@sample)
{:ok, html_tree: html_tree}
end
### Transform misued div
test "transform divs containing no block elements", %{html_tree: html_tree} do
html_tree = Cleaner.transform_misused_div_to_p(html_tree)
[{tag, _, _} | _] = html_tree |> Floki.find("#body")
assert tag == "p"
end
test "not transform divs that contain block elements", %{html_tree: html_tree} do
html_tree = Cleaner.transform_misused_div_to_p(html_tree)
[{tag, _, _} | _] = html_tree |> Floki.find("#contains_blockquote")
assert tag == "div"
end
### Remove unlikely tag
test "remove things that have class comment", %{html_tree: html_tree} do
html_tree = Cleaner.remove_unlikely_tree(html_tree)
refute Floki.text(html_tree) =~ ~r/a comment/
end
test "not remove body tags", %{html_tree: html_tree} do
html_tree = Cleaner.remove_unlikely_tree(html_tree)
refute Floki.find(html_tree, "body") == []
end
end