66 lines
1.5 KiB
Elixir
66 lines
1.5 KiB
Elixir
|
defmodule Readability.ContentFinderTest do
|
||
|
use ExUnit.Case, async: true
|
||
|
|
||
|
doctest Readability.ContentFinder
|
||
|
|
||
|
|
||
|
@unlikey_sample """
|
||
|
<html>
|
||
|
<body>
|
||
|
<header>HEADER</header>
|
||
|
<nav>NAV</nav>
|
||
|
<article class="community">ARTICLE</article>
|
||
|
<div class="disqus">SOCIAL</div>
|
||
|
</body>
|
||
|
</html>
|
||
|
"""
|
||
|
|
||
|
test "remove unlikely tag nodes" do
|
||
|
expected = {"html", [], [ {"body", [], [ {"article", [{"class", "community"}], ["ARTICLE"]} ]} ]}
|
||
|
result = @unlikey_sample
|
||
|
|> Readability.parse
|
||
|
|> Readability.ContentFinder.remove_unlikely_candidates
|
||
|
assert expected == result
|
||
|
end
|
||
|
|
||
|
@misused_sample """
|
||
|
<html>
|
||
|
<body>
|
||
|
<div>
|
||
|
<span>here</span>
|
||
|
</div>
|
||
|
<div>
|
||
|
<p>not here</p>
|
||
|
</div>
|
||
|
</body>
|
||
|
</html>
|
||
|
"""
|
||
|
|
||
|
test "transform misused div tag" do
|
||
|
expected = {"html",
|
||
|
[],
|
||
|
[{"body",
|
||
|
[],
|
||
|
[{"p",
|
||
|
[],
|
||
|
[{"span", [], ["here"]}]
|
||
|
}, {"div",
|
||
|
[],
|
||
|
[{"p", [], ["not here"]}]
|
||
|
}]
|
||
|
}]
|
||
|
}
|
||
|
|
||
|
result = @misused_sample
|
||
|
|> Readability.parse
|
||
|
|> Readability.ContentFinder.transform_misused_divs_into_paragraphs
|
||
|
assert expected == result
|
||
|
end
|
||
|
|
||
|
|
||
|
def read_html(name) do
|
||
|
{:ok, body} = File.read("./test/fixtures/#{name}.html")
|
||
|
body
|
||
|
end
|
||
|
end
|