readability/test/readability_test.exs

69 lines
2.4 KiB
Elixir
Raw Normal View History

2016-04-15 11:51:29 +00:00
defmodule ReadabilityTest do
2016-04-17 12:26:51 +00:00
use ExUnit.Case, async: true
2016-04-15 11:51:29 +00:00
2016-04-17 12:26:51 +00:00
test "readability for NY Times" do
2016-04-24 05:32:43 +00:00
html = TestHelper.read_fixture("nytimes.html")
2016-04-17 12:26:51 +00:00
opts = [clean_conditionally: false]
2016-04-24 05:32:43 +00:00
nytimes = Readability.content(html, opts)
2016-04-17 12:26:51 +00:00
nytimes_html = Readability.raw_html(nytimes)
assert nytimes_html =~ ~r/^<div><div class=\"story-body\">/
assert nytimes_html =~ ~r/major priorities.<\/p><\/div><\/div>$/
2016-04-24 07:14:31 +00:00
nytimes_text = Readability.readable_text(nytimes)
2016-04-17 12:26:51 +00:00
assert nytimes_text =~ ~r/^Buddhist monks performing as part of/
assert nytimes_text =~ ~r/one of her major priorities.$/
end
test "readability for BBC" do
2016-04-24 05:32:43 +00:00
html = TestHelper.read_fixture("bbc.html")
bbc = Readability.content(html)
bbc_html = Readability.raw_html(bbc)
assert bbc_html =~ ~r/^<div><div class=\"story-body__inner\" property=\"articleBody\">/
assert bbc_html =~ ~r/connected computing devices\".<\/p><\/div><\/div>$/
2016-04-24 07:14:31 +00:00
bbc_text = Readability.readable_text(bbc)
2016-04-24 05:32:43 +00:00
# TODO: Remove image caption when extract only text
# assert bbc_text =~ ~r/^Microsoft\'s quarterly profit has missed analysts/
assert bbc_text =~ ~r/connected computing devices\".$/
2016-04-17 12:26:51 +00:00
end
test "readability for medium" do
2016-04-24 05:32:43 +00:00
html = TestHelper.read_fixture("medium.html")
medium = Readability.content(html)
medium_html = Readability.raw_html(medium)
assert medium_html =~ ~r/^<div><div class=\"section-inner layoutSingleColumn\">/
assert medium_html =~ ~r/recommend button!<\/em><\/h3><\/div><\/div>$/
2016-04-24 07:14:31 +00:00
medium_text = Readability.readable_text(medium)
2016-04-24 05:32:43 +00:00
assert medium_text =~ ~r/^Background: Ive spent the past 6/
assert medium_text =~ ~r/a lot to me if you hit the recommend button!$/
2016-04-17 12:26:51 +00:00
end
test "readability for buzzfeed" do
2016-04-24 05:32:43 +00:00
html = TestHelper.read_fixture("buzzfeed.html")
buzzfeed = Readability.content(html)
buzzfeed_html = Readability.raw_html(buzzfeed)
assert buzzfeed_html =~ ~r/^<div><div class=\"buzz_superlist_item_text\"><p>/
assert buzzfeed_html =~ ~r/encrypted devices.<\/p><hr\/><hr\/><hr\/><hr\/><\/div><\/div>$/
2016-04-24 07:14:31 +00:00
buzzfeed_text = Readability.readable_text(buzzfeed)
2016-04-24 05:32:43 +00:00
assert buzzfeed_text =~ ~r/^The FBI no longer needs Apples help/
assert buzzfeed_text =~ ~r/issue of court orders and encrypted devices.$/
2016-04-15 11:51:29 +00:00
end
2016-04-24 07:14:31 +00:00
test "readability elixir blog" do
html = TestHelper.read_fixture("elixir.html")
html = Readability.content(html)
IO.inspect Readability.readable_text(html)
end
2016-04-15 11:51:29 +00:00
end