Compare commits
No commits in common. "75404b197d67e118a6575ee9b39a9ae2ac3c2dcc" and "1538ca2a8c3c3b6b041c9e86b560f368c4547896" have entirely different histories.
75404b197d
...
1538ca2a8c
|
@ -29,7 +29,6 @@ defmodule Readability.Helper do
|
|||
"""
|
||||
@spec remove_attrs(html_tree, String.t() | [String.t()] | Regex.t()) :: html_tree
|
||||
def remove_attrs(content, _) when is_binary(content), do: content
|
||||
def remove_attrs({:comment, _} = comment, _), do: comment
|
||||
def remove_attrs([], _), do: []
|
||||
|
||||
def remove_attrs([h | t], t_attrs) do
|
||||
|
|
1
mix.lock
1
mix.lock
|
@ -8,6 +8,7 @@
|
|||
"floki": {:hex, :floki, "0.20.3", "dfb3a71eb99938e330b4156433d55c6d0b188d936c9683d115a8540bac56e019", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}, {:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm", "77032ea4d961b7e0895e6b84ca4dae45671ae3aaec706db8614077a19bb62d6e"},
|
||||
"hackney": {:hex, :hackney, "1.9.0", "51c506afc0a365868469dcfc79a9d0b94d896ec741cfd5bd338f49a5ec515bfe", [:rebar3], [{:certifi, "2.0.0", [hex: :certifi, optional: false]}, {:idna, "5.1.0", [hex: :idna, optional: false]}, {:metrics, "1.0.1", [hex: :metrics, optional: false]}, {:mimerl, "1.0.2", [hex: :mimerl, optional: false]}, {:ssl_verify_fun, "1.1.1", [hex: :ssl_verify_fun, optional: false]}]},
|
||||
"html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm", "3e3d7156a272950373ce5a4018b1490bea26676f8d6a7d409f6fac8568b8cb9a"},
|
||||
"httpoison": {:hex, :httpoison, "0.13.0", "bfaf44d9f133a6599886720f3937a7699466d23bb0cd7a88b6ba011f53c6f562", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, optional: false]}]},
|
||||
"idna": {:hex, :idna, "5.1.0", "d72b4effeb324ad5da3cab1767cb16b17939004e789d8c0ad5b70f3cea20c89a", [:rebar3], [{:unicode_util_compat, "0.3.1", [hex: :unicode_util_compat, optional: false]}]},
|
||||
"meck": {:hex, :meck, "0.8.7", "ebad16ca23f685b07aed3bc011efff65fbaf28881a8adf925428ef5472d390ee", [:rebar3], [], "hexpm", "51274d4b536dc7958eb4df3aefa5245f4a6df1d6198cb8f8b97d6747033597ca"},
|
||||
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], []},
|
||||
|
|
|
@ -57,6 +57,7 @@ defmodule Readability.HelperTest do
|
|||
assert result == expected
|
||||
end
|
||||
|
||||
|
||||
test "inner text length", %{html_tree: html_tree} do
|
||||
result = html_tree |> Helper.text_length()
|
||||
assert result == 5
|
||||
|
@ -92,10 +93,4 @@ defmodule Readability.HelperTest do
|
|||
assert result_with_scheme =~ foo_url
|
||||
assert result_with_scheme =~ bar_url_https
|
||||
end
|
||||
|
||||
test "remove attrs with comments" do
|
||||
tree = Floki.parse("<div class=\"foo\">hello <span><!-- world --></span></div>")
|
||||
expected = Floki.parse("<div>hello <span><!-- world --></span></div>")
|
||||
assert expected == Helper.remove_attrs(tree, ~w[class])
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
defmodule ReadabilityHttpTest do
|
||||
use ExUnit.Case
|
||||
import Mock
|
||||
require IEx
|
||||
|
||||
test "blank response is parsed as plain text" do
|
||||
url = "https://tools.ietf.org/rfc/rfc2616.txt"
|
||||
content = TestHelper.read_fixture("rfc2616.txt")
|
||||
response = %HTTPoison.Response{status_code: 200, headers: [], body: content}
|
||||
|
||||
with_mock HTTPoison, get!: fn _url, _headers, _opts -> response end do
|
||||
%Readability.Summary{article_text: result_text} = Readability.summarize(url)
|
||||
|
||||
assert result_text =~ ~r/3 Protocol Parameters/
|
||||
end
|
||||
end
|
||||
|
||||
test "text/plain response is parsed as plain text" do
|
||||
url = "https://tools.ietf.org/rfc/rfc2616.txt"
|
||||
content = TestHelper.read_fixture("rfc2616.txt")
|
||||
|
||||
response = %HTTPoison.Response{
|
||||
status_code: 200,
|
||||
headers: [{"Content-Type", "text/plain"}],
|
||||
body: content
|
||||
}
|
||||
|
||||
with_mock HTTPoison, get!: fn _url, _headers, _opts -> response end do
|
||||
%Readability.Summary{article_text: result_text} = Readability.summarize(url)
|
||||
|
||||
assert result_text =~ ~r/3 Protocol Parameters/
|
||||
end
|
||||
end
|
||||
|
||||
test "*ml responses are parsed as markup" do
|
||||
url = "https://news.bbc.co.uk/test.html"
|
||||
content = TestHelper.read_fixture("bbc.html")
|
||||
mimes = ["text/html", "application/xml", "application/xhtml+xml"]
|
||||
|
||||
mimes
|
||||
|> Enum.each(fn mime ->
|
||||
response = %HTTPoison.Response{
|
||||
status_code: 200,
|
||||
headers: [{"Content-Type", mime}],
|
||||
body: content
|
||||
}
|
||||
|
||||
with_mock HTTPoison, get!: fn _url, _headers, _opts -> response end do
|
||||
%Readability.Summary{article_html: result_html} = Readability.summarize(url)
|
||||
|
||||
assert result_html =~ ~r/connected computing devices\".<\/p><\/div><\/div>$/
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
test "response with charset is parsed correctly" do
|
||||
url = "https://news.bbc.co.uk/test.html"
|
||||
content = TestHelper.read_fixture("bbc.html")
|
||||
|
||||
response = %HTTPoison.Response{
|
||||
status_code: 200,
|
||||
headers: [{"Content-Type", "text/html; charset=UTF-8"}],
|
||||
body: content
|
||||
}
|
||||
|
||||
with_mock HTTPoison, get!: fn _url, _headers, _opts -> response end do
|
||||
%Readability.Summary{article_html: result_html} = Readability.summarize(url)
|
||||
|
||||
assert result_html =~ ~r/connected computing devices\".<\/p><\/div><\/div>$/
|
||||
end
|
||||
end
|
||||
|
||||
test "response with content-type in different case is parsed correctly" do
|
||||
# HTTP header keys are case insensitive (RFC2616 - Section 4.2)
|
||||
url = "https://news.bbc.co.uk/test.html"
|
||||
content = TestHelper.read_fixture("bbc.html")
|
||||
|
||||
response = %HTTPoison.Response{
|
||||
status_code: 200,
|
||||
headers: [{"content-Type", "text/html; charset=UTF-8"}],
|
||||
body: content
|
||||
}
|
||||
|
||||
with_mock HTTPoison, get!: fn _url, _headers, _opts -> response end do
|
||||
%Readability.Summary{article_html: result_html} = Readability.summarize(url)
|
||||
|
||||
assert result_html =~ ~r/connected computing devices\".<\/p><\/div><\/div>$/
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue