Fix detection of title suffix
This commit is contained in:
parent
57d9c7bd5e
commit
747e0495ed
|
@ -3,7 +3,7 @@ defmodule Readability.TitleFinder do
|
||||||
The TitleFinder engine traverses HTML tree searching for finding title.
|
The TitleFinder engine traverses HTML tree searching for finding title.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@title_suffix ~r/(\-)|(\:\:)|(\|)/
|
@title_suffix ~r/\s(?:\-|\:\:|\|)\s/
|
||||||
@h_tag_selector "h1, h2, h3"
|
@h_tag_selector "h1, h2, h3"
|
||||||
|
|
||||||
@type html_tree :: tuple | list
|
@type html_tree :: tuple | list
|
||||||
|
|
|
@ -31,6 +31,46 @@ defmodule Readability.TitleFinderTest do
|
||||||
test "extract tag title" do
|
test "extract tag title" do
|
||||||
title = Readability.TitleFinder.tag_title(@html)
|
title = Readability.TitleFinder.tag_title(@html)
|
||||||
assert title == "Tag title"
|
assert title == "Tag title"
|
||||||
|
|
||||||
|
html = """
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Tag title :: test</title>
|
||||||
|
</head>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
title = Readability.TitleFinder.tag_title(html)
|
||||||
|
assert title == "Tag title"
|
||||||
|
|
||||||
|
html = """
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Tag title | test</title>
|
||||||
|
</head>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
title = Readability.TitleFinder.tag_title(html)
|
||||||
|
assert title == "Tag title"
|
||||||
|
|
||||||
|
html = """
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Tag title-tag</title>
|
||||||
|
</head>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
title = Readability.TitleFinder.tag_title(html)
|
||||||
|
assert title == "Tag title-tag"
|
||||||
|
|
||||||
|
html = """
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Tag title-tag-title - test</title>
|
||||||
|
</head>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
title = Readability.TitleFinder.tag_title(html)
|
||||||
|
assert title == "Tag title-tag-title"
|
||||||
end
|
end
|
||||||
|
|
||||||
test "extract h1 tag title" do
|
test "extract h1 tag title" do
|
||||||
|
|
Loading…
Reference in New Issue