Fix detection of title suffix
This commit is contained in:
parent
57d9c7bd5e
commit
747e0495ed
|
@ -3,7 +3,7 @@ defmodule Readability.TitleFinder do
|
|||
The TitleFinder engine traverses HTML tree searching for finding title.
|
||||
"""
|
||||
|
||||
@title_suffix ~r/(\-)|(\:\:)|(\|)/
|
||||
@title_suffix ~r/\s(?:\-|\:\:|\|)\s/
|
||||
@h_tag_selector "h1, h2, h3"
|
||||
|
||||
@type html_tree :: tuple | list
|
||||
|
|
|
@ -31,6 +31,46 @@ defmodule Readability.TitleFinderTest do
|
|||
test "extract tag title" do
|
||||
title = Readability.TitleFinder.tag_title(@html)
|
||||
assert title == "Tag title"
|
||||
|
||||
html = """
|
||||
<html>
|
||||
<head>
|
||||
<title>Tag title :: test</title>
|
||||
</head>
|
||||
</html>
|
||||
"""
|
||||
title = Readability.TitleFinder.tag_title(html)
|
||||
assert title == "Tag title"
|
||||
|
||||
html = """
|
||||
<html>
|
||||
<head>
|
||||
<title>Tag title | test</title>
|
||||
</head>
|
||||
</html>
|
||||
"""
|
||||
title = Readability.TitleFinder.tag_title(html)
|
||||
assert title == "Tag title"
|
||||
|
||||
html = """
|
||||
<html>
|
||||
<head>
|
||||
<title>Tag title-tag</title>
|
||||
</head>
|
||||
</html>
|
||||
"""
|
||||
title = Readability.TitleFinder.tag_title(html)
|
||||
assert title == "Tag title-tag"
|
||||
|
||||
html = """
|
||||
<html>
|
||||
<head>
|
||||
<title>Tag title-tag-title - test</title>
|
||||
</head>
|
||||
</html>
|
||||
"""
|
||||
title = Readability.TitleFinder.tag_title(html)
|
||||
assert title == "Tag title-tag-title"
|
||||
end
|
||||
|
||||
test "extract h1 tag title" do
|
||||
|
|
Loading…
Reference in New Issue