Fix detection of title suffix

This commit is contained in:
Jeff Browning 2016-11-04 14:49:25 -04:00
parent 57d9c7bd5e
commit 747e0495ed
2 changed files with 41 additions and 1 deletions

View File

@ -3,7 +3,7 @@ defmodule Readability.TitleFinder do
The TitleFinder engine traverses HTML tree searching for finding title. The TitleFinder engine traverses HTML tree searching for finding title.
""" """
@title_suffix ~r/(\-)|(\:\:)|(\|)/ @title_suffix ~r/\s(?:\-|\:\:|\|)\s/
@h_tag_selector "h1, h2, h3" @h_tag_selector "h1, h2, h3"
@type html_tree :: tuple | list @type html_tree :: tuple | list

View File

@ -31,6 +31,46 @@ defmodule Readability.TitleFinderTest do
test "extract tag title" do test "extract tag title" do
title = Readability.TitleFinder.tag_title(@html) title = Readability.TitleFinder.tag_title(@html)
assert title == "Tag title" assert title == "Tag title"
html = """
<html>
<head>
<title>Tag title :: test</title>
</head>
</html>
"""
title = Readability.TitleFinder.tag_title(html)
assert title == "Tag title"
html = """
<html>
<head>
<title>Tag title | test</title>
</head>
</html>
"""
title = Readability.TitleFinder.tag_title(html)
assert title == "Tag title"
html = """
<html>
<head>
<title>Tag title-tag</title>
</head>
</html>
"""
title = Readability.TitleFinder.tag_title(html)
assert title == "Tag title-tag"
html = """
<html>
<head>
<title>Tag title-tag-title - test</title>
</head>
</html>
"""
title = Readability.TitleFinder.tag_title(html)
assert title == "Tag title-tag-title"
end end
test "extract h1 tag title" do test "extract h1 tag title" do