From 747e0495eda67811585245a0e9a8f6c882225472 Mon Sep 17 00:00:00 2001 From: Jeff Browning Date: Fri, 4 Nov 2016 14:49:25 -0400 Subject: [PATCH] Fix detection of title suffix --- lib/readability/title_finder.ex | 2 +- test/readability/title_finder_test.exs | 40 ++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/lib/readability/title_finder.ex b/lib/readability/title_finder.ex index 4cb0f64..4273da5 100644 --- a/lib/readability/title_finder.ex +++ b/lib/readability/title_finder.ex @@ -3,7 +3,7 @@ defmodule Readability.TitleFinder do The TitleFinder engine traverses HTML tree searching for finding title. """ - @title_suffix ~r/(\-)|(\:\:)|(\|)/ + @title_suffix ~r/\s(?:\-|\:\:|\|)\s/ @h_tag_selector "h1, h2, h3" @type html_tree :: tuple | list diff --git a/test/readability/title_finder_test.exs b/test/readability/title_finder_test.exs index af724ba..7ba6f87 100644 --- a/test/readability/title_finder_test.exs +++ b/test/readability/title_finder_test.exs @@ -31,6 +31,46 @@ defmodule Readability.TitleFinderTest do test "extract tag title" do title = Readability.TitleFinder.tag_title(@html) assert title == "Tag title" + + html = """ + + + Tag title :: test + + + """ + title = Readability.TitleFinder.tag_title(html) + assert title == "Tag title" + + html = """ + + + Tag title | test + + + """ + title = Readability.TitleFinder.tag_title(html) + assert title == "Tag title" + + html = """ + + + Tag title-tag + + + """ + title = Readability.TitleFinder.tag_title(html) + assert title == "Tag title-tag" + + html = """ + + + Tag title-tag-title - test + + + """ + title = Readability.TitleFinder.tag_title(html) + assert title == "Tag title-tag-title" end test "extract h1 tag title" do