diff --git a/CHANGELOG.md b/CHANGELOG.md index 45f1142..425d2b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). ## Unreleased +- Fix concatenation of multiple matching tags in Title extractor +- Fix exception when no matches are found in Title extractor ## [0.6.2] - 2015.11.22 - Scope the title tag selector to the head element diff --git a/lib/readability/title_finder.ex b/lib/readability/title_finder.ex index bf7243b..59ac9b3 100644 --- a/lib/readability/title_finder.ex +++ b/lib/readability/title_finder.ex @@ -33,7 +33,7 @@ defmodule Readability.TitleFinder do @spec tag_title(html_tree) :: binary def tag_title(html_tree) do html_tree - |> Floki.find("head title") + |> find_tag("head title") |> clean_title() |> String.split(@title_suffix) |> hd() @@ -45,7 +45,7 @@ defmodule Readability.TitleFinder do @spec og_title(html_tree) :: binary def og_title(html_tree) do html_tree - |> Floki.find("meta[property=og:title]") + |> find_tag("meta[property=og:title]") |> Floki.attribute("content") |> clean_title() end @@ -56,11 +56,22 @@ defmodule Readability.TitleFinder do @spec h_tag_title(html_tree, String.t) :: binary def h_tag_title(html_tree, selector \\ @h_tag_selector) do html_tree - |> Floki.find(selector) - |> hd() + |> find_tag(selector) |> clean_title() end + defp find_tag(html_tree, selector) do + case Floki.find(html_tree, selector) do + [] -> + [] + matches when is_list(matches) -> + hd(matches) + end + end + + defp clean_title([]) do + "" + end defp clean_title(html_tree) do html_tree |> Floki.text() diff --git a/test/readability/title_finder_test.exs b/test/readability/title_finder_test.exs index 301ce1f..dc08ea4 100644 --- a/test/readability/title_finder_test.exs +++ b/test/readability/title_finder_test.exs @@ -28,6 +28,19 @@ defmodule Readability.TitleFinderTest do assert title == "og title" end + test "does not merge multiple matching og:title tags" do + html = """ + + + + + + + """ + title = Readability.TitleFinder.og_title(html) + assert title == "og title 1" + end + test "extract tag title" do title = Readability.TitleFinder.tag_title(@html) assert title == "Tag title" @@ -86,13 +99,51 @@ defmodule Readability.TitleFinderTest do assert title == "Tag title" end + test "does not merge multiple title tags" do + html = """ + + + tag title 1 + tag title 2 + + + """ + title = Readability.TitleFinder.tag_title(html) + assert title == "tag title 1" + end + test "extract h1 tag title" do title = Readability.TitleFinder.h_tag_title(@html) assert title == "h1 title" end - test "extrat h2 tag title" do + test "extract h2 tag title" do title = Readability.TitleFinder.h_tag_title(@html, "h2") assert title == "h2 title" end + + test "does not merge multile header tags" do + html = """ + + +

header 1

+

header 2

+ + + """ + title = Readability.TitleFinder.h_tag_title(html) + assert title == "header 1" + end + + test "returns an empty string when no title tag can be found" do + assert Readability.TitleFinder.tag_title("") == "" + end + + test "returns an empty string when no og:title tag can be found" do + assert Readability.TitleFinder.og_title("") == "" + end + + test "returns an empty string when no header tag can be found" do + assert Readability.TitleFinder.h_tag_title("") == "" + end end