Merge branch 'master' of https://github.com/keepcosmos/readability
This commit is contained in:
commit
47af5e48de
|
@ -4,6 +4,8 @@ All notable changes to this project will be documented in this file.
|
||||||
This project adheres to [Semantic Versioning](http://semver.org/).
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
||||||
|
|
||||||
## Unreleased
|
## Unreleased
|
||||||
|
- Fix concatenation of multiple matching tags in Title extractor
|
||||||
|
- Fix exception when no matches are found in Title extractor
|
||||||
|
|
||||||
## [0.6.2] - 2015.11.22
|
## [0.6.2] - 2015.11.22
|
||||||
- Scope the title tag selector to the head element
|
- Scope the title tag selector to the head element
|
||||||
|
|
|
@ -33,7 +33,7 @@ defmodule Readability.TitleFinder do
|
||||||
@spec tag_title(html_tree) :: binary
|
@spec tag_title(html_tree) :: binary
|
||||||
def tag_title(html_tree) do
|
def tag_title(html_tree) do
|
||||||
html_tree
|
html_tree
|
||||||
|> Floki.find("head title")
|
|> find_tag("head title")
|
||||||
|> clean_title()
|
|> clean_title()
|
||||||
|> String.split(@title_suffix)
|
|> String.split(@title_suffix)
|
||||||
|> hd()
|
|> hd()
|
||||||
|
@ -45,7 +45,7 @@ defmodule Readability.TitleFinder do
|
||||||
@spec og_title(html_tree) :: binary
|
@spec og_title(html_tree) :: binary
|
||||||
def og_title(html_tree) do
|
def og_title(html_tree) do
|
||||||
html_tree
|
html_tree
|
||||||
|> Floki.find("meta[property=og:title]")
|
|> find_tag("meta[property=og:title]")
|
||||||
|> Floki.attribute("content")
|
|> Floki.attribute("content")
|
||||||
|> clean_title()
|
|> clean_title()
|
||||||
end
|
end
|
||||||
|
@ -56,11 +56,22 @@ defmodule Readability.TitleFinder do
|
||||||
@spec h_tag_title(html_tree, String.t) :: binary
|
@spec h_tag_title(html_tree, String.t) :: binary
|
||||||
def h_tag_title(html_tree, selector \\ @h_tag_selector) do
|
def h_tag_title(html_tree, selector \\ @h_tag_selector) do
|
||||||
html_tree
|
html_tree
|
||||||
|> Floki.find(selector)
|
|> find_tag(selector)
|
||||||
|> hd()
|
|
||||||
|> clean_title()
|
|> clean_title()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
defp find_tag(html_tree, selector) do
|
||||||
|
case Floki.find(html_tree, selector) do
|
||||||
|
[] ->
|
||||||
|
[]
|
||||||
|
matches when is_list(matches) ->
|
||||||
|
hd(matches)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp clean_title([]) do
|
||||||
|
""
|
||||||
|
end
|
||||||
defp clean_title(html_tree) do
|
defp clean_title(html_tree) do
|
||||||
html_tree
|
html_tree
|
||||||
|> Floki.text()
|
|> Floki.text()
|
||||||
|
|
|
@ -28,6 +28,19 @@ defmodule Readability.TitleFinderTest do
|
||||||
assert title == "og title"
|
assert title == "og title"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "does not merge multiple matching og:title tags" do
|
||||||
|
html = """
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta property='og:title' content='og title 1'>
|
||||||
|
<meta property='og:title' content='og title 2'>
|
||||||
|
</head>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
title = Readability.TitleFinder.og_title(html)
|
||||||
|
assert title == "og title 1"
|
||||||
|
end
|
||||||
|
|
||||||
test "extract tag title" do
|
test "extract tag title" do
|
||||||
title = Readability.TitleFinder.tag_title(@html)
|
title = Readability.TitleFinder.tag_title(@html)
|
||||||
assert title == "Tag title"
|
assert title == "Tag title"
|
||||||
|
@ -86,13 +99,51 @@ defmodule Readability.TitleFinderTest do
|
||||||
assert title == "Tag title"
|
assert title == "Tag title"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "does not merge multiple title tags" do
|
||||||
|
html = """
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>tag title 1</title>
|
||||||
|
<title>tag title 2</title>
|
||||||
|
</head>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
title = Readability.TitleFinder.tag_title(html)
|
||||||
|
assert title == "tag title 1"
|
||||||
|
end
|
||||||
|
|
||||||
test "extract h1 tag title" do
|
test "extract h1 tag title" do
|
||||||
title = Readability.TitleFinder.h_tag_title(@html)
|
title = Readability.TitleFinder.h_tag_title(@html)
|
||||||
assert title == "h1 title"
|
assert title == "h1 title"
|
||||||
end
|
end
|
||||||
|
|
||||||
test "extrat h2 tag title" do
|
test "extract h2 tag title" do
|
||||||
title = Readability.TitleFinder.h_tag_title(@html, "h2")
|
title = Readability.TitleFinder.h_tag_title(@html, "h2")
|
||||||
assert title == "h2 title"
|
assert title == "h2 title"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "does not merge multile header tags" do
|
||||||
|
html = """
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1>header 1</h1>
|
||||||
|
<h1>header 2</h1>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
title = Readability.TitleFinder.h_tag_title(html)
|
||||||
|
assert title == "header 1"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "returns an empty string when no title tag can be found" do
|
||||||
|
assert Readability.TitleFinder.tag_title("") == ""
|
||||||
|
end
|
||||||
|
|
||||||
|
test "returns an empty string when no og:title tag can be found" do
|
||||||
|
assert Readability.TitleFinder.og_title("") == ""
|
||||||
|
end
|
||||||
|
|
||||||
|
test "returns an empty string when no header tag can be found" do
|
||||||
|
assert Readability.TitleFinder.h_tag_title("") == ""
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue