Add support for parsing multiple links
This commit is contained in:
parent
79fafb99c6
commit
4308939726
|
@ -3,11 +3,12 @@ defmodule FeedParser.Item do
|
|||
A item in a feed. Has metadata and content from the item.
|
||||
"""
|
||||
|
||||
defstruct [:guid, :url, :title, :content, :date]
|
||||
defstruct [:guid, :url, :links, :title, :content, :date]
|
||||
|
||||
@type t() :: %__MODULE__{
|
||||
guid: String.t(),
|
||||
url: String.t() | nil,
|
||||
links: [{href :: String.t(), rel :: String.t()} | href :: String.t()],
|
||||
title: String.t() | nil,
|
||||
content: String.t(),
|
||||
date: DateTime.t()
|
||||
|
|
|
@ -47,7 +47,27 @@ defmodule FeedParser.Parser.Atom do
|
|||
|> Enum.map(fn entry ->
|
||||
id = text('/entry/id/text()', entry)
|
||||
title = text('/entry/title/text()', entry)
|
||||
link = attr('/entry/link/@href', entry)
|
||||
|
||||
links =
|
||||
:xmerl_xpath.string('/entry/link', entry)
|
||||
|> Enum.map(fn link ->
|
||||
value = attr('/link/@href', link)
|
||||
|
||||
case attr('/link/@rel', link) do
|
||||
nil -> value
|
||||
rel -> {value, rel}
|
||||
end
|
||||
end)
|
||||
|
||||
url =
|
||||
(Enum.find(links, fn
|
||||
{value, rel} -> rel == "alternate"
|
||||
_value -> false
|
||||
end) || List.first(links))
|
||||
|> case do
|
||||
url when is_binary(url) -> url
|
||||
{url, _rel} -> url
|
||||
end
|
||||
|
||||
updated =
|
||||
text('/entry/updated/text()', entry)
|
||||
|
@ -62,7 +82,8 @@ defmodule FeedParser.Parser.Atom do
|
|||
%FeedParser.Item{
|
||||
guid: id,
|
||||
title: title,
|
||||
url: link,
|
||||
url: url,
|
||||
links: links,
|
||||
content: content,
|
||||
date: updated
|
||||
}
|
||||
|
|
|
@ -48,6 +48,7 @@ defmodule FeedParser.Parser.JSONFeed do
|
|||
%FeedParser.Item{
|
||||
guid: id,
|
||||
url: url,
|
||||
links: [url],
|
||||
title: title,
|
||||
content: content,
|
||||
date: date
|
||||
|
|
|
@ -63,6 +63,7 @@ defmodule FeedParser.Parser.RSS2 do
|
|||
guid: guid,
|
||||
title: title,
|
||||
url: link,
|
||||
links: [link],
|
||||
content: description,
|
||||
date: pubDate
|
||||
}
|
||||
|
|
|
@ -56,6 +56,7 @@ defmodule FeedParser.Parser.RSSInJSON do
|
|||
%FeedParser.Item{
|
||||
guid: guid,
|
||||
url: link,
|
||||
links: [link],
|
||||
title: title,
|
||||
content: content,
|
||||
date: pubDate
|
||||
|
|
|
@ -23,4 +23,21 @@ defmodule FeedParser.Parser.AtomTest do
|
|||
assert item.date == ~U[2003-12-13 18:30:02Z]
|
||||
assert item.content == "Some text."
|
||||
end
|
||||
|
||||
test "parses atom entry with multiple links" do
|
||||
data = File.read!("test/fixtures/atom/multi_url.xml")
|
||||
{true, parsed_data} = Atom.accepts(data, "application/atom+xml")
|
||||
{:ok, %FeedParser.Feed{} = feed} = Atom.parse_feed(parsed_data)
|
||||
[item] = feed.items
|
||||
|
||||
assert item.url ==
|
||||
"https://techcrunch.com/2019/08/30/someone-hacked-jack-dorseys-own-twitter-account/"
|
||||
|
||||
assert item.links == [
|
||||
{"https://techcrunch.com/2019/08/30/someone-hacked-jack-dorseys-own-twitter-account/",
|
||||
"alternate"},
|
||||
{"http://df4.us/rrx", "shorturl"},
|
||||
{"https://daringfireball.net/linked/2019/08/30/dorsey-twitter-account", "related"}
|
||||
]
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue