Add support for parsing multiple links
This commit is contained in:
parent
79fafb99c6
commit
4308939726
|
@ -3,11 +3,12 @@ defmodule FeedParser.Item do
|
||||||
A item in a feed. Has metadata and content from the item.
|
A item in a feed. Has metadata and content from the item.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
defstruct [:guid, :url, :title, :content, :date]
|
defstruct [:guid, :url, :links, :title, :content, :date]
|
||||||
|
|
||||||
@type t() :: %__MODULE__{
|
@type t() :: %__MODULE__{
|
||||||
guid: String.t(),
|
guid: String.t(),
|
||||||
url: String.t() | nil,
|
url: String.t() | nil,
|
||||||
|
links: [{href :: String.t(), rel :: String.t()} | href :: String.t()],
|
||||||
title: String.t() | nil,
|
title: String.t() | nil,
|
||||||
content: String.t(),
|
content: String.t(),
|
||||||
date: DateTime.t()
|
date: DateTime.t()
|
||||||
|
|
|
@ -47,7 +47,27 @@ defmodule FeedParser.Parser.Atom do
|
||||||
|> Enum.map(fn entry ->
|
|> Enum.map(fn entry ->
|
||||||
id = text('/entry/id/text()', entry)
|
id = text('/entry/id/text()', entry)
|
||||||
title = text('/entry/title/text()', entry)
|
title = text('/entry/title/text()', entry)
|
||||||
link = attr('/entry/link/@href', entry)
|
|
||||||
|
links =
|
||||||
|
:xmerl_xpath.string('/entry/link', entry)
|
||||||
|
|> Enum.map(fn link ->
|
||||||
|
value = attr('/link/@href', link)
|
||||||
|
|
||||||
|
case attr('/link/@rel', link) do
|
||||||
|
nil -> value
|
||||||
|
rel -> {value, rel}
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
|
||||||
|
url =
|
||||||
|
(Enum.find(links, fn
|
||||||
|
{value, rel} -> rel == "alternate"
|
||||||
|
_value -> false
|
||||||
|
end) || List.first(links))
|
||||||
|
|> case do
|
||||||
|
url when is_binary(url) -> url
|
||||||
|
{url, _rel} -> url
|
||||||
|
end
|
||||||
|
|
||||||
updated =
|
updated =
|
||||||
text('/entry/updated/text()', entry)
|
text('/entry/updated/text()', entry)
|
||||||
|
@ -62,7 +82,8 @@ defmodule FeedParser.Parser.Atom do
|
||||||
%FeedParser.Item{
|
%FeedParser.Item{
|
||||||
guid: id,
|
guid: id,
|
||||||
title: title,
|
title: title,
|
||||||
url: link,
|
url: url,
|
||||||
|
links: links,
|
||||||
content: content,
|
content: content,
|
||||||
date: updated
|
date: updated
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,7 @@ defmodule FeedParser.Parser.JSONFeed do
|
||||||
%FeedParser.Item{
|
%FeedParser.Item{
|
||||||
guid: id,
|
guid: id,
|
||||||
url: url,
|
url: url,
|
||||||
|
links: [url],
|
||||||
title: title,
|
title: title,
|
||||||
content: content,
|
content: content,
|
||||||
date: date
|
date: date
|
||||||
|
|
|
@ -63,6 +63,7 @@ defmodule FeedParser.Parser.RSS2 do
|
||||||
guid: guid,
|
guid: guid,
|
||||||
title: title,
|
title: title,
|
||||||
url: link,
|
url: link,
|
||||||
|
links: [link],
|
||||||
content: description,
|
content: description,
|
||||||
date: pubDate
|
date: pubDate
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,6 +56,7 @@ defmodule FeedParser.Parser.RSSInJSON do
|
||||||
%FeedParser.Item{
|
%FeedParser.Item{
|
||||||
guid: guid,
|
guid: guid,
|
||||||
url: link,
|
url: link,
|
||||||
|
links: [link],
|
||||||
title: title,
|
title: title,
|
||||||
content: content,
|
content: content,
|
||||||
date: pubDate
|
date: pubDate
|
||||||
|
|
|
@ -23,4 +23,21 @@ defmodule FeedParser.Parser.AtomTest do
|
||||||
assert item.date == ~U[2003-12-13 18:30:02Z]
|
assert item.date == ~U[2003-12-13 18:30:02Z]
|
||||||
assert item.content == "Some text."
|
assert item.content == "Some text."
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "parses atom entry with multiple links" do
|
||||||
|
data = File.read!("test/fixtures/atom/multi_url.xml")
|
||||||
|
{true, parsed_data} = Atom.accepts(data, "application/atom+xml")
|
||||||
|
{:ok, %FeedParser.Feed{} = feed} = Atom.parse_feed(parsed_data)
|
||||||
|
[item] = feed.items
|
||||||
|
|
||||||
|
assert item.url ==
|
||||||
|
"https://techcrunch.com/2019/08/30/someone-hacked-jack-dorseys-own-twitter-account/"
|
||||||
|
|
||||||
|
assert item.links == [
|
||||||
|
{"https://techcrunch.com/2019/08/30/someone-hacked-jack-dorseys-own-twitter-account/",
|
||||||
|
"alternate"},
|
||||||
|
{"http://df4.us/rrx", "shorturl"},
|
||||||
|
{"https://daringfireball.net/linked/2019/08/30/dorsey-twitter-account", "related"}
|
||||||
|
]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue