Add support for parsing multiple links

This commit is contained in:
Shadowfacts 2019-09-01 16:32:36 -04:00
parent 79fafb99c6
commit 4308939726
Signed by: shadowfacts
GPG Key ID: 94A5AB95422746E5
6 changed files with 45 additions and 3 deletions

View File

@ -3,11 +3,12 @@ defmodule FeedParser.Item do
A item in a feed. Has metadata and content from the item. A item in a feed. Has metadata and content from the item.
""" """
defstruct [:guid, :url, :title, :content, :date] defstruct [:guid, :url, :links, :title, :content, :date]
@type t() :: %__MODULE__{ @type t() :: %__MODULE__{
guid: String.t(), guid: String.t(),
url: String.t() | nil, url: String.t() | nil,
links: [{href :: String.t(), rel :: String.t()} | href :: String.t()],
title: String.t() | nil, title: String.t() | nil,
content: String.t(), content: String.t(),
date: DateTime.t() date: DateTime.t()

View File

@ -47,7 +47,27 @@ defmodule FeedParser.Parser.Atom do
|> Enum.map(fn entry -> |> Enum.map(fn entry ->
id = text('/entry/id/text()', entry) id = text('/entry/id/text()', entry)
title = text('/entry/title/text()', entry) title = text('/entry/title/text()', entry)
link = attr('/entry/link/@href', entry)
links =
:xmerl_xpath.string('/entry/link', entry)
|> Enum.map(fn link ->
value = attr('/link/@href', link)
case attr('/link/@rel', link) do
nil -> value
rel -> {value, rel}
end
end)
url =
(Enum.find(links, fn
{value, rel} -> rel == "alternate"
_value -> false
end) || List.first(links))
|> case do
url when is_binary(url) -> url
{url, _rel} -> url
end
updated = updated =
text('/entry/updated/text()', entry) text('/entry/updated/text()', entry)
@ -62,7 +82,8 @@ defmodule FeedParser.Parser.Atom do
%FeedParser.Item{ %FeedParser.Item{
guid: id, guid: id,
title: title, title: title,
url: link, url: url,
links: links,
content: content, content: content,
date: updated date: updated
} }

View File

@ -48,6 +48,7 @@ defmodule FeedParser.Parser.JSONFeed do
%FeedParser.Item{ %FeedParser.Item{
guid: id, guid: id,
url: url, url: url,
links: [url],
title: title, title: title,
content: content, content: content,
date: date date: date

View File

@ -63,6 +63,7 @@ defmodule FeedParser.Parser.RSS2 do
guid: guid, guid: guid,
title: title, title: title,
url: link, url: link,
links: [link],
content: description, content: description,
date: pubDate date: pubDate
} }

View File

@ -56,6 +56,7 @@ defmodule FeedParser.Parser.RSSInJSON do
%FeedParser.Item{ %FeedParser.Item{
guid: guid, guid: guid,
url: link, url: link,
links: [link],
title: title, title: title,
content: content, content: content,
date: pubDate date: pubDate

View File

@ -23,4 +23,21 @@ defmodule FeedParser.Parser.AtomTest do
assert item.date == ~U[2003-12-13 18:30:02Z] assert item.date == ~U[2003-12-13 18:30:02Z]
assert item.content == "Some text." assert item.content == "Some text."
end end
test "parses atom entry with multiple links" do
data = File.read!("test/fixtures/atom/multi_url.xml")
{true, parsed_data} = Atom.accepts(data, "application/atom+xml")
{:ok, %FeedParser.Feed{} = feed} = Atom.parse_feed(parsed_data)
[item] = feed.items
assert item.url ==
"https://techcrunch.com/2019/08/30/someone-hacked-jack-dorseys-own-twitter-account/"
assert item.links == [
{"https://techcrunch.com/2019/08/30/someone-hacked-jack-dorseys-own-twitter-account/",
"alternate"},
{"http://df4.us/rrx", "shorturl"},
{"https://daringfireball.net/linked/2019/08/30/dorsey-twitter-account", "related"}
]
end
end end