From 4308939726cdbd202f364500dea62bcf6e9bbdb1 Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Sun, 1 Sep 2019 16:32:36 -0400 Subject: [PATCH] Add support for parsing multiple links --- lib/item.ex | 3 ++- lib/parser/atom.ex | 25 +++++++++++++++++++++++-- lib/parser/jsonfeed.ex | 1 + lib/parser/rss2.ex | 1 + lib/parser/rssinjson.ex | 1 + test/parser/atom_test.exs | 17 +++++++++++++++++ 6 files changed, 45 insertions(+), 3 deletions(-) diff --git a/lib/item.ex b/lib/item.ex index bc42038..2e77ac7 100644 --- a/lib/item.ex +++ b/lib/item.ex @@ -3,11 +3,12 @@ defmodule FeedParser.Item do A item in a feed. Has metadata and content from the item. """ - defstruct [:guid, :url, :title, :content, :date] + defstruct [:guid, :url, :links, :title, :content, :date] @type t() :: %__MODULE__{ guid: String.t(), url: String.t() | nil, + links: [{href :: String.t(), rel :: String.t()} | href :: String.t()], title: String.t() | nil, content: String.t(), date: DateTime.t() diff --git a/lib/parser/atom.ex b/lib/parser/atom.ex index c4ff0b1..687b6b8 100644 --- a/lib/parser/atom.ex +++ b/lib/parser/atom.ex @@ -47,7 +47,27 @@ defmodule FeedParser.Parser.Atom do |> Enum.map(fn entry -> id = text('/entry/id/text()', entry) title = text('/entry/title/text()', entry) - link = attr('/entry/link/@href', entry) + + links = + :xmerl_xpath.string('/entry/link', entry) + |> Enum.map(fn link -> + value = attr('/link/@href', link) + + case attr('/link/@rel', link) do + nil -> value + rel -> {value, rel} + end + end) + + url = + (Enum.find(links, fn + {value, rel} -> rel == "alternate" + _value -> false + end) || List.first(links)) + |> case do + url when is_binary(url) -> url + {url, _rel} -> url + end updated = text('/entry/updated/text()', entry) @@ -62,7 +82,8 @@ defmodule FeedParser.Parser.Atom do %FeedParser.Item{ guid: id, title: title, - url: link, + url: url, + links: links, content: content, date: updated } diff --git a/lib/parser/jsonfeed.ex b/lib/parser/jsonfeed.ex index ac6375a..55d6348 100644 --- a/lib/parser/jsonfeed.ex +++ b/lib/parser/jsonfeed.ex @@ -48,6 +48,7 @@ defmodule FeedParser.Parser.JSONFeed do %FeedParser.Item{ guid: id, url: url, + links: [url], title: title, content: content, date: date diff --git a/lib/parser/rss2.ex b/lib/parser/rss2.ex index b8c9f4f..f965694 100644 --- a/lib/parser/rss2.ex +++ b/lib/parser/rss2.ex @@ -63,6 +63,7 @@ defmodule FeedParser.Parser.RSS2 do guid: guid, title: title, url: link, + links: [link], content: description, date: pubDate } diff --git a/lib/parser/rssinjson.ex b/lib/parser/rssinjson.ex index f4966f7..40603da 100644 --- a/lib/parser/rssinjson.ex +++ b/lib/parser/rssinjson.ex @@ -56,6 +56,7 @@ defmodule FeedParser.Parser.RSSInJSON do %FeedParser.Item{ guid: guid, url: link, + links: [link], title: title, content: content, date: pubDate diff --git a/test/parser/atom_test.exs b/test/parser/atom_test.exs index 75f7428..b98f5e0 100644 --- a/test/parser/atom_test.exs +++ b/test/parser/atom_test.exs @@ -23,4 +23,21 @@ defmodule FeedParser.Parser.AtomTest do assert item.date == ~U[2003-12-13 18:30:02Z] assert item.content == "Some text." end + + test "parses atom entry with multiple links" do + data = File.read!("test/fixtures/atom/multi_url.xml") + {true, parsed_data} = Atom.accepts(data, "application/atom+xml") + {:ok, %FeedParser.Feed{} = feed} = Atom.parse_feed(parsed_data) + [item] = feed.items + + assert item.url == + "https://techcrunch.com/2019/08/30/someone-hacked-jack-dorseys-own-twitter-account/" + + assert item.links == [ + {"https://techcrunch.com/2019/08/30/someone-hacked-jack-dorseys-own-twitter-account/", + "alternate"}, + {"http://df4.us/rrx", "shorturl"}, + {"https://daringfireball.net/linked/2019/08/30/dorsey-twitter-account", "related"} + ] + end end