diff --git a/lib/item.ex b/lib/item.ex
index 9506873..ce3e9da 100644
--- a/lib/item.ex
+++ b/lib/item.ex
@@ -3,7 +3,7 @@ defmodule FeedParser.Item do
A item in a feed. Has metadata and content from the item.
"""
- defstruct [:guid, :url, :links, :title, :content, :date]
+ defstruct [:guid, :url, :links, :title, :content, :date, :creator]
@type t() :: %__MODULE__{
guid: String.t(),
@@ -11,6 +11,7 @@ defmodule FeedParser.Item do
links: [{href :: String.t(), rel :: String.t() | nil}],
title: String.t() | nil,
content: String.t(),
- date: DateTime.t()
+ date: DateTime.t(),
+ creator: String.t() | nil
}
end
diff --git a/lib/parser/atom.ex b/lib/parser/atom.ex
index 457c2a4..a734420 100644
--- a/lib/parser/atom.ex
+++ b/lib/parser/atom.ex
@@ -33,6 +33,7 @@ defmodule FeedParser.Parser.Atom do
title = text('/feed/title/text()', feed)
link = attr('/feed/link/@href', feed)
icon = text('/feed/icon/text()', feed)
+ feed_author = texts('/feed/author/name/text()', feed)
updated =
text('/feed/updated/text()', feed)
@@ -71,6 +72,8 @@ defmodule FeedParser.Parser.Atom do
_ -> nil
end
+ author = texts('/entry/author/name/text()', entry) || feed_author
+
content = text('/entry/content/text()', entry) || text('/entry/summary/text()', entry)
%FeedParser.Item{
@@ -79,7 +82,8 @@ defmodule FeedParser.Parser.Atom do
url: url,
links: links,
content: content,
- date: updated
+ date: updated,
+ creator: author |> Enum.join(", ")
}
end)
@@ -94,12 +98,21 @@ defmodule FeedParser.Parser.Atom do
end
defp text(xpath, element) do
- case :xmerl_xpath.string(xpath, element) do
- [el] ->
- XML.xmlText(el, :value) |> List.to_string() |> String.trim()
+ case texts(xpath, element) do
+ [text] -> text
+ _ -> nil
+ end
+ end
- _ ->
+ defp texts(xpath, element) do
+ case :xmerl_xpath.string(xpath, element) do
+ [] ->
nil
+
+ els ->
+ Enum.map(els, fn el ->
+ XML.xmlText(el, :value) |> List.to_string() |> String.trim()
+ end)
end
end
diff --git a/lib/parser/jsonfeed.ex b/lib/parser/jsonfeed.ex
index 8f031f7..7a18256 100644
--- a/lib/parser/jsonfeed.ex
+++ b/lib/parser/jsonfeed.ex
@@ -5,11 +5,21 @@ defmodule FeedParser.Parser.JSONFeed do
@behaviour FeedParser.Parser
+ @mime_types [
+ "application/json",
+ "application/feed+json"
+ ]
+
+ @versions [
+ "https://jsonfeed.org/version/1",
+ "https://jsonfeed.org/version/1.1"
+ ]
+
@impl FeedParser.Parser
def accepts(data, content_type) do
- with "application/json" <- content_type,
+ with true <- content_type in @mime_types,
{:ok, json} <- Poison.decode(data),
- %{"version" => "https://jsonfeed.org/version/1"} <- json do
+ %{"version" => v} when v in @versions <- json do
{true, json}
else
_ ->
@@ -23,13 +33,16 @@ defmodule FeedParser.Parser.JSONFeed do
home_page_url = Map.get(json, "home_page_url")
icon = Map.get(json, "icon") || Map.get(json, "favicon")
+ feed_author = authors_string(json)
+
items =
Map.get(json, "items", [])
|> Enum.map(fn item ->
id = item["id"]
url =
- Map.get(item, "url") || if String.starts_with?(id, ~r/https?:\/\//), do: id, else: nil
+ Map.get(item, "url") ||
+ if String.starts_with?(id, ["http://", "https://"]), do: id, else: nil
title = Map.get(item, "title")
@@ -45,13 +58,16 @@ defmodule FeedParser.Parser.JSONFeed do
_ -> nil
end
+ author = authors_string(item) || feed_author
+
%FeedParser.Item{
guid: id,
url: url,
links: [{url, nil}],
title: title,
content: content,
- date: date
+ date: date,
+ creator: author
}
end)
@@ -64,4 +80,20 @@ defmodule FeedParser.Parser.JSONFeed do
items: items
}}
end
+
+ defp authors_string(%{"author" => author}) do
+ author_name(author)
+ end
+
+ defp authors_string(%{"authors" => authors}) do
+ authors
+ |> Enum.map(&author_name/1)
+ |> Enum.reject(&is_nil/1)
+ |> Enum.join(", ")
+ end
+
+ defp authors_string(_), do: nil
+
+ defp author_name(%{"name" => name}), do: name
+ defp author_name(_), do: nil
end
diff --git a/lib/parser/rss2.ex b/lib/parser/rss2.ex
index 41c3358..8f3cbc6 100644
--- a/lib/parser/rss2.ex
+++ b/lib/parser/rss2.ex
@@ -59,13 +59,18 @@ defmodule FeedParser.Parser.RSS2 do
_ -> nil
end
+ # from Dublin Core extension: https://www.rssboard.org/rss-profile#namespace-elements-dublin
+ # todo: should this only be attempted if the xmlns:dc is defined?
+ creator = text('/item/dc:creator/text()', item)
+
%FeedParser.Item{
guid: guid,
title: title,
url: link,
links: [{link, nil}],
content: description,
- date: pubDate
+ date: pubDate,
+ creator: creator
}
end)
diff --git a/test/fixtures/atom/multi_author.xml b/test/fixtures/atom/multi_author.xml
new file mode 100644
index 0000000..189448f
--- /dev/null
+++ b/test/fixtures/atom/multi_author.xml
@@ -0,0 +1,23 @@
+
+
Hello, world!
", + "url": "https://example.org/initial-post" + } + ] +} diff --git a/test/fixtures/rss2/dc_creator.xml b/test/fixtures/rss2/dc_creator.xml new file mode 100644 index 0000000..2b87d3b --- /dev/null +++ b/test/fixtures/rss2/dc_creator.xml @@ -0,0 +1,25 @@ + +Hello, world!
" assert item1.url == "https://example.org/initial-post" end + + test "parses item author" do + assert {:ok, %FeedParser.Feed{items: items}} = + JSONFeed.parse_feed(%{ + "title" => "test", + "items" => [ + %{ + "id" => "1", + "author" => %{ + "name" => "foo" + } + } + ] + }) + + assert [%FeedParser.Item{creator: "foo"}] = items + end + + test "falls back to feed author" do + assert {:ok, %FeedParser.Feed{items: items}} = + JSONFeed.parse_feed(%{ + "title" => "test", + "author" => %{ + "name" => "foo" + }, + "items" => [ + %{ + "id" => "1" + } + ] + }) + + assert [%FeedParser.Item{creator: "foo"}] = items + end + + test "handles multiple authors" do + assert {:ok, %FeedParser.Feed{items: items}} = + JSONFeed.parse_feed(%{ + "title" => "test", + "items" => [ + %{ + "id" => "1", + "authors" => [%{"name" => "foo"}, %{"name" => "bar"}] + } + ] + }) + + assert [%FeedParser.Item{creator: "foo, bar"}] = items + end end diff --git a/test/parser/rss2_test.exs b/test/parser/rss2_test.exs index 68b0409..6390818 100644 --- a/test/parser/rss2_test.exs +++ b/test/parser/rss2_test.exs @@ -28,4 +28,12 @@ defmodule FeedParser.Parser.RSS2Test do assert item.date == ~U[2003-06-03 09:39:21Z] assert item.guid == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573" end + + test "parses rss2 item with dc:creator" do + data = File.read!("test/fixtures/rss2/dc_creator.xml") + {true, parsed_data} = RSS2.accepts(data, "application/rss+xml") + assert {:ok, %FeedParser.Feed{} = feed} = RSS2.parse_feed(parsed_data) + assert [%FeedParser.Item{} = item] = feed.items + assert item.creator == "Jim Newell" + end end