Add creator to Item
This commit is contained in:
parent
b8de34c436
commit
13394e38f6
|
@ -3,7 +3,7 @@ defmodule FeedParser.Item do
|
|||
A item in a feed. Has metadata and content from the item.
|
||||
"""
|
||||
|
||||
defstruct [:guid, :url, :links, :title, :content, :date]
|
||||
defstruct [:guid, :url, :links, :title, :content, :date, :creator]
|
||||
|
||||
@type t() :: %__MODULE__{
|
||||
guid: String.t(),
|
||||
|
@ -11,6 +11,7 @@ defmodule FeedParser.Item do
|
|||
links: [{href :: String.t(), rel :: String.t() | nil}],
|
||||
title: String.t() | nil,
|
||||
content: String.t(),
|
||||
date: DateTime.t()
|
||||
date: DateTime.t(),
|
||||
creator: String.t() | nil
|
||||
}
|
||||
end
|
||||
|
|
|
@ -33,6 +33,7 @@ defmodule FeedParser.Parser.Atom do
|
|||
title = text('/feed/title/text()', feed)
|
||||
link = attr('/feed/link/@href', feed)
|
||||
icon = text('/feed/icon/text()', feed)
|
||||
feed_author = texts('/feed/author/name/text()', feed)
|
||||
|
||||
updated =
|
||||
text('/feed/updated/text()', feed)
|
||||
|
@ -71,6 +72,8 @@ defmodule FeedParser.Parser.Atom do
|
|||
_ -> nil
|
||||
end
|
||||
|
||||
author = texts('/entry/author/name/text()', entry) || feed_author
|
||||
|
||||
content = text('/entry/content/text()', entry) || text('/entry/summary/text()', entry)
|
||||
|
||||
%FeedParser.Item{
|
||||
|
@ -79,7 +82,8 @@ defmodule FeedParser.Parser.Atom do
|
|||
url: url,
|
||||
links: links,
|
||||
content: content,
|
||||
date: updated
|
||||
date: updated,
|
||||
creator: author |> Enum.join(", ")
|
||||
}
|
||||
end)
|
||||
|
||||
|
@ -94,12 +98,21 @@ defmodule FeedParser.Parser.Atom do
|
|||
end
|
||||
|
||||
defp text(xpath, element) do
|
||||
case :xmerl_xpath.string(xpath, element) do
|
||||
[el] ->
|
||||
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
|
||||
case texts(xpath, element) do
|
||||
[text] -> text
|
||||
_ -> nil
|
||||
end
|
||||
end
|
||||
|
||||
_ ->
|
||||
defp texts(xpath, element) do
|
||||
case :xmerl_xpath.string(xpath, element) do
|
||||
[] ->
|
||||
nil
|
||||
|
||||
els ->
|
||||
Enum.map(els, fn el ->
|
||||
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
|
||||
end)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -5,11 +5,21 @@ defmodule FeedParser.Parser.JSONFeed do
|
|||
|
||||
@behaviour FeedParser.Parser
|
||||
|
||||
@mime_types [
|
||||
"application/json",
|
||||
"application/feed+json"
|
||||
]
|
||||
|
||||
@versions [
|
||||
"https://jsonfeed.org/version/1",
|
||||
"https://jsonfeed.org/version/1.1"
|
||||
]
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def accepts(data, content_type) do
|
||||
with "application/json" <- content_type,
|
||||
with true <- content_type in @mime_types,
|
||||
{:ok, json} <- Poison.decode(data),
|
||||
%{"version" => "https://jsonfeed.org/version/1"} <- json do
|
||||
%{"version" => v} when v in @versions <- json do
|
||||
{true, json}
|
||||
else
|
||||
_ ->
|
||||
|
@ -23,13 +33,16 @@ defmodule FeedParser.Parser.JSONFeed do
|
|||
home_page_url = Map.get(json, "home_page_url")
|
||||
icon = Map.get(json, "icon") || Map.get(json, "favicon")
|
||||
|
||||
feed_author = authors_string(json)
|
||||
|
||||
items =
|
||||
Map.get(json, "items", [])
|
||||
|> Enum.map(fn item ->
|
||||
id = item["id"]
|
||||
|
||||
url =
|
||||
Map.get(item, "url") || if String.starts_with?(id, ~r/https?:\/\//), do: id, else: nil
|
||||
Map.get(item, "url") ||
|
||||
if String.starts_with?(id, ["http://", "https://"]), do: id, else: nil
|
||||
|
||||
title = Map.get(item, "title")
|
||||
|
||||
|
@ -45,13 +58,16 @@ defmodule FeedParser.Parser.JSONFeed do
|
|||
_ -> nil
|
||||
end
|
||||
|
||||
author = authors_string(item) || feed_author
|
||||
|
||||
%FeedParser.Item{
|
||||
guid: id,
|
||||
url: url,
|
||||
links: [{url, nil}],
|
||||
title: title,
|
||||
content: content,
|
||||
date: date
|
||||
date: date,
|
||||
creator: author
|
||||
}
|
||||
end)
|
||||
|
||||
|
@ -64,4 +80,20 @@ defmodule FeedParser.Parser.JSONFeed do
|
|||
items: items
|
||||
}}
|
||||
end
|
||||
|
||||
defp authors_string(%{"author" => author}) do
|
||||
author_name(author)
|
||||
end
|
||||
|
||||
defp authors_string(%{"authors" => authors}) do
|
||||
authors
|
||||
|> Enum.map(&author_name/1)
|
||||
|> Enum.reject(&is_nil/1)
|
||||
|> Enum.join(", ")
|
||||
end
|
||||
|
||||
defp authors_string(_), do: nil
|
||||
|
||||
defp author_name(%{"name" => name}), do: name
|
||||
defp author_name(_), do: nil
|
||||
end
|
||||
|
|
|
@ -59,13 +59,18 @@ defmodule FeedParser.Parser.RSS2 do
|
|||
_ -> nil
|
||||
end
|
||||
|
||||
# from Dublin Core extension: https://www.rssboard.org/rss-profile#namespace-elements-dublin
|
||||
# todo: should this only be attempted if the xmlns:dc is defined?
|
||||
creator = text('/item/dc:creator/text()', item)
|
||||
|
||||
%FeedParser.Item{
|
||||
guid: guid,
|
||||
title: title,
|
||||
url: link,
|
||||
links: [{link, nil}],
|
||||
content: description,
|
||||
date: pubDate
|
||||
date: pubDate,
|
||||
creator: creator
|
||||
}
|
||||
end)
|
||||
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<author>
|
||||
<name>Jane Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<title>Atom-Powered Robots Run Amok</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"version": "https://jsonfeed.org/version/1.1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2",
|
||||
"content_text": "This is a second item.",
|
||||
"url": "https://example.org/second-item"
|
||||
},
|
||||
{
|
||||
"id": "1",
|
||||
"content_html": "<p>Hello, world!</p>",
|
||||
"url": "https://example.org/initial-post"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:mi="http://schemas.ingestion.microsoft.com/common/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:slate="https://slate.com">
|
||||
<channel>
|
||||
<title>News and Politics - Slate Magazine</title>
|
||||
<description>Slate RSS - News and Politics section</description>
|
||||
<link>https://slate.com/news-and-politics</link>
|
||||
<lastBuildDate>Fri, 03 Sep 2021 20:29:29 +0000</lastBuildDate>
|
||||
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
|
||||
<copyright>2021</copyright>
|
||||
<generator>Feed delivered by Clay</generator>
|
||||
<item>
|
||||
<slate:id>ckt4s2oap0036b8kzx6zfh6r6</slate:id>
|
||||
<title><![CDATA[What Joe Manchin’s Op-Ed Lamenting the National Debt Is Really About]]></title>
|
||||
<link>https://slate.com/news-and-politics/2021/09/manchin-debt-oped-what-it-means.html?via=rss</link>
|
||||
<pubDate>Fri, 03 Sep 2021 20:08:14 GMT</pubDate>
|
||||
<guid isPermaLink="true">https://slate.com/news-and-politics/2021/09/manchin-debt-oped-what-it-means.html</guid>
|
||||
<description><![CDATA[The Democrats are still stuck negotiating with themselves.]]></description>
|
||||
<dc:creator>Jim Newell</dc:creator>
|
||||
<media:content url="https://compote.slate.com/images/139b39c5-ef93-4c41-9404-288d067dbf73.jpeg?width=780&height=520&rect=7018x4679&offset=0x0" medium="image">
|
||||
<media:credit><![CDATA[Kevin Dietsch/Getty Images]]></media:credit>
|
||||
<media:title type="html"><![CDATA[Sen. Joe Manchin leaves the U.S. Capitol following a vote on August 3, 2021 in Washington, DC.]]></media:title>
|
||||
</media:content>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -21,6 +21,7 @@ defmodule FeedParser.Parser.AtomTest do
|
|||
assert item.title == "Atom-Powered Robots Run Amok"
|
||||
assert item.url == "http://example.org/2003/12/13/atom03"
|
||||
assert item.date == ~U[2003-12-13 18:30:02Z]
|
||||
assert item.creator == "John Doe"
|
||||
assert item.content == "Some text."
|
||||
end
|
||||
|
||||
|
@ -40,4 +41,11 @@ defmodule FeedParser.Parser.AtomTest do
|
|||
{"https://daringfireball.net/linked/2019/08/30/dorsey-twitter-account", "related"}
|
||||
]
|
||||
end
|
||||
|
||||
test "parses atom entry with multiple authors" do
|
||||
data = File.read!("test/fixtures/atom/multi_author.xml")
|
||||
{true, parsed_data} = Atom.accepts(data, "application/atom+xml")
|
||||
assert {:ok, %FeedParser.Feed{items: items}} = Atom.parse_feed(parsed_data)
|
||||
assert [%FeedParser.Item{creator: "John Doe, Jane Doe"}] = items
|
||||
end
|
||||
end
|
||||
|
|
|
@ -8,6 +8,11 @@ defmodule FeedParser.Parser.JSONFeedTest do
|
|||
assert {true, _} = JSONFeed.accepts(data, "application/json")
|
||||
end
|
||||
|
||||
test "matches v1.1 feed" do
|
||||
data = File.read!("test/fixtures/jsonfeed/v1_1.json")
|
||||
assert {true, _} = JSONFeed.accepts(data, "application/feed+json")
|
||||
end
|
||||
|
||||
test "parses json feed" do
|
||||
data = File.read!("test/fixtures/jsonfeed/feed.json")
|
||||
{true, parsed_data} = JSONFeed.accepts(data, "application/json")
|
||||
|
@ -23,4 +28,53 @@ defmodule FeedParser.Parser.JSONFeedTest do
|
|||
assert item1.content == "<p>Hello, world!</p>"
|
||||
assert item1.url == "https://example.org/initial-post"
|
||||
end
|
||||
|
||||
test "parses item author" do
|
||||
assert {:ok, %FeedParser.Feed{items: items}} =
|
||||
JSONFeed.parse_feed(%{
|
||||
"title" => "test",
|
||||
"items" => [
|
||||
%{
|
||||
"id" => "1",
|
||||
"author" => %{
|
||||
"name" => "foo"
|
||||
}
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
assert [%FeedParser.Item{creator: "foo"}] = items
|
||||
end
|
||||
|
||||
test "falls back to feed author" do
|
||||
assert {:ok, %FeedParser.Feed{items: items}} =
|
||||
JSONFeed.parse_feed(%{
|
||||
"title" => "test",
|
||||
"author" => %{
|
||||
"name" => "foo"
|
||||
},
|
||||
"items" => [
|
||||
%{
|
||||
"id" => "1"
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
assert [%FeedParser.Item{creator: "foo"}] = items
|
||||
end
|
||||
|
||||
test "handles multiple authors" do
|
||||
assert {:ok, %FeedParser.Feed{items: items}} =
|
||||
JSONFeed.parse_feed(%{
|
||||
"title" => "test",
|
||||
"items" => [
|
||||
%{
|
||||
"id" => "1",
|
||||
"authors" => [%{"name" => "foo"}, %{"name" => "bar"}]
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
assert [%FeedParser.Item{creator: "foo, bar"}] = items
|
||||
end
|
||||
end
|
||||
|
|
|
@ -28,4 +28,12 @@ defmodule FeedParser.Parser.RSS2Test do
|
|||
assert item.date == ~U[2003-06-03 09:39:21Z]
|
||||
assert item.guid == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
||||
end
|
||||
|
||||
test "parses rss2 item with dc:creator" do
|
||||
data = File.read!("test/fixtures/rss2/dc_creator.xml")
|
||||
{true, parsed_data} = RSS2.accepts(data, "application/rss+xml")
|
||||
assert {:ok, %FeedParser.Feed{} = feed} = RSS2.parse_feed(parsed_data)
|
||||
assert [%FeedParser.Item{} = item] = feed.items
|
||||
assert item.creator == "Jim Newell"
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue