Add creator to Item
This commit is contained in:
parent
b8de34c436
commit
13394e38f6
|
@ -3,7 +3,7 @@ defmodule FeedParser.Item do
|
||||||
A item in a feed. Has metadata and content from the item.
|
A item in a feed. Has metadata and content from the item.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
defstruct [:guid, :url, :links, :title, :content, :date]
|
defstruct [:guid, :url, :links, :title, :content, :date, :creator]
|
||||||
|
|
||||||
@type t() :: %__MODULE__{
|
@type t() :: %__MODULE__{
|
||||||
guid: String.t(),
|
guid: String.t(),
|
||||||
|
@ -11,6 +11,7 @@ defmodule FeedParser.Item do
|
||||||
links: [{href :: String.t(), rel :: String.t() | nil}],
|
links: [{href :: String.t(), rel :: String.t() | nil}],
|
||||||
title: String.t() | nil,
|
title: String.t() | nil,
|
||||||
content: String.t(),
|
content: String.t(),
|
||||||
date: DateTime.t()
|
date: DateTime.t(),
|
||||||
|
creator: String.t() | nil
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
|
@ -33,6 +33,7 @@ defmodule FeedParser.Parser.Atom do
|
||||||
title = text('/feed/title/text()', feed)
|
title = text('/feed/title/text()', feed)
|
||||||
link = attr('/feed/link/@href', feed)
|
link = attr('/feed/link/@href', feed)
|
||||||
icon = text('/feed/icon/text()', feed)
|
icon = text('/feed/icon/text()', feed)
|
||||||
|
feed_author = texts('/feed/author/name/text()', feed)
|
||||||
|
|
||||||
updated =
|
updated =
|
||||||
text('/feed/updated/text()', feed)
|
text('/feed/updated/text()', feed)
|
||||||
|
@ -71,6 +72,8 @@ defmodule FeedParser.Parser.Atom do
|
||||||
_ -> nil
|
_ -> nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
author = texts('/entry/author/name/text()', entry) || feed_author
|
||||||
|
|
||||||
content = text('/entry/content/text()', entry) || text('/entry/summary/text()', entry)
|
content = text('/entry/content/text()', entry) || text('/entry/summary/text()', entry)
|
||||||
|
|
||||||
%FeedParser.Item{
|
%FeedParser.Item{
|
||||||
|
@ -79,7 +82,8 @@ defmodule FeedParser.Parser.Atom do
|
||||||
url: url,
|
url: url,
|
||||||
links: links,
|
links: links,
|
||||||
content: content,
|
content: content,
|
||||||
date: updated
|
date: updated,
|
||||||
|
creator: author |> Enum.join(", ")
|
||||||
}
|
}
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
@ -94,12 +98,21 @@ defmodule FeedParser.Parser.Atom do
|
||||||
end
|
end
|
||||||
|
|
||||||
defp text(xpath, element) do
|
defp text(xpath, element) do
|
||||||
case :xmerl_xpath.string(xpath, element) do
|
case texts(xpath, element) do
|
||||||
[el] ->
|
[text] -> text
|
||||||
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
|
_ -> nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
_ ->
|
defp texts(xpath, element) do
|
||||||
|
case :xmerl_xpath.string(xpath, element) do
|
||||||
|
[] ->
|
||||||
nil
|
nil
|
||||||
|
|
||||||
|
els ->
|
||||||
|
Enum.map(els, fn el ->
|
||||||
|
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
|
||||||
|
end)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -5,11 +5,21 @@ defmodule FeedParser.Parser.JSONFeed do
|
||||||
|
|
||||||
@behaviour FeedParser.Parser
|
@behaviour FeedParser.Parser
|
||||||
|
|
||||||
|
@mime_types [
|
||||||
|
"application/json",
|
||||||
|
"application/feed+json"
|
||||||
|
]
|
||||||
|
|
||||||
|
@versions [
|
||||||
|
"https://jsonfeed.org/version/1",
|
||||||
|
"https://jsonfeed.org/version/1.1"
|
||||||
|
]
|
||||||
|
|
||||||
@impl FeedParser.Parser
|
@impl FeedParser.Parser
|
||||||
def accepts(data, content_type) do
|
def accepts(data, content_type) do
|
||||||
with "application/json" <- content_type,
|
with true <- content_type in @mime_types,
|
||||||
{:ok, json} <- Poison.decode(data),
|
{:ok, json} <- Poison.decode(data),
|
||||||
%{"version" => "https://jsonfeed.org/version/1"} <- json do
|
%{"version" => v} when v in @versions <- json do
|
||||||
{true, json}
|
{true, json}
|
||||||
else
|
else
|
||||||
_ ->
|
_ ->
|
||||||
|
@ -23,13 +33,16 @@ defmodule FeedParser.Parser.JSONFeed do
|
||||||
home_page_url = Map.get(json, "home_page_url")
|
home_page_url = Map.get(json, "home_page_url")
|
||||||
icon = Map.get(json, "icon") || Map.get(json, "favicon")
|
icon = Map.get(json, "icon") || Map.get(json, "favicon")
|
||||||
|
|
||||||
|
feed_author = authors_string(json)
|
||||||
|
|
||||||
items =
|
items =
|
||||||
Map.get(json, "items", [])
|
Map.get(json, "items", [])
|
||||||
|> Enum.map(fn item ->
|
|> Enum.map(fn item ->
|
||||||
id = item["id"]
|
id = item["id"]
|
||||||
|
|
||||||
url =
|
url =
|
||||||
Map.get(item, "url") || if String.starts_with?(id, ~r/https?:\/\//), do: id, else: nil
|
Map.get(item, "url") ||
|
||||||
|
if String.starts_with?(id, ["http://", "https://"]), do: id, else: nil
|
||||||
|
|
||||||
title = Map.get(item, "title")
|
title = Map.get(item, "title")
|
||||||
|
|
||||||
|
@ -45,13 +58,16 @@ defmodule FeedParser.Parser.JSONFeed do
|
||||||
_ -> nil
|
_ -> nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
author = authors_string(item) || feed_author
|
||||||
|
|
||||||
%FeedParser.Item{
|
%FeedParser.Item{
|
||||||
guid: id,
|
guid: id,
|
||||||
url: url,
|
url: url,
|
||||||
links: [{url, nil}],
|
links: [{url, nil}],
|
||||||
title: title,
|
title: title,
|
||||||
content: content,
|
content: content,
|
||||||
date: date
|
date: date,
|
||||||
|
creator: author
|
||||||
}
|
}
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
@ -64,4 +80,20 @@ defmodule FeedParser.Parser.JSONFeed do
|
||||||
items: items
|
items: items
|
||||||
}}
|
}}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
defp authors_string(%{"author" => author}) do
|
||||||
|
author_name(author)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp authors_string(%{"authors" => authors}) do
|
||||||
|
authors
|
||||||
|
|> Enum.map(&author_name/1)
|
||||||
|
|> Enum.reject(&is_nil/1)
|
||||||
|
|> Enum.join(", ")
|
||||||
|
end
|
||||||
|
|
||||||
|
defp authors_string(_), do: nil
|
||||||
|
|
||||||
|
defp author_name(%{"name" => name}), do: name
|
||||||
|
defp author_name(_), do: nil
|
||||||
end
|
end
|
||||||
|
|
|
@ -59,13 +59,18 @@ defmodule FeedParser.Parser.RSS2 do
|
||||||
_ -> nil
|
_ -> nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# from Dublin Core extension: https://www.rssboard.org/rss-profile#namespace-elements-dublin
|
||||||
|
# todo: should this only be attempted if the xmlns:dc is defined?
|
||||||
|
creator = text('/item/dc:creator/text()', item)
|
||||||
|
|
||||||
%FeedParser.Item{
|
%FeedParser.Item{
|
||||||
guid: guid,
|
guid: guid,
|
||||||
title: title,
|
title: title,
|
||||||
url: link,
|
url: link,
|
||||||
links: [{link, nil}],
|
links: [{link, nil}],
|
||||||
content: description,
|
content: description,
|
||||||
date: pubDate
|
date: pubDate,
|
||||||
|
creator: creator
|
||||||
}
|
}
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link href="http://example.org/"/>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<author>
|
||||||
|
<name>John Doe</name>
|
||||||
|
</author>
|
||||||
|
<author>
|
||||||
|
<name>Jane Doe</name>
|
||||||
|
</author>
|
||||||
|
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<title>Atom-Powered Robots Run Amok</title>
|
||||||
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<summary>Some text.</summary>
|
||||||
|
</entry>
|
||||||
|
|
||||||
|
</feed>
|
|
@ -0,0 +1,18 @@
|
||||||
|
{
|
||||||
|
"version": "https://jsonfeed.org/version/1.1",
|
||||||
|
"title": "My Example Feed",
|
||||||
|
"home_page_url": "https://example.org/",
|
||||||
|
"feed_url": "https://example.org/feed.json",
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"id": "2",
|
||||||
|
"content_text": "This is a second item.",
|
||||||
|
"url": "https://example.org/second-item"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "1",
|
||||||
|
"content_html": "<p>Hello, world!</p>",
|
||||||
|
"url": "https://example.org/initial-post"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:mi="http://schemas.ingestion.microsoft.com/common/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:slate="https://slate.com">
|
||||||
|
<channel>
|
||||||
|
<title>News and Politics - Slate Magazine</title>
|
||||||
|
<description>Slate RSS - News and Politics section</description>
|
||||||
|
<link>https://slate.com/news-and-politics</link>
|
||||||
|
<lastBuildDate>Fri, 03 Sep 2021 20:29:29 +0000</lastBuildDate>
|
||||||
|
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
|
||||||
|
<copyright>2021</copyright>
|
||||||
|
<generator>Feed delivered by Clay</generator>
|
||||||
|
<item>
|
||||||
|
<slate:id>ckt4s2oap0036b8kzx6zfh6r6</slate:id>
|
||||||
|
<title><![CDATA[What Joe Manchin’s Op-Ed Lamenting the National Debt Is Really About]]></title>
|
||||||
|
<link>https://slate.com/news-and-politics/2021/09/manchin-debt-oped-what-it-means.html?via=rss</link>
|
||||||
|
<pubDate>Fri, 03 Sep 2021 20:08:14 GMT</pubDate>
|
||||||
|
<guid isPermaLink="true">https://slate.com/news-and-politics/2021/09/manchin-debt-oped-what-it-means.html</guid>
|
||||||
|
<description><![CDATA[The Democrats are still stuck negotiating with themselves.]]></description>
|
||||||
|
<dc:creator>Jim Newell</dc:creator>
|
||||||
|
<media:content url="https://compote.slate.com/images/139b39c5-ef93-4c41-9404-288d067dbf73.jpeg?width=780&height=520&rect=7018x4679&offset=0x0" medium="image">
|
||||||
|
<media:credit><![CDATA[Kevin Dietsch/Getty Images]]></media:credit>
|
||||||
|
<media:title type="html"><![CDATA[Sen. Joe Manchin leaves the U.S. Capitol following a vote on August 3, 2021 in Washington, DC.]]></media:title>
|
||||||
|
</media:content>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
|
@ -21,6 +21,7 @@ defmodule FeedParser.Parser.AtomTest do
|
||||||
assert item.title == "Atom-Powered Robots Run Amok"
|
assert item.title == "Atom-Powered Robots Run Amok"
|
||||||
assert item.url == "http://example.org/2003/12/13/atom03"
|
assert item.url == "http://example.org/2003/12/13/atom03"
|
||||||
assert item.date == ~U[2003-12-13 18:30:02Z]
|
assert item.date == ~U[2003-12-13 18:30:02Z]
|
||||||
|
assert item.creator == "John Doe"
|
||||||
assert item.content == "Some text."
|
assert item.content == "Some text."
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -40,4 +41,11 @@ defmodule FeedParser.Parser.AtomTest do
|
||||||
{"https://daringfireball.net/linked/2019/08/30/dorsey-twitter-account", "related"}
|
{"https://daringfireball.net/linked/2019/08/30/dorsey-twitter-account", "related"}
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "parses atom entry with multiple authors" do
|
||||||
|
data = File.read!("test/fixtures/atom/multi_author.xml")
|
||||||
|
{true, parsed_data} = Atom.accepts(data, "application/atom+xml")
|
||||||
|
assert {:ok, %FeedParser.Feed{items: items}} = Atom.parse_feed(parsed_data)
|
||||||
|
assert [%FeedParser.Item{creator: "John Doe, Jane Doe"}] = items
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -8,6 +8,11 @@ defmodule FeedParser.Parser.JSONFeedTest do
|
||||||
assert {true, _} = JSONFeed.accepts(data, "application/json")
|
assert {true, _} = JSONFeed.accepts(data, "application/json")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "matches v1.1 feed" do
|
||||||
|
data = File.read!("test/fixtures/jsonfeed/v1_1.json")
|
||||||
|
assert {true, _} = JSONFeed.accepts(data, "application/feed+json")
|
||||||
|
end
|
||||||
|
|
||||||
test "parses json feed" do
|
test "parses json feed" do
|
||||||
data = File.read!("test/fixtures/jsonfeed/feed.json")
|
data = File.read!("test/fixtures/jsonfeed/feed.json")
|
||||||
{true, parsed_data} = JSONFeed.accepts(data, "application/json")
|
{true, parsed_data} = JSONFeed.accepts(data, "application/json")
|
||||||
|
@ -23,4 +28,53 @@ defmodule FeedParser.Parser.JSONFeedTest do
|
||||||
assert item1.content == "<p>Hello, world!</p>"
|
assert item1.content == "<p>Hello, world!</p>"
|
||||||
assert item1.url == "https://example.org/initial-post"
|
assert item1.url == "https://example.org/initial-post"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "parses item author" do
|
||||||
|
assert {:ok, %FeedParser.Feed{items: items}} =
|
||||||
|
JSONFeed.parse_feed(%{
|
||||||
|
"title" => "test",
|
||||||
|
"items" => [
|
||||||
|
%{
|
||||||
|
"id" => "1",
|
||||||
|
"author" => %{
|
||||||
|
"name" => "foo"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
assert [%FeedParser.Item{creator: "foo"}] = items
|
||||||
|
end
|
||||||
|
|
||||||
|
test "falls back to feed author" do
|
||||||
|
assert {:ok, %FeedParser.Feed{items: items}} =
|
||||||
|
JSONFeed.parse_feed(%{
|
||||||
|
"title" => "test",
|
||||||
|
"author" => %{
|
||||||
|
"name" => "foo"
|
||||||
|
},
|
||||||
|
"items" => [
|
||||||
|
%{
|
||||||
|
"id" => "1"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
assert [%FeedParser.Item{creator: "foo"}] = items
|
||||||
|
end
|
||||||
|
|
||||||
|
test "handles multiple authors" do
|
||||||
|
assert {:ok, %FeedParser.Feed{items: items}} =
|
||||||
|
JSONFeed.parse_feed(%{
|
||||||
|
"title" => "test",
|
||||||
|
"items" => [
|
||||||
|
%{
|
||||||
|
"id" => "1",
|
||||||
|
"authors" => [%{"name" => "foo"}, %{"name" => "bar"}]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
assert [%FeedParser.Item{creator: "foo, bar"}] = items
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -28,4 +28,12 @@ defmodule FeedParser.Parser.RSS2Test do
|
||||||
assert item.date == ~U[2003-06-03 09:39:21Z]
|
assert item.date == ~U[2003-06-03 09:39:21Z]
|
||||||
assert item.guid == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
assert item.guid == "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "parses rss2 item with dc:creator" do
|
||||||
|
data = File.read!("test/fixtures/rss2/dc_creator.xml")
|
||||||
|
{true, parsed_data} = RSS2.accepts(data, "application/rss+xml")
|
||||||
|
assert {:ok, %FeedParser.Feed{} = feed} = RSS2.parse_feed(parsed_data)
|
||||||
|
assert [%FeedParser.Item{} = item] = feed.items
|
||||||
|
assert item.creator == "Jim Newell"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue