Handle content:encoded in RSS2 feeds

This commit is contained in:
Shadowfacts 2023-01-14 15:22:11 -05:00
parent 76cee0b055
commit 943f4fdea7
3 changed files with 32 additions and 1 deletions

View File

@ -58,6 +58,9 @@ defmodule FeedParser.Parser.RSS2 do
link = text('/item/link/text()', item)
description = text('/item/description/text()', item)
# http://purl.org/rss/1.0/modules/content/
content = text('/item/content:encoded/text()', item)
pubDate =
text('/item/pubDate/text()', item)
|> Timex.parse("{RFC1123}")
@ -75,7 +78,7 @@ defmodule FeedParser.Parser.RSS2 do
title: title,
url: link,
links: [{link, nil}],
content: description,
content: content || description,
date: pubDate,
creator: creator
}

18
test/fixtures/rss2/shadowfacts.xml vendored Normal file
View File

@ -0,0 +1,18 @@
<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<title>Shadowfacts</title>
<link>https://shadowfacts.net</link>
<description></description>
<lastBuildDate>Fri, 06 Jan 2023 05:27:48 +0000</lastBuildDate>
<item>
<title>Rewritten in Rust</title>
<link>https://shadowfacts.net/2023/rewritten-in-rust/</link>
<category>meta</category>
<guid>https://shadowfacts.net/2023/rewritten-in-rust/</guid>
<pubDate>Thu, 05 Jan 2023 19:30:42 +0000</pubDate>
<content:encoded><![CDATA[<p>So, about six months ago I decided I wanted to rewrite my perfectly-working blog backend in Rust. Why? Because I was bored and wanted an excuse to use Rust more.</p>
]]></content:encoded>
</item>
</channel>
</rss>

View File

@ -36,4 +36,14 @@ defmodule FeedParser.Parser.RSS2Test do
assert [%FeedParser.Item{} = item] = feed.items
assert item.creator == "Jim Newell"
end
test "parses rss2 item with content:encoded" do
data = File.read!("test/fixtures/rss2/shadowfacts.xml")
{true, parsed_data} = RSS2.accepts(data, "text/xml")
assert {:ok, %FeedParser.Feed{} = feed} = RSS2.parse_feed(parsed_data)
assert [%FeedParser.Item{} = item] = feed.items
assert item.content ==
"<p>So, about six months ago I decided I wanted to rewrite my perfectly-working blog backend in Rust. Why? Because I was bored and wanted an excuse to use Rust more.</p>"
end
end