feed_parser/lib/parser/rss2.ex

107 lines
2.7 KiB
Elixir
Raw Permalink Normal View History

2019-09-01 03:25:10 +00:00
defmodule FeedParser.Parser.RSS2 do
@moduledoc """
A `FeedParser.Parser` that handles [RSS 2.0 feeds](https://cyber.harvard.edu/rss/rss.html).
"""
2019-08-31 23:02:41 +00:00
alias FeedParser.XML
require XML
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
2021-08-29 23:25:38 +00:00
cond do
content_type in ["application/rss+xml", "text/rss+xml"] ->
2022-04-11 20:25:08 +00:00
case XML.parse(data) do
{:error, _} -> false
{:ok, doc} -> {true, doc}
end
2019-08-31 23:02:41 +00:00
2021-08-29 23:25:38 +00:00
content_type in ["text/xml", "application/xml"] ->
2022-04-11 20:25:08 +00:00
case XML.parse(data) do
{:error, _} ->
false
{:ok, doc} ->
if XML.xmlElement(doc, :name) == :rss do
{true, doc}
else
false
end
2019-08-31 23:02:41 +00:00
end
2021-08-29 23:25:38 +00:00
true ->
2019-08-31 23:02:41 +00:00
false
end
end
@impl FeedParser.Parser
def parse_feed(rss) do
[channel] = :xmerl_xpath.string('/rss/channel', rss)
title = text('/channel/title/text()', channel)
link = text('/channel/link/text()', channel)
image = text('/channel/image/url/text()', channel)
2019-09-01 20:11:13 +00:00
last_updated =
text('/channel/lastBuildDate/text()', channel)
|> Timex.parse("{RFC1123}")
|> case do
{:ok, date} -> date
_ -> nil
end
2019-08-31 23:02:41 +00:00
items =
:xmerl_xpath.string('/channel/item', channel)
|> Enum.map(fn item ->
guid = text('/item/guid/text()', item)
title = text('/item/title/text()', item)
link = text('/item/link/text()', item)
description = text('/item/description/text()', item)
2023-01-14 20:22:11 +00:00
# http://purl.org/rss/1.0/modules/content/
content = text('/item/content:encoded/text()', item)
2019-08-31 23:02:41 +00:00
pubDate =
text('/item/pubDate/text()', item)
|> Timex.parse("{RFC1123}")
|> case do
{:ok, date} -> date
_ -> nil
end
2021-09-03 21:06:08 +00:00
# from Dublin Core extension: https://www.rssboard.org/rss-profile#namespace-elements-dublin
# todo: should this only be attempted if the xmlns:dc is defined?
creator = text('/item/dc:creator/text()', item)
2019-08-31 23:02:41 +00:00
%FeedParser.Item{
guid: guid,
title: title,
url: link,
links: [{link, nil}],
2023-01-14 20:22:11 +00:00
content: content || description,
2021-09-03 21:06:08 +00:00
date: pubDate,
creator: creator
2019-08-31 23:02:41 +00:00
}
end)
{:ok,
%FeedParser.Feed{
site_url: link,
title: title,
image_url: image,
2019-09-01 20:11:13 +00:00
last_updated: last_updated,
2019-08-31 23:02:41 +00:00
items: items
}}
end
defp text(xpath, element) do
case :xmerl_xpath.string(xpath, element) do
[el] ->
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
_ ->
nil
end
end
end