defmodule FeedParser.Parser.RSS2 do @moduledoc """ A `FeedParser.Parser` that handles [RSS 2.0 feeds](https://cyber.harvard.edu/rss/rss.html). """ alias FeedParser.XML require XML @behaviour FeedParser.Parser @impl FeedParser.Parser def accepts(data, content_type) do cond do content_type in ["application/rss+xml", "text/rss+xml"] -> case XML.parse(data) do {:error, _} -> false {:ok, doc} -> {true, doc} end content_type in ["text/xml", "application/xml"] -> case XML.parse(data) do {:error, _} -> false {:ok, doc} -> if XML.xmlElement(doc, :name) == :rss do {true, doc} else false end end true -> false end end @impl FeedParser.Parser def parse_feed(rss) do [channel] = :xmerl_xpath.string('/rss/channel', rss) title = text('/channel/title/text()', channel) link = text('/channel/link/text()', channel) image = text('/channel/image/url/text()', channel) last_updated = text('/channel/lastBuildDate/text()', channel) |> Timex.parse("{RFC1123}") |> case do {:ok, date} -> date _ -> nil end items = :xmerl_xpath.string('/channel/item', channel) |> Enum.map(fn item -> guid = text('/item/guid/text()', item) title = text('/item/title/text()', item) link = text('/item/link/text()', item) description = text('/item/description/text()', item) pubDate = text('/item/pubDate/text()', item) |> Timex.parse("{RFC1123}") |> case do {:ok, date} -> date _ -> nil end # from Dublin Core extension: https://www.rssboard.org/rss-profile#namespace-elements-dublin # todo: should this only be attempted if the xmlns:dc is defined? creator = text('/item/dc:creator/text()', item) %FeedParser.Item{ guid: guid, title: title, url: link, links: [{link, nil}], content: description, date: pubDate, creator: creator } end) {:ok, %FeedParser.Feed{ site_url: link, title: title, image_url: image, last_updated: last_updated, items: items }} end defp text(xpath, element) do case :xmerl_xpath.string(xpath, element) do [el] -> XML.xmlText(el, :value) |> List.to_string() |> String.trim() _ -> nil end end end