feed_parser/lib/parser/atom.ex

134 lines
3.0 KiB
Elixir
Raw Normal View History

2019-09-01 03:25:10 +00:00
defmodule FeedParser.Parser.Atom do
@moduledoc """
A `FeedParser.Parser` that handles [Atom feeds](https://validator.w3.org/feed/docs/atom.html).
"""
2019-08-31 23:02:41 +00:00
alias FeedParser.XML
require XML
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
case content_type do
"application/atom+xml" ->
2019-09-01 19:49:30 +00:00
{true, XML.parse(data)}
2019-08-31 23:02:41 +00:00
_ when content_type in ["text/xml", "application/xml"] ->
doc = XML.parse(data)
if XML.xmlElement(doc, :name) == :feed do
{true, doc}
else
false
end
_ ->
false
end
end
@impl FeedParser.Parser
def parse_feed(feed) do
title = text('/feed/title/text()', feed)
link = attr('/feed/link/@href', feed)
icon = text('/feed/icon/text()', feed)
2021-09-03 21:06:08 +00:00
feed_author = texts('/feed/author/name/text()', feed)
2019-08-31 23:02:41 +00:00
2019-09-01 20:11:13 +00:00
updated =
text('/feed/updated/text()', feed)
|> Timex.parse("{ISO:Extended}")
|> case do
{:ok, date} -> date
_ -> nil
end
2019-08-31 23:02:41 +00:00
items =
:xmerl_xpath.string('/feed/entry', feed)
|> Enum.map(fn entry ->
id = text('/entry/id/text()', entry)
title = text('/entry/title/text()', entry)
2019-09-01 20:32:36 +00:00
links =
:xmerl_xpath.string('/entry/link', entry)
|> Enum.map(fn link ->
value = attr('/link/@href', link)
rel = attr('/link/@rel', link)
{value, rel}
2019-09-01 20:32:36 +00:00
end)
url =
(Enum.find(links, fn {_value, rel} -> rel == "alternate" end) || List.first(links))
2019-09-01 20:32:36 +00:00
|> case do
url when is_binary(url) -> url
{url, _rel} -> url
end
2019-08-31 23:02:41 +00:00
updated =
text('/entry/updated/text()', entry)
|> Timex.parse("{ISO:Extended}")
|> case do
{:ok, date} -> date
_ -> nil
end
2021-09-06 03:16:04 +00:00
author =
(texts('/entry/author/name/text()', entry) || feed_author)
|> case do
nil -> nil
authors -> Enum.join(authors, ", ")
end
2021-09-03 21:06:08 +00:00
2019-08-31 23:02:41 +00:00
content = text('/entry/content/text()', entry) || text('/entry/summary/text()', entry)
%FeedParser.Item{
guid: id,
title: title,
2019-09-01 20:32:36 +00:00
url: url,
links: links,
2019-08-31 23:02:41 +00:00
content: content,
2021-09-03 21:06:08 +00:00
date: updated,
2021-09-06 03:16:04 +00:00
creator: author
2019-08-31 23:02:41 +00:00
}
end)
{:ok,
%FeedParser.Feed{
site_url: link,
title: title,
image_url: icon,
2019-09-01 20:11:13 +00:00
last_updated: updated,
2019-08-31 23:02:41 +00:00
items: items
}}
end
defp text(xpath, element) do
2021-09-03 21:06:08 +00:00
case texts(xpath, element) do
[text] -> text
_ -> nil
end
end
2019-08-31 23:02:41 +00:00
2021-09-03 21:06:08 +00:00
defp texts(xpath, element) do
case :xmerl_xpath.string(xpath, element) do
[] ->
2019-08-31 23:02:41 +00:00
nil
2021-09-03 21:06:08 +00:00
els ->
Enum.map(els, fn el ->
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
end)
2019-08-31 23:02:41 +00:00
end
end
defp attr(xpath, element) do
case :xmerl_xpath.string(xpath, element) do
[attr] ->
XML.xmlAttribute(attr, :value) |> List.to_string() |> String.trim()
_ ->
nil
end
end
end