2019-09-01 03:25:10 +00:00
|
|
|
defmodule FeedParser.Parser.Atom do
|
|
|
|
@moduledoc """
|
|
|
|
A `FeedParser.Parser` that handles [Atom feeds](https://validator.w3.org/feed/docs/atom.html).
|
|
|
|
"""
|
|
|
|
|
2019-08-31 23:02:41 +00:00
|
|
|
alias FeedParser.XML
|
|
|
|
require XML
|
|
|
|
|
|
|
|
@behaviour FeedParser.Parser
|
|
|
|
|
|
|
|
@impl FeedParser.Parser
|
|
|
|
def accepts(data, content_type) do
|
|
|
|
case content_type do
|
|
|
|
"application/atom+xml" ->
|
2019-09-01 19:49:30 +00:00
|
|
|
{true, XML.parse(data)}
|
2019-08-31 23:02:41 +00:00
|
|
|
|
|
|
|
_ when content_type in ["text/xml", "application/xml"] ->
|
|
|
|
doc = XML.parse(data)
|
|
|
|
|
|
|
|
if XML.xmlElement(doc, :name) == :feed do
|
|
|
|
{true, doc}
|
|
|
|
else
|
|
|
|
false
|
|
|
|
end
|
|
|
|
|
|
|
|
_ ->
|
|
|
|
false
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
@impl FeedParser.Parser
|
|
|
|
def parse_feed(feed) do
|
|
|
|
title = text('/feed/title/text()', feed)
|
|
|
|
link = attr('/feed/link/@href', feed)
|
|
|
|
icon = text('/feed/icon/text()', feed)
|
2021-09-03 21:06:08 +00:00
|
|
|
feed_author = texts('/feed/author/name/text()', feed)
|
2019-08-31 23:02:41 +00:00
|
|
|
|
2019-09-01 20:11:13 +00:00
|
|
|
updated =
|
|
|
|
text('/feed/updated/text()', feed)
|
|
|
|
|> Timex.parse("{ISO:Extended}")
|
|
|
|
|> case do
|
|
|
|
{:ok, date} -> date
|
|
|
|
_ -> nil
|
|
|
|
end
|
|
|
|
|
2019-08-31 23:02:41 +00:00
|
|
|
items =
|
|
|
|
:xmerl_xpath.string('/feed/entry', feed)
|
|
|
|
|> Enum.map(fn entry ->
|
|
|
|
id = text('/entry/id/text()', entry)
|
|
|
|
title = text('/entry/title/text()', entry)
|
2019-09-01 20:32:36 +00:00
|
|
|
|
|
|
|
links =
|
|
|
|
:xmerl_xpath.string('/entry/link', entry)
|
|
|
|
|> Enum.map(fn link ->
|
|
|
|
value = attr('/link/@href', link)
|
2019-09-01 20:35:32 +00:00
|
|
|
rel = attr('/link/@rel', link)
|
|
|
|
{value, rel}
|
2019-09-01 20:32:36 +00:00
|
|
|
end)
|
|
|
|
|
|
|
|
url =
|
2019-09-01 20:35:32 +00:00
|
|
|
(Enum.find(links, fn {_value, rel} -> rel == "alternate" end) || List.first(links))
|
2019-09-01 20:32:36 +00:00
|
|
|
|> case do
|
|
|
|
url when is_binary(url) -> url
|
|
|
|
{url, _rel} -> url
|
|
|
|
end
|
2019-08-31 23:02:41 +00:00
|
|
|
|
|
|
|
updated =
|
|
|
|
text('/entry/updated/text()', entry)
|
|
|
|
|> Timex.parse("{ISO:Extended}")
|
|
|
|
|> case do
|
|
|
|
{:ok, date} -> date
|
|
|
|
_ -> nil
|
|
|
|
end
|
|
|
|
|
2021-09-03 21:06:08 +00:00
|
|
|
author = texts('/entry/author/name/text()', entry) || feed_author
|
|
|
|
|
2019-08-31 23:02:41 +00:00
|
|
|
content = text('/entry/content/text()', entry) || text('/entry/summary/text()', entry)
|
|
|
|
|
|
|
|
%FeedParser.Item{
|
|
|
|
guid: id,
|
|
|
|
title: title,
|
2019-09-01 20:32:36 +00:00
|
|
|
url: url,
|
|
|
|
links: links,
|
2019-08-31 23:02:41 +00:00
|
|
|
content: content,
|
2021-09-03 21:06:08 +00:00
|
|
|
date: updated,
|
|
|
|
creator: author |> Enum.join(", ")
|
2019-08-31 23:02:41 +00:00
|
|
|
}
|
|
|
|
end)
|
|
|
|
|
|
|
|
{:ok,
|
|
|
|
%FeedParser.Feed{
|
|
|
|
site_url: link,
|
|
|
|
title: title,
|
|
|
|
image_url: icon,
|
2019-09-01 20:11:13 +00:00
|
|
|
last_updated: updated,
|
2019-08-31 23:02:41 +00:00
|
|
|
items: items
|
|
|
|
}}
|
|
|
|
end
|
|
|
|
|
|
|
|
defp text(xpath, element) do
|
2021-09-03 21:06:08 +00:00
|
|
|
case texts(xpath, element) do
|
|
|
|
[text] -> text
|
|
|
|
_ -> nil
|
|
|
|
end
|
|
|
|
end
|
2019-08-31 23:02:41 +00:00
|
|
|
|
2021-09-03 21:06:08 +00:00
|
|
|
defp texts(xpath, element) do
|
|
|
|
case :xmerl_xpath.string(xpath, element) do
|
|
|
|
[] ->
|
2019-08-31 23:02:41 +00:00
|
|
|
nil
|
2021-09-03 21:06:08 +00:00
|
|
|
|
|
|
|
els ->
|
|
|
|
Enum.map(els, fn el ->
|
|
|
|
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
|
|
|
|
end)
|
2019-08-31 23:02:41 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp attr(xpath, element) do
|
|
|
|
case :xmerl_xpath.string(xpath, element) do
|
|
|
|
[attr] ->
|
|
|
|
XML.xmlAttribute(attr, :value) |> List.to_string() |> String.trim()
|
|
|
|
|
|
|
|
_ ->
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|