Compare commits
1 Commits
f3fc8ed718
...
39fa3f80a6
Author | SHA1 | Date |
---|---|---|
Shadowfacts | 39fa3f80a6 |
|
@ -0,0 +1,91 @@
|
|||
defmodule FeedParser.Parser.Atom do
|
||||
@moduledoc """
|
||||
A `FeedParser.Parser` that handles [Atom feeds](https://validator.w3.org/feed/docs/atom.html).
|
||||
"""
|
||||
|
||||
alias FeedParser.XML
|
||||
require XML
|
||||
|
||||
@behaviour FeedParser.Parser
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def accepts(data, content_type) do
|
||||
case content_type do
|
||||
"application/atom+xml" ->
|
||||
true
|
||||
|
||||
_ when content_type in ["text/xml", "application/xml"] ->
|
||||
doc = XML.parse(data)
|
||||
|
||||
if XML.xmlElement(doc, :name) == :feed do
|
||||
{true, doc}
|
||||
else
|
||||
false
|
||||
end
|
||||
|
||||
_ ->
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def parse_feed(feed) do
|
||||
title = text('/feed/title/text()', feed)
|
||||
link = attr('/feed/link/@href', feed)
|
||||
icon = text('/feed/icon/text()', feed)
|
||||
|
||||
items =
|
||||
:xmerl_xpath.string('/feed/entry', feed)
|
||||
|> Enum.map(fn entry ->
|
||||
id = text('/entry/id/text()', entry)
|
||||
title = text('/entry/title/text()', entry)
|
||||
link = attr('/entry/link/@href', entry)
|
||||
|
||||
updated =
|
||||
text('/entry/updated/text()', entry)
|
||||
|> Timex.parse("{ISO:Extended}")
|
||||
|> case do
|
||||
{:ok, date} -> date
|
||||
_ -> nil
|
||||
end
|
||||
|
||||
content = text('/entry/content/text()', entry) || text('/entry/summary/text()', entry)
|
||||
|
||||
%FeedParser.Item{
|
||||
guid: id,
|
||||
title: title,
|
||||
url: link,
|
||||
content: content,
|
||||
date: updated
|
||||
}
|
||||
end)
|
||||
|
||||
{:ok,
|
||||
%FeedParser.Feed{
|
||||
site_url: link,
|
||||
title: title,
|
||||
image_url: icon,
|
||||
items: items
|
||||
}}
|
||||
end
|
||||
|
||||
defp text(xpath, element) do
|
||||
case :xmerl_xpath.string(xpath, element) do
|
||||
[el] ->
|
||||
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
defp attr(xpath, element) do
|
||||
case :xmerl_xpath.string(xpath, element) do
|
||||
[attr] ->
|
||||
XML.xmlAttribute(attr, :value) |> List.to_string() |> String.trim()
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,65 @@
|
|||
defmodule FeedParser.Parser.JSONFeed do
|
||||
@moduledoc """
|
||||
A `FeedParser.Parser` that handles [JSON Feeds](https://jsonfeed.org/version/1).
|
||||
"""
|
||||
|
||||
@behaviour FeedParser.Parser
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def accepts(data, content_type) do
|
||||
with "application/json" <- content_type,
|
||||
{:ok, json} <- Poison.decode(data),
|
||||
%{"version" => "https://jsonfeed.org/version/1"} <- json do
|
||||
{true, json}
|
||||
else
|
||||
_ ->
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def parse_feed(json) do
|
||||
title = json["title"]
|
||||
home_page_url = Map.get(json, "home_page_url")
|
||||
icon = Map.get(json, "icon") || Map.get(json, "favicon")
|
||||
|
||||
items =
|
||||
Map.get(json, "items", [])
|
||||
|> Enum.map(fn item ->
|
||||
id = item["id"]
|
||||
|
||||
url =
|
||||
Map.get(item, "url") || if String.starts_with?(id, ~r/https?:\/\//), do: id, else: nil
|
||||
|
||||
title = Map.get(item, "title")
|
||||
|
||||
content =
|
||||
Map.get(item, "content_html") || Map.get(item, "content_text") ||
|
||||
Map.get(item, "summary")
|
||||
|
||||
date =
|
||||
(Map.get(item, "date_published") || Map.get(item, "date_updated"))
|
||||
|> Timex.parse("{RFC3339}")
|
||||
|> case do
|
||||
{:ok, date} -> date
|
||||
_ -> nil
|
||||
end
|
||||
|
||||
%FeedParser.Item{
|
||||
guid: id,
|
||||
url: url,
|
||||
title: title,
|
||||
content: content,
|
||||
date: date
|
||||
}
|
||||
end)
|
||||
|
||||
{:ok,
|
||||
%FeedParser.Feed{
|
||||
site_url: home_page_url,
|
||||
title: title,
|
||||
image_url: icon,
|
||||
items: items
|
||||
}}
|
||||
end
|
||||
end
|
|
@ -0,0 +1,81 @@
|
|||
defmodule FeedParser.Parser.RSS2 do
|
||||
@moduledoc """
|
||||
A `FeedParser.Parser` that handles [RSS 2.0 feeds](https://cyber.harvard.edu/rss/rss.html).
|
||||
"""
|
||||
|
||||
alias FeedParser.XML
|
||||
require XML
|
||||
|
||||
@behaviour FeedParser.Parser
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def accepts(data, content_type) do
|
||||
case content_type do
|
||||
"application/rss+xml" ->
|
||||
{true, XML.parse(data)}
|
||||
|
||||
_ when content_type in ["text/xml", "application/xml"] ->
|
||||
doc = XML.parse(data)
|
||||
|
||||
if XML.xmlElement(doc, :name) == :rss do
|
||||
{true, doc}
|
||||
else
|
||||
false
|
||||
end
|
||||
|
||||
_ ->
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def parse_feed(rss) do
|
||||
[channel] = :xmerl_xpath.string('/rss/channel', rss)
|
||||
title = text('/channel/title/text()', channel)
|
||||
link = text('/channel/link/text()', channel)
|
||||
image = text('/channel/image/url/text()', channel)
|
||||
|
||||
items =
|
||||
:xmerl_xpath.string('/channel/item', channel)
|
||||
|> Enum.map(fn item ->
|
||||
guid = text('/item/guid/text()', item)
|
||||
title = text('/item/title/text()', item)
|
||||
link = text('/item/link/text()', item)
|
||||
description = text('/item/description/text()', item)
|
||||
|
||||
pubDate =
|
||||
text('/item/pubDate/text()', item)
|
||||
|> Timex.parse("{RFC1123}")
|
||||
|> case do
|
||||
{:ok, date} -> date
|
||||
_ -> nil
|
||||
end
|
||||
|
||||
%FeedParser.Item{
|
||||
guid: guid,
|
||||
title: title,
|
||||
url: link,
|
||||
content: description,
|
||||
date: pubDate
|
||||
}
|
||||
end)
|
||||
|
||||
{:ok,
|
||||
%FeedParser.Feed{
|
||||
site_url: link,
|
||||
title: title,
|
||||
image_url: image,
|
||||
items: items
|
||||
}}
|
||||
end
|
||||
|
||||
defp text(xpath, element) do
|
||||
case :xmerl_xpath.string(xpath, element) do
|
||||
[el] ->
|
||||
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,65 @@
|
|||
defmodule FeedParser.Parser.RSSInJSON do
|
||||
@moduledoc """
|
||||
A `FeedParser.Parser` that handles [RSS-in-JSON feeds](https://github.com/scripting/Scripting-News/blob/2347bb8751d94dc76d25074997db4fde682585f9/rss-in-json/README.md).
|
||||
"""
|
||||
|
||||
@behaviour FeedParser.Parser
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def accepts(data, content_type) do
|
||||
with "application/json" <- content_type,
|
||||
{:ok, json} <- Poison.decode(data),
|
||||
%{"rss" => %{"version" => "2.0"} = rss} <- json do
|
||||
{true, rss}
|
||||
else
|
||||
_ ->
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def parse_feed(rss) do
|
||||
channel = rss["channel"]
|
||||
title = channel["title"]
|
||||
link = channel["link"]
|
||||
|
||||
image =
|
||||
case channel do
|
||||
%{"image" => %{"url" => url}} -> url
|
||||
_ -> nil
|
||||
end
|
||||
|
||||
items =
|
||||
channel["item"]
|
||||
|> Enum.map(fn item ->
|
||||
guid = item["guid"]
|
||||
link = item["link"]
|
||||
title = Map.get(item, "title")
|
||||
content = item["description"]
|
||||
|
||||
pubDate =
|
||||
item["pubDate"]
|
||||
|> Timex.parse("{RFC1123}")
|
||||
|> case do
|
||||
{:ok, date} -> date
|
||||
_ -> nil
|
||||
end
|
||||
|
||||
%FeedParser.Item{
|
||||
guid: guid,
|
||||
url: link,
|
||||
title: title,
|
||||
content: content,
|
||||
date: pubDate
|
||||
}
|
||||
end)
|
||||
|
||||
{:ok,
|
||||
%FeedParser.Feed{
|
||||
site_url: link,
|
||||
title: title,
|
||||
image_url: image,
|
||||
items: items
|
||||
}}
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue