Compare commits

..

1 Commits

Author SHA1 Message Date
Shadowfacts 39fa3f80a6
Restructure code and add documentation 2019-08-31 23:25:24 -04:00
4 changed files with 302 additions and 0 deletions

91
lib/parser/atom.ex Normal file
View File

@ -0,0 +1,91 @@
defmodule FeedParser.Parser.Atom do
@moduledoc """
A `FeedParser.Parser` that handles [Atom feeds](https://validator.w3.org/feed/docs/atom.html).
"""
alias FeedParser.XML
require XML
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
case content_type do
"application/atom+xml" ->
true
_ when content_type in ["text/xml", "application/xml"] ->
doc = XML.parse(data)
if XML.xmlElement(doc, :name) == :feed do
{true, doc}
else
false
end
_ ->
false
end
end
@impl FeedParser.Parser
def parse_feed(feed) do
title = text('/feed/title/text()', feed)
link = attr('/feed/link/@href', feed)
icon = text('/feed/icon/text()', feed)
items =
:xmerl_xpath.string('/feed/entry', feed)
|> Enum.map(fn entry ->
id = text('/entry/id/text()', entry)
title = text('/entry/title/text()', entry)
link = attr('/entry/link/@href', entry)
updated =
text('/entry/updated/text()', entry)
|> Timex.parse("{ISO:Extended}")
|> case do
{:ok, date} -> date
_ -> nil
end
content = text('/entry/content/text()', entry) || text('/entry/summary/text()', entry)
%FeedParser.Item{
guid: id,
title: title,
url: link,
content: content,
date: updated
}
end)
{:ok,
%FeedParser.Feed{
site_url: link,
title: title,
image_url: icon,
items: items
}}
end
defp text(xpath, element) do
case :xmerl_xpath.string(xpath, element) do
[el] ->
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
_ ->
nil
end
end
defp attr(xpath, element) do
case :xmerl_xpath.string(xpath, element) do
[attr] ->
XML.xmlAttribute(attr, :value) |> List.to_string() |> String.trim()
_ ->
nil
end
end
end

65
lib/parser/jsonfeed.ex Normal file
View File

@ -0,0 +1,65 @@
defmodule FeedParser.Parser.JSONFeed do
@moduledoc """
A `FeedParser.Parser` that handles [JSON Feeds](https://jsonfeed.org/version/1).
"""
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
with "application/json" <- content_type,
{:ok, json} <- Poison.decode(data),
%{"version" => "https://jsonfeed.org/version/1"} <- json do
{true, json}
else
_ ->
false
end
end
@impl FeedParser.Parser
def parse_feed(json) do
title = json["title"]
home_page_url = Map.get(json, "home_page_url")
icon = Map.get(json, "icon") || Map.get(json, "favicon")
items =
Map.get(json, "items", [])
|> Enum.map(fn item ->
id = item["id"]
url =
Map.get(item, "url") || if String.starts_with?(id, ~r/https?:\/\//), do: id, else: nil
title = Map.get(item, "title")
content =
Map.get(item, "content_html") || Map.get(item, "content_text") ||
Map.get(item, "summary")
date =
(Map.get(item, "date_published") || Map.get(item, "date_updated"))
|> Timex.parse("{RFC3339}")
|> case do
{:ok, date} -> date
_ -> nil
end
%FeedParser.Item{
guid: id,
url: url,
title: title,
content: content,
date: date
}
end)
{:ok,
%FeedParser.Feed{
site_url: home_page_url,
title: title,
image_url: icon,
items: items
}}
end
end

81
lib/parser/rss2.ex Normal file
View File

@ -0,0 +1,81 @@
defmodule FeedParser.Parser.RSS2 do
@moduledoc """
A `FeedParser.Parser` that handles [RSS 2.0 feeds](https://cyber.harvard.edu/rss/rss.html).
"""
alias FeedParser.XML
require XML
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
case content_type do
"application/rss+xml" ->
{true, XML.parse(data)}
_ when content_type in ["text/xml", "application/xml"] ->
doc = XML.parse(data)
if XML.xmlElement(doc, :name) == :rss do
{true, doc}
else
false
end
_ ->
false
end
end
@impl FeedParser.Parser
def parse_feed(rss) do
[channel] = :xmerl_xpath.string('/rss/channel', rss)
title = text('/channel/title/text()', channel)
link = text('/channel/link/text()', channel)
image = text('/channel/image/url/text()', channel)
items =
:xmerl_xpath.string('/channel/item', channel)
|> Enum.map(fn item ->
guid = text('/item/guid/text()', item)
title = text('/item/title/text()', item)
link = text('/item/link/text()', item)
description = text('/item/description/text()', item)
pubDate =
text('/item/pubDate/text()', item)
|> Timex.parse("{RFC1123}")
|> case do
{:ok, date} -> date
_ -> nil
end
%FeedParser.Item{
guid: guid,
title: title,
url: link,
content: description,
date: pubDate
}
end)
{:ok,
%FeedParser.Feed{
site_url: link,
title: title,
image_url: image,
items: items
}}
end
defp text(xpath, element) do
case :xmerl_xpath.string(xpath, element) do
[el] ->
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
_ ->
nil
end
end
end

65
lib/parser/rssinjson.ex Normal file
View File

@ -0,0 +1,65 @@
defmodule FeedParser.Parser.RSSInJSON do
@moduledoc """
A `FeedParser.Parser` that handles [RSS-in-JSON feeds](https://github.com/scripting/Scripting-News/blob/2347bb8751d94dc76d25074997db4fde682585f9/rss-in-json/README.md).
"""
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
with "application/json" <- content_type,
{:ok, json} <- Poison.decode(data),
%{"rss" => %{"version" => "2.0"} = rss} <- json do
{true, rss}
else
_ ->
false
end
end
@impl FeedParser.Parser
def parse_feed(rss) do
channel = rss["channel"]
title = channel["title"]
link = channel["link"]
image =
case channel do
%{"image" => %{"url" => url}} -> url
_ -> nil
end
items =
channel["item"]
|> Enum.map(fn item ->
guid = item["guid"]
link = item["link"]
title = Map.get(item, "title")
content = item["description"]
pubDate =
item["pubDate"]
|> Timex.parse("{RFC1123}")
|> case do
{:ok, date} -> date
_ -> nil
end
%FeedParser.Item{
guid: guid,
url: link,
title: title,
content: content,
date: pubDate
}
end)
{:ok,
%FeedParser.Feed{
site_url: link,
title: title,
image_url: image,
items: items
}}
end
end