Restructure code and add documentation

This commit is contained in:
Shadowfacts 2019-08-31 23:25:10 -04:00
parent 42dc662c79
commit f3fc8ed718
Signed by: shadowfacts
GPG Key ID: 94A5AB95422746E5
12 changed files with 66 additions and 295 deletions

BIN
.mix.exs.swp Normal file

Binary file not shown.

View File

@ -1,87 +0,0 @@
defmodule FeedParser.Atom.Parser do
alias FeedParser.XML
require XML
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
case content_type do
"application/atom+xml" ->
true
_ when content_type in ["text/xml", "application/xml"] ->
doc = XML.parse(data)
if XML.xmlElement(doc, :name) == :feed do
{true, doc}
else
false
end
_ ->
false
end
end
@impl FeedParser.Parser
def parse_feed(feed) do
title = text('/feed/title/text()', feed)
link = attr('/feed/link/@href', feed)
icon = text('/feed/icon/text()', feed)
items =
:xmerl_xpath.string('/feed/entry', feed)
|> Enum.map(fn entry ->
id = text('/entry/id/text()', entry)
title = text('/entry/title/text()', entry)
link = attr('/entry/link/@href', entry)
updated =
text('/entry/updated/text()', entry)
|> Timex.parse("{ISO:Extended}")
|> case do
{:ok, date} -> date
_ -> nil
end
content = text('/entry/content/text()', entry) || text('/entry/summary/text()', entry)
%FeedParser.Item{
guid: id,
title: title,
url: link,
content: content,
date: updated
}
end)
{:ok,
%FeedParser.Feed{
site_url: link,
title: title,
image_url: icon,
items: items
}}
end
defp text(xpath, element) do
case :xmerl_xpath.string(xpath, element) do
[el] ->
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
_ ->
nil
end
end
defp attr(xpath, element) do
case :xmerl_xpath.string(xpath, element) do
[attr] ->
XML.xmlAttribute(attr, :value) |> List.to_string() |> String.trim()
_ ->
nil
end
end
end

View File

@ -1,4 +1,8 @@
defmodule FeedParser.Feed do
@moduledoc """
A feed. Contains some information about the site it originates from and a list of items it contains.
"""
defstruct [:site_url, :title, :image_url, :items]
@type t() :: %__MODULE__{

View File

@ -1,11 +1,26 @@
defmodule FeedParser do
@moduledoc """
FeedParser is a for parsing feeds of various formats into a unified representation.
The currently supported formats are:
- RSS 2.0
- Atom
- JSON Feed
- RSS-in-JSON
"""
@default_parsers [
FeedParser.RSS2.Parser,
FeedParser.Atom.Parser,
FeedParser.JSONFeed.Parser,
FeedParser.RSSInJSON.Parser
FeedParser.Parser.RSS2,
FeedParser.Parser.Atom,
FeedParser.Parser.JSONFeed,
FeedParser.Parser.RSSInJSON
]
@doc """
Parses a feed from the given data string and MIME type.
An extended set of parsers may also be provided, otherwise the default set (supporting RSS 2.0, Atom, JSON Feed, and RSS-in-JSON) will be used. Parsers are modules that implement the `FeedParser.Parser` behaviour.
"""
@spec parse(data :: String.t(), content_type :: String.t(), parsers :: [module()]) ::
{:ok, feed :: FeedParser.Feed.t()} | {:error, reason :: String.t()}
def parse(data, content_type, parsers \\ @default_parsers) when is_binary(data) do

View File

@ -1,4 +1,8 @@
defmodule FeedParser.Item do
@moduledoc """
A item in a feed. Has metadata and content from the item.
"""
defstruct [:guid, :url, :title, :content, :date]
@type t() :: %__MODULE__{

View File

@ -1,61 +0,0 @@
defmodule FeedParser.JSONFeed.Parser do
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
with "application/json" <- content_type,
{:ok, json} <- Poison.decode(data),
%{"version" => "https://jsonfeed.org/version/1"} <- json do
{true, json}
else
_ ->
false
end
end
@impl FeedParser.Parser
def parse_feed(json) do
title = json["title"]
home_page_url = Map.get(json, "home_page_url")
icon = Map.get(json, "icon") || Map.get(json, "favicon")
items =
Map.get(json, "items", [])
|> Enum.map(fn item ->
id = item["id"]
url =
Map.get(item, "url") || if String.starts_with?(id, ~r/https?:\/\//), do: id, else: nil
title = Map.get(item, "title")
content =
Map.get(item, "content_html") || Map.get(item, "content_text") ||
Map.get(item, "summary")
date =
(Map.get(item, "date_published") || Map.get(item, "date_updated"))
|> Timex.parse("{RFC3339}")
|> case do
{:ok, date} -> date
_ -> nil
end
%FeedParser.Item{
guid: id,
url: url,
title: title,
content: content,
date: date
}
end)
{:ok,
%FeedParser.Feed{
site_url: home_page_url,
title: title,
image_url: icon,
items: items
}}
end
end

View File

@ -1,5 +1,22 @@
defmodule FeedParser.Parser do
@callback accepts(data :: String.t(), content_type :: String.t()) :: {true, any()} | false
@callback parse_feed(data :: any()) ::
@moduledoc """
This behaviour defines the functions required to implement a feed parser.
"""
@doc """
Determines whether this Parser supports parsing a feed from the given data and MIME type.
If this parser can handle the data, it should return a tuple of `true` and the any object (usually the parsed form of the data). The returned object will then be passed to the `parse_feed` function.
Otherwise, it should return `false`.
"""
@callback accepts(data :: String.t(), content_type :: String.t()) ::
{true, parsed_data :: any()} | false
@doc """
Creates a `FeedParser.Feed` from the parsed data returned by the accepts function.
Returns either a tuple of `:ok` and the parsed Feed or `:error` and the reason for the error.
"""
@callback parse_feed(parsed_data :: any()) ::
{:ok, feed :: FeedParser.Feed.t()} | {:error, reason :: String.t()}
end

View File

@ -1,77 +0,0 @@
defmodule FeedParser.RSS2.Parser do
alias FeedParser.XML
require XML
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
case content_type do
"application/rss+xml" ->
{true, XML.parse(data)}
_ when content_type in ["text/xml", "application/xml"] ->
doc = XML.parse(data)
if XML.xmlElement(doc, :name) == :rss do
{true, doc}
else
false
end
_ ->
false
end
end
@impl FeedParser.Parser
def parse_feed(rss) do
[channel] = :xmerl_xpath.string('/rss/channel', rss)
title = text('/channel/title/text()', channel)
link = text('/channel/link/text()', channel)
image = text('/channel/image/url/text()', channel)
items =
:xmerl_xpath.string('/channel/item', channel)
|> Enum.map(fn item ->
guid = text('/item/guid/text()', item)
title = text('/item/title/text()', item)
link = text('/item/link/text()', item)
description = text('/item/description/text()', item)
pubDate =
text('/item/pubDate/text()', item)
|> Timex.parse("{RFC1123}")
|> case do
{:ok, date} -> date
_ -> nil
end
%FeedParser.Item{
guid: guid,
title: title,
url: link,
content: description,
date: pubDate
}
end)
{:ok,
%FeedParser.Feed{
site_url: link,
title: title,
image_url: image,
items: items
}}
end
defp text(xpath, element) do
case :xmerl_xpath.string(xpath, element) do
[el] ->
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
_ ->
nil
end
end
end

View File

@ -1,61 +0,0 @@
defmodule FeedParser.RSSInJSON.Parser do
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
with "application/json" <- content_type,
{:ok, json} <- Poison.decode(data),
%{"rss" => %{"version" => "2.0"} = rss} <- json do
{true, rss}
else
_ ->
false
end
end
@impl FeedParser.Parser
def parse_feed(rss) do
channel = rss["channel"]
title = channel["title"]
link = channel["link"]
image =
case channel do
%{"image" => %{"url" => url}} -> url
_ -> nil
end
items =
channel["item"]
|> Enum.map(fn item ->
guid = item["guid"]
link = item["link"]
title = Map.get(item, "title")
content = item["description"]
pubDate =
item["pubDate"]
|> Timex.parse("{RFC1123}")
|> case do
{:ok, date} -> date
_ -> nil
end
%FeedParser.Item{
guid: guid,
url: link,
title: title,
content: content,
date: pubDate
}
end)
{:ok,
%FeedParser.Feed{
site_url: link,
title: title,
image_url: image,
items: items
}}
end
end

View File

@ -1,4 +1,8 @@
defmodule FeedParser.XML do
@moduledoc """
A set of helpers for working with XML. To use this module, you must `require` it.
"""
import Record
defrecord :xmlElement, extract(:xmlElement, from_lib: "xmerl/include/xmerl.hrl")

14
mix.exs
View File

@ -7,7 +7,16 @@ defmodule FeedParser.MixProject do
version: "0.1.0",
elixir: "~> 1.9",
start_permanent: Mix.env() == :prod,
deps: deps()
deps: deps(),
# Docs
name: "FeedParser",
source_url: "https://git.shadowfacts.net/shadowfacts/feed_parser",
docs: [
main: "FeedParser",
source_url_pattern:
"https://git.shadowfacts.net/shadowfacts/feed_parser/src/branch/master/%{path}#L%{line}"
]
]
end
@ -21,10 +30,9 @@ defmodule FeedParser.MixProject do
# Run "mix help deps" to learn about dependencies.
defp deps do
[
{:ex_doc, "~> 0.21", only: :dev, runtime: false},
{:timex, "~> 3.6.1"},
{:poison, "~> 4.0.1"}
# {:dep_from_hexpm, "~> 0.3.0"},
# {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"}
]
end
end

View File

@ -1,11 +1,16 @@
%{
"certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"},
"combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm"},
"earmark": {:hex, :earmark, "1.3.6", "ce1d0675e10a5bb46b007549362bd3f5f08908843957687d8484fe7f37466b19", [:mix], [], "hexpm"},
"ex_doc": {:hex, :ex_doc, "0.21.2", "caca5bc28ed7b3bdc0b662f8afe2bee1eedb5c3cf7b322feeeb7c6ebbde089d6", [:mix], [{:earmark, "~> 1.3.3 or ~> 1.4", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"},
"gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm"},
"hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"},
"idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"},
"makeup": {:hex, :makeup, "1.0.0", "671df94cf5a594b739ce03b0d0316aa64312cee2574b6a44becb83cd90fb05dc", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"},
"makeup_elixir": {:hex, :makeup_elixir, "0.14.0", "cf8b7c66ad1cff4c14679698d532f0b5d45a3968ffbcbfd590339cb57742f1ae", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"},
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"},
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"},
"nimble_parsec": {:hex, :nimble_parsec, "0.5.1", "c90796ecee0289dbb5ad16d3ad06f957b0cd1199769641c961cfe0b97db190e0", [:mix], [], "hexpm"},
"parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"},
"poison": {:hex, :poison, "4.0.1", "bcb755a16fac91cad79bfe9fc3585bb07b9331e50cfe3420a24bcc2d735709ae", [:mix], [], "hexpm"},
"saxy": {:hex, :saxy, "0.10.0", "38879f46a595862c22114792c71379355ecfcfa0f713b1cfcc59e1d4127f1f55", [:mix], [], "hexpm"},