diff --git a/.mix.exs.swp b/.mix.exs.swp new file mode 100644 index 0000000..df7cbe0 Binary files /dev/null and b/.mix.exs.swp differ diff --git a/lib/atom/parser.ex b/lib/atom/parser.ex deleted file mode 100644 index fd7bb2e..0000000 --- a/lib/atom/parser.ex +++ /dev/null @@ -1,87 +0,0 @@ -defmodule FeedParser.Atom.Parser do - alias FeedParser.XML - require XML - - @behaviour FeedParser.Parser - - @impl FeedParser.Parser - def accepts(data, content_type) do - case content_type do - "application/atom+xml" -> - true - - _ when content_type in ["text/xml", "application/xml"] -> - doc = XML.parse(data) - - if XML.xmlElement(doc, :name) == :feed do - {true, doc} - else - false - end - - _ -> - false - end - end - - @impl FeedParser.Parser - def parse_feed(feed) do - title = text('/feed/title/text()', feed) - link = attr('/feed/link/@href', feed) - icon = text('/feed/icon/text()', feed) - - items = - :xmerl_xpath.string('/feed/entry', feed) - |> Enum.map(fn entry -> - id = text('/entry/id/text()', entry) - title = text('/entry/title/text()', entry) - link = attr('/entry/link/@href', entry) - - updated = - text('/entry/updated/text()', entry) - |> Timex.parse("{ISO:Extended}") - |> case do - {:ok, date} -> date - _ -> nil - end - - content = text('/entry/content/text()', entry) || text('/entry/summary/text()', entry) - - %FeedParser.Item{ - guid: id, - title: title, - url: link, - content: content, - date: updated - } - end) - - {:ok, - %FeedParser.Feed{ - site_url: link, - title: title, - image_url: icon, - items: items - }} - end - - defp text(xpath, element) do - case :xmerl_xpath.string(xpath, element) do - [el] -> - XML.xmlText(el, :value) |> List.to_string() |> String.trim() - - _ -> - nil - end - end - - defp attr(xpath, element) do - case :xmerl_xpath.string(xpath, element) do - [attr] -> - XML.xmlAttribute(attr, :value) |> List.to_string() |> String.trim() - - _ -> - nil - end - end -end diff --git a/lib/feed.ex b/lib/feed.ex index c9db358..b0fe8db 100644 --- a/lib/feed.ex +++ b/lib/feed.ex @@ -1,4 +1,8 @@ defmodule FeedParser.Feed do + @moduledoc """ + A feed. Contains some information about the site it originates from and a list of items it contains. + """ + defstruct [:site_url, :title, :image_url, :items] @type t() :: %__MODULE__{ diff --git a/lib/feed_parser.ex b/lib/feed_parser.ex index b241700..dd5d237 100644 --- a/lib/feed_parser.ex +++ b/lib/feed_parser.ex @@ -1,11 +1,26 @@ defmodule FeedParser do + @moduledoc """ + FeedParser is a for parsing feeds of various formats into a unified representation. + + The currently supported formats are: + - RSS 2.0 + - Atom + - JSON Feed + - RSS-in-JSON + """ + @default_parsers [ - FeedParser.RSS2.Parser, - FeedParser.Atom.Parser, - FeedParser.JSONFeed.Parser, - FeedParser.RSSInJSON.Parser + FeedParser.Parser.RSS2, + FeedParser.Parser.Atom, + FeedParser.Parser.JSONFeed, + FeedParser.Parser.RSSInJSON ] + @doc """ + Parses a feed from the given data string and MIME type. + + An extended set of parsers may also be provided, otherwise the default set (supporting RSS 2.0, Atom, JSON Feed, and RSS-in-JSON) will be used. Parsers are modules that implement the `FeedParser.Parser` behaviour. + """ @spec parse(data :: String.t(), content_type :: String.t(), parsers :: [module()]) :: {:ok, feed :: FeedParser.Feed.t()} | {:error, reason :: String.t()} def parse(data, content_type, parsers \\ @default_parsers) when is_binary(data) do diff --git a/lib/item.ex b/lib/item.ex index 0a14fb1..bc42038 100644 --- a/lib/item.ex +++ b/lib/item.ex @@ -1,4 +1,8 @@ defmodule FeedParser.Item do + @moduledoc """ + A item in a feed. Has metadata and content from the item. + """ + defstruct [:guid, :url, :title, :content, :date] @type t() :: %__MODULE__{ diff --git a/lib/jsonfeed/parser.ex b/lib/jsonfeed/parser.ex deleted file mode 100644 index d96488a..0000000 --- a/lib/jsonfeed/parser.ex +++ /dev/null @@ -1,61 +0,0 @@ -defmodule FeedParser.JSONFeed.Parser do - @behaviour FeedParser.Parser - - @impl FeedParser.Parser - def accepts(data, content_type) do - with "application/json" <- content_type, - {:ok, json} <- Poison.decode(data), - %{"version" => "https://jsonfeed.org/version/1"} <- json do - {true, json} - else - _ -> - false - end - end - - @impl FeedParser.Parser - def parse_feed(json) do - title = json["title"] - home_page_url = Map.get(json, "home_page_url") - icon = Map.get(json, "icon") || Map.get(json, "favicon") - - items = - Map.get(json, "items", []) - |> Enum.map(fn item -> - id = item["id"] - - url = - Map.get(item, "url") || if String.starts_with?(id, ~r/https?:\/\//), do: id, else: nil - - title = Map.get(item, "title") - - content = - Map.get(item, "content_html") || Map.get(item, "content_text") || - Map.get(item, "summary") - - date = - (Map.get(item, "date_published") || Map.get(item, "date_updated")) - |> Timex.parse("{RFC3339}") - |> case do - {:ok, date} -> date - _ -> nil - end - - %FeedParser.Item{ - guid: id, - url: url, - title: title, - content: content, - date: date - } - end) - - {:ok, - %FeedParser.Feed{ - site_url: home_page_url, - title: title, - image_url: icon, - items: items - }} - end -end diff --git a/lib/parser.ex b/lib/parser.ex index c718910..9c7c734 100644 --- a/lib/parser.ex +++ b/lib/parser.ex @@ -1,5 +1,22 @@ defmodule FeedParser.Parser do - @callback accepts(data :: String.t(), content_type :: String.t()) :: {true, any()} | false - @callback parse_feed(data :: any()) :: + @moduledoc """ + This behaviour defines the functions required to implement a feed parser. + """ + + @doc """ + Determines whether this Parser supports parsing a feed from the given data and MIME type. + + If this parser can handle the data, it should return a tuple of `true` and the any object (usually the parsed form of the data). The returned object will then be passed to the `parse_feed` function. + Otherwise, it should return `false`. + """ + @callback accepts(data :: String.t(), content_type :: String.t()) :: + {true, parsed_data :: any()} | false + + @doc """ + Creates a `FeedParser.Feed` from the parsed data returned by the accepts function. + + Returns either a tuple of `:ok` and the parsed Feed or `:error` and the reason for the error. + """ + @callback parse_feed(parsed_data :: any()) :: {:ok, feed :: FeedParser.Feed.t()} | {:error, reason :: String.t()} end diff --git a/lib/rss2/parser.ex b/lib/rss2/parser.ex deleted file mode 100644 index 0047754..0000000 --- a/lib/rss2/parser.ex +++ /dev/null @@ -1,77 +0,0 @@ -defmodule FeedParser.RSS2.Parser do - alias FeedParser.XML - require XML - - @behaviour FeedParser.Parser - - @impl FeedParser.Parser - def accepts(data, content_type) do - case content_type do - "application/rss+xml" -> - {true, XML.parse(data)} - - _ when content_type in ["text/xml", "application/xml"] -> - doc = XML.parse(data) - - if XML.xmlElement(doc, :name) == :rss do - {true, doc} - else - false - end - - _ -> - false - end - end - - @impl FeedParser.Parser - def parse_feed(rss) do - [channel] = :xmerl_xpath.string('/rss/channel', rss) - title = text('/channel/title/text()', channel) - link = text('/channel/link/text()', channel) - image = text('/channel/image/url/text()', channel) - - items = - :xmerl_xpath.string('/channel/item', channel) - |> Enum.map(fn item -> - guid = text('/item/guid/text()', item) - title = text('/item/title/text()', item) - link = text('/item/link/text()', item) - description = text('/item/description/text()', item) - - pubDate = - text('/item/pubDate/text()', item) - |> Timex.parse("{RFC1123}") - |> case do - {:ok, date} -> date - _ -> nil - end - - %FeedParser.Item{ - guid: guid, - title: title, - url: link, - content: description, - date: pubDate - } - end) - - {:ok, - %FeedParser.Feed{ - site_url: link, - title: title, - image_url: image, - items: items - }} - end - - defp text(xpath, element) do - case :xmerl_xpath.string(xpath, element) do - [el] -> - XML.xmlText(el, :value) |> List.to_string() |> String.trim() - - _ -> - nil - end - end -end diff --git a/lib/rssinjson/parser.ex b/lib/rssinjson/parser.ex deleted file mode 100644 index 54687c7..0000000 --- a/lib/rssinjson/parser.ex +++ /dev/null @@ -1,61 +0,0 @@ -defmodule FeedParser.RSSInJSON.Parser do - @behaviour FeedParser.Parser - - @impl FeedParser.Parser - def accepts(data, content_type) do - with "application/json" <- content_type, - {:ok, json} <- Poison.decode(data), - %{"rss" => %{"version" => "2.0"} = rss} <- json do - {true, rss} - else - _ -> - false - end - end - - @impl FeedParser.Parser - def parse_feed(rss) do - channel = rss["channel"] - title = channel["title"] - link = channel["link"] - - image = - case channel do - %{"image" => %{"url" => url}} -> url - _ -> nil - end - - items = - channel["item"] - |> Enum.map(fn item -> - guid = item["guid"] - link = item["link"] - title = Map.get(item, "title") - content = item["description"] - - pubDate = - item["pubDate"] - |> Timex.parse("{RFC1123}") - |> case do - {:ok, date} -> date - _ -> nil - end - - %FeedParser.Item{ - guid: guid, - url: link, - title: title, - content: content, - date: pubDate - } - end) - - {:ok, - %FeedParser.Feed{ - site_url: link, - title: title, - image_url: image, - items: items - }} - end -end diff --git a/lib/xml.ex b/lib/xml.ex index aacc3fd..de8d397 100644 --- a/lib/xml.ex +++ b/lib/xml.ex @@ -1,4 +1,8 @@ defmodule FeedParser.XML do + @moduledoc """ + A set of helpers for working with XML. To use this module, you must `require` it. + """ + import Record defrecord :xmlElement, extract(:xmlElement, from_lib: "xmerl/include/xmerl.hrl") diff --git a/mix.exs b/mix.exs index 9c9dd43..eddcf5c 100644 --- a/mix.exs +++ b/mix.exs @@ -7,7 +7,16 @@ defmodule FeedParser.MixProject do version: "0.1.0", elixir: "~> 1.9", start_permanent: Mix.env() == :prod, - deps: deps() + deps: deps(), + + # Docs + name: "FeedParser", + source_url: "https://git.shadowfacts.net/shadowfacts/feed_parser", + docs: [ + main: "FeedParser", + source_url_pattern: + "https://git.shadowfacts.net/shadowfacts/feed_parser/src/branch/master/%{path}#L%{line}" + ] ] end @@ -21,10 +30,9 @@ defmodule FeedParser.MixProject do # Run "mix help deps" to learn about dependencies. defp deps do [ + {:ex_doc, "~> 0.21", only: :dev, runtime: false}, {:timex, "~> 3.6.1"}, {:poison, "~> 4.0.1"} - # {:dep_from_hexpm, "~> 0.3.0"}, - # {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"} ] end end diff --git a/mix.lock b/mix.lock index 71ca143..e560663 100644 --- a/mix.lock +++ b/mix.lock @@ -1,11 +1,16 @@ %{ "certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"}, "combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm"}, + "earmark": {:hex, :earmark, "1.3.6", "ce1d0675e10a5bb46b007549362bd3f5f08908843957687d8484fe7f37466b19", [:mix], [], "hexpm"}, + "ex_doc": {:hex, :ex_doc, "0.21.2", "caca5bc28ed7b3bdc0b662f8afe2bee1eedb5c3cf7b322feeeb7c6ebbde089d6", [:mix], [{:earmark, "~> 1.3.3 or ~> 1.4", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"}, "gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm"}, "hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"}, "idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"}, + "makeup": {:hex, :makeup, "1.0.0", "671df94cf5a594b739ce03b0d0316aa64312cee2574b6a44becb83cd90fb05dc", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"}, + "makeup_elixir": {:hex, :makeup_elixir, "0.14.0", "cf8b7c66ad1cff4c14679698d532f0b5d45a3968ffbcbfd590339cb57742f1ae", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"}, "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"}, "mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"}, + "nimble_parsec": {:hex, :nimble_parsec, "0.5.1", "c90796ecee0289dbb5ad16d3ad06f957b0cd1199769641c961cfe0b97db190e0", [:mix], [], "hexpm"}, "parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"}, "poison": {:hex, :poison, "4.0.1", "bcb755a16fac91cad79bfe9fc3585bb07b9331e50cfe3420a24bcc2d735709ae", [:mix], [], "hexpm"}, "saxy": {:hex, :saxy, "0.10.0", "38879f46a595862c22114792c71379355ecfcfa0f713b1cfcc59e1d4127f1f55", [:mix], [], "hexpm"},