Browse Source

Initial commit

master
Shadowfacts 2 years ago
commit
42a3b5344c
Signed by: shadowfacts GPG Key ID: 94A5AB95422746E5
  1. 4
      .formatter.exs
  2. 31
      .gitignore
  3. 21
      README.md
  4. 87
      lib/atom/parser.ex
  5. 10
      lib/feed.ex
  6. 25
      lib/feed_parser.ex
  7. 11
      lib/item.ex
  8. 61
      lib/jsonfeed/parser.ex
  9. 5
      lib/parser.ex
  10. 77
      lib/rss2/parser.ex
  11. 17
      lib/xml.ex
  12. 30
      mix.exs
  13. 16
      mix.lock
  14. 8
      test/feed_parser_test.exs
  15. 1
      test/test_helper.exs

4
.formatter.exs

@ -0,0 +1,4 @@
# Used by "mix format"
[
inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
]

31
.gitignore

@ -0,0 +1,31 @@
# The directory Mix will write compiled artifacts to.
/_build/
# If you run "mix test --cover", coverage assets end up here.
/cover/
# The directory Mix downloads your dependencies sources to.
/deps/
# Where 3rd-party dependencies like ExDoc output generated docs.
/doc/
# Ignore .fetch files in case you like to edit your project deps locally.
/.fetch
# If the VM crashes, it generates a dump, let's ignore it too.
erl_crash.dump
# Also ignore archive artifacts (built via "mix archive.build").
*.ez
# Ignore package tarball (built via "mix hex.build").
frenzy-*.tar
# Files matching config/*.secret.exs pattern contain sensitive
# data and you should not commit them into version control.
#
# Alternatively, you may comment the line below and commit the
# secrets files as long as you replace their contents by environment
# variables.
/config/*.secret.exs

21
README.md

@ -0,0 +1,21 @@
# FeedParser
**TODO: Add description**
## Installation
If [available in Hex](https://hex.pm/docs/publish), the package can be installed
by adding `feed_parser` to your list of dependencies in `mix.exs`:
```elixir
def deps do
[
{:feed_parser, "~> 0.1.0"}
]
end
```
Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc)
and published on [HexDocs](https://hexdocs.pm). Once published, the docs can
be found at [https://hexdocs.pm/feed_parser](https://hexdocs.pm/feed_parser).

87
lib/atom/parser.ex

@ -0,0 +1,87 @@
defmodule FeedParser.Atom.Parser do
alias FeedParser.XML
require XML
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
case content_type do
"application/atom+xml" ->
true
_ when content_type in ["text/xml", "application/xml"] ->
doc = XML.parse(data)
if XML.xmlElement(doc, :name) == :feed do
{true, doc}
else
false
end
_ ->
false
end
end
@impl FeedParser.Parser
def parse_feed(feed) do
title = text('/feed/title/text()', feed)
link = attr('/feed/link/@href', feed)
icon = text('/feed/icon/text()', feed)
items =
:xmerl_xpath.string('/feed/entry', feed)
|> Enum.map(fn entry ->
id = text('/entry/id/text()', entry)
title = text('/entry/title/text()', entry)
link = attr('/entry/link/@href', entry)
updated =
text('/entry/updated/text()', entry)
|> Timex.parse("{ISO:Extended}")
|> case do
{:ok, date} -> date
_ -> nil
end
content = text('/entry/content/text()', entry) || text('/entry/summary/text()', entry)
%FeedParser.Item{
guid: id,
title: title,
url: link,
content: content,
date: updated
}
end)
{:ok,
%FeedParser.Feed{
site_url: link,
title: title,
image_url: icon,
items: items
}}
end
defp text(xpath, element) do
case :xmerl_xpath.string(xpath, element) do
[el] ->
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
_ ->
nil
end
end
defp attr(xpath, element) do
case :xmerl_xpath.string(xpath, element) do
[attr] ->
XML.xmlAttribute(attr, :value) |> List.to_string() |> String.trim()
_ ->
nil
end
end
end

10
lib/feed.ex

@ -0,0 +1,10 @@
defmodule FeedParser.Feed do
defstruct [:site_url, :title, :image_url, :items]
@type t() :: %__MODULE__{
site_url: String.t(),
title: String.t(),
image_url: String.t() | nil,
items: [FeedParser.Item.t()]
}
end

25
lib/feed_parser.ex

@ -0,0 +1,25 @@
defmodule FeedParser do
@default_parsers [FeedParser.RSS2.Parser, FeedParser.Atom.Parser, FeedParser.JSONFeed.Parser]
@spec parse_feed(data :: String.t(), content_type :: String.t(), parsers :: [module()]) ::
{:ok, feed :: FeedParser.Feed.t()} | {:error, reason :: String.t()}
def parse_feed(data, content_type, parsers \\ @default_parsers) when is_binary(data) do
parsers
|> Enum.reduce_while(false, fn parser, acc ->
case parser.accepts(data, content_type) do
{true, result} ->
{:halt, {parser, result}}
_ ->
{:cont, acc}
end
end)
|> case do
{parser, result} ->
parser.parse_feed(result)
false ->
{:error, "no parser matched the given content type and data"}
end
end
end

11
lib/item.ex

@ -0,0 +1,11 @@
defmodule FeedParser.Item do
defstruct [:guid, :url, :title, :content, :date]
@type t() :: %__MODULE__{
guid: String.t(),
url: String.t() | nil,
title: String.t() | nil,
content: String.t(),
date: DateTime.t()
}
end

61
lib/jsonfeed/parser.ex

@ -0,0 +1,61 @@
defmodule FeedParser.JSONFeed.Parser do
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
with "application/json" <- content_type,
{:ok, json} <- Poison.decode(data),
%{"version" => "https://jsonfeed.org/version/1"} <- json do
{true, json}
else
_ ->
false
end
end
@impl FeedParser.Parser
def parse_feed(json) do
title = json["title"]
home_page_url = Map.get(json, "home_page_url")
icon = Map.get(json, "icon") || Map.get(json, "favicon")
items =
Map.get(json, "items", [])
|> Enum.map(fn item ->
id = item["id"]
url =
Map.get(item, "url") || if String.starts_with?(id, ~r/https?:\/\//), do: id, else: nil
title = Map.get(item, "title")
content =
Map.get(item, "content_html") || Map.get(item, "content_text") ||
Map.get(item, "summary")
date =
(Map.get(item, "date_published") || Map.get(item, "date_updated"))
|> Timex.parse("{RFC3339}")
|> case do
{:ok, date} -> date
_ -> nil
end
%FeedParser.Item{
guid: id,
url: url,
title: title,
content: content,
date: date
}
end)
{:ok,
%FeedParser.Feed{
site_url: home_page_url,
title: title,
image_url: icon,
items: items
}}
end
end

5
lib/parser.ex

@ -0,0 +1,5 @@
defmodule FeedParser.Parser do
@callback accepts(data :: String.t(), content_type :: String.t()) :: {true, any()} | false
@callback parse_feed(data :: any()) ::
{:ok, feed :: FeedParser.Feed.t()} | {:error, reason :: String.t()}
end

77
lib/rss2/parser.ex

@ -0,0 +1,77 @@
defmodule FeedParser.RSS2.Parser do
alias FeedParser.XML
require XML
@behaviour FeedParser.Parser
@impl FeedParser.Parser
def accepts(data, content_type) do
case content_type do
"application/rss+xml" ->
{true, XML.parse(data)}
_ when content_type in ["text/xml", "application/xml"] ->
doc = XML.parse(data)
if XML.xmlElement(doc, :name) == :rss do
{true, doc}
else
false
end
_ ->
false
end
end
@impl FeedParser.Parser
def parse_feed(rss) do
[channel] = :xmerl_xpath.string('/rss/channel', rss)
title = text('/channel/title/text()', channel)
link = text('/channel/link/text()', channel)
image = text('/channel/image/url/text()', channel)
items =
:xmerl_xpath.string('/channel/item', channel)
|> Enum.map(fn item ->
guid = text('/item/guid/text()', item)
title = text('/item/title/text()', item)
link = text('/item/link/text()', item)
description = text('/item/description/text()', item)
pubDate =
text('/item/pubDate/text()', item)
|> Timex.parse("{RFC1123}")
|> case do
{:ok, date} -> date
_ -> nil
end
%FeedParser.Item{
guid: guid,
title: title,
url: link,
content: description,
date: pubDate
}
end)
{:ok,
%FeedParser.Feed{
site_url: link,
title: title,
image_url: image,
items: items
}}
end
defp text(xpath, element) do
case :xmerl_xpath.string(xpath, element) do
[el] ->
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
_ ->
nil
end
end
end

17
lib/xml.ex

@ -0,0 +1,17 @@
defmodule FeedParser.XML do
import Record
defrecord :xmlElement, extract(:xmlElement, from_lib: "xmerl/include/xmerl.hrl")
defrecord :xmlAttribute, extract(:xmlAttribute, from_lib: "xmerl/include/xmerl.hrl")
defrecord :xmlText, extract(:xmlText, from_lib: "xmerl/include/xmerl.hrl")
@spec parse(data :: String.t()) :: tuple()
def parse(data) do
{doc, _} =
data
|> :binary.bin_to_list()
|> :xmerl_scan.string()
doc
end
end

30
mix.exs

@ -0,0 +1,30 @@
defmodule FeedParser.MixProject do
use Mix.Project
def project do
[
app: :feed_parser,
version: "0.1.0",
elixir: "~> 1.9",
start_permanent: Mix.env() == :prod,
deps: deps()
]
end
# Run "mix help compile.app" to learn about applications.
def application do
[
extra_applications: [:logger]
]
end
# Run "mix help deps" to learn about dependencies.
defp deps do
[
{:timex, "~> 3.6.1"},
{:poison, "~> 4.0.1"}
# {:dep_from_hexpm, "~> 0.3.0"},
# {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"}
]
end
end

16
mix.lock

@ -0,0 +1,16 @@
%{
"certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"},
"combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm"},
"gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm"},
"hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"},
"idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"},
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"},
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"},
"parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"},
"poison": {:hex, :poison, "4.0.1", "bcb755a16fac91cad79bfe9fc3585bb07b9331e50cfe3420a24bcc2d735709ae", [:mix], [], "hexpm"},
"saxy": {:hex, :saxy, "0.10.0", "38879f46a595862c22114792c71379355ecfcfa0f713b1cfcc59e1d4127f1f55", [:mix], [], "hexpm"},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"},
"timex": {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5 or ~> 1.0.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm"},
"tzdata": {:hex, :tzdata, "1.0.1", "f6027a331af7d837471248e62733c6ebee86a72e57c613aa071ebb1f750fc71a", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
"unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm"},
}

8
test/feed_parser_test.exs

@ -0,0 +1,8 @@
defmodule FeedParserTest do
use ExUnit.Case
doctest FeedParser
test "greets the world" do
assert FeedParser.hello() == :world
end
end

1
test/test_helper.exs

@ -0,0 +1 @@
ExUnit.start()
Loading…
Cancel
Save