Initial commit
This commit is contained in:
commit
42a3b5344c
|
@ -0,0 +1,4 @@
|
|||
# Used by "mix format"
|
||||
[
|
||||
inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
|
||||
]
|
|
@ -0,0 +1,31 @@
|
|||
# The directory Mix will write compiled artifacts to.
|
||||
/_build/
|
||||
|
||||
# If you run "mix test --cover", coverage assets end up here.
|
||||
/cover/
|
||||
|
||||
# The directory Mix downloads your dependencies sources to.
|
||||
/deps/
|
||||
|
||||
# Where 3rd-party dependencies like ExDoc output generated docs.
|
||||
/doc/
|
||||
|
||||
# Ignore .fetch files in case you like to edit your project deps locally.
|
||||
/.fetch
|
||||
|
||||
# If the VM crashes, it generates a dump, let's ignore it too.
|
||||
erl_crash.dump
|
||||
|
||||
# Also ignore archive artifacts (built via "mix archive.build").
|
||||
*.ez
|
||||
|
||||
# Ignore package tarball (built via "mix hex.build").
|
||||
frenzy-*.tar
|
||||
|
||||
# Files matching config/*.secret.exs pattern contain sensitive
|
||||
# data and you should not commit them into version control.
|
||||
#
|
||||
# Alternatively, you may comment the line below and commit the
|
||||
# secrets files as long as you replace their contents by environment
|
||||
# variables.
|
||||
/config/*.secret.exs
|
|
@ -0,0 +1,21 @@
|
|||
# FeedParser
|
||||
|
||||
**TODO: Add description**
|
||||
|
||||
## Installation
|
||||
|
||||
If [available in Hex](https://hex.pm/docs/publish), the package can be installed
|
||||
by adding `feed_parser` to your list of dependencies in `mix.exs`:
|
||||
|
||||
```elixir
|
||||
def deps do
|
||||
[
|
||||
{:feed_parser, "~> 0.1.0"}
|
||||
]
|
||||
end
|
||||
```
|
||||
|
||||
Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc)
|
||||
and published on [HexDocs](https://hexdocs.pm). Once published, the docs can
|
||||
be found at [https://hexdocs.pm/feed_parser](https://hexdocs.pm/feed_parser).
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
defmodule FeedParser.Atom.Parser do
|
||||
alias FeedParser.XML
|
||||
require XML
|
||||
|
||||
@behaviour FeedParser.Parser
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def accepts(data, content_type) do
|
||||
case content_type do
|
||||
"application/atom+xml" ->
|
||||
true
|
||||
|
||||
_ when content_type in ["text/xml", "application/xml"] ->
|
||||
doc = XML.parse(data)
|
||||
|
||||
if XML.xmlElement(doc, :name) == :feed do
|
||||
{true, doc}
|
||||
else
|
||||
false
|
||||
end
|
||||
|
||||
_ ->
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def parse_feed(feed) do
|
||||
title = text('/feed/title/text()', feed)
|
||||
link = attr('/feed/link/@href', feed)
|
||||
icon = text('/feed/icon/text()', feed)
|
||||
|
||||
items =
|
||||
:xmerl_xpath.string('/feed/entry', feed)
|
||||
|> Enum.map(fn entry ->
|
||||
id = text('/entry/id/text()', entry)
|
||||
title = text('/entry/title/text()', entry)
|
||||
link = attr('/entry/link/@href', entry)
|
||||
|
||||
updated =
|
||||
text('/entry/updated/text()', entry)
|
||||
|> Timex.parse("{ISO:Extended}")
|
||||
|> case do
|
||||
{:ok, date} -> date
|
||||
_ -> nil
|
||||
end
|
||||
|
||||
content = text('/entry/content/text()', entry) || text('/entry/summary/text()', entry)
|
||||
|
||||
%FeedParser.Item{
|
||||
guid: id,
|
||||
title: title,
|
||||
url: link,
|
||||
content: content,
|
||||
date: updated
|
||||
}
|
||||
end)
|
||||
|
||||
{:ok,
|
||||
%FeedParser.Feed{
|
||||
site_url: link,
|
||||
title: title,
|
||||
image_url: icon,
|
||||
items: items
|
||||
}}
|
||||
end
|
||||
|
||||
defp text(xpath, element) do
|
||||
case :xmerl_xpath.string(xpath, element) do
|
||||
[el] ->
|
||||
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
defp attr(xpath, element) do
|
||||
case :xmerl_xpath.string(xpath, element) do
|
||||
[attr] ->
|
||||
XML.xmlAttribute(attr, :value) |> List.to_string() |> String.trim()
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,10 @@
|
|||
defmodule FeedParser.Feed do
|
||||
defstruct [:site_url, :title, :image_url, :items]
|
||||
|
||||
@type t() :: %__MODULE__{
|
||||
site_url: String.t(),
|
||||
title: String.t(),
|
||||
image_url: String.t() | nil,
|
||||
items: [FeedParser.Item.t()]
|
||||
}
|
||||
end
|
|
@ -0,0 +1,25 @@
|
|||
defmodule FeedParser do
|
||||
@default_parsers [FeedParser.RSS2.Parser, FeedParser.Atom.Parser, FeedParser.JSONFeed.Parser]
|
||||
|
||||
@spec parse_feed(data :: String.t(), content_type :: String.t(), parsers :: [module()]) ::
|
||||
{:ok, feed :: FeedParser.Feed.t()} | {:error, reason :: String.t()}
|
||||
def parse_feed(data, content_type, parsers \\ @default_parsers) when is_binary(data) do
|
||||
parsers
|
||||
|> Enum.reduce_while(false, fn parser, acc ->
|
||||
case parser.accepts(data, content_type) do
|
||||
{true, result} ->
|
||||
{:halt, {parser, result}}
|
||||
|
||||
_ ->
|
||||
{:cont, acc}
|
||||
end
|
||||
end)
|
||||
|> case do
|
||||
{parser, result} ->
|
||||
parser.parse_feed(result)
|
||||
|
||||
false ->
|
||||
{:error, "no parser matched the given content type and data"}
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,11 @@
|
|||
defmodule FeedParser.Item do
|
||||
defstruct [:guid, :url, :title, :content, :date]
|
||||
|
||||
@type t() :: %__MODULE__{
|
||||
guid: String.t(),
|
||||
url: String.t() | nil,
|
||||
title: String.t() | nil,
|
||||
content: String.t(),
|
||||
date: DateTime.t()
|
||||
}
|
||||
end
|
|
@ -0,0 +1,61 @@
|
|||
defmodule FeedParser.JSONFeed.Parser do
|
||||
@behaviour FeedParser.Parser
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def accepts(data, content_type) do
|
||||
with "application/json" <- content_type,
|
||||
{:ok, json} <- Poison.decode(data),
|
||||
%{"version" => "https://jsonfeed.org/version/1"} <- json do
|
||||
{true, json}
|
||||
else
|
||||
_ ->
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def parse_feed(json) do
|
||||
title = json["title"]
|
||||
home_page_url = Map.get(json, "home_page_url")
|
||||
icon = Map.get(json, "icon") || Map.get(json, "favicon")
|
||||
|
||||
items =
|
||||
Map.get(json, "items", [])
|
||||
|> Enum.map(fn item ->
|
||||
id = item["id"]
|
||||
|
||||
url =
|
||||
Map.get(item, "url") || if String.starts_with?(id, ~r/https?:\/\//), do: id, else: nil
|
||||
|
||||
title = Map.get(item, "title")
|
||||
|
||||
content =
|
||||
Map.get(item, "content_html") || Map.get(item, "content_text") ||
|
||||
Map.get(item, "summary")
|
||||
|
||||
date =
|
||||
(Map.get(item, "date_published") || Map.get(item, "date_updated"))
|
||||
|> Timex.parse("{RFC3339}")
|
||||
|> case do
|
||||
{:ok, date} -> date
|
||||
_ -> nil
|
||||
end
|
||||
|
||||
%FeedParser.Item{
|
||||
guid: id,
|
||||
url: url,
|
||||
title: title,
|
||||
content: content,
|
||||
date: date
|
||||
}
|
||||
end)
|
||||
|
||||
{:ok,
|
||||
%FeedParser.Feed{
|
||||
site_url: home_page_url,
|
||||
title: title,
|
||||
image_url: icon,
|
||||
items: items
|
||||
}}
|
||||
end
|
||||
end
|
|
@ -0,0 +1,5 @@
|
|||
defmodule FeedParser.Parser do
|
||||
@callback accepts(data :: String.t(), content_type :: String.t()) :: {true, any()} | false
|
||||
@callback parse_feed(data :: any()) ::
|
||||
{:ok, feed :: FeedParser.Feed.t()} | {:error, reason :: String.t()}
|
||||
end
|
|
@ -0,0 +1,77 @@
|
|||
defmodule FeedParser.RSS2.Parser do
|
||||
alias FeedParser.XML
|
||||
require XML
|
||||
|
||||
@behaviour FeedParser.Parser
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def accepts(data, content_type) do
|
||||
case content_type do
|
||||
"application/rss+xml" ->
|
||||
{true, XML.parse(data)}
|
||||
|
||||
_ when content_type in ["text/xml", "application/xml"] ->
|
||||
doc = XML.parse(data)
|
||||
|
||||
if XML.xmlElement(doc, :name) == :rss do
|
||||
{true, doc}
|
||||
else
|
||||
false
|
||||
end
|
||||
|
||||
_ ->
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
@impl FeedParser.Parser
|
||||
def parse_feed(rss) do
|
||||
[channel] = :xmerl_xpath.string('/rss/channel', rss)
|
||||
title = text('/channel/title/text()', channel)
|
||||
link = text('/channel/link/text()', channel)
|
||||
image = text('/channel/image/url/text()', channel)
|
||||
|
||||
items =
|
||||
:xmerl_xpath.string('/channel/item', channel)
|
||||
|> Enum.map(fn item ->
|
||||
guid = text('/item/guid/text()', item)
|
||||
title = text('/item/title/text()', item)
|
||||
link = text('/item/link/text()', item)
|
||||
description = text('/item/description/text()', item)
|
||||
|
||||
pubDate =
|
||||
text('/item/pubDate/text()', item)
|
||||
|> Timex.parse("{RFC1123}")
|
||||
|> case do
|
||||
{:ok, date} -> date
|
||||
_ -> nil
|
||||
end
|
||||
|
||||
%FeedParser.Item{
|
||||
guid: guid,
|
||||
title: title,
|
||||
url: link,
|
||||
content: description,
|
||||
date: pubDate
|
||||
}
|
||||
end)
|
||||
|
||||
{:ok,
|
||||
%FeedParser.Feed{
|
||||
site_url: link,
|
||||
title: title,
|
||||
image_url: image,
|
||||
items: items
|
||||
}}
|
||||
end
|
||||
|
||||
defp text(xpath, element) do
|
||||
case :xmerl_xpath.string(xpath, element) do
|
||||
[el] ->
|
||||
XML.xmlText(el, :value) |> List.to_string() |> String.trim()
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,17 @@
|
|||
defmodule FeedParser.XML do
|
||||
import Record
|
||||
|
||||
defrecord :xmlElement, extract(:xmlElement, from_lib: "xmerl/include/xmerl.hrl")
|
||||
defrecord :xmlAttribute, extract(:xmlAttribute, from_lib: "xmerl/include/xmerl.hrl")
|
||||
defrecord :xmlText, extract(:xmlText, from_lib: "xmerl/include/xmerl.hrl")
|
||||
|
||||
@spec parse(data :: String.t()) :: tuple()
|
||||
def parse(data) do
|
||||
{doc, _} =
|
||||
data
|
||||
|> :binary.bin_to_list()
|
||||
|> :xmerl_scan.string()
|
||||
|
||||
doc
|
||||
end
|
||||
end
|
|
@ -0,0 +1,30 @@
|
|||
defmodule FeedParser.MixProject do
|
||||
use Mix.Project
|
||||
|
||||
def project do
|
||||
[
|
||||
app: :feed_parser,
|
||||
version: "0.1.0",
|
||||
elixir: "~> 1.9",
|
||||
start_permanent: Mix.env() == :prod,
|
||||
deps: deps()
|
||||
]
|
||||
end
|
||||
|
||||
# Run "mix help compile.app" to learn about applications.
|
||||
def application do
|
||||
[
|
||||
extra_applications: [:logger]
|
||||
]
|
||||
end
|
||||
|
||||
# Run "mix help deps" to learn about dependencies.
|
||||
defp deps do
|
||||
[
|
||||
{:timex, "~> 3.6.1"},
|
||||
{:poison, "~> 4.0.1"}
|
||||
# {:dep_from_hexpm, "~> 0.3.0"},
|
||||
# {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"}
|
||||
]
|
||||
end
|
||||
end
|
|
@ -0,0 +1,16 @@
|
|||
%{
|
||||
"certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm"},
|
||||
"gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm"},
|
||||
"hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"},
|
||||
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"},
|
||||
"parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"},
|
||||
"poison": {:hex, :poison, "4.0.1", "bcb755a16fac91cad79bfe9fc3585bb07b9331e50cfe3420a24bcc2d735709ae", [:mix], [], "hexpm"},
|
||||
"saxy": {:hex, :saxy, "0.10.0", "38879f46a595862c22114792c71379355ecfcfa0f713b1cfcc59e1d4127f1f55", [:mix], [], "hexpm"},
|
||||
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"},
|
||||
"timex": {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5 or ~> 1.0.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"tzdata": {:hex, :tzdata, "1.0.1", "f6027a331af7d837471248e62733c6ebee86a72e57c613aa071ebb1f750fc71a", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
|
||||
"unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm"},
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
defmodule FeedParserTest do
|
||||
use ExUnit.Case
|
||||
doctest FeedParser
|
||||
|
||||
test "greets the world" do
|
||||
assert FeedParser.hello() == :world
|
||||
end
|
||||
end
|
|
@ -0,0 +1 @@
|
|||
ExUnit.start()
|
Loading…
Reference in New Issue