frenzy/lib/frenzy/opml/importer.ex

67 lines
1.9 KiB
Elixir

defmodule Frenzy.OPML.Importer do
import Record
@typedoc """
The data created from an OPML import.
A list of groups. Each group has a name (String) or :default (if the OPML specified no group) and a list of feed URLs.
"""
@type import_data() :: %{optional(String.t() | :default) => [String.t()]}
defrecord :xmlElement, extract(:xmlElement, from_lib: "xmerl/include/xmerl.hrl")
defrecord :xmlAttribute, extract(:xmlAttribute, from_lib: "xmerl/include/xmerl.hrl")
@spec parse_opml(String.t()) :: import_data()
def parse_opml(text) do
{doc, _} =
text
|> String.trim()
|> :binary.bin_to_list()
|> :xmerl_scan.string()
outline_elements = :xmerl_xpath.string('/opml/body/outline', doc)
outline_elements
|> Enum.flat_map(&get_feeds/1)
|> Enum.reduce(%{}, fn {group, feed_url}, acc ->
Map.update(acc, group, [feed_url], fn feeds -> [feed_url | feeds] end)
end)
end
defp get_feeds(outline_el) do
attributes = xmlElement(outline_el, :attributes)
# if the <outline> contains a xmlUrl attribute, it is a top level feed (uses the :default group)
# otherwise, it is a group of feeds
if Enum.any?(attributes, fn attr -> xmlAttribute(attr, :name) == :xmlUrl end) do
[{:default, get_feed_from_outline(outline_el)}]
else
get_feeds_from_group(outline_el)
end
end
defp get_feed_from_outline(feed_el) do
[attr] = :xmerl_xpath.string('//@xmlUrl', feed_el)
xmlAttribute(attr, :value)
|> List.to_string()
end
defp get_feeds_from_group(group_el) do
[title_attr] = :xmerl_xpath.string('/outline/@title', group_el)
group_title =
xmlAttribute(title_attr, :value)
|> List.to_string()
:xmerl_xpath.string('/outline/outline/@xmlUrl', group_el)
|> Enum.map(fn attr ->
feed_url =
xmlAttribute(attr, :value)
|> List.to_string()
{group_title, feed_url}
end)
end
end