From 5c4b9894465094b55a6a8c4843a1540f74dfbb9c Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Sun, 1 Sep 2019 16:46:56 -0400 Subject: [PATCH] Use feed_parser instead of fiet --- lib/frenzy/update_feeds.ex | 69 +++++++++++++++++++------------------- mix.exs | 5 +-- mix.lock | 14 ++++---- 3 files changed, 46 insertions(+), 42 deletions(-) diff --git a/lib/frenzy/update_feeds.ex b/lib/frenzy/update_feeds.ex index abb802e..9acee56 100644 --- a/lib/frenzy/update_feeds.ex +++ b/lib/frenzy/update_feeds.ex @@ -65,8 +65,23 @@ defmodule Frenzy.UpdateFeeds do Logger.debug("Updating #{feed.feed_url}") case HTTPoison.get(feed.feed_url) do - {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> - case Fiet.parse(body) do + {:ok, + %HTTPoison.Response{ + status_code: 200, + body: body, + headers: headers + }} = response -> + {_, content_type} = + headers + |> Enum.find(fn {k, v} -> k == "Content-Type" end) + + content_type = + content_type + |> String.split(";") + |> Enum.map(&String.trim/1) + |> Enum.find(fn s -> !String.contains?(s, "=") end) + + case FeedParser.parse(body, content_type) do {:ok, rss} -> update_feed_from_rss(feed, rss) end @@ -91,19 +106,12 @@ defmodule Frenzy.UpdateFeeds do end end - defp update_feed_from_rss(feed, rss) do - last_updated = - if rss.updated_at do - parse_date(rss.updated_at) - else - DateTime.utc_now() - end - + defp update_feed_from_rss(feed, %FeedParser.Feed{} = rss) do changeset = Feed.changeset(feed, %{ title: rss.title, - site_url: rss.link.href, - last_updated: last_updated + site_url: rss.site_url, + last_updated: (rss.last_updated || DateTime.utc_now()) |> Timex.Timezone.convert(:utc) }) Repo.update(changeset) @@ -112,7 +120,7 @@ defmodule Frenzy.UpdateFeeds do Enum.map(rss.items, fn entry -> # todo: use Repo.exists for this - if !Enum.any?(feed.items, fn item -> item.guid == entry.id end) do + if !Enum.any?(feed.items, fn item -> item.guid == entry.guid end) do create_item(feed, entry) end end) @@ -124,12 +132,12 @@ defmodule Frenzy.UpdateFeeds do Logger.debug("Creating item for #{url}") item_params = %{ - guid: entry.id, + guid: entry.guid, title: entry.title, url: url, - date: parse_date(entry.published_at), + date: entry.date |> Timex.Timezone.convert(:utc), creator: "", - content: entry.description + content: entry.content } feed = Repo.preload(feed, :pipeline_stages) @@ -185,29 +193,22 @@ defmodule Frenzy.UpdateFeeds do end end - defp parse_date(str) do - case Timex.parse(str, "{RFC1123}") do - {:ok, date} -> - Timex.Timezone.convert(date, :utc) - - _ -> - {:ok, date, _} = DateTime.from_iso8601(str) - Timex.Timezone.convert(date, :utc) - end - end - defp get_real_url(entry) do - links = Enum.reject(entry.links, fn l -> l.rel == "shorturl" end) + links = Enum.reject(entry.links, fn {_, rel} -> rel == "shorturl" end) - case Enum.find(links, fn l -> l.rel == "related" end) do + case Enum.find(links, fn {_, rel} -> rel == "related" end) do nil -> - case Enum.find(links, fn l -> l.rel == "alternate" end) do - nil -> Enum.fetch!(links, 0).href - link -> link.href + case Enum.find(links, fn {_, rel} -> rel == "alternate" end) do + nil -> + [{href, _} | _] = links + href + + {href, _} -> + href end - link -> - link.href + {href, _} -> + href end end end diff --git a/mix.exs b/mix.exs index 1e5ecb5..1ab5d83 100644 --- a/mix.exs +++ b/mix.exs @@ -44,8 +44,9 @@ defmodule Frenzy.MixProject do {:jason, "~> 1.0"}, {:plug_cowboy, "~> 2.0"}, {:httpoison, "~> 1.4"}, - {:fiet, git: "https://github.com/shadowfacts/fiet.git", branch: "master"}, - {:timex, "~> 3.0"}, + {:feed_parser, + git: "https://git.shadowfacts.net/shadowfacts/feed_parser.git", branch: "master"}, + {:timex, "~> 3.6"}, {:readability, git: "https://github.com/shadowfacts/readability.git", branch: "master"}, {:bcrypt_elixir, "~> 2.0"}, {:dialyxir, "~> 1.0.0-rc.6", only: :dev, runtime: false}, diff --git a/mix.lock b/mix.lock index 8568c03..7a695da 100644 --- a/mix.lock +++ b/mix.lock @@ -1,7 +1,7 @@ %{ "basic_auth": {:hex, :basic_auth, "2.2.4", "d8c748237870dd1df3bc5c0f1ab4f1fad6270c75472d7e62b19302ec59e92a79", [:mix], [{:plug, "~> 0.14 or ~> 1.0", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"}, "bcrypt_elixir": {:hex, :bcrypt_elixir, "2.0.1", "1061e2114aaac554c12e5c1e4608bf4aadaca839f30d1b85224272facd5e6427", [:make, :mix], [{:comeonin, "~> 5.1", [hex: :comeonin, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm"}, - "certifi": {:hex, :certifi, "2.4.2", "75424ff0f3baaccfd34b1214184b6ef616d89e420b258bb0a5ea7d7bc628f7f0", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"}, + "certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"}, "combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm"}, "comeonin": {:hex, :comeonin, "5.1.1", "0abd6bae41acc01c369bb3eafe46399f301bf4e1bacebafdb89252bbb8a1a32d", [:mix], [], "hexpm"}, "connection": {:hex, :connection, "1.0.4", "a1cae72211f0eef17705aaededacac3eb30e6625b04a6117c1b2db6ace7d5976", [:mix], [], "hexpm"}, @@ -14,18 +14,19 @@ "ecto_sql": {:hex, :ecto_sql, "3.0.2", "0e04cbc183b91ea0085c502226befcd237a4ac31c204fd4be8d4db6676b5f10d", [:mix], [{:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.0.3", [hex: :ecto, repo: "hexpm", optional: false]}, {:mariaex, "~> 0.9.1", [hex: :mariaex, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.14.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.2.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"}, "elixir_make": {:hex, :elixir_make, "0.5.2", "96a28c79f5b8d34879cd95ebc04d2a0d678cfbbd3e74c43cb63a76adf0ee8054", [:mix], [], "hexpm"}, "erlex": {:hex, :erlex, "0.2.4", "23791959df45fe8f01f388c6f7eb733cc361668cbeedd801bf491c55a029917b", [:mix], [], "hexpm"}, + "feed_parser": {:git, "https://git.shadowfacts.net/shadowfacts/feed_parser.git", "8c42d4587328698e8d29d2ad562e478abb146f75", [branch: "master"]}, "fiet": {:git, "https://github.com/shadowfacts/fiet.git", "bf117bc30a6355a189d05a562127cfaf9e0187ae", [branch: "master"]}, "file_system": {:hex, :file_system, "0.2.6", "fd4dc3af89b9ab1dc8ccbcc214a0e60c41f34be251d9307920748a14bf41f1d3", [:mix], [], "hexpm"}, "floki": {:hex, :floki, "0.20.4", "be42ac911fece24b4c72f3b5846774b6e61b83fe685c2fc9d62093277fb3bc86", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}, {:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"}, - "gettext": {:hex, :gettext, "0.16.1", "e2130b25eebcbe02bb343b119a07ae2c7e28bd4b146c4a154da2ffb2b3507af2", [:mix], [], "hexpm"}, - "hackney": {:hex, :hackney, "1.14.3", "b5f6f5dcc4f1fba340762738759209e21914516df6be440d85772542d4a5e412", [:rebar3], [{:certifi, "2.4.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "1.0.2", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"}, + "gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm"}, + "hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"}, "html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm"}, "httpoison": {:hex, :httpoison, "1.4.0", "e0b3c2ad6fa573134e42194d13e925acfa8f89d138bc621ffb7b1989e6d22e73", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"}, "idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"}, "jason": {:hex, :jason, "1.1.2", "b03dedea67a99223a2eaf9f1264ce37154564de899fd3d8b9a21b1a6fd64afe7", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm"}, "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"}, "mime": {:hex, :mime, "1.3.0", "5e8d45a39e95c650900d03f897fbf99ae04f60ab1daa4a34c7a20a5151b7a5fe", [:mix], [], "hexpm"}, - "mimerl": {:hex, :mimerl, "1.0.2", "993f9b0e084083405ed8252b99460c4f0563e41729ab42d9074fd5e52439be88", [:rebar3], [], "hexpm"}, + "mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"}, "mochiweb": {:hex, :mochiweb, "2.18.0", "eb55f1db3e6e960fac4e6db4e2db9ec3602cc9f30b86cd1481d56545c3145d2e", [:rebar3], [], "hexpm"}, "parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"}, "phoenix": {:hex, :phoenix, "1.4.0", "56fe9a809e0e735f3e3b9b31c1b749d4b436e466d8da627b8d82f90eaae714d2", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 1.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 1.0 or ~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}], "hexpm"}, @@ -36,14 +37,15 @@ "plug": {:hex, :plug, "1.7.1", "8516d565fb84a6a8b2ca722e74e2cd25ca0fc9d64f364ec9dbec09d33eb78ccd", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}], "hexpm"}, "plug_cowboy": {:hex, :plug_cowboy, "2.0.0", "ab0c92728f2ba43c544cce85f0f220d8d30fc0c90eaa1e6203683ab039655062", [:mix], [{:cowboy, "~> 2.5", [hex: :cowboy, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"}, "plug_crypto": {:hex, :plug_crypto, "1.0.0", "18e49317d3fa343f24620ed22795ec29d4a5e602d52d1513ccea0b07d8ea7d4d", [:mix], [], "hexpm"}, + "poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm"}, "postgrex": {:hex, :postgrex, "0.14.0", "f3d6ffea1ca8a156e0633900a5338a3d17b00435227726baed8982718232b694", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"}, "ranch": {:hex, :ranch, "1.7.0", "9583f47160ca62af7f8d5db11454068eaa32b56eeadf984d4f46e61a076df5f2", [:rebar3], [], "hexpm"}, "readability": {:git, "https://github.com/shadowfacts/readability.git", "71fa17caaf8103ef213e2c7dde4b447a48669122", [branch: "master"]}, "saxy": {:hex, :saxy, "0.6.0", "cdb2f2fcd8133d1f3f8b0cf6a131ee1ca348dca613de266e9a239db850c4a093", [:mix], [], "hexpm"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"}, "telemetry": {:hex, :telemetry, "0.2.0", "5b40caa3efe4deb30fb12d7cd8ed4f556f6d6bd15c374c2366772161311ce377", [:mix], [], "hexpm"}, - "timex": {:hex, :timex, "3.4.2", "d74649c93ad0e12ce5b17cf5e11fbd1fb1b24a3d114643e86dba194b64439547", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm"}, - "tzdata": {:hex, :tzdata, "0.5.19", "7962a3997bf06303b7d1772988ede22260f3dae1bf897408ebdac2b4435f4e6a", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"}, + "timex": {:hex, :timex, "3.6.1", "efdf56d0e67a6b956cc57774353b0329c8ab7726766a11547e529357ffdc1d56", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5 or ~> 1.0.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm"}, + "tzdata": {:hex, :tzdata, "1.0.1", "f6027a331af7d837471248e62733c6ebee86a72e57c613aa071ebb1f750fc71a", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"}, "unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm"}, "xml_builder": {:hex, :xml_builder, "2.1.1", "2d6d665f09cf1319e3e1c46035755271b414d99ad8615d0bd6f337623e0c885b", [:mix], [], "hexpm"}, }