From ab105d71aefaf7fa38ba219d8397eba8d0562d71 Mon Sep 17 00:00:00 2001 From: Shadowfacts Date: Sat, 18 Jul 2020 23:13:42 -0400 Subject: [PATCH] Add Gemini document -> HTML converter stage --- lib/frenzy/pipeline/render_gemini_stage.ex | 105 +++++++++++++++++++++ lib/frenzy_web/live/edit_pipeline_live.ex | 3 +- mix.lock | 2 +- 3 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 lib/frenzy/pipeline/render_gemini_stage.ex diff --git a/lib/frenzy/pipeline/render_gemini_stage.ex b/lib/frenzy/pipeline/render_gemini_stage.ex new file mode 100644 index 0000000..084f63b --- /dev/null +++ b/lib/frenzy/pipeline/render_gemini_stage.ex @@ -0,0 +1,105 @@ +defmodule Frenzy.Pipeline.RenderGeminiStage do + require Logger + alias Frenzy.Pipeline.Stage + @behaviour Stage + + @impl Stage + def apply(_opts, %{content: content, content_type: "text/gemini"} = item_params) do + html = render_gemini(content) + {:ok, %{item_params | content_type: "text/html", content: html}} + end + + def apply(_opts, %{content_type: content_type} = item_params) do + Logger.debug("Not rendering Gemini text for item, incorect content type: #{content_type}") + {:ok, item_params} + end + + @impl Stage + def validate_opts(opts) do + {:ok, opts} + end + + @impl Stage + def default_opts(), do: %{} + + def render_gemini(gemini_source) do + gemini_source + |> Gemini.parse() + |> render_lines() + |> Floki.raw_html() + end + + @spec render_lines([Gemini.line()], [String.t()]) :: [String.t()] + + defp render_lines(lines, acc \\ []) + + defp render_lines([], acc) do + Enum.reverse(acc) + end + + defp render_lines([{:text, text} | rest], acc) do + render_lines(rest, [{"p", [], [text]} | acc]) + end + + defp render_lines([{:link, uri, text} | rest], acc) do + uri_str = URI.to_string(uri) + text = if is_nil(text), do: uri_str, else: text + a = {"a", [{"href", uri_str}], [text]} + p = {"p", [], [a]} + render_lines(rest, [p | acc]) + end + + defp render_lines([{:preformatted_start, _alt} | rest], acc) do + {preformatted_lines, [:preformatted_end, rest]} = + Enum.split_while(rest, fn + {:preformatted_text, _} -> true + _ -> false + end) + + pre_text = + preformatted_lines + |> Enum.map(fn {:preformatted_text, text} -> text end) + |> Enum.join("\n") + + pre = {"pre", [], pre_text} + render_lines(rest, [pre | acc]) + end + + defp render_lines([{:heading, text, level} | rest], acc) do + tag = "h#{level}" + heading = {tag, [], [text]} + render_lines(rest, [heading | acc]) + end + + defp render_lines([{:list_item, _text} | _rest] = lines, acc) do + {list_items, rest} = + Enum.split_while(lines, fn + {:list_item, _} -> true + _ -> false + end) + + lis = + Enum.map(list_items, fn {:list_item, text} -> + {"li", [], [text]} + end) + + ul = {"ul", [], lis} + render_lines(rest, [ul | acc]) + end + + defp render_lines([{:quoted, _text} | _rest] = lines, acc) do + {quoted_lines, rest} = + Enum.split_while(lines, fn + {:quoted, _} -> true + _ -> false + end) + + ps = + Enum.map(quoted_lines, fn {:quoted, text} -> + {"p", [], [text]} + end) + + blockquote = {"blockquote", [], ps} + render_lines(rest, [blockquote | acc]) + end +end diff --git a/lib/frenzy_web/live/edit_pipeline_live.ex b/lib/frenzy_web/live/edit_pipeline_live.ex index 1a0a782..3dd973a 100644 --- a/lib/frenzy_web/live/edit_pipeline_live.ex +++ b/lib/frenzy_web/live/edit_pipeline_live.ex @@ -7,7 +7,8 @@ defmodule FrenzyWeb.EditPipelineLive do {"Filter Stage", "Frenzy.Pipeline.FilterStage"}, {"Scrape Stage", "Frenzy.Pipeline.ScrapeStage"}, {"Conditional Stage", "Frenzy.Pipeline.ConditionalStage"}, - {"Gemini Scrape Stage", "Frenzy.Pipeline.GeminiScrapeStage"} + {"Gemini Scrape Stage", "Frenzy.Pipeline.GeminiScrapeStage"}, + {"Render Gemini Stage", "Frenzy.Pipeline.RenderGeminiStage"} ] def stages, do: @stages diff --git a/mix.lock b/mix.lock index 9758e96..723cc24 100644 --- a/mix.lock +++ b/mix.lock @@ -18,7 +18,7 @@ "fiet": {:git, "https://github.com/shadowfacts/fiet.git", "bf117bc30a6355a189d05a562127cfaf9e0187ae", [branch: "master"]}, "file_system": {:hex, :file_system, "0.2.6", "fd4dc3af89b9ab1dc8ccbcc214a0e60c41f34be251d9307920748a14bf41f1d3", [:mix], [], "hexpm", "0d50da6b04c58e101a3793b1600f9a03b86e3a8057b192ac1766013d35706fa6"}, "floki": {:hex, :floki, "0.23.0", "956ab6dba828c96e732454809fb0bd8d43ce0979b75f34de6322e73d4c917829", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "e680b5ef0b61ce02faa7137db8d1714903a5552be4c89fb57293b8770e7f49c2"}, - "gemini": {:git, "https://git.shadowfacts.net/shadowfacts/gemini-ex.git", "37864e9f1196eb0efa71427d76a9279cee84ef19", [branch: "main"]}, + "gemini": {:git, "https://git.shadowfacts.net/shadowfacts/gemini-ex.git", "b97479c58d278274568010387d9a18f8557d0ca7", [branch: "main"]}, "gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm", "e0b8598e802676c81e66b061a2148c37c03886b24a3ca86a1f98ed40693b94b3"}, "hackney": {:hex, :hackney, "1.16.0", "5096ac8e823e3a441477b2d187e30dd3fff1a82991a806b2003845ce72ce2d84", [:rebar3], [{:certifi, "2.5.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.1", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.0", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.6", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "3bf0bebbd5d3092a3543b783bf065165fa5d3ad4b899b836810e513064134e18"}, "html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm", "3e3d7156a272950373ce5a4018b1490bea26676f8d6a7d409f6fac8568b8cb9a"},