diff --git a/lib/frenzy/pipeline/filter_engine.ex b/lib/frenzy/pipeline/filter_engine.ex new file mode 100644 index 0000000..0de7c51 --- /dev/null +++ b/lib/frenzy/pipeline/filter_engine.ex @@ -0,0 +1,120 @@ +defmodule Frenzy.Pipeline.FilterEngine do + def validate_filter(filter) when is_map(filter) do + cond do + not is_map(filter) -> + {:error, "filter must be a map"} + + not (Map.has_key?(filter, "mode") and is_binary(filter["mode"]) and + filter["mode"] in ["accept", "reject"]) -> + {:error, "mode must be a string, either 'accept' or 'reject'"} + + not (Map.has_key?(filter, "score") and is_integer(filter["score"])) -> + {:error, "score must be an integer"} + + not (Map.has_key?(filter, "rules") and is_list(filter["rules"])) -> + {:error, "rules must be a list of rules"} + + true -> + validate_rules(filter) + end + end + + def validate_rules(%{"rules" => rules} = filter) do + case validate_rules(rules) do + {:ok, rules} -> + {:ok, Map.put(filter, "rules", rules)} + + {:error, _reason} = err -> + err + end + end + + def validate_rules(rules) when is_list(rules) do + rules + |> Enum.with_index() + |> Enum.reduce_while({:ok, []}, fn {rule, index}, {:ok, new_rules} -> + case validate_rule(rule) do + {:ok, rule} -> + {:cont, {:ok, [rule | new_rules]}} + + {:error, reason} -> + {:halt, {:error, "invalid rule #{index}: #{reason}"}} + end + end) + end + + def validate_rules(_rules), do: {:error, "rules must be a list"} + + def validate_rule(rule) do + cond do + not is_map(rule) -> + {:error, "rule must be a map"} + + not (Map.has_key?(rule, "mode") and is_binary(rule["mode"]) and + rule["mode"] in ["contains_string", "matches_regex"]) -> + {:error, "mode property must be a string, either 'contains_string' or 'matches_regex'"} + + not (Map.has_key?(rule, "property") and is_binary(rule["property"]) and + rule["property"] in ["url", "title", "author"]) -> + {:error, "property property must be a string, either 'url', 'title', or 'author'"} + + not (Map.has_key?(rule, "param") and is_binary(rule["param"])) -> + {:error, "param property must be a string"} + + not (Map.has_key?(rule, "weight") and is_integer(rule["weight"])) -> + {:error, "weight property must be an integer"} + + true -> + {:ok, rule} + end + end + + def test(%{"mode" => mode, "score" => score, "rules" => rules}, item_params) do + item_score = + rules + |> Enum.filter(&test_rule(&1, item_params)) + |> Enum.map(& &1["weight"]) + |> Enum.sum() + + matches = item_score >= score + + case {mode, matches} do + {"accept", true} -> + true + + {"reject", false} -> + true + + _ -> + false + end + end + + def test_rule( + %{"mode" => mode, "property" => property, "param" => param}, + item_params + ) do + with prop_value <- get_property(item_params, property), + true <- is_binary(prop_value), + true <- matches(prop_value, mode, param) do + true + else + _ -> + false + end + end + + defp matches(value, "contains_string", param) do + String.contains?(value, param) + end + + defp matches(value, "matches_regex", param) do + {:ok, regex} = Regex.compile(param) + String.match?(value, regex) + end + + defp get_property(item_params, "url"), do: item_params.url + defp get_property(item_params, "title"), do: item_params.title + defp get_property(item_params, "author"), do: item_params.author + defp get_property(_item_params, _property), do: {:error, "invalid property"} +end diff --git a/lib/frenzy/pipeline/filter_stage.ex b/lib/frenzy/pipeline/filter_stage.ex index 6d56933..e038170 100644 --- a/lib/frenzy/pipeline/filter_stage.ex +++ b/lib/frenzy/pipeline/filter_stage.ex @@ -1,128 +1,20 @@ defmodule Frenzy.Pipeline.FilterStage do require Logger - alias Frenzy.Pipeline.Stage + alias Frenzy.Pipeline.{Stage, FilterEngine} @behaviour Stage @impl Stage - def apply(%{"mode" => mode, "score" => score, "rules" => rules}, item_params) - when is_binary(mode) and is_integer(score) and is_list(rules) do - item_score = - rules - |> Enum.map(fn rule -> test(rule, item_params) end) - |> Enum.sum() - - matches = item_score >= score - - case {mode, matches} do - {"accept", true} -> - {:ok, item_params} - - {"reject", false} -> - {:ok, item_params} - - _ -> - Logger.debug("Skipping item #{item_params.url} due to feed filter") - :tombstone - end - end - - @impl Stage - def apply(opts, item_params) do - Logger.warn("Received invalid filter opts: #{opts}") - {:ok, item_params} - end - - @impl Stage - def validate_opts(opts) when is_map(opts) do - cond do - not (Map.has_key?(opts, "mode") and is_binary(opts["mode"]) and - opts["mode"] in ["accept", "reject"]) -> - {:error, "mode must be a string, either 'accept' or 'reject'"} - - not (Map.has_key?(opts, "score") and is_integer(opts["score"])) -> - {:error, "score must be an integer"} - - not (Map.has_key?(opts, "rules") and is_list(opts["rules"])) -> - {:error, "rules must be a list of rules"} - - true -> - validate_rules(opts) - end - end - - @impl Stage - def validate_opts(_opts), do: {:error, "options must be a map"} - - defp validate_rules(%{"rules" => rules} = opts) do - rules - |> Enum.with_index() - |> Enum.reduce_while(:ok, fn {rule, index}, :ok -> - case validate_rule(rule) do - :ok -> - {:cont, :ok} - - {:error, reason} -> - {:halt, {:error, "invalid rule #{index}: #{reason}"}} - end - end) - |> case do - :ok -> - {:ok, opts} - - {:error, _reason} = err -> - err - end - end - - defp validate_rule(rule) do - cond do - not is_map(rule) -> - {:error, "rule must be a map"} - - not (Map.has_key?(rule, "mode") and is_binary(rule["mode"]) and - rule["mode"] in ["contains_string", "matches_regex"]) -> - {:error, "mode property must be a string, either 'contains_string' or 'matches_regex'"} - - not (Map.has_key?(rule, "property") and is_binary(rule["property"]) and - rule["property"] in ["url", "title", "author"]) -> - {:error, "property property must be a string, either 'url', 'title', or 'author'"} - - not (Map.has_key?(rule, "param") and is_binary(rule["param"])) -> - {:error, "param property must be a string"} - - not (Map.has_key?(rule, "weight") and is_integer(rule["weight"])) -> - {:error, "weight property must be an integer"} - - true -> - :ok - end - end - - defp test( - %{"mode" => mode, "property" => property, "param" => param, "weight" => weight}, - item_params - ) do - with prop_value <- get_property(item_params, property), - true <- is_binary(prop_value), - true <- matches(prop_value, mode, param) do - weight + def apply(filter, item_params) do + if FilterEngine.test(filter, item_params) do + {:ok, item_params} else - _ -> - 0 + Logger.debug("Skipping item #{item_params.url} due to feed filter") + :tombstone end end - def matches(value, "contains_string", param) do - String.contains?(value, param) + @impl Stage + def validate_opts(opts) do + FilterEngine.validate_filter(opts) end - - def matches(value, "matches_regex", param) do - {:ok, regex} = Regex.compile(param) - String.match?(value, regex) - end - - defp get_property(item_params, "url"), do: item_params.url - defp get_property(item_params, "title"), do: item_params.title - defp get_property(item_params, "author"), do: item_params.author - defp get_property(_item_params, _property), do: {:error, "invalid property"} end