defmodule Frenzy.Pipeline.FilterEngine do def validate_filter(filter) when is_map(filter) do cond do not is_map(filter) -> {:error, "filter must be a map"} not (Map.has_key?(filter, "mode") and is_binary(filter["mode"]) and filter["mode"] in ["accept", "reject"]) -> {:error, "mode must be a string, either 'accept' or 'reject'"} not (Map.has_key?(filter, "score") and is_integer(filter["score"])) -> {:error, "score must be an integer"} not (Map.has_key?(filter, "rules") and is_list(filter["rules"])) -> {:error, "rules must be a list of rules"} true -> validate_rules(filter) end end def validate_rules(%{"rules" => rules} = filter) do case validate_rules(rules) do {:ok, rules} -> {:ok, Map.put(filter, "rules", rules)} {:error, _reason} = err -> err end end def validate_rules(rules) when is_list(rules) do rules |> Enum.with_index() |> Enum.reduce_while({:ok, []}, fn {rule, index}, {:ok, new_rules} -> case validate_rule(rule) do {:ok, rule} -> {:cont, {:ok, [rule | new_rules]}} {:error, reason} -> {:halt, {:error, "invalid rule #{index}: #{reason}"}} end end) end def validate_rules(_rules), do: {:error, "rules must be a list"} @rule_modes ~W[contains_string contains_string_case_sensitive matches_regex] @rule_properties ~W[url title author content] def validate_rule(rule) do cond do not is_map(rule) -> {:error, "rule must be a map"} not (Map.has_key?(rule, "mode") and is_binary(rule["mode"]) and rule["mode"] in @rule_modes) -> rule_modes_text = Enum.map_join(@rule_modes, ", ", &"'#{&1}'") {:error, "mode property must be a string, one of #{rule_modes_text}"} not (Map.has_key?(rule, "property") and is_binary(rule["property"]) and rule["property"] in @rule_properties) -> rule_props_text = Enum.map_join(@rule_properties, ", ", &"'#{&1}'") {:error, "property property must be a string, one of #{rule_props_text}"} not (Map.has_key?(rule, "param") and is_binary(rule["param"])) -> {:error, "param property must be a string"} not (Map.has_key?(rule, "weight") and is_integer(rule["weight"])) -> {:error, "weight property must be an integer"} true -> {:ok, rule} end end def test(%{"mode" => mode, "score" => score, "rules" => rules}, item_params) do item_score = rules |> Enum.filter(&test_rule(&1, item_params)) |> Enum.map(& &1["weight"]) |> Enum.sum() matches = item_score >= score case {mode, matches} do {"accept", true} -> true {"reject", false} -> true _ -> false end end def test_rule( %{"mode" => mode, "property" => property, "param" => param}, item_params ) do with prop_value <- get_property(item_params, property), true <- is_binary(prop_value), true <- matches(prop_value, mode, param) do true else _ -> false end end defp matches(value, "contains_string", param) do String.contains?(String.downcase(value), String.downcase(param)) end defp matches(value, "contains_string_case_sensitive", param) do String.contains?(value, param) end defp matches(value, "matches_regex", param) do {:ok, regex} = Regex.compile(param, "i") String.match?(value, regex) end defp get_property(item_params, "url"), do: item_params.url defp get_property(item_params, "title"), do: item_params.title defp get_property(item_params, "author"), do: item_params.author defp get_property(%{content: content, content_type: type}, "content") when type in ["text/plain", "text/gemini"], do: content defp get_property(%{content: content, content_type: "text/html"}, "content") do content |> Floki.parse() |> Floki.text() end defp get_property(_item_params, _property), do: {:error, "invalid property"} end