frenzy/lib/frenzy/pipeline/filter_engine.ex

142 lines
4.0 KiB
Elixir

defmodule Frenzy.Pipeline.FilterEngine do
def validate_filter(filter) when is_map(filter) do
cond do
not is_map(filter) ->
{:error, "filter must be a map"}
not (Map.has_key?(filter, "mode") and is_binary(filter["mode"]) and
filter["mode"] in ["accept", "reject"]) ->
{:error, "mode must be a string, either 'accept' or 'reject'"}
not (Map.has_key?(filter, "score") and is_integer(filter["score"])) ->
{:error, "score must be an integer"}
not (Map.has_key?(filter, "rules") and is_list(filter["rules"])) ->
{:error, "rules must be a list of rules"}
true ->
validate_rules(filter)
end
end
def validate_rules(%{"rules" => rules} = filter) do
case validate_rules(rules) do
{:ok, rules} ->
{:ok, Map.put(filter, "rules", rules)}
{:error, _reason} = err ->
err
end
end
def validate_rules(rules) when is_list(rules) do
rules
|> Enum.with_index()
|> Enum.reduce_while({:ok, []}, fn {rule, index}, {:ok, new_rules} ->
case validate_rule(rule) do
{:ok, rule} ->
{:cont, {:ok, [rule | new_rules]}}
{:error, reason} ->
{:halt, {:error, "invalid rule #{index}: #{reason}"}}
end
end)
end
def validate_rules(_rules), do: {:error, "rules must be a list"}
@rule_modes ~W[contains_string contains_string_case_sensitive matches_regex]
@rule_properties ~W[url title author content]
def validate_rule(rule) do
cond do
not is_map(rule) ->
{:error, "rule must be a map"}
not (Map.has_key?(rule, "mode") and is_binary(rule["mode"]) and
rule["mode"] in @rule_modes) ->
rule_modes_text = Enum.map_join(@rule_modes, ", ", &"'#{&1}'")
{:error, "mode property must be a string, one of #{rule_modes_text}"}
not (Map.has_key?(rule, "property") and is_binary(rule["property"]) and
rule["property"] in @rule_properties) ->
rule_props_text = Enum.map_join(@rule_properties, ", ", &"'#{&1}'")
{:error, "property property must be a string, one of #{rule_props_text}"}
not (Map.has_key?(rule, "param") and is_binary(rule["param"])) ->
{:error, "param property must be a string"}
not (Map.has_key?(rule, "weight") and is_integer(rule["weight"])) ->
{:error, "weight property must be an integer"}
true ->
{:ok, rule}
end
end
def test(%{"mode" => mode, "score" => score, "rules" => rules}, item_params) do
item_score =
rules
|> Enum.filter(&test_rule(&1, item_params))
|> Enum.map(& &1["weight"])
|> Enum.sum()
matches = item_score >= score
case {mode, matches} do
{"accept", true} ->
true
{"reject", false} ->
true
_ ->
false
end
end
def test_rule(
%{"mode" => mode, "property" => property, "param" => param},
item_params
) do
with prop_value <- get_property(item_params, property),
true <- is_binary(prop_value),
true <- matches(prop_value, mode, param) do
true
else
_ ->
false
end
end
defp matches(value, "contains_string", param) do
String.contains?(String.downcase(value), String.downcase(param))
end
defp matches(value, "contains_string_case_sensitive", param) do
String.contains?(value, param)
end
defp matches(value, "matches_regex", param) do
{:ok, regex} = Regex.compile(param, "i")
String.match?(value, regex)
end
defp get_property(item_params, "url"), do: item_params.url
defp get_property(item_params, "title"), do: item_params.title
defp get_property(item_params, "author"), do: item_params.author
defp get_property(%{content: content, content_type: type}, "content")
when type in ["text/plain", "text/gemini"],
do: content
defp get_property(%{content: content, content_type: "text/html"}, "content") do
content
|> Floki.parse()
|> Floki.text()
end
defp get_property(_item_params, _property), do: {:error, "invalid property"}
end