Add Elixir 1.6 formatter config file and formatted the codebase

2018-02-09 11:42:08 +08:00 · 2018-02-09 11:42:08 +08:00 · b2f8a3b4da
commit b2f8a3b4da
parent 307152202b
16 changed files with 324 additions and 224 deletions
--- a/.formatter.exs
+++ b/.formatter.exs
@ -0,0 +1,4 @@
+# Used by "mix format"
+[
+  inputs: ["mix.exs", "{config,lib,test}/**/*.{ex,exs}"]
+]
--- a/lib/readability.ex
+++ b/lib/readability.ex
@ -34,32 +34,36 @@ defmodule Readability do
  alias Readability.Summary
  alias Readability.Helper

-  @default_options [retry_length: 250,
-                    min_text_length: 25,
-                    remove_unlikely_candidates: true,
-                    weight_classes: true,
-                    clean_conditionally: true,
-                    remove_empty_nodes: true,
-                    min_image_width: 130,
-                    min_image_height: 80,
-                    ignore_image_format: [],
-                    blacklist: nil,
-                    whitelist: nil,
-                    page_url: nil
-                   ]
+  @default_options [
+    retry_length: 250,
+    min_text_length: 25,
+    remove_unlikely_candidates: true,
+    weight_classes: true,
+    clean_conditionally: true,
+    remove_empty_nodes: true,
+    min_image_width: 130,
+    min_image_height: 80,
+    ignore_image_format: [],
+    blacklist: nil,
+    whitelist: nil,
+    page_url: nil
+  ]

-  @regexes [unlikely_candidate: ~r/combx|comment|community|disqus|extra|foot|header|hidden|lightbox|modal|menu|meta|nav|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i,
-            ok_maybe_its_a_candidate: ~r/and|article|body|column|main|shadow/i,
-            positive: ~r/article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i,
-            negative: ~r/hidden|^hid|combx|comment|com-|contact|foot|footer|footnote|link|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|utility|widget/i,
-            div_to_p_elements: ~r/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i,
-            replace_brs: ~r/(<br[^>]*>[ \n\r\t]*){2,}/i,
-            replace_fonts: ~r/<(\/?)font[^>]*>/i,
-            replace_xml_version: ~r/<\?xml.*\?>/i,
-            normalize: ~r/\s{2,}/,
-            video: ~r/\/\/(www\.)?(dailymotion|youtube|youtube-nocookie|player\.vimeo)\.com/i,
-            protect_attrs: ~r/^(?!id|rel|for|summary|title|href|src|alt|srcdoc)/i
-           ]
+  @regexes [
+    unlikely_candidate:
+      ~r/combx|comment|community|disqus|extra|foot|header|hidden|lightbox|modal|menu|meta|nav|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i,
+    ok_maybe_its_a_candidate: ~r/and|article|body|column|main|shadow/i,
+    positive: ~r/article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i,
+    negative:
+      ~r/hidden|^hid|combx|comment|com-|contact|foot|footer|footnote|link|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|utility|widget/i,
+    div_to_p_elements: ~r/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i,
+    replace_brs: ~r/(<br[^>]*>[ \n\r\t]*){2,}/i,
+    replace_fonts: ~r/<(\/?)font[^>]*>/i,
+    replace_xml_version: ~r/<\?xml.*\?>/i,
+    normalize: ~r/\s{2,}/,
+    video: ~r/\/\/(www\.)?(dailymotion|youtube|youtube-nocookie|player\.vimeo)\.com/i,
+    protect_attrs: ~r/^(?!id|rel|for|summary|title|href|src|alt|srcdoc)/i
+  ]

  @markup_mimes ~r/^(application|text)\/[a-z\-_\.\+]+ml(;\s+charset=.*)?$/i

@ -72,32 +76,30 @@ defmodule Readability do
  @doc """
  summarize the primary readable content of a webpage.
  """
-  @spec summarize(url, options) :: Summary.t
+  @spec summarize(url, options) :: Summary.t()
  def summarize(url, opts \\ []) do
-    opts = Keyword.merge(opts, [page_url: url])
-    httpoison_options = Application.get_env :readability, :httpoison_options, []
+    opts = Keyword.merge(opts, page_url: url)
+    httpoison_options = Application.get_env(:readability, :httpoison_options, [])
    %{status_code: _, body: raw, headers: headers} = HTTPoison.get!(url, [], httpoison_options)

    case is_response_markup(headers) do
      true ->
        html_tree = Helper.normalize(raw)
-        article_tree = html_tree
-        |> ArticleBuilder.build(opts)

-        %Summary{title: title(html_tree),
-                 authors: authors(html_tree),
-                 article_html: readable_html(article_tree),
-                 article_text: readable_text(article_tree)
+        article_tree =
+          html_tree
+          |> ArticleBuilder.build(opts)
+
+        %Summary{
+          title: title(html_tree),
+          authors: authors(html_tree),
+          article_html: readable_html(article_tree),
+          article_text: readable_text(article_tree)
        }

      _ ->
-        %Summary{title: nil,
-                 authors: nil,
-                 article_html: nil,
-                 article_text: raw
-        }
+        %Summary{title: nil, authors: nil, article_html: nil, article_text: raw}
    end
-
  end

  @doc """
@ -112,8 +114,10 @@ defmodule Readability do
  def mime(headers \\ []) do
    headers
    |> Enum.find(
-      {"Content-Type", "text/plain"},  # default
-      fn({key, _}) -> String.downcase(key) == "content-type" end)
+      # default
+      {"Content-Type", "text/plain"},
+      fn {key, _} -> String.downcase(key) == "content-type" end
+    )
    |> elem(1)
  end

@ -141,12 +145,12 @@ defmodule Readability do
  """
  @spec title(binary | html_tree) :: binary
  def title(raw_html) when is_binary(raw_html) do
-     raw_html
-     |> Helper.normalize
-     |> title
+    raw_html
+    |> Helper.normalize()
+    |> title
  end
-  def title(html_tree), do: TitleFinder.title(html_tree)

+  def title(html_tree), do: TitleFinder.title(html_tree)

  @doc """
  Extract authors
@ -173,8 +177,9 @@ defmodule Readability do
  @spec article(binary, options) :: html_tree
  def article(raw_html, opts \\ []) do
    opts = Keyword.merge(@default_options, opts)
+
    raw_html
-    |> Helper.normalize
+    |> Helper.normalize()
    |> ArticleBuilder.build(opts)
  end

@ -196,10 +201,11 @@ defmodule Readability do
    # TODO: Remove image caption when extract only text
    tags_to_br = ~r/<\/(p|div|article|h\d)/i
    html_str = html_tree |> raw_html
-    Regex.replace(tags_to_br, html_str, &("\n#{&1}"))
-    |> Floki.parse
-    |> Floki.text
-    |> String.strip
+
+    Regex.replace(tags_to_br, html_str, &"\n#{&1}")
+    |> Floki.parse()
+    |> Floki.text()
+    |> String.strip()
  end

  @doc """
@ -207,7 +213,7 @@ defmodule Readability do
  """
  @spec raw_html(html_tree) :: binary
  def raw_html(html_tree) do
-    html_tree |> Floki.raw_html
+    html_tree |> Floki.raw_html()
  end

  def parse(raw_html) when is_binary(raw_html), do: Floki.parse(raw_html)
--- a/lib/readability/article_builder.ex
+++ b/lib/readability/article_builder.ex
@ -20,12 +20,18 @@ defmodule Readability.ArticleBuilder do
  @spec build(html_tree, options) :: html_tree
  def build(html_tree, opts) do
    origin_tree = html_tree
-    html_tree = html_tree
-                |> Helper.remove_tag(fn({tag, _, _}) ->
-                     Enum.member?(["script", "style"], tag)
-                   end)

-    html_tree = if opts[:remove_unlikely_candidates], do: Cleaner.remove_unlikely_tree(html_tree), else: html_tree
+    html_tree =
+      html_tree
+      |> Helper.remove_tag(fn {tag, _, _} ->
+        Enum.member?(["script", "style"], tag)
+      end)
+
+    html_tree =
+      if opts[:remove_unlikely_candidates],
+        do: Cleaner.remove_unlikely_tree(html_tree),
+        else: html_tree
+
    html_tree = Cleaner.transform_misused_div_to_p(html_tree)

    candidates = CandidateFinder.find(html_tree, opts)
@ -48,25 +54,34 @@ defmodule Readability.ArticleBuilder do
    cond do
      opts[:remove_unlikely_candidates] ->
        Keyword.put(opts, :remove_unlikely_candidates, false)
+
      opts[:weight_classes] ->
        Keyword.put(opts, :weight_classes, false)
+
      opts[:clean_conditionally] ->
        Keyword.put(opts, :clean_conditionally, false)
-      true -> nil
+
+      true ->
+        nil
    end
  end

  defp find_article(candidates, html_tree) do
    best_candidate = CandidateFinder.find_best_candidate(candidates)
-    article_trees = if best_candidate do
-                      find_article_trees(best_candidate, candidates)
-                    else
-                      fallback_candidate = case html_tree |> Floki.find("body") do
-                                             [tree|_] -> %Candidate{html_tree: tree}
-                                             _ -> %Candidate{html_tree: {}}
-                                           end
-                      find_article_trees(fallback_candidate, candidates)
-                    end
+
+    article_trees =
+      if best_candidate do
+        find_article_trees(best_candidate, candidates)
+      else
+        fallback_candidate =
+          case html_tree |> Floki.find("body") do
+            [tree | _] -> %Candidate{html_tree: tree}
+            _ -> %Candidate{html_tree: {}}
+          end
+
+        find_article_trees(fallback_candidate, candidates)
+      end
+
    {"div", [], article_trees}
  end

@ -75,22 +90,21 @@ defmodule Readability.ArticleBuilder do

    candidates
    |> Enum.filter(&(&1.tree_depth == best_candidate.tree_depth))
-    |> Enum.filter(fn(candidate) ->
-         candidate == best_candidate
-         || candidate.score >= score_threshold
-         || append?(candidate)
-       end)
-    |> Enum.map(&(to_article_tag(&1.html_tree)))
+    |> Enum.filter(fn candidate ->
+      candidate == best_candidate || candidate.score >= score_threshold || append?(candidate)
+    end)
+    |> Enum.map(&to_article_tag(&1.html_tree))
  end

  defp append?(%Candidate{html_tree: html_tree}) when elem(html_tree, 0) == "p" do
    link_density = Scoring.calc_link_density(html_tree)
-    inner_text = html_tree |> Floki.text
-    inner_length = inner_text |> String.length
+    inner_text = html_tree |> Floki.text()
+    inner_length = inner_text |> String.length()

-    (inner_length > 80 && link_density < 0.25)
-    || (inner_length < 80 && link_density == 0 && inner_text =~ ~r/\.( |$)/)
+    (inner_length > 80 && link_density < 0.25) ||
+      (inner_length < 80 && link_density == 0 && inner_text =~ ~r/\.( |$)/)
  end
+
  defp append?(_), do: false

  defp to_article_tag({tag, attrs, inner_tree} = html_tree) do
--- a/lib/readability/author_finder.ex
+++ b/lib/readability/author_finder.ex
@ -11,21 +11,24 @@ defmodule Readability.AuthorFinder do
  @spec find(html_tree) :: [binary]
  def find(html_tree) do
    author_names = find_by_meta_tag(html_tree)
+
    if author_names do
      split_author_names(author_names)
    end
  end

  def find_by_meta_tag(html_tree) do
-    names = html_tree
-             |> Floki.find("meta[name*=author], meta[property*=author]")
-             |> Enum.map(fn(meta) ->
-                  meta
-                  |> Floki.attribute("content")
-                  |> Enum.join(" ")
-                  |> String.strip
-                end)
-             |> Enum.reject(&(is_nil(&1) || String.length(&1) == 0))
+    names =
+      html_tree
+      |> Floki.find("meta[name*=author], meta[property*=author]")
+      |> Enum.map(fn meta ->
+        meta
+        |> Floki.attribute("content")
+        |> Enum.join(" ")
+        |> String.strip()
+      end)
+      |> Enum.reject(&(is_nil(&1) || String.length(&1) == 0))
+
    if length(names) > 0 do
      hd(names)
    else
--- a/lib/readability/candidate/cleaner.ex
+++ b/lib/readability/candidate/cleaner.ex
@ -14,9 +14,11 @@ defmodule Readability.Candidate.Cleaner do
  @spec transform_misused_div_to_p(html_tree) :: html_tree
  def transform_misused_div_to_p(content) when is_binary(content), do: content
  def transform_misused_div_to_p([]), do: []
-  def transform_misused_div_to_p([h|t]) do
-    [transform_misused_div_to_p(h)|transform_misused_div_to_p(t)]
+
+  def transform_misused_div_to_p([h | t]) do
+    [transform_misused_div_to_p(h) | transform_misused_div_to_p(t)]
  end
+
  def transform_misused_div_to_p({tag, attrs, inner_tree}) do
    tag = if misused_divs?(tag, inner_tree), do: "p", else: tag
    {tag, attrs, transform_misused_div_to_p(inner_tree)}
@ -33,16 +35,18 @@ defmodule Readability.Candidate.Cleaner do
  defp misused_divs?("div", inner_tree) do
    !(Floki.raw_html(inner_tree) =~ Readability.regexes(:div_to_p_elements))
  end
+
  defp misused_divs?(_, _), do: false

  defp unlikely_tree?({tag, attrs, _}) do
-    idclass_str = attrs
-                  |> Enum.filter_map(&(elem(&1, 0)  =~ ~r/id|class/i), &(elem(&1, 1)))
-                  |> Enum.join("")
+    idclass_str =
+      attrs
+      |> Enum.filter_map(&(elem(&1, 0) =~ ~r/id|class/i), &elem(&1, 1))
+      |> Enum.join("")
+
    str = tag <> idclass_str

-    str =~ Readability.regexes(:unlikely_candidate)
-      && !(str =~ Readability.regexes(:ok_maybe_its_a_candidate))
-      && tag != "html"
+    str =~ Readability.regexes(:unlikely_candidate) &&
+      !(str =~ Readability.regexes(:ok_maybe_its_a_candidate)) && tag != "html"
  end
 end
--- a/lib/readability/candidate/scoring.ex
+++ b/lib/readability/candidate/scoring.ex
@ -4,11 +4,7 @@ defmodule Readability.Candidate.Scoring do
  """
  alias Readability.Helper

-  @element_scores %{"div" => 5,
-                    "blockquote" => 3,
-                    "form" => -3,
-                    "th" => -5
-                  }
+  @element_scores %{"div" => 5, "blockquote" => 3, "form" => -3, "th" => -5}

  @type html_tree :: tuple | list
  @type options :: list
@ -20,15 +16,19 @@ defmodule Readability.Candidate.Scoring do
  @spec calc_score(html_tree, options) :: number
  def calc_score(html_tree, opts \\ []) do
    score = calc_node_score(html_tree, opts)
-    score = score + calc_children_content_score(html_tree) + calc_grand_children_content_score(html_tree)
+
+    score =
+      score + calc_children_content_score(html_tree) +
+        calc_grand_children_content_score(html_tree)
+
    score * (1 - calc_link_density(html_tree))
  end

  defp calc_content_score(html_tree) do
    score = 1
-    inner_text = html_tree |> Floki.text
+    inner_text = html_tree |> Floki.text()
    split_score = inner_text |> String.split(",") |> length
-    length_score = [(String.length(inner_text) / 100), 3] |> Enum.min
+    length_score = [String.length(inner_text) / 100, 3] |> Enum.min()
    score + split_score + length_score
  end

@ -37,9 +37,11 @@ defmodule Readability.Candidate.Scoring do
    score = if opts[:weight_classes], do: score + class_weight(attrs), else: score
    score + (@element_scores[tag] || 0)
  end
-  defp calc_node_score([h|t], opts) do
+
+  defp calc_node_score([h | t], opts) do
    calc_node_score(h, opts) + calc_node_score(t, opts)
  end
+
  defp calc_node_score([], _), do: 0

  def class_weight(attrs) do
@ -55,14 +57,16 @@ defmodule Readability.Candidate.Scoring do
  end

  def calc_link_density(html_tree) do
-    link_length = html_tree
-                  |> Floki.find("a")
-                  |> Floki.text
-                  |> String.length
+    link_length =
+      html_tree
+      |> Floki.find("a")
+      |> Floki.text()
+      |> String.length()

-    text_length = html_tree
-                  |> Floki.text
-                  |> String.length
+    text_length =
+      html_tree
+      |> Floki.text()
+      |> String.length()

    if text_length == 0 do
      0
@ -78,11 +82,13 @@ defmodule Readability.Candidate.Scoring do
  end

  defp calc_grand_children_content_score({_, _, children_tree}) do
-    score = children_tree
-            |> Enum.filter_map(&is_tuple(&1), &elem(&1, 2))
-            |> List.flatten
-            |> Enum.filter(&(is_tuple(&1) && Helper.candidate_tag?(&1)))
-            |> calc_content_score
+    score =
+      children_tree
+      |> Enum.filter_map(&is_tuple(&1), &elem(&1, 2))
+      |> List.flatten()
+      |> Enum.filter(&(is_tuple(&1) && Helper.candidate_tag?(&1)))
+      |> calc_content_score
+
    score / 2
  end
 end
--- a/lib/readability/candidate_finder.ex
+++ b/lib/readability/candidate_finder.ex
@ -14,20 +14,26 @@ defmodule Readability.CandidateFinder do
  @doc """
  Find candidates that shuld be meaningful article by analysing nodes
  """
-  @spec find(html_tree, options, number) :: [Candidate.t]
+  @spec find(html_tree, options, number) :: [Candidate.t()]
  def find(_, opts \\ [], tree_depth \\ 0)
  def find([], _, _), do: []
-  def find([h|t], opts, tree_depth) do
+
+  def find([h | t], opts, tree_depth) do
    [find(h, opts, tree_depth) | find(t, opts, tree_depth)]
-    |> List.flatten
+    |> List.flatten()
  end
+
  def find(text, _, _) when is_binary(text), do: []
+
  def find({tag, attrs, inner_tree}, opts, tree_depth) do
    html_tree = {tag, attrs, inner_tree}
+
    if candidate?(html_tree) do
-      candidate = %Candidate{html_tree: html_tree,
-                             score: Scoring.calc_score(html_tree, opts),
-                             tree_depth: tree_depth}
+      candidate = %Candidate{
+        html_tree: html_tree,
+        score: Scoring.calc_score(html_tree, opts),
+        tree_depth: tree_depth
+      }

      [candidate | find(inner_tree, opts, tree_depth + 1)]
    else
@ -38,18 +44,20 @@ defmodule Readability.CandidateFinder do
  @doc """
  Find the highest score candidate.
  """
-  @spec find_best_candidate([Candidate.t]) :: Candidate.t
+  @spec find_best_candidate([Candidate.t()]) :: Candidate.t()
  def find_best_candidate([]), do: nil
+
  def find_best_candidate(candidates) do
    candidates
-    |> Enum.max_by(fn(candidate) -> candidate.score end)
+    |> Enum.max_by(fn candidate -> candidate.score end)
  end

  defp candidate?(_, depth \\ 0)
  defp candidate?(_, depth) when depth > 2, do: false
-  defp candidate?([h|t], depth), do: candidate?(h, depth) || candidate?(t, depth)
+  defp candidate?([h | t], depth), do: candidate?(h, depth) || candidate?(t, depth)
  defp candidate?([], _), do: false
  defp candidate?(text, _) when is_binary(text), do: false
+
  defp candidate?({_, _, inner_tree} = html_tree, depth) do
    if Helper.candidate_tag?(html_tree) do
      true
--- a/lib/readability/helper.ex
+++ b/lib/readability/helper.ex
@ -8,15 +8,18 @@ defmodule Readability.Helper do
  @doc """
  Change existing tags by selector
  """
-  @spec change_tag(html_tree, String.t, String.t) :: html_tree
+  @spec change_tag(html_tree, String.t(), String.t()) :: html_tree
  def change_tag(content, _, _) when is_binary(content), do: content
  def change_tag([], _, _), do: []
-  def change_tag([h|t], selector, tag) do
-    [change_tag(h, selector, tag)|change_tag(t, selector, tag)]
+
+  def change_tag([h | t], selector, tag) do
+    [change_tag(h, selector, tag) | change_tag(t, selector, tag)]
  end
+
  def change_tag({tag_name, attrs, inner_tree}, tag_name, tag) do
    {tag, attrs, change_tag(inner_tree, tag_name, tag)}
  end
+
  def change_tag({tag_name, attrs, html_tree}, selector, tag) do
    {tag_name, attrs, change_tag(html_tree, selector, tag)}
  end
@ -24,41 +27,50 @@ defmodule Readability.Helper do
  @doc """
  Remove html attributes
  """
-  @spec remove_attrs(html_tree, String.t | [String.t] | Regex.t) :: html_tree
+  @spec remove_attrs(html_tree, String.t() | [String.t()] | Regex.t()) :: html_tree
  def remove_attrs(content, _) when is_binary(content), do: content
  def remove_attrs([], _), do: []
-  def remove_attrs([h|t], t_attrs) do
-    [remove_attrs(h, t_attrs)|remove_attrs(t, t_attrs)]
+
+  def remove_attrs([h | t], t_attrs) do
+    [remove_attrs(h, t_attrs) | remove_attrs(t, t_attrs)]
  end
+
  def remove_attrs({tag_name, attrs, inner_tree}, target_attr) do
    reject_fun =
      cond do
        is_binary(target_attr) ->
-          fn(attr) -> elem(attr, 0) == target_attr end
+          fn attr -> elem(attr, 0) == target_attr end
+
        Regex.regex?(target_attr) ->
-          fn(attr) -> elem(attr, 0) =~ target_attr end
+          fn attr -> elem(attr, 0) =~ target_attr end
+
        is_list(target_attr) ->
-          fn(attr) -> Enum.member?(target_attr, elem(attr, 0)) end
-        true -> fn(attr) -> attr end
+          fn attr -> Enum.member?(target_attr, elem(attr, 0)) end
+
+        true ->
+          fn attr -> attr end
      end
+
    {tag_name, Enum.reject(attrs, reject_fun), remove_attrs(inner_tree, target_attr)}
  end

-
  @doc """
  Remove tags
  """
  @spec remove_tag(html_tree, fun) :: html_tree
  def remove_tag(content, _) when is_binary(content), do: content
  def remove_tag([], _), do: []
-  def remove_tag([h|t], fun) do
+
+  def remove_tag([h | t], fun) do
    node = remove_tag(h, fun)
+
    if is_nil(node) do
      remove_tag(t, fun)
    else
-      [node|remove_tag(t, fun)]
+      [node | remove_tag(t, fun)]
    end
  end
+
  def remove_tag({tag, attrs, inner_tree} = html_tree, fun) do
    if fun.(html_tree) do
      nil
@ -72,7 +84,7 @@ defmodule Readability.Helper do
  """
  @spec text_length(html_tree) :: number
  def text_length(html_tree) do
-    html_tree |> Floki.text |> String.strip |> String.length
+    html_tree |> Floki.text() |> String.strip() |> String.length()
  end

  @doc """
@ -80,9 +92,9 @@ defmodule Readability.Helper do
  """
  @spec candidate_tag?(html_tree) :: boolean
  def candidate_tag?({tag, _, _} = html_tree) do
-    Enum.any?(["p", "td"], fn(candidate_tag) ->
-      tag == candidate_tag
-      && (text_length(html_tree)) >= Readability.default_options[:min_text_length]
+    Enum.any?(["p", "td"], fn candidate_tag ->
+      tag == candidate_tag &&
+        text_length(html_tree) >= Readability.default_options()[:min_text_length]
    end)
  end

@ -96,7 +108,7 @@ defmodule Readability.Helper do
    |> String.replace(Readability.regexes(:replace_brs), "</p><p>")
    |> String.replace(Readability.regexes(:replace_fonts), "<\1span>")
    |> String.replace(Readability.regexes(:normalize), " ")
-    |> Floki.parse
+    |> Floki.parse()
    |> Floki.filter_out(:comment)
  end
 end
--- a/lib/readability/sanitizer.ex
+++ b/lib/readability/sanitizer.ex
@ -13,12 +13,13 @@ defmodule Readability.Sanitizer do
  @doc """
  Sanitizes article html tree
  """
-  @spec sanitize(html_tree, [Candidate.t], list) :: html_tree
-  def sanitize(html_tree, candidates, opts  \\ []) do
-    html_tree = html_tree
-                |> Helper.remove_tag(&clean_headline_tag?(&1))
-                |> Helper.remove_tag(&clean_unlikely_tag?(&1))
-                |> Helper.remove_tag(&clean_empty_p?(&1))
+  @spec sanitize(html_tree, [Candidate.t()], list) :: html_tree
+  def sanitize(html_tree, candidates, opts \\ []) do
+    html_tree =
+      html_tree
+      |> Helper.remove_tag(&clean_headline_tag?(&1))
+      |> Helper.remove_tag(&clean_unlikely_tag?(&1))
+      |> Helper.remove_tag(&clean_empty_p?(&1))

    if opts[:clean_conditionally] do
      html_tree |> Helper.remove_tag(conditionally_cleaing_fn(candidates))
@ -28,15 +29,19 @@ defmodule Readability.Sanitizer do
  end

  defp conditionally_cleaing_fn(candidates) do
-    fn({tag, attrs, _} = tree) ->
+    fn {tag, attrs, _} = tree ->
      if Enum.any?(["table", "ul", "div"], &(&1 == tag)) do
        weight = Scoring.class_weight(attrs)
-        same_tree = candidates
-                    |> Enum.find(%Candidate{}, &(&1.html_tree == tree))
+
+        same_tree =
+          candidates
+          |> Enum.find(%Candidate{}, &(&1.html_tree == tree))
+
        list? = tag == "ul"
+
        cond do
-          weight + same_tree.score < 0
-            -> true
+          weight + same_tree.score < 0 ->
+            true

          length(Regex.scan(~r/\,/, Floki.text(tree))) < 10 ->
            # If there are not very many commas, and the number of
@ -46,35 +51,42 @@ defmodule Readability.Sanitizer do
            img_len = tree |> Floki.find("img") |> length
            li_len = tree |> Floki.find("li") |> length
            input_len = tree |> Floki.find("input") |> length
-            embed_len = tree
-                        |> Floki.find("embed")
-                        |> Enum.reject(&(&1 =~ Readability.regexes(:video)))
-                        |> length

-            link_density =  Scoring.calc_link_density(tree)
+            embed_len =
+              tree
+              |> Floki.find("embed")
+              |> Enum.reject(&(&1 =~ Readability.regexes(:video)))
+              |> length
+
+            link_density = Scoring.calc_link_density(tree)
            conent_len = Helper.text_length(tree)

-            img_len > p_len                 # too many image
-            || (!list? && li_len > p_len)   # more <li>s than <p>s
-            || input_len > (p_len / 3)      # less than 3x <p>s than <input>s
-            || (!list? && conent_len < Readability.regexes(:min_text_length) && img_len != 1) # too short a content length without a single image
-            || (weight < 25 && link_density > 0.2) # too many links for its weight (#{weight})
-            || (weight >= 25 && link_density > 0.5) # too many links for its weight (#{weight})
-            || ((embed_len == 1 && conent_len < 75) || embed_len > 1) # <embed>s with too short a content length, or too many <embed>s
+            # too many image
+            # more <li>s than <p>s
+            # less than 3x <p>s than <input>s
+            # too short a content length without a single image
+            # too many links for its weight (#{weight})
+            # too many links for its weight (#{weight})
+            # <embed>s with too short a content length, or too many <embed>s
+            img_len > p_len || (!list? && li_len > p_len) || input_len > p_len / 3 ||
+              (!list? && conent_len < Readability.regexes(:min_text_length) && img_len != 1) ||
+              (weight < 25 && link_density > 0.2) || (weight >= 25 && link_density > 0.5) ||
+              ((embed_len == 1 && conent_len < 75) || embed_len > 1)

-          true -> false
+          true ->
+            false
        end
      end
    end
  end

  defp clean_headline_tag?({tag, attrs, _} = html_tree) do
-    tag =~ ~r/^h\d{1}$/
-    && (Scoring.class_weight(attrs) < 0 || Scoring.calc_link_density(html_tree) > 0.33)
+    tag =~ ~r/^h\d{1}$/ &&
+      (Scoring.class_weight(attrs) < 0 || Scoring.calc_link_density(html_tree) > 0.33)
  end

  defp clean_unlikely_tag?({tag, attrs, _}) do
-    attrs_str = attrs |> Enum.map(&(elem(&1, 1))) |> Enum.join("")
+    attrs_str = attrs |> Enum.map(&elem(&1, 1)) |> Enum.join("")
    tag =~ ~r/form|object|iframe|embed/ && !(attrs_str =~ Readability.regexes(:video))
  end

--- a/lib/readability/title_finder.ex
+++ b/lib/readability/title_finder.ex
@ -23,6 +23,7 @@ defmodule Readability.TitleFinder do
        else
          h_title
        end
+
      title when is_binary(title) ->
        title
    end
@ -54,7 +55,7 @@ defmodule Readability.TitleFinder do
  @doc """
  Find title from h tag
  """
-  @spec h_tag_title(html_tree, String.t) :: binary
+  @spec h_tag_title(html_tree, String.t()) :: binary
  def h_tag_title(html_tree, selector \\ @h_tag_selector) do
    html_tree
    |> find_tag(selector)
@ -65,6 +66,7 @@ defmodule Readability.TitleFinder do
    case Floki.find(html_tree, selector) do
      [] ->
        []
+
      matches when is_list(matches) ->
        hd(matches)
    end
@ -73,9 +75,11 @@ defmodule Readability.TitleFinder do
  defp clean_title([]) do
    ""
  end
+
  defp clean_title([title]) when is_binary(title) do
    String.strip(title)
  end
+
  defp clean_title(html_tree) do
    html_tree
    |> Floki.text()
--- a/mix.exs
+++ b/mix.exs
@ -10,24 +10,23 @@ defmodule Readability.Mixfile do
  use Mix.Project

  def project do
-    [app: :readability,
-     version: @version,
-     elixir: "~> 1.3",
-     description: @description,
-     package: package(),
-     build_embedded: Mix.env == :prod,
-     start_permanent: Mix.env == :prod,
-     deps: deps()]
+    [
+      app: :readability,
+      version: @version,
+      elixir: "~> 1.3",
+      description: @description,
+      package: package(),
+      build_embedded: Mix.env() == :prod,
+      start_permanent: Mix.env() == :prod,
+      deps: deps()
+    ]
  end

  # Configuration for the OTP application
  #
  # Type "mix help compile.app" for more information
  def application do
-    [applications: [:logger,
-                    :floki,
-                    :httpoison
-                   ]]
+    [applications: [:logger, :floki, :httpoison]]
  end

  # Dependencies can be Hex packages:
@ -40,20 +39,25 @@ defmodule Readability.Mixfile do
  #
  # Type "mix help deps" for more examples and options
  defp deps do
-    [{:floki, "~> 0.18.0"},
-     {:httpoison, "~> 0.13.0"},
-     {:ex_doc, "~> 0.14", only: :dev},
-     {:credo, "~> 0.6.1", only: [:dev, :test]},
-     {:dialyxir, "~> 0.3", only: [:dev]},
-     {:mock, "~> 0.2.0", only: :test},
+    [
+      {:floki, "~> 0.18.0"},
+      {:httpoison, "~> 0.13.0"},
+      {:ex_doc, "~> 0.14", only: :dev},
+      {:credo, "~> 0.6.1", only: [:dev, :test]},
+      {:dialyxir, "~> 0.3", only: [:dev]},
+      {:mock, "~> 0.2.0", only: :test}
    ]
  end

  defp package do
-    [files: ["lib", "mix.exs", "README*", "LICENSE*", "doc"],
-     maintainers: ["Jaehyun Shin"],
-     licenses: ["Apache 2.0"],
-     links: %{"GitHub" => "https://github.com/keepcosmos/readability",
-              "Docs" => "https://hexdocs.pm/readability/Readability.html"}]
+    [
+      files: ["lib", "mix.exs", "README*", "LICENSE*", "doc"],
+      maintainers: ["Jaehyun Shin"],
+      licenses: ["Apache 2.0"],
+      links: %{
+        "GitHub" => "https://github.com/keepcosmos/readability",
+        "Docs" => "https://hexdocs.pm/readability/Readability.html"
+      }
+    ]
  end
 end
--- a/test/readability/candidate/cleaner_test.exs
+++ b/test/readability/candidate/cleaner_test.exs
@ -29,14 +29,14 @@ defmodule Readability.Candidate.CleanerTest do

  test "transform divs containing no block elements", %{html_tree: html_tree} do
    html_tree = Cleaner.transform_misused_div_to_p(html_tree)
-    [{tag, _, _}|_] = html_tree |> Floki.find("#body")
+    [{tag, _, _} | _] = html_tree |> Floki.find("#body")

    assert tag == "p"
  end

  test "not transform divs that contain block elements", %{html_tree: html_tree} do
    html_tree = Cleaner.transform_misused_div_to_p(html_tree)
-    [{tag, _, _}|_] = html_tree |> Floki.find("#contains_blockquote")
+    [{tag, _, _} | _] = html_tree |> Floki.find("#contains_blockquote")
    assert tag == "div"
  end

--- a/test/readability/helper_test.exs
+++ b/test/readability/helper_test.exs
@ -26,23 +26,25 @@ defmodule Readability.HelperTest do
  end

  test "change font tag to span", %{html_tree: html_tree} do
-    expectred = @sample |> String.replace(~r/font/, "span") |> Floki.parse
+    expectred = @sample |> String.replace(~r/font/, "span") |> Floki.parse()
    result = Helper.change_tag(html_tree, "font", "span")
    assert result == expectred
  end

  test "remove tag", %{html_tree: html_tree} do
    expected = "<html><body></body></html>" |> parse
-    result = html_tree
-             |> Helper.remove_tag(fn({tag, _, _}) ->
-               tag == "p"
-             end)
+
+    result =
+      html_tree
+      |> Helper.remove_tag(fn {tag, _, _} ->
+        tag == "p"
+      end)

    assert result == expected
  end

  test "inner text lengt", %{html_tree: html_tree} do
-    result = html_tree |> Helper.text_length
+    result = html_tree |> Helper.text_length()
    assert result == 5
  end
 end
--- a/test/readability/title_finder_test.exs
+++ b/test/readability/title_finder_test.exs
@ -37,6 +37,7 @@ defmodule Readability.TitleFinderTest do
      </head>
    </html>
    """
+
    title = Readability.TitleFinder.og_title(html)
    assert title == "og title 1"
  end
@ -52,6 +53,7 @@ defmodule Readability.TitleFinderTest do
      </head>
    </html>
    """
+
    title = Readability.TitleFinder.tag_title(html)
    assert title == "Tag title"

@ -62,6 +64,7 @@ defmodule Readability.TitleFinderTest do
      </head>
    </html>
    """
+
    title = Readability.TitleFinder.tag_title(html)
    assert title == "Tag title"

@ -72,6 +75,7 @@ defmodule Readability.TitleFinderTest do
      </head>
    </html>
    """
+
    title = Readability.TitleFinder.tag_title(html)
    assert title == "Tag title-tag"

@ -82,6 +86,7 @@ defmodule Readability.TitleFinderTest do
      </head>
    </html>
    """
+
    title = Readability.TitleFinder.tag_title(html)
    assert title == "Tag title-tag-title"

@ -95,6 +100,7 @@ defmodule Readability.TitleFinderTest do
      </body>
    </html>
    """
+
    title = Readability.TitleFinder.tag_title(html)
    assert title == "Tag title"
  end
@ -108,6 +114,7 @@ defmodule Readability.TitleFinderTest do
      </head>
    </html>
    """
+
    title = Readability.TitleFinder.tag_title(html)
    assert title == "tag title 1"
  end
@ -131,6 +138,7 @@ defmodule Readability.TitleFinderTest do
      </body>
    </html>
    """
+
    title = Readability.TitleFinder.h_tag_title(html)
    assert title == "header 1"
  end
--- a/test/readability_http_test.exs
+++ b/test/readability_http_test.exs
@ -6,12 +6,9 @@ defmodule ReadabilityHttpTest do
  test "blank response is parsed as plain text" do
    url = "https://tools.ietf.org/rfc/rfc2616.txt"
    content = TestHelper.read_fixture("rfc2616.txt")
-    response = %HTTPoison.Response{
-      status_code: 200,
-      headers: [],
-      body: content}
-    
-    with_mock HTTPoison, [get!: fn(_url, _headers, _opts) -> response end] do
+    response = %HTTPoison.Response{status_code: 200, headers: [], body: content}
+
+    with_mock HTTPoison, get!: fn _url, _headers, _opts -> response end do
      %Readability.Summary{article_text: result_text} = Readability.summarize(url)

      assert result_text =~ ~r/3 Protocol Parameters/
@ -21,12 +18,14 @@ defmodule ReadabilityHttpTest do
  test "text/plain response is parsed as plain text" do
    url = "https://tools.ietf.org/rfc/rfc2616.txt"
    content = TestHelper.read_fixture("rfc2616.txt")
+
    response = %HTTPoison.Response{
      status_code: 200,
      headers: [{"Content-Type", "text/plain"}],
-      body: content}
-    
-    with_mock HTTPoison, [get!: fn(_url, _headers, _opts) -> response end] do
+      body: content
+    }
+
+    with_mock HTTPoison, get!: fn _url, _headers, _opts -> response end do
      %Readability.Summary{article_text: result_text} = Readability.summarize(url)

      assert result_text =~ ~r/3 Protocol Parameters/
@ -38,13 +37,15 @@ defmodule ReadabilityHttpTest do
    content = TestHelper.read_fixture("bbc.html")
    mimes = ["text/html", "application/xml", "application/xhtml+xml"]

-    mimes |> Enum.each(fn(mime) ->
+    mimes
+    |> Enum.each(fn mime ->
      response = %HTTPoison.Response{
        status_code: 200,
        headers: [{"Content-Type", mime}],
-        body: content}
-      
-      with_mock HTTPoison, [get!: fn(_url, _headers, _opts) -> response end] do
+        body: content
+      }
+
+      with_mock HTTPoison, get!: fn _url, _headers, _opts -> response end do
        %Readability.Summary{article_html: result_html} = Readability.summarize(url)

        assert result_html =~ ~r/connected computing devices\".<\/p><\/div><\/div>$/
@ -55,12 +56,14 @@ defmodule ReadabilityHttpTest do
  test "response with charset is parsed correctly" do
    url = "https://news.bbc.co.uk/test.html"
    content = TestHelper.read_fixture("bbc.html")
+
    response = %HTTPoison.Response{
      status_code: 200,
      headers: [{"Content-Type", "text/html; charset=UTF-8"}],
-      body: content}
-    
-    with_mock HTTPoison, [get!: fn(_url, _headers, _opts) -> response end] do
+      body: content
+    }
+
+    with_mock HTTPoison, get!: fn _url, _headers, _opts -> response end do
      %Readability.Summary{article_html: result_html} = Readability.summarize(url)

      assert result_html =~ ~r/connected computing devices\".<\/p><\/div><\/div>$/
@ -71,12 +74,14 @@ defmodule ReadabilityHttpTest do
    # HTTP header keys are case insensitive (RFC2616 - Section 4.2)
    url = "https://news.bbc.co.uk/test.html"
    content = TestHelper.read_fixture("bbc.html")
+
    response = %HTTPoison.Response{
      status_code: 200,
      headers: [{"content-Type", "text/html; charset=UTF-8"}],
-      body: content}
-    
-    with_mock HTTPoison, [get!: fn(_url, _headers, _opts) -> response end] do
+      body: content
+    }
+
+    with_mock HTTPoison, get!: fn _url, _headers, _opts -> response end do
      %Readability.Summary{article_html: result_html} = Readability.summarize(url)

      assert result_html =~ ~r/connected computing devices\".<\/p><\/div><\/div>$/
--- a/test/readability_test.exs
+++ b/test/readability_test.exs
@ -7,7 +7,10 @@ defmodule ReadabilityTest do
    nytimes = Readability.article(html, opts)

    nytimes_html = Readability.readable_html(nytimes)
-    assert nytimes_html =~ ~r/^<div><div><figure id=\"media-100000004245260\"><div><img src=\"https/
+
+    assert nytimes_html =~
+             ~r/^<div><div><figure id=\"media-100000004245260\"><div><img src=\"https/
+
    assert nytimes_html =~ ~r/major priorities.<\/p><\/div><\/div>$/

    nytimes_text = Readability.readable_text(nytimes)
@ -66,12 +69,17 @@ defmodule ReadabilityTest do

    pubmed_html = Readability.readable_html(pubmed)

-    assert pubmed_html =~ ~r/^<div><div><h4>BACKGROUND AND OBJECTIVES: <\/h4><p><abstracttext>Although strict blood pressure/
-    assert pubmed_html =~ ~r/different mechanisms yielded potent antihypertensive efficacy with safety and decreased plasma BNP levels.<\/abstracttext><\/p><\/div><\/div>$/
+    assert pubmed_html =~
+             ~r/^<div><div><h4>BACKGROUND AND OBJECTIVES: <\/h4><p><abstracttext>Although strict blood pressure/
+
+    assert pubmed_html =~
+             ~r/different mechanisms yielded potent antihypertensive efficacy with safety and decreased plasma BNP levels.<\/abstracttext><\/p><\/div><\/div>$/

    pubmed_text = Readability.readable_text(pubmed)

    assert pubmed_text =~ ~r/^BACKGROUND AND OBJECTIVES: \nAlthough strict blood pressure/
-    assert pubmed_text =~ ~r/with different mechanisms yielded potent antihypertensive efficacy with safety and decreased plasma BNP levels.$/
+
+    assert pubmed_text =~
+             ~r/with different mechanisms yielded potent antihypertensive efficacy with safety and decreased plasma BNP levels.$/
  end
 end