From 23db20bbf03b218a895c9b1ce989da2193ef610c Mon Sep 17 00:00:00 2001
From: keepcosmos <keepcosmos@gmail.com>
Date: Sun, 24 Apr 2016 18:40:35 +0900
Subject: [PATCH] add document

---
 CANGELOG.md                        |  5 ++
 README.md                          | 39 ++++++++-------
 lib/readability.ex                 | 78 ++++++++++++++++++------------
 lib/readability/article_builder.ex |  2 +-
 lib/readability/helper.ex          |  5 +-
 lib/readability/title_finder.ex    |  2 +-
 mix.exs                            | 15 +++---
 test/readability_test.exs          | 14 ++----
 8 files changed, 90 insertions(+), 70 deletions(-)
 create mode 100644 CANGELOG.md
diff --git a/CANGELOG.md b/CANGELOG.md
new file mode 100644
index 0000000..01d1c0e
--- /dev/null
+++ b/CANGELOG.md
@@ -0,0 +1,5 @@
+# Change log
+
+## [0.3.0] - 2016.04.24
+
+- Release!!
diff --git a/README.md b/README.md
index b97b6be..087b404 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 [![Build Status](https://travis-ci.org/keepcosmos/readability.svg?branch=master)](https://travis-ci.org/keepcosmos/readability)
 [![Readability version](https://img.shields.io/hexpm/v/readability.svg)](https://hex.pm/packages/readability)
 
-Readability library for extracting and curating articles.  
+Readability is Elixir library for extracting and curating articles.  
 Check out The [Documentation](https://hexdocs.pm/readability/Readability.html) for full and detailed guides
 
 ## Installation
@@ -29,7 +29,7 @@ If [available in Hex](https://hex.pm/docs/publish), the package can be installed
 ## Usage
 
 To parse document, you must prepare html string.
-The below example below, `html` variable is the html code of page from [Elixir Design Goals](http://elixir-lang.org/blog/2013/08/08/elixir-design-goals/)
+The example below, `html` variable is the html source from [Elixir Design Goals](http://elixir-lang.org/blog/2013/08/08/elixir-design-goals/)
 
 ### Examples
 ```elixir
@@ -39,33 +39,35 @@ Readability.title(html)
 #=> Elixir Design Goals
 
 ### Extract the content with transformed html.
-content = Readability.content(html)
-Readability.raw_html(content)
+html
+|> Readability.article
+|> Readability.raw_html
 #=>
 # <div><div class=\"entry-content\"><p>During the last year,
 # ...
-# ...
-# or check out our sidebar for other learning resources.</p></div></div>
+# ... out our sidebar for other learning resources.</p></div></div>
 
 ### Extract the text only content.
-Readability.readable_text(content)
+html
+|> Readability.article
+|> Readability.readable_text
+
 #=>
 # During the last year, we have spoken at many conferences spreading the word about Elixir. We usually s.....
 # ...
-# ...
-# started guide, or check out our sidebar for other learning resources.
+# ... started guide, or check out our sidebar for other learning resources.
 ```
 
 ### Options
 
-You may provide options(Keyword type) to `Readability.content`, including:
+You may provide options(Keyword type) to `Readability.article`, including:
 
-* retry_length: 250(default),
-* min_text_length: 25(default),
-* remove_unlikely_candidates: true(default),
-* weight_classes: true(default),
-* clean_conditionally: true(default),
-* remove_empty_nodes: true(default),
+* retry_length \\\\ 250
+* min_text_length \\\\ 25
+* remove_unlikely_candidates \\\\ true,
+* weight_classes \\\\ true,
+* clean_conditionally \\\\ true,
+* remove_empty_nodes \\\\ true,
 
 ## Test
 
@@ -73,9 +75,10 @@ To run the test suite:
 
     $ mix test
 
-## TODO
-* [ ] Extract a author
+## Todo
+* [ ] Extract authors
 * [ ] Extract Images
+* [ ] Extract Videos
 * [ ] Convert relative paths into absolute paths of `img#src` and `a#href`
 * [ ] More configurable
 * [ ] Command line interface
diff --git a/lib/readability.ex b/lib/readability.ex
index ead36fe..ba1d2fd 100644
--- a/lib/readability.ex
+++ b/lib/readability.ex
@@ -7,17 +7,17 @@ defmodule Readability do
   ```elixir
   @type html :: binary
 
-  # extract title
+  # Extract title
   Readability.title(html)
 
-  # extract only text from content
-  content = html
-            |> Readability.content
+  # Extract only text from article
+  article = html
+            |> Readability.article
             |> Readability.readable_text
 
-  # extract content with transformed html
-  content = html
-            |> Readability.content
+  # Extract article with transformed html
+  article = html
+            |> Readability.article
             |> Readability.raw_html
   ```
   """
@@ -52,21 +52,59 @@ defmodule Readability do
   @type html_tree :: tuple | list
   @type options :: list
 
+  @doc """
+  Extract title
+
+  ## Example
+
+      iex> title = Readability.title(html_str)
+      "Some title in html"
+  """
+  @spec title(binary) :: binary
   def title(html) when is_binary(html), do: html |> parse |> title
   def title(html_tree), do: TitleFinder.title(html_tree)
 
   @doc """
   Using a variety of metrics (content score, classname, element types), find the content that is
   most likely to be the stuff a user wants to read
+
+  ## Example
+
+      iex> article_tree = Redability(html_str)
+      # returns article that is tuple
+
   """
-  @spec content(binary, options) :: binary
-  def content(raw_html, opts \\ []) do
+  @spec article(binary, options) :: html_tree
+  def article(raw_html, opts \\ []) do
     opts = Keyword.merge(@default_options, opts)
     raw_html
     |> parse
     |> ArticleBuilder.build(opts)
   end
 
+
+  @doc """
+  return raw html binary from html_tree
+  """
+  @spec raw_html(html_tree) :: binary
+  def raw_html(html_tree) do
+    html_tree |> Floki.raw_html
+  end
+
+  @doc """
+  return only text binary from html_tree
+  """
+  @spec raw_html(html_tree) :: binary
+  def readable_text(html_tree) do
+    # TODO: Remove image caption when extract only text
+    tags_to_br = ~r/<\/(p|div|article|h\d)/i
+    html_str = html_tree |> raw_html
+    Regex.replace(tags_to_br, html_str, &("\n#{&1}"))
+    |> Floki.parse
+    |> Floki.text
+    |> String.strip
+  end
+
   @doc """
   Normalize and Parse to html tree(tuple or list)) from binary html
   """
@@ -80,28 +118,6 @@ defmodule Readability do
     |> Floki.filter_out(:comment)
   end
 
-  @doc """
-  return raw html binary from html tree tuple
-  """
-  @spec raw_html(html_tree) :: binary
-  def raw_html(html_tree) do
-    html_tree |> Floki.raw_html
-  end
-
-  @doc """
-  return only text binary from html tree tuple
-  """
-  @spec raw_html(html_tree) :: binary
-  def readable_text(html_tree) do
-    # TODO: Remove image caption when extract only text
-    tags_to_br = ~r/<\/(p|div|article|h\d)/i
-    html_str = html_tree |> raw_html
-    Regex.replace(tags_to_br, html_str, &("\n#{&1}"))
-    |> Floki.parse
-    |> Floki.text
-    |> String.strip
-  end
-
   def regexes, do: @regexes
 
   def default_options, do: @default_options
diff --git a/lib/readability/article_builder.ex b/lib/readability/article_builder.ex
index e7b6254..4ccc7de 100644
--- a/lib/readability/article_builder.ex
+++ b/lib/readability/article_builder.ex
@@ -1,6 +1,6 @@
 defmodule Readability.ArticleBuilder do
   @moduledoc """
-  build article for readability
+  Build article for readability
   """
 
   alias Readability.Helper
diff --git a/lib/readability/helper.ex b/lib/readability/helper.ex
index 9551da3..fe9dd3f 100644
--- a/lib/readability/helper.ex
+++ b/lib/readability/helper.ex
@@ -21,6 +21,9 @@ defmodule Readability.Helper do
     {tag_name, attrs, change_tag(html_tree, selector, tag)}
   end
 
+  @doc """
+  Remove html attributes
+  """
   @spec remove_attrs(html_tree, String.t | [String.t] | Regex.t) :: html_tree
   def remove_attrs(content, _) when is_binary(content), do: content
   def remove_attrs([], _), do: []
@@ -65,7 +68,7 @@ defmodule Readability.Helper do
   end
 
   @doc """
-  count only text length
+  Count only text length
   """
   @spec text_length(html_tree) :: number
   def text_length(html_tree) do
diff --git a/lib/readability/title_finder.ex b/lib/readability/title_finder.ex
index f3d2fab..4cb0f64 100644
--- a/lib/readability/title_finder.ex
+++ b/lib/readability/title_finder.ex
@@ -1,6 +1,6 @@
 defmodule Readability.TitleFinder do
   @moduledoc """
-  The TitleFinder engine traverse the HTML tree searching for finding title.
+  The TitleFinder engine traverses HTML tree searching for finding title.
   """
 
   @title_suffix ~r/(\-)|(\:\:)|(\|)/
diff --git a/mix.exs b/mix.exs
index 4ed76a4..c99b111 100644
--- a/mix.exs
+++ b/mix.exs
@@ -2,13 +2,18 @@ defmodule Readability.Mixfile do
   @moduledoc """
   """
 
+  @version "0.3.1"
+  @description """
+  Readability library for extracting and curating articles.
+  """
+
   use Mix.Project
 
   def project do
     [app: :readability,
-     version: "0.3.1",
+     version: @version,
      elixir: "~> 1.2",
-     description: description,
+     description: @description,
      package: package,
      build_embedded: Mix.env == :prod,
      start_permanent: Mix.env == :prod,
@@ -42,12 +47,6 @@ defmodule Readability.Mixfile do
     ]
   end
 
-  defp description do
-    """
-    Readability library for extracting and curating articles.
-    """
-  end
-
   defp package do
     [files: ["lib", "mix.exs", "README*", "LICENSE*", "doc"],
      maintainers: ["Jaehyun Shin"],
diff --git a/test/readability_test.exs b/test/readability_test.exs
index 128b628..79b0712 100644
--- a/test/readability_test.exs
+++ b/test/readability_test.exs
@@ -4,7 +4,7 @@ defmodule ReadabilityTest do
   test "readability for NY Times" do
     html = TestHelper.read_fixture("nytimes.html")
     opts = [clean_conditionally: false]
-    nytimes = Readability.content(html, opts)
+    nytimes = Readability.article(html, opts)
 
     nytimes_html = Readability.raw_html(nytimes)
     assert nytimes_html =~ ~r/^<div><div class=\"story-body\">/
@@ -17,7 +17,7 @@ defmodule ReadabilityTest do
 
   test "readability for BBC" do
     html = TestHelper.read_fixture("bbc.html")
-    bbc = Readability.content(html)
+    bbc = Readability.article(html)
 
     bbc_html = Readability.raw_html(bbc)
 
@@ -32,7 +32,7 @@ defmodule ReadabilityTest do
 
   test "readability for medium" do
     html = TestHelper.read_fixture("medium.html")
-    medium = Readability.content(html)
+    medium = Readability.article(html)
 
     medium_html = Readability.raw_html(medium)
 
@@ -47,7 +47,7 @@ defmodule ReadabilityTest do
 
   test "readability for buzzfeed" do
     html = TestHelper.read_fixture("buzzfeed.html")
-    buzzfeed = Readability.content(html)
+    buzzfeed = Readability.article(html)
 
     buzzfeed_html = Readability.raw_html(buzzfeed)
 
@@ -59,10 +59,4 @@ defmodule ReadabilityTest do
     assert buzzfeed_text =~ ~r/^The FBI no longer needs Apple’s help/
     assert buzzfeed_text =~ ~r/issue of court orders and encrypted devices.$/
   end
-
-  test "readability elixir blog" do
-    html = TestHelper.read_fixture("elixir.html")
-    html =  Readability.content(html)
-    IO.inspect Readability.readable_text(html)
-  end
 end