This commit is contained in:
keepcosmos 2016-04-24 16:14:31 +09:00
parent d8677a599c
commit 46ac9dddde
9 changed files with 559 additions and 2356 deletions

View File

@ -1,6 +1,10 @@
# Readability
Port of arc90's readability project to Elixir
[![Build Status](https://travis-ci.org/keepcosmos/readability.svg?branch=master)](https://travis-ci.org/keepcosmos/readability)
[![Readability version](https://img.shields.io/hexpm/v/readability.svg)](https://hex.pm/packages/readability)
Readability library for extracting and curating articles.
Check out The [Documentation](https://hexdocs.pm/readability/Readability.html) for full and detailed guides
## Installation
@ -8,12 +12,80 @@ If [available in Hex](https://hex.pm/docs/publish), the package can be installed
1. Add readability to your list of dependencies in `mix.exs`:
def deps do
[{:readability, "~> 0.0.1"}]
end
```elixir
def deps do
[{:readability, "~> 0.3"}]
end
```
2. Ensure readability is started before your application:
def application do
[applications: [:readability]]
end
```elixir
def application do
[applications: [:readability]]
end
```
## Usage
To parse document, you must prepare html string.
The below example below, `html` variable is the html code of page from [Elixir Design Goals](http://elixir-lang.org/blog/2013/08/08/elixir-design-goals/)
### Examples
```elixir
### Extract the title
Readability.title(html)
#=> Elixir Design Goals
### Extract the content with transformed html.
content = Readability.content(html)
Readability.raw_html(content)
#=>
# <div><div class=\"entry-content\"><p>During the last year,
# ...
# ...
# or check out our sidebar for other learning resources.</p></div></div>
### Extract the text only content.
Readability.readable_text(content)
#=>
# During the last year, we have spoken at many conferences spreading the word about Elixir. We usually s.....
# ...
# ...
# started guide, or check out our sidebar for other learning resources.
```
### Options
You may provide options(Keyword type) to `Readability.content`, including:
* retry_length: 250(default),
* min_text_length: 25(default),
* remove_unlikely_candidates: true(default),
* weight_classes: true(default),
* clean_conditionally: true(default),
* remove_empty_nodes: true(default),
## Test
To run the test suite:
$ mix test
## TODO
* [ ] Extract a author
* [ ] Extract Images
* [ ] Convert relative paths into absolute paths of `img#src` and `a#href`
* [ ] More configurable
* [ ] Command line interface
## Related and Inpired Projects
* [readability.js](https://github.com/mozilla/readability) is a standalone version of the readability library used for Firefox Reader View.
* [newspaper](https://github.com/codelucas/newspaper) is an advanced news extraction, article extraction, and content curation library for Python.
* [ruby-readability](https://github.com/cantino/ruby-readability) is a tool for extracting the primary readable content of a webpage.
## LICENSE
This code is under the Apache License 2.0. See <http://www.apache.org/licenses/LICENSE-2.0>.

View File

@ -1,5 +1,25 @@
defmodule Readability do
@moduledoc """
Readability library for extracting & curating articles.
## Example
```elixir
@type html :: binary
# extract title
Readability.title(html)
# extract only text from content
content = html
|> Readability.content
|> Readability.readable_text
# extract content with transformed html
content = html
|> Readability.content
|> Readability.raw_html
```
"""
alias Readability.TitleFinder
@ -40,7 +60,7 @@ defmodule Readability do
most likely to be the stuff a user wants to read
"""
@spec content(binary, options) :: binary
def content(raw_html, opts \\ @default_options) do
def content(raw_html, opts \\ []) do
opts = Keyword.merge(@default_options, opts)
raw_html
|> parse
@ -72,7 +92,7 @@ defmodule Readability do
return only text binary from html tree tuple
"""
@spec raw_html(html_tree) :: binary
def readabl_text(html_tree) do
def readable_text(html_tree) do
# TODO: Remove image caption when extract only text
tags_to_br = ~r/<\/(p|div|article|h\d)/i
html_str = html_tree |> raw_html

View File

@ -13,11 +13,19 @@ defmodule Readability.TitleFinder do
"""
@spec title(html_tree) :: binary
def title(html_tree) do
maybe_title = tag_title(html_tree)
if length(String.split(maybe_title, " ")) <= 4 do
maybe_title = og_title(html_tree)
maybe_title = og_title(html_tree)
if String.length(String.strip(maybe_title)) == 0 do
maybe_title = tag_title(html_tree)
end
maybe_title || h_tag_title(html_tree)
unless good_title?(maybe_title) do
h_title = h_tag_title(html_tree)
if good_title?(h_title) do
maybe_title = h_title
end
end
maybe_title
end
@doc """
@ -59,4 +67,8 @@ defmodule Readability.TitleFinder do
|> hd
|> String.strip
end
defp good_title?(title) do
length(String.split(title, " ")) >= 4
end
end

10
mix.exs
View File

@ -6,7 +6,7 @@ defmodule Readability.Mixfile do
def project do
[app: :readability,
version: "0.0.1",
version: "0.3.1",
elixir: "~> 1.2",
description: description,
package: package,
@ -20,8 +20,7 @@ defmodule Readability.Mixfile do
# Type "mix help compile.app" for more information
def application do
[applications: [:logger,
:floki,
:httpoison
:floki
]]
end
@ -36,7 +35,6 @@ defmodule Readability.Mixfile do
# Type "mix help deps" for more examples and options
defp deps do
[{:floki, "~> 0.8.0"},
{:httpoison, "~> 0.8.0"},
{:earmark, "~> 0.1", only: :dev},
{:ex_doc, "~> 0.11", only: :dev},
{:credo, "~> 0.3", only: [:dev, :test]},
@ -46,7 +44,7 @@ defmodule Readability.Mixfile do
defp description do
"""
Readability to elixir. This project is heavily based on Arc90's project.
Readability library for extracting and curating articles.
"""
end
@ -55,6 +53,6 @@ defmodule Readability.Mixfile do
maintainers: ["Jaehyun Shin"],
licenses: ["Apache 2.0"],
links: %{"GitHub" => "https://github.com/keepcosmos/readability",
"Docs" => "http://https://hexdocs.pm/readability"}]
"Docs" => "https://hexdocs.pm/readability/Readability.html"}]
end
end

View File

@ -1,13 +0,0 @@
<html>
<body>
<code><pre>
root
indented
</pre></code>
<pre><code>
second
indented
</code></pre>
</body>
</html>

428
test/fixtures/elixir.html vendored Normal file
View File

@ -0,0 +1,428 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
<title>Elixir Design Goals - Elixir</title>
<link href="http://feeds.feedburner.com/ElixirLang" rel="alternate" title="Elixir's Blog" type="application/atom+xml" />
<link rel="stylesheet" type="text/css" href="/css/style.css" />
<link rel="stylesheet" type="text/css" href="/css/syntax.css" />
<link rel="stylesheet" href="/js/icons/style.css">
<!--[if lt IE 8]><!-->
<link rel="stylesheet" href="/js/icons/ie7/ie7.css">
<!--<![endif]-->
<meta name="viewport" content="width=device-width,initial-scale=1" />
<link rel="stylesheet" id="font-bitter-css" href="http://fonts.googleapis.com/css?family=Bitter:400,700" type="text/css" media="screen" />
<link rel="shortcut icon" type="image/x-icon" href="/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" title="elixir-lang.org" href="/opensearch.xml" />
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-8268430-6', 'auto');
ga('send', 'pageview');
</script>
</head>
<body class="">
<div id="container">
<div class="wrap">
<div id="header">
<div id="branding">
<h1 id="site-title">
<a href="/" title="Elixir" rel="Home">
<img class="logo" src="/images/logo/logo.png" alt="Elixir Logo" />
</a>
</h1>
</div>
<div id="menu-primary" class="menu-container">
<div class="menu">
<ul id="menu-primary-items">
<li class="menu-item home"><a class="spec" href="/">Home</a></li>
<li class="menu-item install"><a class="spec" href="/install.html">Install</a></li>
<li class="menu-item getting-started"><a class="spec" href="/getting-started/introduction.html">Getting Started</a></li>
<li class="menu-item learning"><a class="spec" href="/learning.html">Learning</a></li>
<li class="menu-item docs"><a class="spec" href="/docs.html">Docs</a></li>
<li class="menu-item blog"><a class="spec" href="/blog/">Blog</a></li>
<li class="menu-item packages"><a class="spec" href="https://hex.pm/">Packages</a></li>
</ul>
</div>
</div>
<div class="clear"></div>
</div>
<div id="main">
<div id="content">
<div class="hfeed">
<div class="hentry post">
<div class="post-content">
<div class="sticky-header">
<h1>Elixir Design Goals</h1>
<div class="byline">August 08, 2013 · by José Valim . in <a href="/blog/categories.html#Internals" class="category">Internals</a></div>
</div>
<div class="entry-content">
<p>During the last year, we have spoken at many conferences spreading the word about Elixir. We <a href="https://vimeo.com/53221562">usually started with introducing the Erlang VM</a>, then went on to talk about Elixir goals, saving some time at the end to do a live demo, showing some goodies like exchanging information between remote nodes and even hot code swapping.</p>
<p>This post is a summary of those talks, focusing on the language goals: compatibility, productivity and extensibility.</p>
<h2 id="compatibility">Compatibility</h2>
<p>Elixir is meant to be compatible with the Erlang VM and the existing ecosystem. When we talk about Erlang, we can break it into three parts:</p>
<ul>
<li>A functional programming language, called Erlang</li>
<li>A set of design principles, called OTP</li>
<li>The Erlang Virtual Machine, referred to as EVM or BEAM</li>
</ul>
<p>Elixir runs in the same virtual machine and is compatible with OTP. Not only that, all the tools and libraries available in the Erlang ecosystem are also available in Elixir, simply because there is no conversion cost from calling Erlang from Elixir and vice-versa.</p>
<p>We frequently say that <strong>the Erlang VM is Elixirs strongest asset</strong>.</p>
<p>All Elixir code is executed inside light-weight processes (actors), each with its own state, that exchange messages between each other. The Erlang VM multiplexes those processes onto many cores, making it trivial to run code concurrently.</p>
<p>In fact if you compile any Elixir code, including the Elixir source, you will see all cores on your machine being used out of the box. With <a href="http://www.parallella.org/board/">technologies like Parallella</a> becoming more accessible and affordable, it is hard to ignore the power you can get out of the Erlang VM.</p>
<p>Finally, the Erlang VM was designed to build systems that run forever, self-heal and scale. Joe Armstrong, one of Erlangs creators, has recently given an excellent talk <a href="http://www.infoq.com/presentations/self-heal-scalable-system">about the design decisions behind OTP and the VM</a>.</p>
<p>Nothing that we are describing here is particularly new. Open source projects like CouchDB, Riak, RabbitMQ, Chef11 and companies like Ericsson, Heroku, Basho, Klarna and Wooga are already enjoying the benefits provided by the Erlang VM, some of them for quite a long time.</p>
<h2 id="productivity">Productivity</h2>
<blockquote>
<p>Now we need to go meta. We should now think of a language design as being a pattern for language designs. A tool for making more tools of the same kind. […] A language design can no longer be a thing. It must be a pattern, a pattern for growth. A pattern for growing a pattern, for defining the patterns that programmers can use for their real work and main goals.</p>
</blockquote>
<ul>
<li>Guy Steele, keynote at the 1998 ACM OOPSLA conference on “Growing a Language”</li>
</ul>
<p>Productivity is, in general, a hard goal to measure. A language productive for creating desktop applications may not be productive for mathematical computing. Productivity depends directly on the field in which you intend to use the language, the available tools in the ecosystem and how easy it is to create and extend those tools.</p>
<p>For this reason, we have opted for a small language core. For example, while some languages have <code class="highlighter-rouge">if</code>, <code class="highlighter-rouge">case</code>, <code class="highlighter-rouge">try</code> and so on as language keywords, each with its own rules in the parser, <strong>in Elixir they are just macros</strong>. This allows us to implement most of Elixir in Elixir and also allows developers to extend the language using the same tools we used to build the language itself, often extending the language to the specific domains they are working on.</p>
<p>Here is an example of how someone would implement <code class="highlighter-rouge">unless</code>, which is a keyword in many languages, in Elixir:</p>
<div class="highlighter-rouge"><pre class="highlight"><code><span class="k">defmacro</span> <span class="k">unless</span><span class="p">(</span><span class="n">expr</span><span class="p">,</span> <span class="n">opts</span><span class="p">)</span> <span class="k">do</span>
<span class="kn">quote</span> <span class="k">do</span>
<span class="k">if</span><span class="p">(</span><span class="n">!unquote</span><span class="p">(</span><span class="n">expr</span><span class="p">),</span> <span class="kn">unquote</span><span class="p">(</span><span class="n">opts</span><span class="p">))</span>
<span class="k">end</span>
<span class="k">end</span>
<span class="k">unless</span> <span class="no">true</span> <span class="k">do</span>
<span class="no">IO</span><span class="o">.</span><span class="n">puts</span> <span class="sd">"</span><span class="s2">this will never be seen"</span>
<span class="k">end</span>
</code></pre>
</div>
<p>Since a macro receives the code representation as arguments, we can simply convert an <code class="highlighter-rouge">unless</code> into an <code class="highlighter-rouge">if</code> at compile time.</p>
<p>Macros are also the base construct for meta-programming in Elixir: the ability to write code that generates code. Meta-programming allows developers to easily get rid of boilerplate and create powerful tools. A common example mentioned in talks is how our test framework uses macros for expressiveness. Lets see an example:</p>
<div class="highlighter-rouge"><pre class="highlight"><code><span class="no">ExUnit</span><span class="o">.</span><span class="n">start</span>
<span class="k">defmodule</span> <span class="no">MathTest</span> <span class="k">do</span>
<span class="kn">use</span> <span class="no">ExUnit</span><span class="o">.</span><span class="no">Case</span><span class="p">,</span> <span class="ss">async:</span> <span class="no">true</span>
<span class="n">test</span> <span class="sd">"</span><span class="s2">adding two numbers"</span> <span class="k">do</span>
<span class="n">assert</span> <span class="m">1</span> <span class="o">+</span> <span class="m">2</span> <span class="o">==</span> <span class="m">4</span>
<span class="k">end</span>
<span class="k">end</span>
</code></pre>
</div>
<p>The first thing to notice is the <code class="highlighter-rouge">async: true</code> option. When your tests do not have any side-effects, you can run them concurrently by passing the <code class="highlighter-rouge">async: true</code> option.</p>
<p>Next we define a test case and we do an assertion with the <code class="highlighter-rouge">assert</code> macro. Simply calling <code class="highlighter-rouge">assert</code> would be a bad practice in many languages as it would provide a poor error report. In such languages, functions/methods like <code class="highlighter-rouge">assertEqual</code> or <code class="highlighter-rouge">assert_equal</code> would be the recommended way of performing such assertion.</p>
<p>In Elixir, however, <code class="highlighter-rouge">assert</code> is a macro and as such it can look into the code being asserted and infer that a comparison is being made. This code is then transformed to provide a detailed error report when the test runs:</p>
<div class="highlighter-rouge"><pre class="highlight"><code>1) test adding two numbers (MathTest)
** (ExUnit.ExpectationError)
expected: 3
to be equal to (==): 4
at test.exs:7
</code></pre>
</div>
<p>This simple example illustrates how a developer can leverage macros to provide a concise but powerful API. Macros have access to the whole compilation environment, being able to check the imported functions, macros, defined variables and more.</p>
<p>Those examples are just scratching the surface of what can be achieved with macros in Elixir. For example, we are currently using macros to compile routes from a web application into a bunch of patterns that are highly optimizable by the VM, providing an expressive but heavily optimized routing algorithm.</p>
<p>The macro system also caused a huge impact on the syntax, which we will discuss briefly before moving to the last goal.</p>
<h3 id="syntax">Syntax</h3>
<p>Although syntax is usually one of the first topics that comes up when Elixir is being discussed, it was never a goal to simply provide a different syntax. Since we wanted to provide a macro system, we knew that the macro system would only be sane if we could represent Elixir syntax in terms of Elixirs own data structures in a straight-forward fashion. With this goal in mind, we set out to design the first Elixir version, which looked like this:</p>
<div class="highlighter-rouge"><pre class="highlight"><code><span class="k">defmodule</span><span class="p">(</span><span class="no">Hello</span><span class="p">,</span> <span class="k">do</span><span class="p">:</span> <span class="p">(</span>
<span class="k">def</span><span class="p">(</span><span class="n">calculate</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">),</span> <span class="k">do</span><span class="p">:</span> <span class="p">(</span>
<span class="o">=</span><span class="p">(</span><span class="n">temp</span><span class="p">,</span> <span class="o">*</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">))</span>
<span class="o">+</span><span class="p">(</span><span class="n">temp</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span>
<span class="p">))</span>
<span class="p">))</span>
</code></pre>
</div>
<p>In the snippet above, we represent everything, except variables, as a function or a macro call. Notice keyword arguments like <code class="highlighter-rouge">do:</code> have been present since the first version. To this, we slowly added new syntax, making some common patterns more elegant while keeping the same underlying data representation. We soon added infix notation for operators:</p>
<div class="highlighter-rouge"><pre class="highlight"><code><span class="k">defmodule</span><span class="p">(</span><span class="no">Hello</span><span class="p">,</span> <span class="k">do</span><span class="p">:</span> <span class="p">(</span>
<span class="k">def</span><span class="p">(</span><span class="n">calculate</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">),</span> <span class="k">do</span><span class="p">:</span> <span class="p">(</span>
<span class="n">temp</span> <span class="o">=</span> <span class="n">a</span> <span class="o">*</span> <span class="n">b</span>
<span class="n">temp</span> <span class="o">+</span> <span class="n">c</span>
<span class="p">))</span>
<span class="p">))</span>
</code></pre>
</div>
<p>The next step was to make parentheses optional:</p>
<div class="highlighter-rouge"><pre class="highlight"><code><span class="k">defmodule</span> <span class="no">Hello</span><span class="p">,</span> <span class="k">do</span><span class="p">:</span> <span class="p">(</span>
<span class="k">def</span> <span class="n">calculate</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">),</span> <span class="k">do</span><span class="p">:</span> <span class="p">(</span>
<span class="n">temp</span> <span class="o">=</span> <span class="n">a</span> <span class="o">*</span> <span class="n">b</span>
<span class="n">temp</span> <span class="o">+</span> <span class="n">c</span>
<span class="p">)</span>
<span class="p">)</span>
</code></pre>
</div>
<p>And finally we added <code class="highlighter-rouge">do/end</code> as convenience for the common <code class="highlighter-rouge">do: (...)</code> construct:</p>
<div class="highlighter-rouge"><pre class="highlight"><code><span class="k">defmodule</span> <span class="no">Hello</span> <span class="k">do</span>
<span class="k">def</span> <span class="n">calculate</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span> <span class="k">do</span>
<span class="n">temp</span> <span class="o">=</span> <span class="n">a</span> <span class="o">*</span> <span class="n">b</span>
<span class="n">temp</span> <span class="o">+</span> <span class="n">c</span>
<span class="k">end</span>
<span class="k">end</span>
</code></pre>
</div>
<p>Given my previous background in Ruby, it is natural that some of the constructs added were borrowed from Ruby. However, those additions were a by-product, never a language goal.</p>
<p>Many language constructs are also inspired by their Erlang counter-parts, like some of the control-flow macros, operators and containers. Notice how some Elixir code:</p>
<div class="highlighter-rouge"><pre class="highlight"><code><span class="c1"># A tuple</span>
<span class="n">tuple</span> <span class="o">=</span> <span class="p">{</span> <span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">3</span> <span class="p">}</span>
<span class="c1"># Adding two lists</span>
<span class="p">[</span><span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">3</span><span class="p">]</span> <span class="o">++</span> <span class="p">[</span><span class="m">4</span><span class="p">,</span> <span class="m">5</span><span class="p">,</span> <span class="m">6</span><span class="p">]</span>
<span class="c1"># Case</span>
<span class="k">case</span> <span class="n">expr</span> <span class="k">do</span>
<span class="p">{</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span> <span class="p">}</span> <span class="o">-&gt;</span> <span class="n">x</span> <span class="o">+</span> <span class="n">y</span>
<span class="n">other</span> <span class="ow">when</span> <span class="n">is_integer</span><span class="p">(</span><span class="n">other</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">other</span>
<span class="k">end</span>
</code></pre>
</div>
<p>maps to Erlang:</p>
<div class="highlighter-rouge"><pre class="highlight"><code><span class="c">% A tuple
</span><span class="nv">Tuple</span> <span class="o">=</span> <span class="p">{</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span> <span class="p">}.</span>
<span class="c">% Adding two lists
</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">]</span> <span class="o">++</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">].</span>
<span class="c">% Case
</span><span class="k">case</span> <span class="nv">Expr</span> <span class="k">of</span>
<span class="p">{</span> <span class="nv">X</span><span class="p">,</span> <span class="nv">Y</span> <span class="p">}</span> <span class="o">-&gt;</span> <span class="nv">X</span> <span class="o">+</span> <span class="nv">Y</span><span class="p">;</span>
<span class="nv">Other</span> <span class="k">when</span> <span class="nb">is_integer</span><span class="p">(</span><span class="nv">Other</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nv">Other</span>
<span class="k">end</span><span class="p">.</span>
</code></pre>
</div>
<h2 id="extensibility">Extensibility</h2>
<p>By building on top of a small core, most of the constructs in the language can be replaced and extended as required by developers to target specific domains. However, there is a particular domain that Elixir is inherently good at, which is building concurrent, distributed applications, thanks to OTP and the Erlang VM.</p>
<p>Elixir complements this domain by providing a standard library with:</p>
<ul>
<li>Unicode strings and unicode operations</li>
<li>A powerful unit test framework</li>
<li>More data structures like ranges, including novel implementations for sets and dictionaries</li>
<li>Polymorphic records (in contrast to Erlangs compilation-time only records)</li>
<li>Strict and lazy enumeration APIs</li>
<li>Convenience functions for scripting, like working with paths and the filesystem</li>
<li>A project management tool to compile and test Elixir code</li>
</ul>
<p>And much more.</p>
<p>Most of the features above provide their own extensibility mechanisms, too. For example, take the <code class="highlighter-rouge">Enum</code> module. The <code class="highlighter-rouge">Enum</code> module allow us to enumerate the built-in ranges, lists, sets, etc:</p>
<div class="highlighter-rouge"><pre class="highlight"><code><span class="n">list</span> <span class="o">=</span> <span class="p">[</span><span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">3</span><span class="p">]</span>
<span class="no">Enum</span><span class="o">.</span><span class="n">map</span> <span class="n">list</span><span class="p">,</span> <span class="k">fn</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">x</span> <span class="o">*</span> <span class="m">2</span> <span class="k">end</span>
<span class="c1">#=&gt; [2, 4, 6]</span>
<span class="n">range</span> <span class="o">=</span> <span class="m">1</span><span class="o">..</span><span class="m">3</span>
<span class="no">Enum</span><span class="o">.</span><span class="n">map</span> <span class="n">range</span><span class="p">,</span> <span class="k">fn</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">x</span> <span class="o">*</span> <span class="m">2</span> <span class="k">end</span>
<span class="c1">#=&gt; [2, 4, 6]</span>
<span class="n">set</span> <span class="o">=</span> <span class="no">HashSet</span><span class="o">.</span><span class="n">new</span> <span class="p">[</span><span class="m">1</span><span class="p">,</span> <span class="m">2</span><span class="p">,</span> <span class="m">3</span><span class="p">]</span>
<span class="no">Enum</span><span class="o">.</span><span class="n">map</span> <span class="n">set</span><span class="p">,</span> <span class="k">fn</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">x</span> <span class="o">*</span> <span class="m">2</span> <span class="k">end</span>
<span class="c1">#=&gt; [2, 4, 6]</span>
</code></pre>
</div>
<p>Not only that, any developer can <strong>extend</strong> the <code class="highlighter-rouge">Enum</code> module to work with any data type as long as the data type implements <a href="/docs/stable/elixir/Enumerable.html">the <code class="highlighter-rouge">Enumerable</code> protocol</a> (protocols in Elixir are based on Clojures protocol). This is extremely convenient because the developer needs to know only the <code class="highlighter-rouge">Enum</code> API for enumeration, instead of memorizing specific APIs for sets, lists, dicts, etc.</p>
<p>There are many other protocols exposed by the language, like <a href="/docs/stable/elixir/Inspect.html">the <code class="highlighter-rouge">Inspect</code> protocol</a> for pretty printing data structures and <a href="/docs/stable/elixir/Access.html">the <code class="highlighter-rouge">Access</code> protocol</a> for accessing key-value data by key. By being extensible, Elixir ensures developers can work <strong>with</strong> the language, instead of <strong>against</strong> the language.</p>
<h2 id="summing-up">Summing up</h2>
<p>The goal of this post was to sumarize the language goals: compatibility, productivity and extensibility. By being compatibile with the Erlang VM, we are providing developers another toolset for building concurrent, distributed and fault-tolerant systems.</p>
<p>We also hope to have clarified what Elixir brings to the Erlang VM, in particular, meta-programming through macros, polymorphic constructs for extensibility and a data-focused standard library with extensible and consistent APIs for diverse types, including strict and lazy enumeration, unicode handling, a test framework and more.</p>
<p>Give Elixir a try! You can start with our <a href="/getting-started/introduction.html">getting started guide</a>, or check out our sidebar for other learning resources.</p>
</div>
<!-- <div class="entry-meta">
</div> -->
</div>
</div>
</div>
</div><!-- #content -->
<div id="sidebar-primary" class="sidebar">
<div class="widget news">
<h3>
News: <a href="/blog/2016/01/03/elixir-v1-2-0-released/">Elixir v1.2 released</a>
</h3>
</div>
<div class="widget search">
<form method="get" id="search-form" class="search-form" action="https://www.google.com/search">
<div>
<input class="search-text" type="text" name="q" placeholder="Search..." id="searchfield">
<input type="hidden" name="sitesearch" value="elixir-lang.org">
<input class="search-submit button" name="submit" type="submit" value="Search">
</div>
</form>
</div>
<div class="rss-button">
<a href="http://feeds.feedburner.com/ElixirLang" target="_blank"><img src="/images/social/RSSButton.png" alt="RSS button" /></a>
</div>
<div class="widget categories-list">
<h3 class="widget-title">Blog Categories</h3>
<ul>
<li><a class="spec" href="/blog/categories.html#Internals">Internals</a></li>
<li><a class="spec" href="/blog/categories.html#Releases">Releases</a></li>
<li><a class="spec" href="/blog/categories.html#Announcements">Announcements</a></li>
</ul>
</div>
<div class="widget">
<h3 class="widget-title">Join the Community</h3>
<ul>
<li><a class="spec" href="irc://irc.freenode.net/elixir-lang">#elixir-lang on freenode IRC</a></li>
<li><a class="spec" href="https://elixir-slackin.herokuapp.com/">Elixir on Slack</a></li>
<li><a class="spec" href="http://elixirforum.com">Elixir Forum</a></li>
<li><a class="spec" href="https://groups.google.com/group/elixir-lang-talk">elixir-talk mailing list</a></li>
<li><a class="spec" href="https://twitter.com/elixirlang">@elixirlang on Twitter</a></li>
<li><a class="spec" href="http://elixir.meetup.com">Meetups around the world</a></li>
<li><a class="spec" href="https://github.com/elixir-lang/elixir/wiki">Wiki with events, resources and talks organized by the community</a></li>
</ul>
</div>
<div class="widget">
<h3 class="widget-title">Important links</h3>
<ul>
<li><a class="spec" href="https://github.com/elixir-lang/elixir">Source Code</a></li>
<li><a class="spec" href="https://github.com/elixir-lang/elixir/issues">Issue tracker</a></li>
<li><a class="spec" href="https://groups.google.com/group/elixir-lang-core">elixir-core mailing list (development)</a></li>
<li><a class="spec" href="/crash-course.html">Crash course for Erlang developers</a></li>
</ul>
</div>
<div class="widget">
<h3 class="widget-title">Code editor support</h3>
<ul>
<li><a class="spec" href="https://github.com/elixir-lang/emacs-elixir">Emacs Mode</a></li>
<li><a class="spec" href="http://www.alchemist-elixir.org/">Alchemist (Emacs Elixir Tooling)</a></li>
<li><a class="spec" href="https://github.com/elixir-lang/elixir-tmbundle">TextMate / Sublime Text Bundle</a></li>
<li><a class="spec" href="https://github.com/vishnevskiy/ElixirSublime">Sublime Plugin</a></li>
<li><a class="spec" href="https://github.com/elixir-lang/vim-elixir">Vim Elixir</a></li>
<li><a class="spec" href="https://github.com/SteffenBauer/elixir-gtksourceview">GtkSourceView (gedit)</a></li>
<li><a class="spec" href="https://github.com/lucasmazza/language-elixir">Atom Package</a></li>
<li><a class="spec" href="https://github.com/KronicDeth/intellij-elixir">IntelliJ Elixir</a></li>
<li><a class="spec" href="https://marketplace.visualstudio.com/items?itemName=mjmcloug.vscode-elixir">Visual Studio Elixir</a></li>
</ul>
</div>
<div id="sponsors" class="widget">
<h3 class="widget-title">Sponsors</h3>
<ul>
<li class="image"><a href="http://plataformatec.com.br" title="Plataformatec"><img src="/images/logo/plataformatec.png" alt="Plataformatec Logo" width="190" height="74" /></a></li>
</ul>
</div>
<div id="elixir-radar" class="widget">
<h3 class="widget-title">Elixir Radar</h3>
<p>
A weekly Elixir email newsletter with content curated by Plataformatec. <strong>Subscribe below</strong>.
</p>
<div class="elixir-radar-cta">
<div class="cta-copy">
<div class="cta-title">
Elixir Radar
</div>
<div class="cta-subtitle">
weekly newsletter
</div>
</div>
<div class="cta-button-container">
<a href="http://plataformatec.com.br/elixir-radar?utm_campaign=elixir_lang_cta&utm_medium=cta&utm_source=elixir_lang_website" class="cta-button">
Subscribe now
</a>
</div>
</div>
</div>
</div>
</div><!-- #main -->
<div class="clear"></div>
<div id="copyright">
&copy; 2012-2016 <a href="http://plataformatec.com.br/">Plataformatec</a>. All rights reserved.
</div>
</div><!-- .wrap -->
</div><!-- #container -->
<script src="//ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
<script src="/js/toc/toc.js" type="text/javascript" charset="utf-8"></script>
<script type="text/javascript">
$(document).ready(function() {
$('.toc').toc({
title: '',
listType: 'ol',
minimumHeaders: 2,
headers: 'h2, h3, h4, h5, h6',
linkHere: true,
linkHereTitle: 'Link here',
backToTop: true,
backToTopId: 'toc',
backToTopTitle: 'Back to Table of Contents',
});
$('.jekyll-toc-header a.jekyll-toc-link-here span.jekyll-toc-icon').addClass('icon icon-link');
$('.jekyll-toc-header a.jekyll-toc-back-to-top span.jekyll-toc-icon').addClass('icon icon-chevron-up');
});
</script>
</body>
</html>

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -10,7 +10,7 @@ defmodule ReadabilityTest do
assert nytimes_html =~ ~r/^<div><div class=\"story-body\">/
assert nytimes_html =~ ~r/major priorities.<\/p><\/div><\/div>$/
nytimes_text = Readability.readabl_text(nytimes)
nytimes_text = Readability.readable_text(nytimes)
assert nytimes_text =~ ~r/^Buddhist monks performing as part of/
assert nytimes_text =~ ~r/one of her major priorities.$/
end
@ -24,7 +24,7 @@ defmodule ReadabilityTest do
assert bbc_html =~ ~r/^<div><div class=\"story-body__inner\" property=\"articleBody\">/
assert bbc_html =~ ~r/connected computing devices\".<\/p><\/div><\/div>$/
bbc_text = Readability.readabl_text(bbc)
bbc_text = Readability.readable_text(bbc)
# TODO: Remove image caption when extract only text
# assert bbc_text =~ ~r/^Microsoft\'s quarterly profit has missed analysts/
assert bbc_text =~ ~r/connected computing devices\".$/
@ -39,7 +39,7 @@ defmodule ReadabilityTest do
assert medium_html =~ ~r/^<div><div class=\"section-inner layoutSingleColumn\">/
assert medium_html =~ ~r/recommend button!<\/em><\/h3><\/div><\/div>$/
medium_text = Readability.readabl_text(medium)
medium_text = Readability.readable_text(medium)
assert medium_text =~ ~r/^Background: Ive spent the past 6/
assert medium_text =~ ~r/a lot to me if you hit the recommend button!$/
@ -54,9 +54,15 @@ defmodule ReadabilityTest do
assert buzzfeed_html =~ ~r/^<div><div class=\"buzz_superlist_item_text\"><p>/
assert buzzfeed_html =~ ~r/encrypted devices.<\/p><hr\/><hr\/><hr\/><hr\/><\/div><\/div>$/
buzzfeed_text = Readability.readabl_text(buzzfeed)
buzzfeed_text = Readability.readable_text(buzzfeed)
assert buzzfeed_text =~ ~r/^The FBI no longer needs Apples help/
assert buzzfeed_text =~ ~r/issue of court orders and encrypted devices.$/
end
test "readability elixir blog" do
html = TestHelper.read_fixture("elixir.html")
html = Readability.content(html)
IO.inspect Readability.readable_text(html)
end
end