From 4e4a712718e2a89a4457702af6e1e17a88923443 Mon Sep 17 00:00:00 2001 From: keepcosmos Date: Sun, 17 Apr 2016 15:28:33 +0900 Subject: [PATCH] add filter algorithms --- lib/readability.ex | 2 +- lib/readability/content_finder.ex | 94 + lib/readability/helper.ex | 25 + lib/{ => readability}/title_finder.ex | 6 + test/content_finder_test.ex | 65 + test/fixtures/bbc.html | 2066 +++++++++++++++++ test/fixtures/code.html | 13 + test/fixtures/nytimes.html | 58 + .../nytimes.html => fixtures/sample.html} | 0 test/fixtures/thesun.html | 1122 +++++++++ test/helper_text.exs | 31 + test/title_finder_test.exs | 2 +- 12 files changed, 3482 insertions(+), 2 deletions(-) create mode 100644 lib/readability/content_finder.ex create mode 100644 lib/readability/helper.ex rename lib/{ => readability}/title_finder.ex (94%) create mode 100644 test/content_finder_test.ex create mode 100644 test/fixtures/bbc.html create mode 100644 test/fixtures/code.html create mode 100644 test/fixtures/nytimes.html rename test/{features/nytimes.html => fixtures/sample.html} (100%) create mode 100644 test/fixtures/thesun.html create mode 100644 test/helper_text.exs diff --git a/lib/readability.ex b/lib/readability.ex index fc7121b..87e2840 100644 --- a/lib/readability.ex +++ b/lib/readability.ex @@ -6,5 +6,5 @@ defmodule Readability do def title(html) when is_binary(html), do: parse(html) |> title def title(html_tree), do: TitleFinder.title(html_tree) - defp parse(raw_html), do: Floki.parse(raw_html) + def parse(raw_html), do: Floki.parse(raw_html) end diff --git a/lib/readability/content_finder.ex b/lib/readability/content_finder.ex new file mode 100644 index 0000000..43c5991 --- /dev/null +++ b/lib/readability/content_finder.ex @@ -0,0 +1,94 @@ +defmodule Readability.ContentFinder do + @moduledoc """ + ContentFinder uses a variety of metrics for finding the content + that is most likely to be the stuff a user wants to read. + Then return it wrapped up in a div. + """ + + @regexes [ unlikelyCandidatesRe: ~r/combx|comment|community|disqus|extra|foot|header|lightbox|modal|menu|meta|nav|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i, + okMaybeItsACandidateRe: ~r/and|article|body|column|main|shadow/i, + positiveRe: ~r/article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i, + negativeRe: ~r/combx|comment|com-|contact|foot|footer|footnote|link|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|utility|widget/i, + divToPElementsRe: ~r/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i, + replaceBrsRe: ~r/(]*>[ \n\r\t]*){2,}/i, + replaceFontsRe: ~r/<(\/?)font[^>]*>/i, + trimRe: ~r/^\s+|\s+$/, + normalizeRe: ~r/\s{2,}/, + killBreaksRe: ~r/((\s| ?)*){1,}/, + videoRe: ~r/http:\/\/(www\.)?(youtube|vimeo)\.com/i + ] + + @type html_tree :: tuple | list + + @spec content(html_tree) :: html_tree + + def content(html_tree, options \\ []) do + candidate = html_tree + |> preapre_cadidates + + best_candidate = candidate + |> select_best_candidate + + candidate + |> fix_relative_uris + end + + defp preapre_cadidates(html_tree) do + html_tree + |> Floki.filter_out("script") + |> Floki.filter_out("style") + |> remove_unlikely_candidates + |> transform_misused_divs_into_paragraphs + end + + @doc """ + Remove unlikely tag nodes + """ + + @spec remove_unlikely_candidates(html_tree) :: html_tree + + def remove_unlikely_candidates(content) when is_binary(content), do: content + def remove_unlikely_candidates([]), do: [] + def remove_unlikely_candidates([h|t]) do + case remove_unlikely_candidates(h) do + nil -> remove_unlikely_candidates(t) + html_tree -> [html_tree|remove_unlikely_candidates(t)] + end + end + def remove_unlikely_candidates({tag_name, attrs, inner_tree}) do + cond do + unlikely_candidate?(tag_name, attrs) -> nil + true -> {tag_name, attrs, remove_unlikely_candidates(inner_tree)} + end + end + defp unlikely_candidate?(tag_name, attrs) do + idclass_str = attrs + |> Enum.filter_map(fn(attr) -> elem(attr, 0) =~ ~r/id|class/i end, + fn(attr) -> elem(attr, 1) end) + |> Enum.join("") + str = tag_name <> idclass_str + str =~ @regexes[:unlikelyCandidatesRe] && !(str =~ @regexes[:okMaybeItsACandidateRe]) && tag_name != "html" + end + + def transform_misused_divs_into_paragraphs(content) when is_binary(content), do: content + def transform_misused_divs_into_paragraphs([]), do: [] + def transform_misused_divs_into_paragraphs([h|t]) do + [transform_misused_divs_into_paragraphs(h)|transform_misused_divs_into_paragraphs(t)] + end + def transform_misused_divs_into_paragraphs({tag_name, attrs, inner_tree} = html_tree) do + if misused_divs?(tag_name, inner_tree), do: tag_name = "p" + {tag_name, attrs, transform_misused_divs_into_paragraphs(inner_tree)} + end + defp misused_divs?("div", inner_tree) do + !(Floki.raw_html(inner_tree) =~ @regexes[:divToPElementsRe]) + end + defp misused_divs?(_, _), do: false + + defp select_best_candidate(html_tree) do + html_tree + end + + defp fix_relative_uris(html_tree) do + html_tree + end +end diff --git a/lib/readability/helper.ex b/lib/readability/helper.ex new file mode 100644 index 0000000..3650cf6 --- /dev/null +++ b/lib/readability/helper.ex @@ -0,0 +1,25 @@ +defmodule Readability.Helper do + @moduledoc """ + Utilities + """ + + @type html_tree :: tuple | list + + @doc """ + change existing tags by selector + """ + + @spec change_tag(html_tree, String.t, String.t) :: html_tree + + def change_tag({tag_name, attrs, inner_tree}, tag_name, tag) do + {tag, attrs, change_tag(inner_tree, tag_name, tag)} + end + def change_tag({tag_name, attrs, html_tree}, selector, tag) do + {tag_name, attrs, change_tag(html_tree, selector, tag)} + end + def change_tag([h|t], selector, tag) do + [change_tag(h, selector, tag)|change_tag(t, selector, tag)] + end + def change_tag([], selector, tag), do: [] + def change_tag(content, selector, tag) when is_binary(content), do: content +end diff --git a/lib/title_finder.ex b/lib/readability/title_finder.ex similarity index 94% rename from lib/title_finder.ex rename to lib/readability/title_finder.ex index 8a1305f..ce26a17 100644 --- a/lib/title_finder.ex +++ b/lib/readability/title_finder.ex @@ -8,6 +8,12 @@ defmodule Readability.TitleFinder do @type html_tree :: tuple | list + @doc """ + Find proper title + """ + + @spec title(html_tree) :: binary + def title(html_tree) do maybe_title = tag_title(html_tree) if length(String.split(maybe_title, " ")) <= 4 do diff --git a/test/content_finder_test.ex b/test/content_finder_test.ex new file mode 100644 index 0000000..73061e3 --- /dev/null +++ b/test/content_finder_test.ex @@ -0,0 +1,65 @@ +defmodule Readability.ContentFinderTest do + use ExUnit.Case, async: true + + doctest Readability.ContentFinder + + + @unlikey_sample """ + + +
HEADER
+ +
ARTICLE
+
SOCIAL
+ + + """ + + test "remove unlikely tag nodes" do + expected = {"html", [], [ {"body", [], [ {"article", [{"class", "community"}], ["ARTICLE"]} ]} ]} + result = @unlikey_sample + |> Readability.parse + |> Readability.ContentFinder.remove_unlikely_candidates + assert expected == result + end + + @misused_sample """ + + +
+ here +
+
+

not here

+
+ + + """ + + test "transform misused div tag" do + expected = {"html", + [], + [{"body", + [], + [{"p", + [], + [{"span", [], ["here"]}] + }, {"div", + [], + [{"p", [], ["not here"]}] + }] + }] + } + + result = @misused_sample + |> Readability.parse + |> Readability.ContentFinder.transform_misused_divs_into_paragraphs + assert expected == result + end + + + def read_html(name) do + {:ok, body} = File.read("./test/fixtures/#{name}.html") + body + end +end diff --git a/test/fixtures/bbc.html b/test/fixtures/bbc.html new file mode 100644 index 0000000..80d39bf --- /dev/null +++ b/test/fixtures/bbc.html @@ -0,0 +1,2066 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BBC News - Submarine escape: A WWII survival tale from Kefalonia + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+
+ + + + + +
+ +
+ + + + +
+ + + +
+ + +
+ +
+ + + + +
+ + + + + + + + + + + + + +
+
+ + + + + + + + + +

Submarine escape: A WWII survival tale from Kefalonia

+ + + + + + + +
+ Launch of HMS Perseus in May 1929 + + HMS Perseus was launched in May 1929 +
+
+ + + + + +
+ + + + + + + + + +

In today's Magazine

+ + + + + + + + + + +
+ +

Seventy years ago, off the Greek island of Kefalonia, the British submarine HMS Perseus hit an Italian mine, sparking one of the greatest and most controversial survival stories of World War II.

+

The clear waters of the Mediterranean were a death trap for British submarines in World War II.

+

Some were bombed from the air, others hunted with sonar and depth charges, and many, perhaps most, collided with mines.

+

Two fifths of the subs that ventured into the Mediterranean were sunk and when a submarine sank it became a communal coffin - everyone on board died. That was the rule.

+

In fact, during the whole of the war there were only four escapes from stricken British submarines. And the most remarkable of these took place on 6 December 1941, when HMS Perseus plummeted to the seabed.

+ Enigma +

When she left the British submarine base at Malta at the end of November 1941, HMS Perseus had on board her 59 crew and two passengers, one of whom was John Capes, a 31-year-old Navy stoker en route to Alexandria.

+
+ John Capes + + John Capes: Stoker on the Perseus +
+

Tall, dark, handsome and a bit of an enigma, Capes had been educated at Dulwich College, and as the son of a diplomat he would naturally have been officer class rather than one of the lowliest of the mechanics who looked after the engines.

+

On the rough winter night of 6 December, Perseus was on the surface of the sea 3km (two miles) off the coast of Kefalonia, recharging her batteries under cover of darkness in preparation for another day underwater.

+

According to newspaper articles Capes later wrote or contributed to, he was relaxing in a makeshift bunk converted from a spare torpedo tube when, with no warning, there was a devastating explosion.

+

The boat twisted, plunged, and hit the bottom with what Capes described as a "nerve-shattering jolt".

+

His bunk reared up and threw him across the compartment. The lights went out.

+
+

Escape from the Deep

+ + + +
    +
  • Louis de Bernieres returns to Kefalonia to tell the story of John Capes and HMS Perseus
  • +
  • Tim Clayton acted as a programme consultant
  • +
  • Broadcast on Friday 2 December 2011 at 1100 GMT on BBC Radio 4, or listen again on iPlayer
  • +
+ +

Capes guessed they had hit a mine. Finding that he could stand, he groped for a torch. In the increasingly foul air and rising water of the engine room he found "the mangled bodies of a dozen dead".

+

But that was as far as he could get. The engine room door was forced shut by the pressure of water on the other side. "It was creaking under the great pressure. Jets and trickles from the rubber joint were seeping through," said Capes.

+

He dragged any stokers who showed signs of life towards the escape hatch and fitted them and himself with Davis Submarine Escape Apparatus, a rubber lung with an oxygen bottle, mouthpiece and goggles.

+
+

British WWII submarine escapes

+ +
+ Graphic showing the depth at which British WWII submariners escaped + +
+ + +
    +
  • HMS Umpire sank near Norfolk, England on 19 July 1941. Escapees: 14-15
  • +
  • HMS Stratagem sank near Malacca, Malaysia on 22 November 1944. Escapees: 10
  • +
  • HMS Perseus sank near Kefalonia, Greece on 6 December 1941. Escapees: 1
  • +
  • HMS P32 sank near Tripoli, Libya on 18 August 1941 (but the wreck was discovered only in 1999). Escapees: 2
  • +
+ +

This equipment had only been tested to a depth of 100ft (30m). The depth gauge showed just over 270ft, and as far as Capes knew, no-one had ever made an escape from such a depth.

+

In fact the gauge was broken, over-estimating the depth by 100ft, but time was running out. It was difficult to breathe now.

+

He flooded the compartment, lowered the canvas trunk beneath the escape hatch and with great difficulty released the damaged bolts on the hatch.

+

He pushed his injured companions into the trunk, up through the hatch and away into the cold sea above. Then he took a last swig of rum from his blitz bottle, ducked under and passed through the hatch himself.

+

"I let go, and the buoyant oxygen lifted me quickly upward. Suddenly I was alone in the middle of the great ocean.

+

"The pain became frantic, my lungs and whole body as fit to burst apart. Agony made me dizzy. How long can I last?

+

"Then, with the suddenness of certainty, I burst to the surface and wallowed in a slight swell with whitecaps here and there."

+

But having made the deepest escape yet recorded, his ordeal was not over.

+

His fellow injured stokers had not made it to the surface with him so he found himself alone in the middle of a cold December sea.

+

In the darkness he spotted a band of white cliffs and realised he had no choice but to strike out for those.

+ Story doubted +

The next morning, Capes was found unconscious by two fishermen on the shore of Kefalonia.

+

For the following 18 months he was passed from house to house, to evade the Italian occupiers. He lost 70lb (32kg) in weight and dyed his hair black in an effort to blend in.

+

He recalled later: "Always, at the moment of despair, some utterly poor but friendly and patriotic islander would risk the lives of all his family for my sake.

+
+ Kostas Thoctarides swimming next to the wreck of HMS Perseus + + Kostas Thoctarides and his dive team found the wreck of HMS Perseus in 1997 +
+

"They even gave me one of their prize possessions, a donkey called Mareeka. There was one condition attached to her - I had to take a solemn vow not to eat her."

+

He was finally taken off the island on a fishing boat in May 1943, in a clandestine operation organised by the Royal Navy.

+

A dangerous, roundabout journey of 640km took him to Turkey and from there back to the submarine service in Alexandria.

+

Despite being awarded a medal for his escape, Capes's story was so extraordinary that many people, both within and outside the Navy, doubted it.

+

Was he really on the boat at all? After all, he was not on the crew list. And submarine commanders had been ordered to bolt escape hatches shut from the outside to prevent them lifting during depth charge attacks.

+

There were no witnesses, he had a reputation as a great storyteller, and his own written accounts after the war varied in their details.

+

And the depth gauge reading 270ft made his story all the harder to believe.

+

John Capes died in 1985 but it was not until 1997 that his story was finally verified.

+

In a series of dives to the wreck of Perseus, Kostas Thoctarides discovered Capes's empty torpedo tube bunk, the hatch and compartment exactly as he had described it, and finally, his blitz bottle from which he had taken that last fortifying swig of rum.

+

Tim Clayton is the author of Sea Wolves: the Extraordinary Story of Britain's WW2 Submarines.

+

BBC Radio 4's Escape from the Deep is broadcast on Friday 2 December 2011 at 1100 GMT. Or listen again on BBC iPlayer.

+ + +
+ +
+ +
+

More on This Story

+ + + + + +
+ +
+ +

In today's Magazine + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + + +

The BBC is not responsible for the content of external Internet sites

+
+ + +
+ + + + + + +
+ +
+ + + + +
+ +
+ + + + + +
+ +
+ +
+ + + + + + +
+ +

Top Stories

+ + + + + + + + + +
+ + + + +
+ +
+ + + + +
+ +

Features & Analysis

+ +
    + + + + + + + + + +
  • + + + + + + + + + + + + + + +

    + Meryl StreepIt's quiz time! + +

    + + +

    Meryl Streep's last Oscar was for which film? +

    + +
    +
  • + + +
  • + + + + + + + + + + + + + + +

    + Qatari prime ministerLeague of its own + +

    + + +

    How Arab leaders embraced revolution +

    + +
    +
  • + + +
  • + + + + + + + + + + + + + + +

    + Delegates are seen beneath a ceiling painted by Spanish artist Miquel Barcelo during a special session of the UN Human Rights Council, GenevaDay in pictures + +

    + + +

    24 hours of news photos from around the world +

    + +
    +
  • + + + + +
+
+ + + + + + + + +
+ + + + + + + + +
+
+ +

Elsewhere on BBC News

+ +
    + + + +
  • + + + + + + + + + + + + + +

    + Zhai MeiqingGiving a bit back + +

    + +

    The entrepreneur and now multi-millionaire at the forefront of China's new-found philanthropic thinking

    +
  • +
+ +
+ +
+ + +
+ + + +
+ + + +
+ +
+ + + + + + + + +
+ + + +
+

Programmes

+ + + + +
    + + + +
  • + + + + + + + + + + + + + +

    + Toyota Fun ViiClick Watch + +

    + +

    Toyota's futuristic car that changes colour and other tech news in Click's weekly bulletin

    + + +
    + +
    + + +
  • +
+
+ + +
+ + + +
+ +
+ + + +
+ + +
+ + +
+ + + + + + + +
+ + + + + + +

bbc.co.uk navigation

BBC © 2011 The BBC is not responsible for the content of external sites. Read more.

This page is best viewed in an up-to-date web browser with style sheets (CSS) enabled. While you will be able to view the content of this page in your current browser, you will not be able to get the full visual experience. Please consider upgrading your browser software or enabling style sheets (CSS) if you are able to do so.

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + diff --git a/test/fixtures/code.html b/test/fixtures/code.html new file mode 100644 index 0000000..a82a92c --- /dev/null +++ b/test/fixtures/code.html @@ -0,0 +1,13 @@ + + +
+root
+  indented
+    
+ +

+second
+  indented
+    
+ + diff --git a/test/fixtures/nytimes.html b/test/fixtures/nytimes.html new file mode 100644 index 0000000..cf2d529 --- /dev/null +++ b/test/fixtures/nytimes.html @@ -0,0 +1,58 @@ +Health Care for a Changing Work Force - NYTimes.com +
The New York Times


December 1, 2011, 9:15 pm

Health Care for a Changing Work Force

Fixes

Fixes looks at solutions to social problems and why they work.

Sara Horowitz addressed members of the Freelancers Union during a 2009 forum which invited candidates for Public Advocate and Comptroller to discuss the issues affecting independent workers.Carolyn SilveiraSara Horowitz, the founder of the Freelancers Union, addressed union members at a 2009 forum on the issues affecting independent workers.

Big institutions are often slow to awaken to major social transformations. Microsoft was famously late to grasp the importance of the Internet. American auto manufacturers were slow to identify the demand for fuel-efficient cars. And today, the United States government is making a similar mistake: it still doesn’t seem to recognize that Americans no longer work the way they used to.

Today, some 42 million people — about a third of the United States work force — do not have jobs in the traditional sense. They fall into a catchall category the government calls “contingent” workers. These people — independent contractors, freelancers, temp workers, part-timers, people between jobs — typically work on a project-to-project basis for a variety of clients, and most are outcasts from the traditional system of benefits that provide economic security to Americans. Even as the economy has changed, employment benefits are still based on an outdated industrial-era model in which workers are expected to stay with a single company for years, if not their whole careers.

The industrial-era model of employer-based health care no longer applies.

For most of the 20th century, it was efficient to link benefits to jobs this way. But today, more and more work falls outside the one-to-one, employee-to-employer relationship. Work is decentralized, workers are mobile, and working arrangements are fluid. However, the risks of life haven’t gone away: people still need protections. They just need a different system to distribute them. They need benefits that they can carry around, like their laptops. As things stand, millions of independent workers go without health and unemployment insurance, protection against discrimination and unpaid wages, and pension plans. It makes no sense.

One of the social innovators to recognize this problem early and act on it was Sara Horowitz, the founder of the Freelancers Union, which has more than 165,000 members across all 50 states. At Fixes, we highlight practical applications of ideas that have the potential to achieve widespread impact. That means looking at how ideas take root in institutions that become part of the fabric of society.

In the early 20th century, a landscape of new institutions — including the early labor unions and hundreds of civil society organizations like Rotary International, the Boy and Girl Scouts, and the N.A.A.C.P. — reshaped the American landscape. Today, the Freelancers Union offers a glimpse of the kind of social enterprise — mission-driven and pragmatic, market-savvy and cooperative — that is likely to proliferate in the coming years to meet the needs of a fast-changing work force and society.

Horowitz had been a labor lawyer and union organizer when, in the early 1990s, she recognized that the number of people turning to independent work was on the rise. It was also clear that institutions had not yet been built to represent them in any meaningful way. (On one occasion, Horowitz found herself misclassified by an employer as an independent contractor — and quickly discovered that she received no job benefits.) Horowitz had the idea to create an organization to bring freelancers together so they could wield their power in the marketplace and in the political arena, much like AARP does for retirees.

She quickly discovered that their biggest concern was the cost of health insurance. But there were other problems, too. Unlike traditional workers who receive unemployment benefits, independent contractors have to rely on their own resources to get through hard times. In 2009, Freelancers Union surveyed 3,000 members and found that more than 80 percent had gone jobless or underemployed during the year. More than 60 percent had used their credit cards or borrowed from friends and family to make ends meet, and 12 percent had to turn to food stamps. Close to 40 percent had given up, or downgraded, their health insurance protection.

Another problem was getting paid. Some companies, like Time Inc., actually charge freelancers penalties if they request payment within 25 days. Freelancers Union found that 77 percent of its members had been cheated by a client during their careers and 40 percent had had trouble getting paid in 2009. The average wage loss was $6,000. The Department of Labor protects traditional workers from unpaid wages, but freelancers have no equivalent recourse. Then there were difficulties obtaining mortgages, the lack of access to 401(k) plans, and other issues.

An insurance provider that stays viable by not seeking to maximize profits.

Horowitz saw that she could attract a large membership if she could figure out how to provide health insurance at lower cost. Health insurance companies don’t have much love for freelancers. They prefer to serve large groups because it’s easier to deal with one corporate benefits manager than a multitude of individuals. And because insurers often lack reliable information about independent workers, they tend to assume that they are risky. As a result, premiums in the open marketplace for health insurance are higher and more volatile than those for employees. (The Affordable Care Act is designed to address this problem beginning in 2014 by subsidizing private insurance, but it applies only to people with low and moderate incomes.)

Horowitz got the idea of grouping freelancers in New York State so they could purchase their health insurance together. It made sense in theory, but it had never been done. She worked closely with officials in Albany, notably Assemblyman Sheldon Silver, who was a strong ally, and Gregory Serio, the former superintendent of insurance for New York State, who had the authority to grant approval for “discretionary” insurance groups.

“A lot of health insurers have looked at individual and sole proprietors as very expensive and risky to underwrite,” explained Serio. “Sara was able to foresee a trend [in the rise of independent work] before a lot of other people did. She went and found out that these people were not bad risks. Her creativity was in using existing concepts of insurance risk sharing and applying it to a community that has been ignored by the marketplace and, in fact, almost vilified by the marketplace.”

Serio and Horowitz made an interesting team. “I was a conservative Republican from Nassau County working for George Pataki,” he told me. “And she was my liberal friend from Brooklyn.” But Serio found the idea of protecting freelancers appealing because his father had been a dentist who operated out of a second-floor walk-up office on Jamaica Avenue, in Woodhaven, Queens. “I grew up in a sole proprietor household,” he said. “If my father didn’t work, he didn’t get paid. And I knew what it was like seeing health insurance rates go up and up.”

Today, the Freelancers Insurance Company (F.I.C.), which is wholly owned by the Freelancers Union (a nonprofit), has revenues of roughly $100 million and covers 25,000 independent workers and their family members in New York State, offering them premiums that the company calculates are more than a third below the open market rate. Close to 60 percent of its clients were previously uninsured or on COBRA (a temporary extension of their previous insurance). The renewal rate last year was 97 percent. (Disclosure: I have purchased health insurance from F.I.C. for a number of years.) The company was financed with $17 million in loans and grants from social investors, including the Rockefeller Foundation, the Robert Wood Johnson Foundation and the New York City Investment Fund. “Our freelancers have access to the best doctors and hospitals,” says Horowitz. “We have skilled human resource people, just like Fortune 500 companies. We’re able to watch out for our members.”

How can the F.I.C. undercut market rates and still be a viable enterprise? The key is that while it seeks to be profitable, it does not seek to maximize profits. Its executives receive salaries that are below industry averages, and it has only one shareholder (the Freelancers Union) to satisfy. Those are fundamental differences. Silver, who is the speaker of the State Assembly, notes that the success of the F.I.C. makes it more difficult for traditional insurers to contend that they can’t deliver insurance at lower cost. “Duplicating the model and showing the ability of [the F.I.C.] to keep costs under control is something that we will be looking at,” he adds.

Like many social goods, health insurance is often seen through a binary lens: either it must be handled by the government or it must be handled by the free market. But the F.I.C. is demonstrating that a middle way can work, too, and that it may be preferable to provide vital services like insurance through social-benefit companies, at least to certain customer groups. In fact, the Affordable Care Act has a provision to finance a new type of nonprofit health insurance company that would be run by its customers. It would be called a Consumer Operated and Oriented Plan (CO-OP). The Freelancers Union has proposed to establish CO-OPs in Florida, New Jersey, New York, Oregon and Washington.

Because the F.I.C. has a close connection with freelancers, it can be more effective helping its members make good health care decisions. “We’re moving away from fee-for-service medicine to one where a primary care doctor aggressively coordinates care,” explains Horowitz. “We’re also trying to innovate with alternative care — promoting meditation, yoga, and nutrition which can have long-term beneficial effects.” In 2012, the organization will be opening up the Brooklyn Freelancers Medical Practice, a health center modeled on the medical-home approach and designed in partnership with a physician named Rushika Fernandopulle, who pioneered a team-based model of care that is attracting attention across the country.

For now, the United States government doesn’t keep an accurate count of the independent work force. This is an oversight. It appears likely that this way of working will continue to grow. In cities with concentrations of knowledge workers, you find a proliferation of co-working spaces designed specifically for freelancers. And online marketplaces for freelancers like Etsy, oDesk and Elance are expanding rapidly.

Related
More From Fixes

Read previous contributions to this series.

It’s not just hipsters who work like this. Forty-five percent of Freelancers Union members are over 40 years old. Not all follow this path by choice. Many freelancers are former employees, like journalists, who lost jobs. Recent college graduates, discovering that a degree is far from a job guarantee, are forced to be more entrepreneurial. And many companies, seeking to hold costs down, engage freelancers rather than hire full-time workers. All of these workers deserve the same protections accorded to others.

“The industrial workers of the 20th century helped bring about the New Deal,” says Horowitz. “But the New Deal hasn’t evolved to include independent workers. I think this work force will help bring about the next New Deal — a framework of economic security that is parallel in its goals but led by a network of new institutions.”

“The government can’t replace civil society,” she added. “So if the civil society organizations have control, it will be harder to have your benefits taken away — if you happen to lose an election.”

On Wednesday, I’ll report on some of the other ways the Freelancers Union is helping to make independent work more secure. In the meantime, if you are a freelancer, or know someone who works this way, let us know about your experiences.
Join Fixes on Facebook and follow updates on twitter.com/nytimesfixes.


David Bornstein

David Bornstein is the author of “How to Change the World,” which has been published in 20 languages, and “The Price of a Dream: The Story of the Grameen Bank,” and is co-author of “Social Entrepreneurship: What Everyone Needs to Know.” He is the founder of dowser.org, a media site that reports on social innovation.


Inside Opinionator

December 1, 2011
Health Care for a Changing Work Force

With 42 million independent workers in the United States, the Freelancers Union’s health plan may be a model for the future.

November 29, 2011
Giving Where It Works

Most social enterprises struggle to survive. These seven programs make the most of the charitable dollars they receive.

More From Fixes »

December 1, 2011
Gun Nuts in a Rut

Despite all evidence to the contrary, the N.R.A. is determined to see President Obama as anti-gun.

November 24, 2011
My End of the Food Chain

Food from the wild is the way to go, even if it means having to remove the odd shotgun pellet.

More From Timothy Egan »

December 1, 2011
Lincoln’s P.R. Coup

Why did the president release scores of State Department correspondence to the public in 1861?

November 29, 2011
Beyond ‘Glory’

The complicated story of African-American soldiers goes back much further than the Civil War.

More From Disunion »

November 30, 2011
My Bridge to Nowhere

We thought that once we decided on adoption, out of the ether, a child would appear. I have been wrong before, but never quite this wrong.

November 19, 2011
My Coney Island Crime

I came upon a man stealing sand from the beach, and then I helped him.

More From Townies »

November 30, 2011
Sins of the Parents

Florida is treating residents who have lived their entire lives there as non-residents for tuition purposes if they can’t prove their parents are in the United States legally.

November 16, 2011
Reasonable Expectations

The Supreme Court will explore the permissible limits of government watchfulness over our daily lives.

More From Linda Greenhouse »

November 30, 2011
On Abortion and Defining a ‘Person’

A recent referendum in Mississippi suggests important consequences for the logic of the abortion debate.

November 30, 2011
Stone Links

A gathering of recent philosophy-related links.

More From The Stone »

November 30, 2011
The Not-Romneys

What drives the anybody-but-Mitt phenomenon?

November 16, 2011
Lightning Round

There are a lot of things to talk about: Cain, Gingrich, Occupy Wall Street — and Cincinnati.

More From The Conversation »

November 29, 2011
Making Local Food Real

In Vermont, community-supported agriculture that’s working.

November 19, 2011
No Turkeys Here

It’s easy to find signs of hope in the people and organizations who’ve been prodding American food back on a natural, sustainable, beautiful track.

More From Mark Bittman »

November 28, 2011
A Not-So-Straight Story

The American-Canadian border, famously said to run straight across the 49th parallel for hundreds of miles, is neither straight nor along the 49th parallel.

November 21, 2011
The Way We Were

In an Age of Lead, it’s easy for countries to dream of a Golden Era when they were stronger — and much bigger.

More From Borderlines »

November 28, 2011
Looking at Dogs and Cars

Is having a generous impulse enough, or do you have to follow through?

November 14, 2011
The Tobacco Horror Show

A court case involving cigarette ads raises murky issues about the relative impact of words and images.

More From Stanley Fish »

November 11, 2011
Up Against the Wall

A college ritual remembered: peculiar, official and all in the name of posture.

October 21, 2011
Tough Sell

Even a brilliant visionary had a hard time getting this author even to unpack his computer.

More From Dick Cavett »

November 10, 2011
Lives During Wartime, Vol. 3

A collection of reader photographs and remembrances of veterans and their service.

November 9, 2011
Checkpoints: A U.S. Veteran in Baghdad

A poet and Iraq war veteran journeys to Baghdad’s “Street of the Dead” and beyond to try to understand life in the city today.

More From Home Fires »

October 28, 2011
The Cain Enigma

For many analysts, the success of Herman Cain defies logic.

October 21, 2011
Battle of the Borders

While Republicans talked tough on immigration, the Obama administration deported a record number of illegal immigrants.

More From The Thread »

October 18, 2011
Prophecy of Machines

Technology has surpassed art, not only in its power to influence public imagination, but also in prophetic vision.

August 3, 2011
Scoring Outside the Lines

Writing music that goes beyond notes and clefs, into the realm of visual art.

More From The Score »

October 18, 2011
Prophecy of Machines

Technology has surpassed art, not only in its power to influence public imagination, but also in prophetic vision.

August 3, 2011
Scoring Outside the Lines

Writing music that goes beyond notes and clefs, into the realm of visual art.

More From The Score »

Opinionator Highlights

Health Care for a Changing Work Force

With 42 million independent workers in the United States, the Freelancers Union’s health plan may be a model for the future.

On Abortion and Defining a ‘Person’

A recent referendum in Mississippi suggests important consequences for the logic of the abortion debate.

Thumbnail
Giving Where It Works

Most social enterprises struggle to survive. These seven programs make the most of the charitable dollars they receive.

Thumbnail
Scaling the ‘Wall in the Head’

Walls and fences, electrified or not, protect people not from nameless barbarians, but from their own anxieties and fears.

An Electronic Eye on Hospital Hand-Washing

A video monitoring system is helping to increase hand-washing rates and reduce deadly hospital-acquired infections.

+ + diff --git a/test/features/nytimes.html b/test/fixtures/sample.html similarity index 100% rename from test/features/nytimes.html rename to test/fixtures/sample.html diff --git a/test/fixtures/thesun.html b/test/fixtures/thesun.html new file mode 100644 index 0000000..a044fe4 --- /dev/null +++ b/test/fixtures/thesun.html @@ -0,0 +1,1122 @@ + + + + + +Manchester United news: Dimitar Berbatov and Carling Cup flops warned | The Sun |Sport|Football + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Spacer
+
+
+
+ +
+ + +
+
+
+ + + + +
+
+
+
+
+LOOKING DIM ... Dimitar Berbatov was poor against Crystal Palace +
+
LOOKING DIM ... Dimitar Berbatov was poor against Crystal Palace
+
+
+
+
+ +
+

Top Stories

+ + + +
+ +
+ +Danger - Your business cannot ignore this + +
+
+
+
+
+
+
+
+

+United cup flops warned +

+
+
+ +
+
+ +
+
+
+
+
+
+ +
+
+
+

+MANCHESTER UNITED'S Carling Cup flops are set to pay the price for their KO +against Crystal Palace. +

+

+Alex Ferguson was forced to apologise to the Old Trafford faithful and now the +likes of Dimitar Berbatov, Darron Gibson, Kiko Macheda and Mame Biram Diouf +could be moved on. +

+

+There is a big question mark over the future of record �30.75million buy +Berbatov who limped off at half-time after another poor display. +

+

+Boss Fergie has shown he is not afraid to get ruthless with players who do not +cut it in the League Cup. +

+

+Four years ago United crashed out to Coventry at Old Trafford and some never +recovered. Of the 14 players on duty, nine have since left while Tomasz +Kusczak is frozen out. +

+

+Ferguson's men were also humbled at West Ham 12 months ago and four players +from that squad have gone. +

+

+So that could spell bad news for the likes of Italian Macheda, 20, and Senegal +ace Diouf, 23. +

+

+Skipper for the night Jonny Evans said: "The lads have got to work hard on +their games. +

+

+"We're very disappointed not to be in the semi-finals. But the games are +coming thick and fast and the manager will need his squad. So we need to +pick ourselves up." +

+
+ + + +
+
+
+
+
+
+
+ +
+RSS +Share it +Email +Print +
+
+
+
+ +
+ +
+ +
+
+
Sign up to SunSport's breaking email alertsSign up to SunSport's breaking text alerts
+
+ + +
+
+
+
+ + + +
+
+
+
+ + +
+ +
+
+
+ + + + + + + + + diff --git a/test/helper_text.exs b/test/helper_text.exs new file mode 100644 index 0000000..57aced2 --- /dev/null +++ b/test/helper_text.exs @@ -0,0 +1,31 @@ +defmodule Readability.HelperTest do + use ExUnit.Case, async: true + + import Readability, only: :functions + alias Readability.Helper + + @sample """ + + +

+ a +

+ abc +

+

+

+ b +

+ + + """ + + test "change font tag to span" do + expectred = @sample + |> String.replace(~r/font/, "span") + |> Floki.parse + + result = Helper.change_tag(parse(@sample), "font", "span") + assert expectred == result + end +end diff --git a/test/title_finder_test.exs b/test/title_finder_test.exs index e9a8b2d..ea5098d 100644 --- a/test/title_finder_test.exs +++ b/test/title_finder_test.exs @@ -1,7 +1,7 @@ defmodule Readability.TitleFinderTest do use ExUnit.Case, async: true - doctest Readability + doctest Readability.TitleFinder @html """