From 49d21b71dc2575a68f47a413ef7a37fa7eac0e38 Mon Sep 17 00:00:00 2001 From: Adlan Razalan Date: Sun, 29 Oct 2017 15:09:00 +0800 Subject: [PATCH] Do a case-insensitive content-type check --- lib/readability.ex | 2 +- test/readability_http_test.exs | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/lib/readability.ex b/lib/readability.ex index a869165..c50c715 100644 --- a/lib/readability.ex +++ b/lib/readability.ex @@ -113,7 +113,7 @@ defmodule Readability do headers |> Enum.find( {"Content-Type", "text/plain"}, # default - fn({key, _}) -> key == "Content-Type" end) + fn({key, _}) -> String.downcase(key) == "content-type" end) |> elem(1) end diff --git a/test/readability_http_test.exs b/test/readability_http_test.exs index ccb14f2..bb626fa 100644 --- a/test/readability_http_test.exs +++ b/test/readability_http_test.exs @@ -66,4 +66,20 @@ defmodule ReadabilityHttpTest do assert result_html =~ ~r/connected computing devices\".<\/p><\/div><\/div>$/ end end + + test "response with content-type in different case is parsed correctly" do + # HTTP header keys are case insensitive (RFC2616 - Section 4.2) + url = "https://news.bbc.co.uk/test.html" + content = TestHelper.read_fixture("bbc.html") + response = %HTTPoison.Response{ + status_code: 200, + headers: [{"content-Type", "text/html; charset=UTF-8"}], + body: content} + + with_mock HTTPoison, [get!: fn(_url, _headers, _opts) -> response end] do + %Readability.Summary{article_html: result_html} = Readability.summarize(url) + + assert result_html =~ ~r/connected computing devices\".<\/p><\/div><\/div>$/ + end + end end