Remove old code
This commit is contained in:
parent
50af019c6f
commit
4cccab8df0
|
@ -43,26 +43,4 @@ defmodule Frenzy.HTTP do
|
||||||
{:error, reason}
|
{:error, reason}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def resolve_uri(uri, site_uri) when is_binary(site_uri) do
|
|
||||||
resolve_uri(uri, URI.parse(site_uri))
|
|
||||||
end
|
|
||||||
|
|
||||||
def resolve_uri(%URI{host: nil, path: path}, %URI{} = site_uri) do
|
|
||||||
%URI{site_uri | path: path}
|
|
||||||
|> resolve_uri(site_uri)
|
|
||||||
end
|
|
||||||
|
|
||||||
def resolve_uri(%URI{scheme: nil} = uri, %URI{scheme: scheme} = site_uri) do
|
|
||||||
scheme =
|
|
||||||
case scheme do
|
|
||||||
nil -> "https"
|
|
||||||
_ -> scheme
|
|
||||||
end
|
|
||||||
|
|
||||||
%URI{uri | scheme: scheme}
|
|
||||||
|> resolve_uri(site_uri)
|
|
||||||
end
|
|
||||||
|
|
||||||
def resolve_uri(uri, _), do: uri
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -60,7 +60,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do
|
||||||
@impl Stage
|
@impl Stage
|
||||||
def validate_opts(_), do: {:error, "options must be a map"}
|
def validate_opts(_), do: {:error, "options must be a map"}
|
||||||
|
|
||||||
@spec get_article_content(String.t(), String.t()) :: {:ok, String.t()} | {:error, String.t()}
|
@spec get_article_content(String.t(), map()) :: {:ok, String.t()} | {:error, String.t()}
|
||||||
defp get_article_content(url, opts) when is_binary(url) and url != "" do
|
defp get_article_content(url, opts) when is_binary(url) and url != "" do
|
||||||
Logger.debug("Getting article from #{url}")
|
Logger.debug("Getting article from #{url}")
|
||||||
|
|
||||||
|
@ -75,9 +75,9 @@ defmodule Frenzy.Pipeline.ScrapeStage do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp get_article_content(_url), do: {:error, "URL must be a non-empty string"}
|
defp get_article_content(_url, _opts), do: {:error, "URL must be a non-empty string"}
|
||||||
|
|
||||||
@spec handle_response(String.t(), HTTPoison.Response.t(), String.t()) ::
|
@spec handle_response(String.t(), HTTPoison.Response.t(), map()) ::
|
||||||
{:ok, String.t()} | {:error, String.t()}
|
{:ok, String.t()} | {:error, String.t()}
|
||||||
defp handle_response(url, %HTTPoison.Response{body: body}, opts) do
|
defp handle_response(url, %HTTPoison.Response{body: body}, opts) do
|
||||||
case opts["extractor"] do
|
case opts["extractor"] do
|
||||||
|
@ -114,7 +114,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do
|
||||||
# Generates a helper function for the article with the given URI that takes an HTML element and,
|
# Generates a helper function for the article with the given URI that takes an HTML element and,
|
||||||
# if it's an <img> element whose src attribute does not have a hostname, adds the hostname and
|
# if it's an <img> element whose src attribute does not have a hostname, adds the hostname and
|
||||||
# scheme to the element.
|
# scheme to the element.
|
||||||
defp rewrite_image_urls(convert_to_data_uris, %URI{host: host, scheme: scheme} = site_uri) do
|
defp rewrite_image_urls(convert_to_data_uris, site_uri) do
|
||||||
fn
|
fn
|
||||||
{"img", attrs} ->
|
{"img", attrs} ->
|
||||||
new_attrs =
|
new_attrs =
|
||||||
|
@ -137,10 +137,7 @@ defmodule Frenzy.Pipeline.ScrapeStage do
|
||||||
|
|
||||||
# convert images to data URIs so that they're stored by clients as part of the body
|
# convert images to data URIs so that they're stored by clients as part of the body
|
||||||
defp image_to_data_uri(src, site_uri, true) do
|
defp image_to_data_uri(src, site_uri, true) do
|
||||||
absolute_url =
|
absolute_url = URI.merge(site_uri, src) |> to_string()
|
||||||
src
|
|
||||||
|> URI.parse()
|
|
||||||
|> HTTP.resolve_uri(site_uri)
|
|
||||||
|
|
||||||
case HTTP.get(absolute_url) do
|
case HTTP.get(absolute_url) do
|
||||||
{:ok, %HTTPoison.Response{body: body, headers: headers}} ->
|
{:ok, %HTTPoison.Response{body: body, headers: headers}} ->
|
||||||
|
|
Loading…
Reference in New Issue