diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/pleroma/web/rich_media/backfill.ex | 24 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/helpers.ex | 44 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parser.ex | 23 | ||||
-rw-r--r-- | lib/pleroma/web/rich_media/parsers/o_embed.ex | 2 | ||||
-rw-r--r-- | lib/pleroma/workers/rich_media_worker.ex | 16 |
5 files changed, 66 insertions, 43 deletions
diff --git a/lib/pleroma/web/rich_media/backfill.ex b/lib/pleroma/web/rich_media/backfill.ex index f1ee83bf0..a66422e71 100644 --- a/lib/pleroma/web/rich_media/backfill.ex +++ b/lib/pleroma/web/rich_media/backfill.ex @@ -4,6 +4,7 @@ defmodule Pleroma.Web.RichMedia.Backfill do alias Pleroma.Web.RichMedia.Card + alias Pleroma.Web.RichMedia.Helpers alias Pleroma.Web.RichMedia.Parser alias Pleroma.Web.RichMedia.Parser.TTL alias Pleroma.Workers.RichMediaWorker @@ -16,8 +17,7 @@ defmodule Pleroma.Web.RichMedia.Backfill do Pleroma.Web.ActivityPub.ActivityPub ) - @spec run(map()) :: - :ok | {:error, {:invalid_metadata, any()} | :body_too_large | {:content, any()} | any()} + @spec run(map()) :: :ok | Parser.parse_errors() | Helpers.get_errors() def run(%{"url" => url} = args) do url_hash = Card.url_to_hash(url) @@ -33,22 +33,16 @@ defmodule Pleroma.Web.RichMedia.Backfill do end warm_cache(url_hash, card) + :ok - {:error, {:invalid_metadata, fields}} -> - Logger.debug("Rich media incomplete or invalid metadata for #{url}: #{inspect(fields)}") - negative_cache(url_hash) - - {:error, :body_too_large} -> - Logger.error("Rich media error for #{url}: :body_too_large") - negative_cache(url_hash) - - {:error, {:content_type, type}} -> - Logger.debug("Rich media error for #{url}: :content_type is #{type}") + {:error, type} = error + when type in [:invalid_metadata, :body_too_large, :content_type, :validate] -> negative_cache(url_hash) + error - e -> - Logger.debug("Rich media error for #{url}: #{inspect(e)}") - {:error, e} + {:error, type} = error + when type in [:get, :head] -> + error end end diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index ea41bd285..fba23c657 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -5,26 +5,38 @@ defmodule Pleroma.Web.RichMedia.Helpers do alias Pleroma.Config + require Logger + + @type get_errors :: {:error, :body_too_large | :content_type | :head | :get} + + @spec rich_media_get(String.t()) :: {:ok, String.t()} | get_errors() def rich_media_get(url) do headers = [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}] - head_check = - case Pleroma.HTTP.head(url, headers, http_options()) do - # If the HEAD request didn't reach the server for whatever reason, - # we assume the GET that comes right after won't either - {:error, _} = e -> - e + with {_, {:ok, %Tesla.Env{status: 200, headers: headers}}} <- + {:head, Pleroma.HTTP.head(url, headers, http_options())}, + {_, :ok} <- {:content_type, check_content_type(headers)}, + {_, :ok} <- {:content_length, check_content_length(headers)}, + {_, {:ok, %Tesla.Env{status: 200, body: body}}} <- + {:get, Pleroma.HTTP.get(url, headers, http_options())} do + {:ok, body} + else + {:head, _} -> + Logger.debug("Rich media error for #{url}: HTTP HEAD failed") + {:error, :head} - {:ok, %Tesla.Env{status: 200, headers: headers}} -> - with :ok <- check_content_type(headers), - :ok <- check_content_length(headers), - do: :ok + {:content_type, {_, type}} -> + Logger.debug("Rich media error for #{url}: content-type is #{type}") + {:error, :content_type} - _ -> - :ok - end + {:content_length, {_, length}} -> + Logger.debug("Rich media error for #{url}: content-length is #{length}") + {:error, :body_too_large} - with :ok <- head_check, do: Pleroma.HTTP.get(url, headers, http_options()) + {:get, _} -> + Logger.debug("Rich media error for #{url}: HTTP GET failed") + {:error, :get} + end end defp check_content_type(headers) do @@ -32,7 +44,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do {_, content_type} -> case Plug.Conn.Utils.media_type(content_type) do {:ok, "text", "html", _} -> :ok - _ -> {:error, {:content_type, content_type}} + _ -> {:error, content_type} end _ -> @@ -47,7 +59,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do {_, maybe_content_length} -> case Integer.parse(maybe_content_length) do {content_length, ""} when content_length <= max_body -> :ok - {_, ""} -> {:error, :body_too_large} + {_, ""} -> {:error, maybe_content_length} _ -> :ok end diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index 7f6b5d388..a3a522d7a 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -3,6 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.Parser do + alias Pleroma.Web.RichMedia.Helpers require Logger @config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config) @@ -11,24 +12,26 @@ defmodule Pleroma.Web.RichMedia.Parser do Pleroma.Config.get([:rich_media, :parsers]) end - def parse(nil), do: nil + @type parse_errors :: {:error, :rich_media_disabled | :validate} - @spec parse(String.t()) :: {:ok, map()} | {:error, any()} - def parse(url) do + @spec parse(String.t()) :: + {:ok, map()} | parse_errors() | Helpers.get_errors() + def parse(url) when is_binary(url) do with {_, true} <- {:config, @config_impl.get([:rich_media, :enabled])}, - :ok <- validate_page_url(url), - {:ok, data} <- parse_url(url) do + {_, :ok} <- {:validate, validate_page_url(url)}, + {_, {:ok, data}} <- {:parse, parse_url(url)} do data = Map.put(data, "url", url) {:ok, data} else {:config, _} -> {:error, :rich_media_disabled} - e -> e + {:validate, _} -> {:error, :validate} + {:parse, error} -> error end end defp parse_url(url) do - with {:ok, %Tesla.Env{body: html}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url), - {:ok, html} <- Floki.parse_document(html) do + with {:ok, body} <- Helpers.rich_media_get(url), + {:ok, html} <- Floki.parse_document(body) do html |> maybe_parse() |> clean_parsed_data() @@ -50,8 +53,8 @@ defmodule Pleroma.Web.RichMedia.Parser do {:ok, data} end - defp check_parsed_data(data) do - {:error, {:invalid_metadata, data}} + defp check_parsed_data(_data) do + {:error, :invalid_metadata} end defp clean_parsed_data(data) do diff --git a/lib/pleroma/web/rich_media/parsers/o_embed.ex b/lib/pleroma/web/rich_media/parsers/o_embed.ex index 0f303176c..35ec14426 100644 --- a/lib/pleroma/web/rich_media/parsers/o_embed.ex +++ b/lib/pleroma/web/rich_media/parsers/o_embed.ex @@ -22,7 +22,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do end defp get_oembed_data(url) do - with {:ok, %Tesla.Env{body: json}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url) do + with {:ok, json} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url) do Jason.decode(json) end end diff --git a/lib/pleroma/workers/rich_media_worker.ex b/lib/pleroma/workers/rich_media_worker.ex index ecc980a28..30f9d9e9e 100644 --- a/lib/pleroma/workers/rich_media_worker.ex +++ b/lib/pleroma/workers/rich_media_worker.ex @@ -14,7 +14,21 @@ defmodule Pleroma.Workers.RichMediaWorker do end def perform(%Job{args: %{"op" => "backfill", "url" => _url} = args}) do - Backfill.run(args) + case Backfill.run(args) do + :ok -> + :ok + + {:error, type} + when type in [:invalid_metadata, :body_too_large, :content_type, :validate] -> + {:cancel, type} + + {:error, type} + when type in [:get, :head] -> + {:error, type} + + error -> + {:error, error} + end end @impl Oban.Worker |