diff options
| -rw-r--r-- | CHANGELOG.md | 3 | ||||
| -rw-r--r-- | config/config.exs | 1 | ||||
| -rw-r--r-- | config/description.exs | 7 | ||||
| -rw-r--r-- | docs/configuration/cheatsheet.md | 1 | ||||
| -rw-r--r-- | lib/pleroma/html.ex | 5 | ||||
| -rw-r--r-- | lib/pleroma/web/mastodon_api/views/status_view.ex | 16 | ||||
| -rw-r--r-- | lib/pleroma/web/rich_media/parser.ex | 54 | ||||
| -rw-r--r-- | test/web/rich_media/aws_signed_url_test.exs | 2 | 
8 files changed, 63 insertions, 26 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ff00c161..c87aff851 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,11 +13,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).  - **Breaking:** The metadata providers RelMe and Feed are no longer configurable. RelMe should always be activated and Feed only provides a <link> header tag for the actual RSS/Atom feed when the instance is public.  ### Added -  - Rich media failure tracking (along with `:failure_backoff` option)  ### Fixed  - Mastodon API: Search parameter `following` now correctly returns the followings rather than the followers +- Mastodon API: Timelines hanging for (`number of posts with links * rich media timeout`) in the worst case. +  Reduced to just rich media timeout.  ## [2.1.0] - 2020-08-28 diff --git a/config/config.exs b/config/config.exs index 694909bfd..99bbb74e9 100644 --- a/config/config.exs +++ b/config/config.exs @@ -412,6 +412,7 @@ config :pleroma, :rich_media,      Pleroma.Web.RichMedia.Parsers.TwitterCard,      Pleroma.Web.RichMedia.Parsers.OEmbed    ], +  failure_backoff: 60_000,    ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]  config :pleroma, :media_proxy, diff --git a/config/description.exs b/config/description.exs index 29a657333..5e08ba109 100644 --- a/config/description.exs +++ b/config/description.exs @@ -2385,6 +2385,13 @@ config :pleroma, :config_description, [          suggestions: [            Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl          ] +      }, +      %{ +        key: :failure_backoff, +        type: :integer, +        description: +          "Amount of milliseconds after request failure, during which the request will not be retried.", +        suggestions: [60_000]        }      ]    }, diff --git a/docs/configuration/cheatsheet.md b/docs/configuration/cheatsheet.md index b4504d1d7..b2980793d 100644 --- a/docs/configuration/cheatsheet.md +++ b/docs/configuration/cheatsheet.md @@ -359,6 +359,7 @@ config :pleroma, Pleroma.Web.MediaProxy.Invalidation.Http,  * `ignore_hosts`: list of hosts which will be ignored by the metadata parser. For example `["accounts.google.com", "xss.website"]`, defaults to `[]`.  * `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"].  * `parsers`: list of Rich Media parsers. +* `failure_backoff`: Amount of milliseconds after request failure, during which the request will not be retried.  ## HTTP server diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index dc1b9b840..20b02f091 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -109,8 +109,9 @@ defmodule Pleroma.HTML do        result =          content          |> Floki.parse_fragment!() -        |> Floki.filter_out("a.mention,a.hashtag,a.attachment,a[rel~=\"tag\"]") -        |> Floki.attribute("a", "href") +        |> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])") +        |> Enum.take(1) +        |> Floki.attribute("href")          |> Enum.at(0)        {:commit, {:ok, result}} diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 01b8bb6bb..3fe1967be 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -23,6 +23,17 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do    import Pleroma.Web.ActivityPub.Visibility, only: [get_visibility: 1, visible_for_user?: 2] +  # This is a naive way to do this, just spawning a process per activity +  # to fetch the preview. However it should be fine considering +  # pagination is restricted to 40 activities at a time +  defp fetch_rich_media_for_activities(activities) do +    Enum.each(activities, fn activity -> +      spawn(fn -> +        Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) +      end) +    end) +  end +    # TODO: Add cached version.    defp get_replied_to_activities([]), do: %{} @@ -80,6 +91,11 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do      # To do: check AdminAPIControllerTest on the reasons behind nil activities in the list      activities = Enum.filter(opts.activities, & &1) + +    # Start fetching rich media before doing anything else, so that later calls to get the cards +    # only block for timeout in the worst case, as opposed to +    # length(activities_with_links) * timeout +    fetch_rich_media_for_activities(activities)      replied_to_activities = get_replied_to_activities(activities)      parent_activities = diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index e9aa2dd03..e98c743ca 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -17,14 +17,25 @@ defmodule Pleroma.Web.RichMedia.Parser do    else      @spec parse(String.t()) :: {:ok, map()} | {:error, any()}      def parse(url) do -      Cachex.fetch!(:rich_media_cache, url, fn _ -> -        with {:ok, data} <- parse_url(url) do -          {:commit, {:ok, data}} -        else -          error -> {:ignore, error} -        end -      end) -      |> set_ttl_based_on_image(url) +      with {:ok, data} <- get_cached_or_parse(url), +           {:ok, _} <- set_ttl_based_on_image(data, url) do +        {:ok, data} +      else +        error -> +          Logger.error(fn -> "Rich media error: #{inspect(error)}" end) +      end +    end + +    defp get_cached_or_parse(url) do +      case Cachex.fetch!(:rich_media_cache, url, fn _ -> {:commit, parse_url(url)} end) do +        {:ok, _data} = res -> +          res + +        {:error, _} = e -> +          ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000) +          Cachex.expire(:rich_media_cache, url, ttl) +          e +      end      end    end @@ -50,24 +61,23 @@ defmodule Pleroma.Web.RichMedia.Parser do        config :pleroma, :rich_media,          ttl_setters: [MyModule]    """ -  @spec set_ttl_based_on_image({:ok, map()} | {:error, any()}, String.t()) :: -          {:ok, map()} | {:error, any()} -  def set_ttl_based_on_image({:ok, data}, url) do -    with {:ok, nil} <- Cachex.ttl(:rich_media_cache, url), -         {:ok, ttl} when is_number(ttl) <- get_ttl_from_image(data, url) do -      Cachex.expire_at(:rich_media_cache, url, ttl * 1000) -      {:ok, data} -    else +  @spec set_ttl_based_on_image(map(), String.t()) :: +          {:ok, Integer.t() | :noop} | {:error, :no_key} +  def set_ttl_based_on_image(data, url) do +    case get_ttl_from_image(data, url) do +      {:ok, ttl} when is_number(ttl) -> +        ttl = ttl * 1000 + +        case Cachex.expire_at(:rich_media_cache, url, ttl) do +          {:ok, true} -> {:ok, ttl} +          {:ok, false} -> {:error, :no_key} +        end +        _ -> -        {:ok, data} +        {:ok, :noop}      end    end -  def set_ttl_based_on_image({:error, _} = error, _) do -    Logger.error("parsing error: #{inspect(error)}") -    error -  end -    defp get_ttl_from_image(data, url) do      [:rich_media, :ttl_setters]      |> Pleroma.Config.get() diff --git a/test/web/rich_media/aws_signed_url_test.exs b/test/web/rich_media/aws_signed_url_test.exs index a21f3c935..1ceae1a31 100644 --- a/test/web/rich_media/aws_signed_url_test.exs +++ b/test/web/rich_media/aws_signed_url_test.exs @@ -55,7 +55,7 @@ defmodule Pleroma.Web.RichMedia.TTL.AwsSignedUrlTest do      Cachex.put(:rich_media_cache, url, metadata) -    Pleroma.Web.RichMedia.Parser.set_ttl_based_on_image({:ok, metadata}, url) +    Pleroma.Web.RichMedia.Parser.set_ttl_based_on_image(metadata, url)      {:ok, cache_ttl} = Cachex.ttl(:rich_media_cache, url)  | 
