From 637f5bc4311474c125bc1e3a5bbf3984079f5d43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?marcin=20miko=C5=82ajczak?= Date: Sat, 27 Apr 2024 16:29:36 +0200 Subject: Fix type in description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: marcin mikołajczak --- config/description.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'config') diff --git a/config/description.exs b/config/description.exs index 7a9c027de..9cc3d469e 100644 --- a/config/description.exs +++ b/config/description.exs @@ -3522,7 +3522,7 @@ config :pleroma, :config_description, [ }, %{ key: :initial_indexing_chunk_size, - type: :int, + type: :integer, description: "Amount of posts in a batch when running the initial indexing operation. Should probably not be more than 100000" <> " since there's a limit on maximum insert size", -- cgit v1.2.3 From ede414094f7b196d3ff129b8a23ba461ef80d29f Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Sun, 11 Feb 2024 16:11:52 -0500 Subject: RichMedia refactor Rich Media parsing was previously handled on-demand with a 2 second HTTP request timeout and retained only in Cachex. Every time a Pleroma instance is restarted it will have to request and parse the data for each status with a URL detected. When fetching a batch of statuses they were processed in parallel to attempt to keep the maximum latency at 2 seconds, but often resulted in a timeline appearing to hang during loading due to a URL that could not be successfully reached. URLs which had images links that expire (Amazon AWS) were parsed and inserted with a TTL to ensure the image link would not break. Rich Media data is now cached in the database and fetched asynchronously. Cachex is used as a read-through cache. When the data becomes available we stream an update to the clients. If the result is returned quickly the experience is almost seamless. Activities were already processed for their Rich Media data during ingestion to warm the cache, so users should not normally encounter the asynchronous loading of the Rich Media data. Implementation notes: - The async worker is a Task with a globally unique process name to prevent duplicate processing of the same URL - The Task will attempt to fetch the data 3 times with increasing sleep time between attempts - The HTTP request obeys the default HTTP request timeout value instead of 2 seconds - URLs that cannot be successfully parsed due to an unexpected error receives a negative cache entry for 15 minutes - URLs that fail with an expected error will receive a negative cache with no TTL - Activities that have no detected URLs insert a nil value in the Cachex :scrubber_cache so we do not repeat parsing the object content with Floki every time the activity is rendered - Expiring image URLs are handled with an Oban job - There is no automatic cleanup of the Rich Media data in the database, but it is safe to delete at any time - The post draft/preview feature makes the URL processing synchronous so the rendered post preview will have an accurate rendering Overall performance of timelines and creating new posts which contain URLs is greatly improved. --- config/config.exs | 3 ++- config/test.exs | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'config') diff --git a/config/config.exs b/config/config.exs index 32c8509be..bf8671145 100644 --- a/config/config.exs +++ b/config/config.exs @@ -575,7 +575,8 @@ config :pleroma, Oban, attachments_cleanup: 1, new_users_digest: 1, mute_expire: 5, - search_indexing: 10 + search_indexing: 10, + rich_media_expiration: 2 ], plugins: [Oban.Plugins.Pruner], crontab: [ diff --git a/config/test.exs b/config/test.exs index 80b01932c..5e642a112 100644 --- a/config/test.exs +++ b/config/test.exs @@ -174,6 +174,8 @@ config :pleroma, Pleroma.Uploaders.Uploader, timeout: 1_000 config :pleroma, Pleroma.Emoji.Loader, test_emoji: true +config :pleroma, Pleroma.Web.RichMedia.Backfill, provider: Pleroma.Web.RichMedia.Backfill + if File.exists?("./config/test.secret.exs") do import_config "test.secret.exs" else -- cgit v1.2.3 From df0734fcbf7adcd98e9bce38cc7aa18345aaf78d Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Sun, 11 Feb 2024 16:53:21 -0500 Subject: Increase the :max_body for Rich Media to 5MB Websites are increasingly getting more bloated with tricks like inlining content (e.g., CNN.com) which puts pages at or above 5MB. This value may still be too low. --- config/config.exs | 3 ++- config/test.exs | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'config') diff --git a/config/config.exs b/config/config.exs index bf8671145..796e9073b 100644 --- a/config/config.exs +++ b/config/config.exs @@ -428,7 +428,8 @@ config :pleroma, :rich_media, Pleroma.Web.RichMedia.Parsers.OEmbed ], failure_backoff: 60_000, - ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl] + ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl], + max_body: 5_000_000 config :pleroma, :media_proxy, enabled: false, diff --git a/config/test.exs b/config/test.exs index 5e642a112..9b4113dd5 100644 --- a/config/test.exs +++ b/config/test.exs @@ -61,7 +61,8 @@ config :tesla, adapter: Tesla.Mock config :pleroma, :rich_media, enabled: false, ignore_hosts: [], - ignore_tld: ["local", "localdomain", "lan"] + ignore_tld: ["local", "localdomain", "lan"], + max_body: 2_000_000 config :pleroma, :instance, multi_factor_authentication: [ -- cgit v1.2.3 From d21aa1a77cbda323ae2e82ea7910e076b6011571 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Sun, 18 Feb 2024 22:24:27 -0500 Subject: Respect the TTL returned in OpenGraph tags --- config/config.exs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'config') diff --git a/config/config.exs b/config/config.exs index 796e9073b..b69044a2b 100644 --- a/config/config.exs +++ b/config/config.exs @@ -428,7 +428,10 @@ config :pleroma, :rich_media, Pleroma.Web.RichMedia.Parsers.OEmbed ], failure_backoff: 60_000, - ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl], + ttl_setters: [ + Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl, + Pleroma.Web.RichMedia.Parser.TTL.Opengraph + ], max_body: 5_000_000 config :pleroma, :media_proxy, -- cgit v1.2.3