summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Felder <feld@feld.me>2024-10-29 16:00:18 -0400
committerMark Felder <feld@feld.me>2024-10-29 16:00:18 -0400
commitd2de251c4d018c7d517d399d7d5e0e20d853972f (patch)
tree919ca21f13782085fe6a0ec0cafecf96a4733023
parent6099a94dbc26c9f86e340769af34c7b28725d831 (diff)
downloadpleroma-d2de251c4d018c7d517d399d7d5e0e20d853972f.tar.gz
pleroma-d2de251c4d018c7d517d399d7d5e0e20d853972f.zip
Pleroma.Upload.Filter.Dedupe: sharding directory structure
Dedupe now uses a three-level sharding directory structure to improve performance when many files are uploaded and stored on a filesystem instead of an object store. (note: Minio still affected as it still uses a traditional filesystem) This does not help if you already have hundreds of thousands of files uploaded. The media URLs are permanently part of the activity so the files cannot be relocated. A motivated user could write a tool to move the files and perhaps write an Nginx or equivalent redirect to make the files still accessible, but that is beyond the scope of this change.
-rw-r--r--changelog.d/dedupe-sharding.change1
-rw-r--r--lib/pleroma/upload/filter/dedupe.ex10
-rw-r--r--test/pleroma/object_test.exs8
-rw-r--r--test/pleroma/upload/filter/dedupe_test.exs4
-rw-r--r--test/pleroma/upload_test.exs6
5 files changed, 21 insertions, 8 deletions
diff --git a/changelog.d/dedupe-sharding.change b/changelog.d/dedupe-sharding.change
new file mode 100644
index 000000000..2e140d8a2
--- /dev/null
+++ b/changelog.d/dedupe-sharding.change
@@ -0,0 +1 @@
+Dedupe upload filter now uses a three-level sharding directory structure
diff --git a/lib/pleroma/upload/filter/dedupe.ex b/lib/pleroma/upload/filter/dedupe.ex
index ef793d390..7b278d299 100644
--- a/lib/pleroma/upload/filter/dedupe.ex
+++ b/lib/pleroma/upload/filter/dedupe.ex
@@ -17,8 +17,16 @@ defmodule Pleroma.Upload.Filter.Dedupe do
|> Base.encode16(case: :lower)
filename = shasum <> "." <> extension
- {:ok, :filtered, %Upload{upload | id: shasum, path: filename}}
+
+ {:ok, :filtered, %Upload{upload | id: shasum, path: shard_path(filename)}}
end
def filter(_), do: {:ok, :noop}
+
+ @spec shard_path(String.t()) :: String.t()
+ def shard_path(
+ <<a::binary-size(2), b::binary-size(2), c::binary-size(2), _::binary>> = filename
+ ) do
+ Path.join([a, b, c, filename])
+ end
end
diff --git a/test/pleroma/object_test.exs b/test/pleroma/object_test.exs
index b3c528e32..ed5c2b6c8 100644
--- a/test/pleroma/object_test.exs
+++ b/test/pleroma/object_test.exs
@@ -174,8 +174,9 @@ defmodule Pleroma.ObjectTest do
filename = Path.basename(href)
- assert {:ok, files} = File.ls(uploads_dir)
- assert filename in files
+ expected_path = Path.join([uploads_dir, Pleroma.Upload.Filter.Dedupe.shard_path(filename)])
+
+ assert File.exists?(expected_path)
Object.delete(note)
@@ -183,8 +184,7 @@ defmodule Pleroma.ObjectTest do
assert Object.get_by_id(note.id).data["deleted"]
assert Object.get_by_id(attachment.id) == nil
- assert {:ok, files} = File.ls(uploads_dir)
- refute filename in files
+ refute File.exists?(expected_path)
end
test "with objects that have legacy data.url attribute" do
diff --git a/test/pleroma/upload/filter/dedupe_test.exs b/test/pleroma/upload/filter/dedupe_test.exs
index 29c181509..cd5ce121b 100644
--- a/test/pleroma/upload/filter/dedupe_test.exs
+++ b/test/pleroma/upload/filter/dedupe_test.exs
@@ -23,10 +23,12 @@ defmodule Pleroma.Upload.Filter.DedupeTest do
tempfile: Path.absname("test/fixtures/image_tmp.jpg")
}
+ expected_path = Dedupe.shard_path(@shasum <> ".jpg")
+
assert {
:ok,
:filtered,
- %Pleroma.Upload{id: @shasum, path: @shasum <> ".jpg"}
+ %Pleroma.Upload{id: @shasum, path: ^expected_path}
} = Dedupe.filter(upload)
end
end
diff --git a/test/pleroma/upload_test.exs b/test/pleroma/upload_test.exs
index facb634c3..5fd62fa43 100644
--- a/test/pleroma/upload_test.exs
+++ b/test/pleroma/upload_test.exs
@@ -149,6 +149,9 @@ defmodule Pleroma.UploadTest do
test "copies the file to the configured folder with deduping" do
File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg")
+ expected_filename = "e30397b58d226d6583ab5b8b3c5defb0c682bda5c31ef07a9f57c1c4986e3781.jpg"
+
+ expected_path = Pleroma.Upload.Filter.Dedupe.shard_path(expected_filename)
file = %Plug.Upload{
content_type: "image/jpeg",
@@ -159,8 +162,7 @@ defmodule Pleroma.UploadTest do
{:ok, data} = Upload.store(file, filters: [Pleroma.Upload.Filter.Dedupe])
assert List.first(data["url"])["href"] ==
- Pleroma.Upload.base_url() <>
- "e30397b58d226d6583ab5b8b3c5defb0c682bda5c31ef07a9f57c1c4986e3781.jpg"
+ Path.join([Pleroma.Upload.base_url(), expected_path])
end
test "copies the file to the configured folder without deduping" do