From e4ac2a7cd69aa5e87d9dc277c0271e15466e3215 Mon Sep 17 00:00:00 2001 From: tusooa Date: Fri, 16 Dec 2022 02:56:32 -0500 Subject: Detail backup states --- lib/pleroma/ecto_enums.ex | 8 +++ lib/pleroma/user/backup.ex | 116 +++++++++++++++++++++++++++-------- lib/pleroma/workers/backup_worker.ex | 2 +- 3 files changed, 101 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/ecto_enums.ex b/lib/pleroma/ecto_enums.ex index a4890b489..b346b39d6 100644 --- a/lib/pleroma/ecto_enums.ex +++ b/lib/pleroma/ecto_enums.ex @@ -27,3 +27,11 @@ defenum(Pleroma.DataMigration.State, failed: 4, manual: 5 ) + +defenum(Pleroma.User.Backup.State, + pending: 1, + running: 2, + complete: 3, + failed: 4, + invalid: 5 +) diff --git a/lib/pleroma/user/backup.ex b/lib/pleroma/user/backup.ex index 9df010605..74001f9c3 100644 --- a/lib/pleroma/user/backup.ex +++ b/lib/pleroma/user/backup.ex @@ -15,6 +15,7 @@ defmodule Pleroma.User.Backup do alias Pleroma.Bookmark alias Pleroma.Repo alias Pleroma.User + alias Pleroma.User.Backup.State alias Pleroma.Web.ActivityPub.ActivityPub alias Pleroma.Web.ActivityPub.Transmogrifier alias Pleroma.Web.ActivityPub.UserView @@ -25,12 +26,16 @@ defmodule Pleroma.User.Backup do field(:file_name, :string) field(:file_size, :integer, default: 0) field(:processed, :boolean, default: false) + field(:state, State, default: :invalid) + field(:processed_number, :integer, default: 0) belongs_to(:user, User, type: FlakeId.Ecto.CompatType) timestamps() end + @report_every 100 + def create(user, admin_id \\ nil) do with :ok <- validate_limit(user, admin_id), {:ok, backup} <- user |> new() |> Repo.insert() do @@ -46,7 +51,8 @@ defmodule Pleroma.User.Backup do %__MODULE__{ user_id: user.id, content_type: "application/zip", - file_name: name + file_name: name, + state: :pending } end @@ -109,27 +115,75 @@ defmodule Pleroma.User.Backup do def get(id), do: Repo.get(__MODULE__, id) + defp set_state(backup, state, processed_number \\ nil) do + struct = + %{state: state} + |> Pleroma.Maps.put_if_present(:processed_number, processed_number) + + backup + |> cast(struct, [:state, :processed_number]) + |> Repo.update() + end + def process(%__MODULE__{} = backup) do - with {:ok, zip_file} <- export(backup), - {:ok, %{size: size}} <- File.stat(zip_file), - {:ok, _upload} <- upload(backup, zip_file) do - backup - |> cast(%{file_size: size, processed: true}, [:file_size, :processed]) - |> Repo.update() + set_state(backup, :running, 0) + + current_pid = self() + + Task.Supervisor.async_nolink( + Pleroma.TaskSupervisor, + fn -> + with {:ok, zip_file} <- export(backup, current_pid), + {:ok, %{size: size}} <- File.stat(zip_file), + {:ok, _upload} <- upload(backup, zip_file) do + backup + |> cast( + %{ + file_size: size, + processed: true, + state: :complete + }, + [:file_size, :processed, :state] + ) + |> Repo.update() + + send(current_pid, :completed) + end + end + ) + + wait_backup(backup, backup.processed_number) + end + + defp wait_backup(backup, current_processed) do + receive do + {:progress, new_processed} -> + total_processed = current_processed + new_processed + + with {:ok, updated_backup} <- set_state(backup, :running, total_processed) do + wait_backup(updated_backup, total_processed) + else + _ -> wait_backup(backup, total_processed) + end + + :completed -> + {:ok, get(backup.id)} + after + 30_000 -> set_state(backup, :failed) end end @files ['actor.json', 'outbox.json', 'likes.json', 'bookmarks.json'] - def export(%__MODULE__{} = backup) do + def export(%__MODULE__{} = backup, caller_pid \\ nil) do backup = Repo.preload(backup, :user) name = String.trim_trailing(backup.file_name, ".zip") dir = dir(name) with :ok <- File.mkdir(dir), - :ok <- actor(dir, backup.user), - :ok <- statuses(dir, backup.user), - :ok <- likes(dir, backup.user), - :ok <- bookmarks(dir, backup.user), + :ok <- actor(dir, backup.user, caller_pid), + :ok <- statuses(dir, backup.user, caller_pid), + :ok <- likes(dir, backup.user, caller_pid), + :ok <- bookmarks(dir, backup.user, caller_pid), {:ok, zip_path} <- :zip.create(String.to_charlist(dir <> ".zip"), @files, cwd: dir), {:ok, _} <- File.rm_rf(dir) do {:ok, to_string(zip_path)} @@ -157,11 +211,12 @@ defmodule Pleroma.User.Backup do end end - defp actor(dir, user) do + defp actor(dir, user, caller_pid) do with {:ok, json} <- UserView.render("user.json", %{user: user}) |> Map.merge(%{"likes" => "likes.json", "bookmarks" => "bookmarks.json"}) |> Jason.encode() do + send(caller_pid, {:progress, 1}) File.write(Path.join(dir, "actor.json"), json) end end @@ -180,7 +235,9 @@ defmodule Pleroma.User.Backup do ) end - defp write(query, dir, name, fun) do + defp should_report?(num), do: rem(num, @report_every) == 0 + + defp write(query, dir, name, fun, caller_pid) do path = Path.join(dir, "#{name}.json") with {:ok, file} <- File.open(path, [:write, :utf8]), @@ -192,35 +249,41 @@ defmodule Pleroma.User.Backup do with {:ok, data} <- fun.(i), {:ok, str} <- Jason.encode(data), :ok <- IO.write(file, str <> ",\n") do + if should_report?(acc + 1) do + send(caller_pid, {:progress, @report_every}) + end + acc + 1 else _ -> acc end end) + send(caller_pid, {:progress, rem(total, @report_every)}) + with :ok <- :file.pwrite(file, {:eof, -2}, "\n],\n \"totalItems\": #{total}}") do File.close(file) end end end - defp bookmarks(dir, %{id: user_id} = _user) do + defp bookmarks(dir, %{id: user_id} = _user, caller_pid) do Bookmark |> where(user_id: ^user_id) |> join(:inner, [b], activity in assoc(b, :activity)) |> select([b, a], %{id: b.id, object: fragment("(?)->>'object'", a.data)}) - |> write(dir, "bookmarks", fn a -> {:ok, a.object} end) + |> write(dir, "bookmarks", fn a -> {:ok, a.object} end, caller_pid) end - defp likes(dir, user) do + defp likes(dir, user, caller_pid) do user.ap_id |> Activity.Queries.by_actor() |> Activity.Queries.by_type("Like") |> select([like], %{id: like.id, object: fragment("(?)->>'object'", like.data)}) - |> write(dir, "likes", fn a -> {:ok, a.object} end) + |> write(dir, "likes", fn a -> {:ok, a.object} end, caller_pid) end - defp statuses(dir, user) do + defp statuses(dir, user, caller_pid) do opts = %{} |> Map.put(:type, ["Create", "Announce"]) @@ -233,10 +296,15 @@ defmodule Pleroma.User.Backup do ] |> Enum.concat() |> ActivityPub.fetch_activities_query(opts) - |> write(dir, "outbox", fn a -> - with {:ok, activity} <- Transmogrifier.prepare_outgoing(a.data) do - {:ok, Map.delete(activity, "@context")} - end - end) + |> write( + dir, + "outbox", + fn a -> + with {:ok, activity} <- Transmogrifier.prepare_outgoing(a.data) do + {:ok, Map.delete(activity, "@context")} + end + end, + caller_pid + ) end end diff --git a/lib/pleroma/workers/backup_worker.ex b/lib/pleroma/workers/backup_worker.ex index 12ee70f00..a485ddb4b 100644 --- a/lib/pleroma/workers/backup_worker.ex +++ b/lib/pleroma/workers/backup_worker.ex @@ -51,7 +51,7 @@ defmodule Pleroma.Workers.BackupWorker do end @impl Oban.Worker - def timeout(_job), do: :timer.seconds(900) + def timeout(_job), do: :infinity defp has_email?(user) do not is_nil(user.email) and user.email != "" -- cgit v1.2.3 From bdd63d2a3a5fe76f8a8ac1f71cb8698be85bfef4 Mon Sep 17 00:00:00 2001 From: tusooa Date: Fri, 16 Dec 2022 14:10:48 -0500 Subject: Expose backup status via Pleroma API --- .../web/api_spec/operations/pleroma_backup_operation.ex | 8 ++++++-- lib/pleroma/web/pleroma_api/views/backup_view.ex | 10 ++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex b/lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex index 45fa2b058..5655527e0 100644 --- a/lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex +++ b/lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex @@ -64,7 +64,9 @@ defmodule Pleroma.Web.ApiSpec.PleromaBackupOperation do content_type: %Schema{type: :string}, file_name: %Schema{type: :string}, file_size: %Schema{type: :integer}, - processed: %Schema{type: :boolean} + processed: %Schema{type: :boolean, description: "whether this backup has succeeded"}, + state: %Schema{type: :string, description: "the state of the backup", enum: ["pending", "running", "complete", "failed"]}, + processed_number: %Schema{type: :integer, description: "the number of records processed"} }, example: %{ "content_type" => "application/zip", @@ -72,7 +74,9 @@ defmodule Pleroma.Web.ApiSpec.PleromaBackupOperation do "https://cofe.fe:4000/media/backups/archive-foobar-20200908T164207-Yr7vuT5Wycv-sN3kSN2iJ0k-9pMo60j9qmvRCdDqIew.zip", "file_size" => 4105, "inserted_at" => "2020-09-08T16:42:07.000Z", - "processed" => true + "processed" => true, + "state" => "complete", + "processed_number": 20 } } end diff --git a/lib/pleroma/web/pleroma_api/views/backup_view.ex b/lib/pleroma/web/pleroma_api/views/backup_view.ex index d778590f0..02d891791 100644 --- a/lib/pleroma/web/pleroma_api/views/backup_view.ex +++ b/lib/pleroma/web/pleroma_api/views/backup_view.ex @@ -9,12 +9,22 @@ defmodule Pleroma.Web.PleromaAPI.BackupView do alias Pleroma.Web.CommonAPI.Utils def render("show.json", %{backup: %Backup{} = backup}) do + # To deal with records before the migration + state = + if backup.state == :invalid do + if backup.processed, do: :complete, else: :failed + else + backup.state + end + %{ id: backup.id, content_type: backup.content_type, url: download_url(backup), file_size: backup.file_size, processed: backup.processed, + state: to_string(state), + processed_number: backup.processed_number, inserted_at: Utils.to_masto_date(backup.inserted_at) } end -- cgit v1.2.3 From 46ab97d72145a122cc0e0ada3cd9f0af5449bdf3 Mon Sep 17 00:00:00 2001 From: tusooa Date: Fri, 16 Dec 2022 14:13:46 -0500 Subject: Simplify backup update clause --- lib/pleroma/user/backup.ex | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/user/backup.ex b/lib/pleroma/user/backup.ex index 74001f9c3..97b8718c1 100644 --- a/lib/pleroma/user/backup.ex +++ b/lib/pleroma/user/backup.ex @@ -160,11 +160,8 @@ defmodule Pleroma.User.Backup do {:progress, new_processed} -> total_processed = current_processed + new_processed - with {:ok, updated_backup} <- set_state(backup, :running, total_processed) do - wait_backup(updated_backup, total_processed) - else - _ -> wait_backup(backup, total_processed) - end + set_state(backup, :running, total_processed) + wait_backup(backup, total_processed) :completed -> {:ok, get(backup.id)} -- cgit v1.2.3 From a1b95922c5c4a4351b52c0f7a484a96efed08be9 Mon Sep 17 00:00:00 2001 From: tusooa Date: Fri, 16 Dec 2022 14:16:07 -0500 Subject: Fix compile error --- lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex b/lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex index 5655527e0..ea3fc230a 100644 --- a/lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex +++ b/lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex @@ -76,7 +76,7 @@ defmodule Pleroma.Web.ApiSpec.PleromaBackupOperation do "inserted_at" => "2020-09-08T16:42:07.000Z", "processed" => true, "state" => "complete", - "processed_number": 20 + "processed_number" => 20 } } end -- cgit v1.2.3 From 070fbb89e1bc51669b03555b72c6852a769f1e15 Mon Sep 17 00:00:00 2001 From: tusooa Date: Fri, 16 Dec 2022 14:29:21 -0500 Subject: Lint --- .../web/api_spec/operations/pleroma_backup_operation.ex | 6 +++++- lib/pleroma/web/pleroma_api/views/backup_view.ex | 10 +++++----- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex b/lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex index ea3fc230a..400f3825d 100644 --- a/lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex +++ b/lib/pleroma/web/api_spec/operations/pleroma_backup_operation.ex @@ -65,7 +65,11 @@ defmodule Pleroma.Web.ApiSpec.PleromaBackupOperation do file_name: %Schema{type: :string}, file_size: %Schema{type: :integer}, processed: %Schema{type: :boolean, description: "whether this backup has succeeded"}, - state: %Schema{type: :string, description: "the state of the backup", enum: ["pending", "running", "complete", "failed"]}, + state: %Schema{ + type: :string, + description: "the state of the backup", + enum: ["pending", "running", "complete", "failed"] + }, processed_number: %Schema{type: :integer, description: "the number of records processed"} }, example: %{ diff --git a/lib/pleroma/web/pleroma_api/views/backup_view.ex b/lib/pleroma/web/pleroma_api/views/backup_view.ex index 02d891791..20403aeee 100644 --- a/lib/pleroma/web/pleroma_api/views/backup_view.ex +++ b/lib/pleroma/web/pleroma_api/views/backup_view.ex @@ -11,11 +11,11 @@ defmodule Pleroma.Web.PleromaAPI.BackupView do def render("show.json", %{backup: %Backup{} = backup}) do # To deal with records before the migration state = - if backup.state == :invalid do - if backup.processed, do: :complete, else: :failed - else - backup.state - end + if backup.state == :invalid do + if backup.processed, do: :complete, else: :failed + else + backup.state + end %{ id: backup.id, -- cgit v1.2.3 From 7d3e4eaeb94a5381d56a3281e5813b7e0c63c8dd Mon Sep 17 00:00:00 2001 From: tusooa Date: Sun, 18 Dec 2022 15:55:52 -0500 Subject: Log errors more extensively --- lib/pleroma/user/backup.ex | 105 ++++++++++++++++++++++++++++++++------------- 1 file changed, 75 insertions(+), 30 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/user/backup.ex b/lib/pleroma/user/backup.ex index 97b8718c1..cb9a40ba1 100644 --- a/lib/pleroma/user/backup.ex +++ b/lib/pleroma/user/backup.ex @@ -9,6 +9,7 @@ defmodule Pleroma.User.Backup do import Ecto.Query import Pleroma.Web.Gettext + require Logger require Pleroma.Constants alias Pleroma.Activity @@ -130,48 +131,79 @@ defmodule Pleroma.User.Backup do current_pid = self() - Task.Supervisor.async_nolink( - Pleroma.TaskSupervisor, - fn -> - with {:ok, zip_file} <- export(backup, current_pid), - {:ok, %{size: size}} <- File.stat(zip_file), - {:ok, _upload} <- upload(backup, zip_file) do - backup - |> cast( - %{ - file_size: size, - processed: true, - state: :complete - }, - [:file_size, :processed, :state] - ) - |> Repo.update() - - send(current_pid, :completed) - end - end - ) + task = + Task.Supervisor.async_nolink( + Pleroma.TaskSupervisor, + __MODULE__, + :do_process, + [backup, current_pid] + ) - wait_backup(backup, backup.processed_number) + wait_backup(backup, backup.processed_number, task) end - defp wait_backup(backup, current_processed) do + def do_process(backup, current_pid) do + with {:ok, zip_file} <- export(backup, current_pid), + {:ok, %{size: size}} <- File.stat(zip_file), + {:ok, _upload} <- upload(backup, zip_file) do + backup + |> cast( + %{ + file_size: size, + processed: true, + state: :complete + }, + [:file_size, :processed, :state] + ) + |> Repo.update() + end + end + + defp wait_backup(backup, current_processed, task) do receive do {:progress, new_processed} -> total_processed = current_processed + new_processed set_state(backup, :running, total_processed) - wait_backup(backup, total_processed) + wait_backup(backup, total_processed, task) + + {:DOWN, _ref, _proc, _pid, reason} -> + backup = get(backup.id) + + if reason != :normal do + Logger.error("Backup #{backup.id} process ended abnormally: #{inspect(reason)}") + + {:ok, backup} = set_state(backup, :failed) - :completed -> - {:ok, get(backup.id)} + {:error, + %{ + backup: backup, + reason: :exit, + details: reason + }} + else + {:ok, backup} + end after - 30_000 -> set_state(backup, :failed) + 30_000 -> + Logger.error( + "Backup #{backup.id} timed out after no response for 30 seconds, terminating" + ) + + Task.Supervisor.terminate_child(Pleroma.TaskSupervisor, task.pid) + + {:ok, backup} = set_state(backup, :failed) + + {:error, + %{ + backup: backup, + reason: :timeout + }} end end @files ['actor.json', 'outbox.json', 'likes.json', 'bookmarks.json'] - def export(%__MODULE__{} = backup, caller_pid \\ nil) do + def export(%__MODULE__{} = backup, caller_pid) do backup = Repo.preload(backup, :user) name = String.trim_trailing(backup.file_name, ".zip") dir = dir(name) @@ -243,7 +275,12 @@ defmodule Pleroma.User.Backup do query |> Pleroma.Repo.chunk_stream(100) |> Enum.reduce(0, fn i, acc -> - with {:ok, data} <- fun.(i), + with {:ok, data} <- + (try do + fun.(i) + rescue + e -> {:error, e} + end), {:ok, str} <- Jason.encode(data), :ok <- IO.write(file, str <> ",\n") do if should_report?(acc + 1) do @@ -252,7 +289,15 @@ defmodule Pleroma.User.Backup do acc + 1 else - _ -> acc + {:error, e} -> + Logger.warn( + "Error processing backup item: #{inspect(e)}\n The item is: #{inspect(i)}" + ) + + acc + + _ -> + acc end end) -- cgit v1.2.3 From 179efd94677d1d30bdbbbbaafc899c8c908181d2 Mon Sep 17 00:00:00 2001 From: tusooa Date: Sat, 24 Dec 2022 00:17:17 -0500 Subject: Make backup parameters configurable --- lib/pleroma/user/backup.ex | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/user/backup.ex b/lib/pleroma/user/backup.ex index cb9a40ba1..447fca2a1 100644 --- a/lib/pleroma/user/backup.ex +++ b/lib/pleroma/user/backup.ex @@ -35,8 +35,6 @@ defmodule Pleroma.User.Backup do timestamps() end - @report_every 100 - def create(user, admin_id \\ nil) do with :ok <- validate_limit(user, admin_id), {:ok, backup} <- user |> new() |> Repo.insert() do @@ -160,6 +158,8 @@ defmodule Pleroma.User.Backup do end defp wait_backup(backup, current_processed, task) do + wait_time = Pleroma.Config.get([__MODULE__, :process_wait_time]) + receive do {:progress, new_processed} -> total_processed = current_processed + new_processed @@ -175,6 +175,8 @@ defmodule Pleroma.User.Backup do {:ok, backup} = set_state(backup, :failed) + cleanup(backup) + {:error, %{ backup: backup, @@ -185,15 +187,17 @@ defmodule Pleroma.User.Backup do {:ok, backup} end after - 30_000 -> + wait_time -> Logger.error( - "Backup #{backup.id} timed out after no response for 30 seconds, terminating" + "Backup #{backup.id} timed out after no response for #{wait_time}ms, terminating" ) Task.Supervisor.terminate_child(Pleroma.TaskSupervisor, task.pid) {:ok, backup} = set_state(backup, :failed) + cleanup(backup) + {:error, %{ backup: backup, @@ -205,8 +209,7 @@ defmodule Pleroma.User.Backup do @files ['actor.json', 'outbox.json', 'likes.json', 'bookmarks.json'] def export(%__MODULE__{} = backup, caller_pid) do backup = Repo.preload(backup, :user) - name = String.trim_trailing(backup.file_name, ".zip") - dir = dir(name) + dir = backup_tempdir(backup) with :ok <- File.mkdir(dir), :ok <- actor(dir, backup.user, caller_pid), @@ -264,16 +267,28 @@ defmodule Pleroma.User.Backup do ) end - defp should_report?(num), do: rem(num, @report_every) == 0 + defp should_report?(num, chunk_size), do: rem(num, chunk_size) == 0 + + defp backup_tempdir(backup) do + name = String.trim_trailing(backup.file_name, ".zip") + dir(name) + end + + defp cleanup(backup) do + dir = backup_tempdir(backup) + File.rm_rf(dir) + end defp write(query, dir, name, fun, caller_pid) do path = Path.join(dir, "#{name}.json") + chunk_size = Pleroma.Config.get([__MODULE__, :process_chunk_size]) + with {:ok, file} <- File.open(path, [:write, :utf8]), :ok <- write_header(file, name) do total = query - |> Pleroma.Repo.chunk_stream(100) + |> Pleroma.Repo.chunk_stream(chunk_size, _returns_as = :one, timeout: :infinity) |> Enum.reduce(0, fn i, acc -> with {:ok, data} <- (try do @@ -283,8 +298,8 @@ defmodule Pleroma.User.Backup do end), {:ok, str} <- Jason.encode(data), :ok <- IO.write(file, str <> ",\n") do - if should_report?(acc + 1) do - send(caller_pid, {:progress, @report_every}) + if should_report?(acc + 1, chunk_size) do + send(caller_pid, {:progress, chunk_size}) end acc + 1 @@ -301,7 +316,7 @@ defmodule Pleroma.User.Backup do end end) - send(caller_pid, {:progress, rem(total, @report_every)}) + send(caller_pid, {:progress, rem(total, chunk_size)}) with :ok <- :file.pwrite(file, {:eof, -2}, "\n],\n \"totalItems\": #{total}}") do File.close(file) -- cgit v1.2.3