summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/3907.skip0
-rw-r--r--changelog.d/logger-metadata.add1
-rw-r--r--changelog.d/qdrant_search.add1
-rw-r--r--config/config.exs17
-rw-r--r--config/dev.exs4
-rw-r--r--docs/configuration/search.md24
-rw-r--r--lib/mix/tasks/pleroma/search/indexer.ex80
-rw-r--r--lib/pleroma/search/database_search.ex6
-rw-r--r--lib/pleroma/search/meilisearch.ex6
-rw-r--r--lib/pleroma/search/qdrant_search.ex182
-rw-r--r--lib/pleroma/search/search_backend.ex10
-rw-r--r--lib/pleroma/web/activity_pub/activity_pub_controller.ex8
-rw-r--r--lib/pleroma/web/endpoint.ex2
-rw-r--r--lib/pleroma/web/plugs/logger_metadata_path.ex12
-rw-r--r--lib/pleroma/web/plugs/logger_metadata_user.ex18
-rw-r--r--lib/pleroma/web/router.ex8
-rw-r--r--priv/gettext/config_descriptions.pot84
-rw-r--r--priv/gettext/errors.pot36
-rw-r--r--priv/gettext/oauth_scopes.pot40
-rw-r--r--supplemental/search/fastembed-api/Dockerfile9
-rw-r--r--supplemental/search/fastembed-api/README.md6
-rw-r--r--supplemental/search/fastembed-api/compose.yml5
-rw-r--r--supplemental/search/fastembed-api/fastembed-server.py27
-rw-r--r--supplemental/search/fastembed-api/requirements.txt4
-rw-r--r--test/pleroma/search/qdrant_search_test.exs199
25 files changed, 777 insertions, 12 deletions
diff --git a/changelog.d/3907.skip b/changelog.d/3907.skip
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/changelog.d/3907.skip
diff --git a/changelog.d/logger-metadata.add b/changelog.d/logger-metadata.add
new file mode 100644
index 000000000..6c627a972
--- /dev/null
+++ b/changelog.d/logger-metadata.add
@@ -0,0 +1 @@
+Logger metadata is now attached to some logs to help with troubleshooting and analysis
diff --git a/changelog.d/qdrant_search.add b/changelog.d/qdrant_search.add
new file mode 100644
index 000000000..9801131d1
--- /dev/null
+++ b/changelog.d/qdrant_search.add
@@ -0,0 +1 @@
+Add Qdrant/OpenAI embedding search
diff --git a/config/config.exs b/config/config.exs
index f861daf04..956d067f3 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -135,13 +135,13 @@ config :pleroma, Pleroma.Web.Endpoint,
config :logger, :console,
level: :debug,
format: "\n$time $metadata[$level] $message\n",
- metadata: [:request_id]
+ metadata: [:actor, :path, :type, :user]
config :logger, :ex_syslogger,
level: :debug,
ident: "pleroma",
format: "$metadata[$level] $message",
- metadata: [:request_id]
+ metadata: [:actor, :path, :type, :user]
config :mime, :types, %{
"application/xml" => ["xml"],
@@ -934,6 +934,19 @@ config :pleroma, Pleroma.Application,
config :pleroma, Pleroma.Uploaders.Uploader, timeout: 30_000
+config :pleroma, Pleroma.Search.QdrantSearch,
+ qdrant_url: "http://127.0.0.1:6333/",
+ qdrant_api_key: "",
+ openai_url: "http://127.0.0.1:11345",
+ # The healthcheck url has to be set to nil when used with the real openai
+ # API, as it doesn't have a healthcheck endpoint.
+ openai_healthcheck_url: "http://127.0.0.1:11345/health",
+ openai_model: "snowflake/snowflake-arctic-embed-xs",
+ openai_api_key: "",
+ qdrant_index_configuration: %{
+ vectors: %{size: 384, distance: "Cosine"}
+ }
+
# Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above.
import_config "#{Mix.env()}.exs"
diff --git a/config/dev.exs b/config/dev.exs
index fe8de5045..f23719fe3 100644
--- a/config/dev.exs
+++ b/config/dev.exs
@@ -35,8 +35,8 @@ config :pleroma, Pleroma.Emails.Mailer, adapter: Swoosh.Adapters.Local
# configured to run both http and https servers on
# different ports.
-# Do not include metadata nor timestamps in development logs
-config :logger, :console, format: "[$level] $message\n"
+# Do not include timestamps in development logs
+config :logger, :console, format: "$metadata[$level] $message\n"
# Set a higher stacktrace during development. Avoid configuring such
# in production as building large stacktraces may be expensive.
diff --git a/docs/configuration/search.md b/docs/configuration/search.md
index 0316c9bf4..d34f84d4f 100644
--- a/docs/configuration/search.md
+++ b/docs/configuration/search.md
@@ -10,6 +10,30 @@ To use built-in search that has no external dependencies, set the search module
While it has no external dependencies, it has problems with performance and relevancy.
+## QdrantSearch
+
+This uses the vector search engine [Qdrant](https://qdrant.tech) to search the posts in a vector space. This needs a way to generate embeddings and uses the [OpenAI API](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings). This is implemented by several project besides OpenAI itself, including the python-based fastembed-server found in `supplemental/search/fastembed-api`.
+
+The default settings will support a setup where both the fastembed server and Qdrant run on the same system as pleroma. To use it, set the search provider and run the fastembed server, see the README in `supplemental/search/fastembed-api`:
+
+> config :pleroma, Pleroma.Search, module: Pleroma.Search.QdrantSearch
+
+Then, start the Qdrant server, see [here](https://qdrant.tech/documentation/quick-start/) for instructions.
+
+You will also need to create the Qdrant index once by running `mix pleroma.search.indexer create_index`. Running `mix pleroma.search.indexer index` will retroactively index the last 100_000 activities.
+
+### Indexing and model options
+
+To see the available configuration options, check out the QdrantSearch section in `config/config.exs`.
+
+The default indexing option work for the default model (`snowflake-arctic-embed-xs`). To optimize for a low memory footprint, adjust the index configuration as described in the [Qdrant docs](https://qdrant.tech/documentation/guides/optimize/). See also [this blog post](https://qdrant.tech/articles/memory-consumption/) that goes into detail.
+
+Different embedding models will need different vector size settings. You can see a list of the models supported by the fastembed server [here](https://qdrant.github.io/fastembed/examples/Supported_Models), including their vector dimensions. These vector dimensions need to be set in the `qdrant_index_configuration`.
+
+E.g, If you want to use `sentence-transformers/all-MiniLM-L6-v2` as a model, you will not need to adjust things, because it and `snowflake-arctic-embed-xs` are both 384 dimensional models. If you want to use `snowflake/snowflake-arctic-embed-l`, you will need to adjust the `size` parameter in the `qdrant_index_configuration` to 1024, as it has a dimension of 1024.
+
+When using a different model, you will need do drop the index and recreate it (`mix pleroma.search.indexer drop_index` and `mix pleroma.search.indexer create_index`), as the different embeddings are not compatible with each other.
+
## Meilisearch
Note that it's quite a bit more memory hungry than PostgreSQL (around 4-5G for ~1.2 million
diff --git a/lib/mix/tasks/pleroma/search/indexer.ex b/lib/mix/tasks/pleroma/search/indexer.ex
new file mode 100644
index 000000000..81a9fced6
--- /dev/null
+++ b/lib/mix/tasks/pleroma/search/indexer.ex
@@ -0,0 +1,80 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Mix.Tasks.Pleroma.Search.Indexer do
+ import Mix.Pleroma
+ import Ecto.Query
+
+ alias Pleroma.Workers.SearchIndexingWorker
+
+ def run(["create_index"]) do
+ start_pleroma()
+
+ with :ok <- Pleroma.Config.get([Pleroma.Search, :module]).create_index() do
+ IO.puts("Index created")
+ else
+ e -> IO.puts("Could not create index: #{inspect(e)}")
+ end
+ end
+
+ def run(["drop_index"]) do
+ start_pleroma()
+
+ with :ok <- Pleroma.Config.get([Pleroma.Search, :module]).drop_index() do
+ IO.puts("Index dropped")
+ else
+ e -> IO.puts("Could not drop index: #{inspect(e)}")
+ end
+ end
+
+ def run(["index" | options]) do
+ {options, [], []} =
+ OptionParser.parse(
+ options,
+ strict: [
+ limit: :integer
+ ]
+ )
+
+ start_pleroma()
+
+ limit = Keyword.get(options, :limit, 100_000)
+
+ per_step = 1000
+ chunks = max(div(limit, per_step), 1)
+
+ 1..chunks
+ |> Enum.each(fn step ->
+ q =
+ from(a in Pleroma.Activity,
+ limit: ^per_step,
+ offset: ^per_step * (^step - 1),
+ select: [:id],
+ order_by: [desc: :id]
+ )
+
+ {:ok, ids} =
+ Pleroma.Repo.transaction(fn ->
+ Pleroma.Repo.stream(q, timeout: :infinity)
+ |> Enum.map(fn a ->
+ a.id
+ end)
+ end)
+
+ IO.puts("Got #{length(ids)} activities, adding to indexer")
+
+ ids
+ |> Enum.chunk_every(100)
+ |> Enum.each(fn chunk ->
+ IO.puts("Adding #{length(chunk)} activities to indexing queue")
+
+ chunk
+ |> Enum.map(fn id ->
+ SearchIndexingWorker.new(%{"op" => "add_to_index", "activity" => id})
+ end)
+ |> Oban.insert_all()
+ end)
+ end)
+ end
+end
diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex
index 11e99e7f1..c6fe8a9bd 100644
--- a/lib/pleroma/search/database_search.ex
+++ b/lib/pleroma/search/database_search.ex
@@ -49,6 +49,12 @@ defmodule Pleroma.Search.DatabaseSearch do
def remove_from_index(_object), do: :ok
@impl true
+ def create_index, do: :ok
+
+ @impl true
+ def drop_index, do: :ok
+
+ @impl true
def healthcheck_endpoints, do: nil
def maybe_restrict_author(query, %User{} = author) do
diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex
index 08c2f3d86..9bba5b30f 100644
--- a/lib/pleroma/search/meilisearch.ex
+++ b/lib/pleroma/search/meilisearch.ex
@@ -10,6 +10,12 @@ defmodule Pleroma.Search.Meilisearch do
@behaviour Pleroma.Search.SearchBackend
+ @impl true
+ def create_index, do: :ok
+
+ @impl true
+ def drop_index, do: :ok
+
defp meili_headers do
private_key = Config.get([Pleroma.Search.Meilisearch, :private_key])
diff --git a/lib/pleroma/search/qdrant_search.ex b/lib/pleroma/search/qdrant_search.ex
new file mode 100644
index 000000000..b659bb682
--- /dev/null
+++ b/lib/pleroma/search/qdrant_search.ex
@@ -0,0 +1,182 @@
+defmodule Pleroma.Search.QdrantSearch do
+ @behaviour Pleroma.Search.SearchBackend
+ import Ecto.Query
+
+ alias Pleroma.Activity
+ alias Pleroma.Config.Getting, as: Config
+
+ alias __MODULE__.OpenAIClient
+ alias __MODULE__.QdrantClient
+
+ import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1]
+ import Pleroma.Search.DatabaseSearch, only: [maybe_fetch: 3]
+
+ @impl true
+ def create_index do
+ payload = Config.get([Pleroma.Search.QdrantSearch, :qdrant_index_configuration])
+
+ with {:ok, %{status: 200}} <- QdrantClient.put("/collections/posts", payload) do
+ :ok
+ else
+ e -> {:error, e}
+ end
+ end
+
+ @impl true
+ def drop_index do
+ with {:ok, %{status: 200}} <- QdrantClient.delete("/collections/posts") do
+ :ok
+ else
+ e -> {:error, e}
+ end
+ end
+
+ def get_embedding(text) do
+ with {:ok, %{body: %{"data" => [%{"embedding" => embedding}]}}} <-
+ OpenAIClient.post("/v1/embeddings", %{
+ input: text,
+ model: Config.get([Pleroma.Search.QdrantSearch, :openai_model])
+ }) do
+ {:ok, embedding}
+ else
+ _ ->
+ {:error, "Failed to get embedding"}
+ end
+ end
+
+ defp actor_from_activity(%{data: %{"actor" => actor}}) do
+ actor
+ end
+
+ defp actor_from_activity(_), do: nil
+
+ defp build_index_payload(activity, embedding) do
+ actor = actor_from_activity(activity)
+ published_at = activity.data["published"]
+
+ %{
+ points: [
+ %{
+ id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(),
+ vector: embedding,
+ payload: %{actor: actor, published_at: published_at}
+ }
+ ]
+ }
+ end
+
+ defp build_search_payload(embedding, options) do
+ base = %{
+ vector: embedding,
+ limit: options[:limit] || 20,
+ offset: options[:offset] || 0
+ }
+
+ if author = options[:author] do
+ Map.put(base, :filter, %{
+ must: [%{key: "actor", match: %{value: author.ap_id}}]
+ })
+ else
+ base
+ end
+ end
+
+ @impl true
+ def add_to_index(activity) do
+ # This will only index public or unlisted notes
+ maybe_search_data = object_to_search_data(activity.object)
+
+ if activity.data["type"] == "Create" and maybe_search_data do
+ with {:ok, embedding} <- get_embedding(maybe_search_data.content),
+ {:ok, %{status: 200}} <-
+ QdrantClient.put(
+ "/collections/posts/points",
+ build_index_payload(activity, embedding)
+ ) do
+ :ok
+ else
+ e -> {:error, e}
+ end
+ else
+ :ok
+ end
+ end
+
+ @impl true
+ def remove_from_index(object) do
+ activity = Activity.get_by_object_ap_id_with_object(object.data["id"])
+ id = activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()
+
+ with {:ok, %{status: 200}} <-
+ QdrantClient.post("/collections/posts/points/delete", %{"points" => [id]}) do
+ :ok
+ else
+ e -> {:error, e}
+ end
+ end
+
+ @impl true
+ def search(user, original_query, options) do
+ query = "Represent this sentence for searching relevant passages: #{original_query}"
+
+ with {:ok, embedding} <- get_embedding(query),
+ {:ok, %{body: %{"result" => result}}} <-
+ QdrantClient.post(
+ "/collections/posts/points/search",
+ build_search_payload(embedding, options)
+ ) do
+ ids =
+ Enum.map(result, fn %{"id" => id} ->
+ Ecto.UUID.dump!(id)
+ end)
+
+ from(a in Activity, where: a.id in ^ids)
+ |> Activity.with_preloaded_object()
+ |> Activity.restrict_deactivated_users()
+ |> Ecto.Query.order_by([a], fragment("array_position(?, ?)", ^ids, a.id))
+ |> Pleroma.Repo.all()
+ |> maybe_fetch(user, original_query)
+ else
+ _ ->
+ []
+ end
+ end
+
+ @impl true
+ def healthcheck_endpoints do
+ qdrant_health =
+ Config.get([Pleroma.Search.QdrantSearch, :qdrant_url])
+ |> URI.parse()
+ |> Map.put(:path, "/healthz")
+ |> URI.to_string()
+
+ openai_health = Config.get([Pleroma.Search.QdrantSearch, :openai_healthcheck_url])
+
+ [qdrant_health, openai_health] |> Enum.filter(& &1)
+ end
+end
+
+defmodule Pleroma.Search.QdrantSearch.OpenAIClient do
+ use Tesla
+ alias Pleroma.Config.Getting, as: Config
+
+ plug(Tesla.Middleware.BaseUrl, Config.get([Pleroma.Search.QdrantSearch, :openai_url]))
+ plug(Tesla.Middleware.JSON)
+
+ plug(Tesla.Middleware.Headers, [
+ {"Authorization",
+ "Bearer #{Pleroma.Config.get([Pleroma.Search.QdrantSearch, :openai_api_key])}"}
+ ])
+end
+
+defmodule Pleroma.Search.QdrantSearch.QdrantClient do
+ use Tesla
+ alias Pleroma.Config.Getting, as: Config
+
+ plug(Tesla.Middleware.BaseUrl, Config.get([Pleroma.Search.QdrantSearch, :qdrant_url]))
+ plug(Tesla.Middleware.JSON)
+
+ plug(Tesla.Middleware.Headers, [
+ {"api-key", Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_api_key])}
+ ])
+end
diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex
index 13c887bc2..f4ed13c36 100644
--- a/lib/pleroma/search/search_backend.ex
+++ b/lib/pleroma/search/search_backend.ex
@@ -23,6 +23,16 @@ defmodule Pleroma.Search.SearchBackend do
@callback remove_from_index(object :: Pleroma.Object.t()) :: :ok | {:error, any()}
@doc """
+ Create the index
+ """
+ @callback create_index() :: :ok | {:error, any()}
+
+ @doc """
+ Drop the index
+ """
+ @callback drop_index() :: :ok | {:error, any()}
+
+ @doc """
Healthcheck endpoints of search backend infrastructure to monitor for controlling
processing of jobs in the Oban queue.
diff --git a/lib/pleroma/web/activity_pub/activity_pub_controller.ex b/lib/pleroma/web/activity_pub/activity_pub_controller.ex
index e38a94966..d2b2cae0b 100644
--- a/lib/pleroma/web/activity_pub/activity_pub_controller.ex
+++ b/lib/pleroma/web/activity_pub/activity_pub_controller.ex
@@ -52,6 +52,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubController do
when action in [:activity, :object]
)
+ plug(:log_inbox_metadata when action in [:inbox])
plug(:set_requester_reachable when action in [:inbox])
plug(:relay_active? when action in [:relay])
@@ -521,6 +522,13 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubController do
conn
end
+ defp log_inbox_metadata(conn = %{params: %{"actor" => actor, "type" => type}}, _) do
+ Logger.metadata(actor: actor, type: type)
+ conn
+ end
+
+ defp log_inbox_metadata(conn, _), do: conn
+
def upload_media(%{assigns: %{user: %User{} = user}} = conn, %{"file" => file} = data) do
with {:ok, object} <-
ActivityPub.upload(
diff --git a/lib/pleroma/web/endpoint.ex b/lib/pleroma/web/endpoint.ex
index 2e2104904..fef907ace 100644
--- a/lib/pleroma/web/endpoint.ex
+++ b/lib/pleroma/web/endpoint.ex
@@ -38,6 +38,8 @@ defmodule Pleroma.Web.Endpoint do
plug(Plug.Telemetry, event_prefix: [:phoenix, :endpoint])
+ plug(Pleroma.Web.Plugs.LoggerMetadataPath)
+
plug(Pleroma.Web.Plugs.SetLocalePlug)
plug(CORSPlug)
plug(Pleroma.Web.Plugs.HTTPSecurityPlug)
diff --git a/lib/pleroma/web/plugs/logger_metadata_path.ex b/lib/pleroma/web/plugs/logger_metadata_path.ex
new file mode 100644
index 000000000..a5553cfc8
--- /dev/null
+++ b/lib/pleroma/web/plugs/logger_metadata_path.ex
@@ -0,0 +1,12 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.Plugs.LoggerMetadataPath do
+ def init(opts), do: opts
+
+ def call(conn, _) do
+ Logger.metadata(path: conn.request_path)
+ conn
+ end
+end
diff --git a/lib/pleroma/web/plugs/logger_metadata_user.ex b/lib/pleroma/web/plugs/logger_metadata_user.ex
new file mode 100644
index 000000000..6a5c0041d
--- /dev/null
+++ b/lib/pleroma/web/plugs/logger_metadata_user.ex
@@ -0,0 +1,18 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.Plugs.LoggerMetadataUser do
+ alias Pleroma.User
+
+ def init(opts), do: opts
+
+ def call(%{assigns: %{user: user = %User{}}} = conn, _) do
+ Logger.metadata(user: user.nickname)
+ conn
+ end
+
+ def call(conn, _) do
+ conn
+ end
+end
diff --git a/lib/pleroma/web/router.ex b/lib/pleroma/web/router.ex
index 368a04df0..56c457e90 100644
--- a/lib/pleroma/web/router.ex
+++ b/lib/pleroma/web/router.ex
@@ -29,6 +29,7 @@ defmodule Pleroma.Web.Router do
pipeline :browser do
plug(:accepts, ["html"])
plug(:fetch_session)
+ plug(Pleroma.Web.Plugs.LoggerMetadataUser)
end
pipeline :oauth do
@@ -67,12 +68,14 @@ defmodule Pleroma.Web.Router do
plug(:fetch_session)
plug(:authenticate)
plug(OpenApiSpex.Plug.PutApiSpec, module: Pleroma.Web.ApiSpec)
+ plug(Pleroma.Web.Plugs.LoggerMetadataUser)
end
pipeline :no_auth_or_privacy_expectations_api do
plug(:base_api)
plug(:after_auth)
plug(Pleroma.Web.Plugs.IdempotencyPlug)
+ plug(Pleroma.Web.Plugs.LoggerMetadataUser)
end
# Pipeline for app-related endpoints (no user auth checks — app-bound tokens must be supported)
@@ -83,12 +86,14 @@ defmodule Pleroma.Web.Router do
pipeline :api do
plug(:expect_public_instance_or_user_authentication)
plug(:no_auth_or_privacy_expectations_api)
+ plug(Pleroma.Web.Plugs.LoggerMetadataUser)
end
pipeline :authenticated_api do
plug(:expect_user_authentication)
plug(:no_auth_or_privacy_expectations_api)
plug(Pleroma.Web.Plugs.EnsureAuthenticatedPlug)
+ plug(Pleroma.Web.Plugs.LoggerMetadataUser)
end
pipeline :admin_api do
@@ -99,6 +104,7 @@ defmodule Pleroma.Web.Router do
plug(Pleroma.Web.Plugs.EnsureAuthenticatedPlug)
plug(Pleroma.Web.Plugs.UserIsStaffPlug)
plug(Pleroma.Web.Plugs.IdempotencyPlug)
+ plug(Pleroma.Web.Plugs.LoggerMetadataUser)
end
pipeline :require_admin do
@@ -179,6 +185,7 @@ defmodule Pleroma.Web.Router do
plug(:browser)
plug(:authenticate)
plug(Pleroma.Web.Plugs.EnsureUserTokenAssignsPlug)
+ plug(Pleroma.Web.Plugs.LoggerMetadataUser)
end
pipeline :well_known do
@@ -193,6 +200,7 @@ defmodule Pleroma.Web.Router do
pipeline :pleroma_api do
plug(:accepts, ["html", "json"])
plug(OpenApiSpex.Plug.PutApiSpec, module: Pleroma.Web.ApiSpec)
+ plug(Pleroma.Web.Plugs.LoggerMetadataUser)
end
pipeline :mailbox_preview do
diff --git a/priv/gettext/config_descriptions.pot b/priv/gettext/config_descriptions.pot
index 4f60e1c85..b4792868b 100644
--- a/priv/gettext/config_descriptions.pot
+++ b/priv/gettext/config_descriptions.pot
@@ -5973,3 +5973,87 @@ msgstr ""
msgctxt "config label at :pleroma-:instance > :languages"
msgid "Languages"
msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config description at :pleroma-:mrf_emoji"
+msgid "Reject or force-unlisted emojis whose URLs or names match a keyword or [Regex](https://hexdocs.pm/elixir/Regex.html)."
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config description at :pleroma-:mrf_emoji > :federated_timeline_removal_shortcode"
+msgid " A list of patterns which result in message with emojis whose shortcodes match being removed from federated timelines (a.k.a unlisted). This will apply only to statuses.\n\n Each pattern can be a string or [Regex](https://hexdocs.pm/elixir/Regex.html) in the format of `~r/PATTERN/`.\n"
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config description at :pleroma-:mrf_emoji > :federated_timeline_removal_url"
+msgid " A list of patterns which result in message with emojis whose URLs match being removed from federated timelines (a.k.a unlisted). This will apply only to statuses.\n\n Each pattern can be a string or [Regex](https://hexdocs.pm/elixir/Regex.html) in the format of `~r/PATTERN/`.\n"
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config description at :pleroma-:mrf_emoji > :remove_shortcode"
+msgid " A list of patterns which result in emoji whose shortcode matches being removed from the message. This will apply to statuses, emoji reactions, and user profiles.\n\n Each pattern can be a string or [Regex](https://hexdocs.pm/elixir/Regex.html) in the format of `~r/PATTERN/`.\n"
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config description at :pleroma-:mrf_emoji > :remove_url"
+msgid " A list of patterns which result in emoji whose URL matches being removed from the message. This will apply to statuses, emoji reactions, and user profiles.\n\n Each pattern can be a string or [Regex](https://hexdocs.pm/elixir/Regex.html) in the format of `~r/PATTERN/`.\n"
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config description at :pleroma-Pleroma.User.Backup > :process_chunk_size"
+msgid "The number of activities to fetch in the backup job for each chunk."
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config description at :pleroma-Pleroma.User.Backup > :process_wait_time"
+msgid "The amount of time to wait for backup to report progress, in milliseconds. If no progress is received from the backup job for that much time, terminate it and deem it failed."
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config label at :pleroma-:mrf_emoji"
+msgid "MRF Emoji"
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config label at :pleroma-:mrf_emoji > :federated_timeline_removal_shortcode"
+msgid "Federated timeline removal shortcode"
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config label at :pleroma-:mrf_emoji > :federated_timeline_removal_url"
+msgid "Federated timeline removal url"
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config label at :pleroma-:mrf_emoji > :remove_shortcode"
+msgid "Remove shortcode"
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config label at :pleroma-:mrf_emoji > :remove_url"
+msgid "Remove url"
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config label at :pleroma-Pleroma.User.Backup > :process_chunk_size"
+msgid "Process Chunk Size"
+msgstr ""
+
+#: lib/pleroma/docs/translator.ex:5
+#, elixir-autogen, elixir-format
+msgctxt "config label at :pleroma-Pleroma.User.Backup > :process_wait_time"
+msgid "Process Wait Time"
+msgstr ""
diff --git a/priv/gettext/errors.pot b/priv/gettext/errors.pot
index d320ee1bd..aca77f8fa 100644
--- a/priv/gettext/errors.pot
+++ b/priv/gettext/errors.pot
@@ -110,7 +110,7 @@ msgstr ""
msgid "Can't display this activity"
msgstr ""
-#: lib/pleroma/web/mastodon_api/controllers/account_controller.ex:334
+#: lib/pleroma/web/mastodon_api/controllers/account_controller.ex:346
#, elixir-autogen, elixir-format
msgid "Can't find user"
msgstr ""
@@ -198,7 +198,7 @@ msgstr ""
msgid "Invalid password."
msgstr ""
-#: lib/pleroma/web/mastodon_api/controllers/account_controller.ex:267
+#: lib/pleroma/web/mastodon_api/controllers/account_controller.ex:279
#, elixir-autogen, elixir-format
msgid "Invalid request"
msgstr ""
@@ -225,7 +225,7 @@ msgstr ""
#: lib/pleroma/web/feed/tag_controller.ex:16
#: lib/pleroma/web/feed/user_controller.ex:69
#: lib/pleroma/web/o_status/o_status_controller.ex:132
-#: lib/pleroma/web/plugs/uploaded_media.ex:104
+#: lib/pleroma/web/plugs/uploaded_media.ex:84
#, elixir-autogen, elixir-format
msgid "Not found"
msgstr ""
@@ -235,7 +235,7 @@ msgstr ""
msgid "Poll's author can't vote"
msgstr ""
-#: lib/pleroma/web/mastodon_api/controllers/account_controller.ex:499
+#: lib/pleroma/web/mastodon_api/controllers/account_controller.ex:511
#: lib/pleroma/web/mastodon_api/controllers/fallback_controller.ex:20
#: lib/pleroma/web/mastodon_api/controllers/poll_controller.ex:39
#: lib/pleroma/web/mastodon_api/controllers/poll_controller.ex:51
@@ -341,7 +341,7 @@ msgstr ""
msgid "CAPTCHA expired"
msgstr ""
-#: lib/pleroma/web/plugs/uploaded_media.ex:77
+#: lib/pleroma/web/plugs/uploaded_media.ex:57
#, elixir-autogen, elixir-format
msgid "Failed"
msgstr ""
@@ -361,7 +361,7 @@ msgstr ""
msgid "Insufficient permissions: %{permissions}."
msgstr ""
-#: lib/pleroma/web/plugs/uploaded_media.ex:131
+#: lib/pleroma/web/plugs/uploaded_media.ex:111
#, elixir-autogen, elixir-format
msgid "Internal Error"
msgstr ""
@@ -557,7 +557,7 @@ msgstr ""
msgid "Access denied"
msgstr ""
-#: lib/pleroma/web/mastodon_api/controllers/account_controller.ex:331
+#: lib/pleroma/web/mastodon_api/controllers/account_controller.ex:343
#, elixir-autogen, elixir-format
msgid "This API requires an authenticated user"
msgstr ""
@@ -567,7 +567,7 @@ msgstr ""
msgid "User is not an admin."
msgstr ""
-#: lib/pleroma/user/backup.ex:73
+#: lib/pleroma/user/backup.ex:78
#, elixir-format
msgid "Last export was less than a day ago"
msgid_plural "Last export was less than %{days} days ago"
@@ -607,3 +607,23 @@ msgstr ""
#, elixir-autogen, elixir-format
msgid "User isn't privileged."
msgstr ""
+
+#: lib/pleroma/web/mastodon_api/controllers/account_controller.ex:267
+#, elixir-autogen, elixir-format
+msgid "Bio is too long"
+msgstr ""
+
+#: lib/pleroma/web/mastodon_api/controllers/account_controller.ex:270
+#, elixir-autogen, elixir-format
+msgid "Name is too long"
+msgstr ""
+
+#: lib/pleroma/web/mastodon_api/controllers/account_controller.ex:273
+#, elixir-autogen, elixir-format
+msgid "One or more field entries are too long"
+msgstr ""
+
+#: lib/pleroma/web/mastodon_api/controllers/account_controller.ex:276
+#, elixir-autogen, elixir-format
+msgid "Too many field entries"
+msgstr ""
diff --git a/priv/gettext/oauth_scopes.pot b/priv/gettext/oauth_scopes.pot
index 50ad0dd9e..83328770e 100644
--- a/priv/gettext/oauth_scopes.pot
+++ b/priv/gettext/oauth_scopes.pot
@@ -219,3 +219,43 @@ msgstr ""
#, elixir-autogen, elixir-format
msgid "read:mutes"
msgstr ""
+
+#: lib/pleroma/web/api_spec/scopes/translator.ex:5
+#, elixir-autogen, elixir-format
+msgid "push"
+msgstr ""
+
+#: lib/pleroma/web/api_spec/scopes/translator.ex:5
+#, elixir-autogen, elixir-format
+msgid "read:backups"
+msgstr ""
+
+#: lib/pleroma/web/api_spec/scopes/translator.ex:5
+#, elixir-autogen, elixir-format
+msgid "read:chats"
+msgstr ""
+
+#: lib/pleroma/web/api_spec/scopes/translator.ex:5
+#, elixir-autogen, elixir-format
+msgid "read:media"
+msgstr ""
+
+#: lib/pleroma/web/api_spec/scopes/translator.ex:5
+#, elixir-autogen, elixir-format
+msgid "read:reports"
+msgstr ""
+
+#: lib/pleroma/web/api_spec/scopes/translator.ex:5
+#, elixir-autogen, elixir-format
+msgid "write:chats"
+msgstr ""
+
+#: lib/pleroma/web/api_spec/scopes/translator.ex:5
+#, elixir-autogen, elixir-format
+msgid "write:follow"
+msgstr ""
+
+#: lib/pleroma/web/api_spec/scopes/translator.ex:5
+#, elixir-autogen, elixir-format
+msgid "write:reports"
+msgstr ""
diff --git a/supplemental/search/fastembed-api/Dockerfile b/supplemental/search/fastembed-api/Dockerfile
new file mode 100644
index 000000000..c1e0ef51f
--- /dev/null
+++ b/supplemental/search/fastembed-api/Dockerfile
@@ -0,0 +1,9 @@
+FROM python:3.9
+
+WORKDIR /code
+COPY fastembed-server.py /workdir/fastembed-server.py
+COPY requirements.txt /workdir/requirements.txt
+
+RUN pip install -r /workdir/requirements.txt
+
+CMD ["python", "/workdir/fastembed-server.py"]
diff --git a/supplemental/search/fastembed-api/README.md b/supplemental/search/fastembed-api/README.md
new file mode 100644
index 000000000..63a037207
--- /dev/null
+++ b/supplemental/search/fastembed-api/README.md
@@ -0,0 +1,6 @@
+# About
+This is a minimal implementation of the [OpenAI Embeddings API](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings) meant to be used with the QdrantSearch backend.
+
+# Usage
+
+The easiest way to run it is to just use docker compose with `docker compose up`. This starts the server on the default configured port. Different models can be used, for a full list of supported models, check the [fastembed documentation](https://qdrant.github.io/fastembed/examples/Supported_Models/). The first time a model is requested it will be downloaded, which can take a few seconds.
diff --git a/supplemental/search/fastembed-api/compose.yml b/supplemental/search/fastembed-api/compose.yml
new file mode 100644
index 000000000..d4cb31722
--- /dev/null
+++ b/supplemental/search/fastembed-api/compose.yml
@@ -0,0 +1,5 @@
+services:
+ web:
+ build: .
+ ports:
+ - "11345:11345"
diff --git a/supplemental/search/fastembed-api/fastembed-server.py b/supplemental/search/fastembed-api/fastembed-server.py
new file mode 100644
index 000000000..02da69db2
--- /dev/null
+++ b/supplemental/search/fastembed-api/fastembed-server.py
@@ -0,0 +1,27 @@
+from fastembed import TextEmbedding
+from fastapi import FastAPI
+from pydantic import BaseModel
+
+models = {}
+
+app = FastAPI()
+
+class EmbeddingRequest(BaseModel):
+ model: str
+ input: str
+
+@app.post("/v1/embeddings")
+def embeddings(request: EmbeddingRequest):
+ model = models.get(request.model) or TextEmbedding(request.model)
+ models[request.model] = model
+ embeddings = next(model.embed(request.input)).tolist()
+ return {"data": [{"embedding": embeddings}]}
+
+@app.get("/health")
+def health():
+ return {"status": "ok"}
+
+if __name__ == "__main__":
+ import uvicorn
+
+ uvicorn.run(app, host="0.0.0.0", port=11345)
diff --git a/supplemental/search/fastembed-api/requirements.txt b/supplemental/search/fastembed-api/requirements.txt
new file mode 100644
index 000000000..db67a8402
--- /dev/null
+++ b/supplemental/search/fastembed-api/requirements.txt
@@ -0,0 +1,4 @@
+fastapi==0.111.0
+fastembed==0.2.7
+pydantic==1.10.15
+uvicorn==0.29.0
diff --git a/test/pleroma/search/qdrant_search_test.exs b/test/pleroma/search/qdrant_search_test.exs
new file mode 100644
index 000000000..47a77a391
--- /dev/null
+++ b/test/pleroma/search/qdrant_search_test.exs
@@ -0,0 +1,199 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Search.QdrantSearchTest do
+ use Pleroma.DataCase, async: true
+ use Oban.Testing, repo: Pleroma.Repo
+
+ import Pleroma.Factory
+ import Mox
+
+ alias Pleroma.Search.QdrantSearch
+ alias Pleroma.UnstubbedConfigMock, as: Config
+ alias Pleroma.Web.CommonAPI
+ alias Pleroma.Workers.SearchIndexingWorker
+
+ describe "Qdrant search" do
+ test "returns the correct healthcheck endpoints" do
+ # No openai healthcheck URL
+ Config
+ |> expect(:get, 2, fn
+ [Pleroma.Search.QdrantSearch, key], nil ->
+ %{qdrant_url: "https://qdrant.url"}[key]
+ end)
+
+ [health_endpoint] = QdrantSearch.healthcheck_endpoints()
+
+ assert "https://qdrant.url/healthz" == health_endpoint
+
+ # Set openai healthcheck URL
+ Config
+ |> expect(:get, 2, fn
+ [Pleroma.Search.QdrantSearch, key], nil ->
+ %{qdrant_url: "https://qdrant.url", openai_healthcheck_url: "https://openai.url/health"}[
+ key
+ ]
+ end)
+
+ [_, health_endpoint] = QdrantSearch.healthcheck_endpoints()
+
+ assert "https://openai.url/health" == health_endpoint
+ end
+
+ test "searches for a term by encoding it and sending it to qdrant" do
+ user = insert(:user)
+
+ {:ok, activity} =
+ CommonAPI.post(user, %{
+ status: "guys i just don't wanna leave the swamp",
+ visibility: "public"
+ })
+
+ Config
+ |> expect(:get, 3, fn
+ [Pleroma.Search, :module], nil ->
+ QdrantSearch
+
+ [Pleroma.Search.QdrantSearch, key], nil ->
+ %{
+ openai_model: "a_model",
+ openai_url: "https://openai.url",
+ qdrant_url: "https://qdrant.url"
+ }[key]
+ end)
+
+ Tesla.Mock.mock(fn
+ %{url: "https://openai.url/v1/embeddings", method: :post} ->
+ Tesla.Mock.json(%{
+ data: [%{embedding: [1, 2, 3]}]
+ })
+
+ %{url: "https://qdrant.url/collections/posts/points/search", method: :post, body: body} ->
+ data = Jason.decode!(body)
+ refute data["filter"]
+
+ Tesla.Mock.json(%{
+ result: [%{"id" => activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()}]
+ })
+ end)
+
+ results = QdrantSearch.search(nil, "guys i just don't wanna leave the swamp", %{})
+
+ assert results == [activity]
+ end
+
+ test "for a given actor, ask for only relevant matches" do
+ user = insert(:user)
+
+ {:ok, activity} =
+ CommonAPI.post(user, %{
+ status: "guys i just don't wanna leave the swamp",
+ visibility: "public"
+ })
+
+ Config
+ |> expect(:get, 3, fn
+ [Pleroma.Search, :module], nil ->
+ QdrantSearch
+
+ [Pleroma.Search.QdrantSearch, key], nil ->
+ %{
+ openai_model: "a_model",
+ openai_url: "https://openai.url",
+ qdrant_url: "https://qdrant.url"
+ }[key]
+ end)
+
+ Tesla.Mock.mock(fn
+ %{url: "https://openai.url/v1/embeddings", method: :post} ->
+ Tesla.Mock.json(%{
+ data: [%{embedding: [1, 2, 3]}]
+ })
+
+ %{url: "https://qdrant.url/collections/posts/points/search", method: :post, body: body} ->
+ data = Jason.decode!(body)
+
+ assert data["filter"] == %{
+ "must" => [%{"key" => "actor", "match" => %{"value" => user.ap_id}}]
+ }
+
+ Tesla.Mock.json(%{
+ result: [%{"id" => activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()}]
+ })
+ end)
+
+ results =
+ QdrantSearch.search(nil, "guys i just don't wanna leave the swamp", %{author: user})
+
+ assert results == [activity]
+ end
+
+ test "indexes a public post on creation, deletes from the index on deletion" do
+ user = insert(:user)
+
+ Tesla.Mock.mock(fn
+ %{method: :post, url: "https://openai.url/v1/embeddings"} ->
+ send(self(), "posted_to_openai")
+
+ Tesla.Mock.json(%{
+ data: [%{embedding: [1, 2, 3]}]
+ })
+
+ %{method: :put, url: "https://qdrant.url/collections/posts/points", body: body} ->
+ send(self(), "posted_to_qdrant")
+
+ data = Jason.decode!(body)
+ %{"points" => [%{"vector" => vector, "payload" => payload}]} = data
+
+ assert vector == [1, 2, 3]
+ assert payload["actor"]
+ assert payload["published_at"]
+
+ Tesla.Mock.json("ok")
+
+ %{method: :post, url: "https://qdrant.url/collections/posts/points/delete"} ->
+ send(self(), "deleted_from_qdrant")
+ Tesla.Mock.json("ok")
+ end)
+
+ Config
+ |> expect(:get, 6, fn
+ [Pleroma.Search, :module], nil ->
+ QdrantSearch
+
+ [Pleroma.Search.QdrantSearch, key], nil ->
+ %{
+ openai_model: "a_model",
+ openai_url: "https://openai.url",
+ qdrant_url: "https://qdrant.url"
+ }[key]
+ end)
+
+ {:ok, activity} =
+ CommonAPI.post(user, %{
+ status: "guys i just don't wanna leave the swamp",
+ visibility: "public"
+ })
+
+ args = %{"op" => "add_to_index", "activity" => activity.id}
+
+ assert_enqueued(
+ worker: SearchIndexingWorker,
+ args: args
+ )
+
+ assert :ok = perform_job(SearchIndexingWorker, args)
+ assert_received("posted_to_openai")
+ assert_received("posted_to_qdrant")
+
+ {:ok, _} = CommonAPI.delete(activity.id, user)
+
+ delete_args = %{"op" => "remove_from_index", "object" => activity.object.id}
+ assert_enqueued(worker: SearchIndexingWorker, args: delete_args)
+ assert :ok = perform_job(SearchIndexingWorker, delete_args)
+
+ assert_received("deleted_from_qdrant")
+ end
+ end
+end