summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config/config.exs16
-rw-r--r--config/description.exs43
-rw-r--r--config/test.exs4
-rw-r--r--docs/configuration/search.md123
-rw-r--r--lib/mix/tasks/pleroma/search/meilisearch.ex145
-rw-r--r--lib/pleroma/activity.ex2
-rw-r--r--lib/pleroma/application.ex6
-rw-r--r--lib/pleroma/search.ex17
-rw-r--r--lib/pleroma/search/database_search.ex (renamed from lib/pleroma/activity/search.ex)33
-rw-r--r--lib/pleroma/search/meilisearch.ex176
-rw-r--r--lib/pleroma/search/search_backend.ex24
-rw-r--r--lib/pleroma/web/activity_pub/activity_pub.ex3
-rw-r--r--lib/pleroma/web/activity_pub/side_effects.ex9
-rw-r--r--lib/pleroma/web/mastodon_api/controllers/search_controller.ex3
-rw-r--r--lib/pleroma/workers/search_indexing_worker.ex21
-rw-r--r--priv/scrubbers/search_indexing.ex24
-rw-r--r--test/pleroma/search/database_search_test.ex (renamed from test/pleroma/activity/search_test.exs)10
-rw-r--r--test/pleroma/search/meilisearch_test.exs129
18 files changed, 758 insertions, 30 deletions
diff --git a/config/config.exs b/config/config.exs
index 821166fd9..52319ce49 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -567,7 +567,8 @@ config :pleroma, Oban,
remote_fetcher: 2,
attachments_cleanup: 1,
new_users_digest: 1,
- mute_expire: 5
+ mute_expire: 5,
+ search_indexing: 10
],
plugins: [Oban.Plugins.Pruner],
crontab: [
@@ -578,7 +579,8 @@ config :pleroma, Oban,
config :pleroma, :workers,
retries: [
federator_incoming: 5,
- federator_outgoing: 5
+ federator_outgoing: 5,
+ search_indexing: 2
]
config :pleroma, Pleroma.Formatter,
@@ -862,11 +864,19 @@ config :pleroma, Pleroma.User.Backup,
config :pleroma, ConcurrentLimiter, [
{Pleroma.Web.RichMedia.Helpers, [max_running: 5, max_waiting: 5]},
- {Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]}
+ {Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]},
+ {Pleroma.Search, [max_running: 30, max_waiting: 50]}
]
config :pleroma, Pleroma.Web.WebFinger, domain: nil, update_nickname_on_user_fetch: true
+config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch
+
+config :pleroma, Pleroma.Search.Meilisearch,
+ url: "http://127.0.0.1:7700/",
+ private_key: nil,
+ initial_indexing_chunk_size: 100_000
+
# Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above.
import_config "#{Mix.env()}.exs"
diff --git a/config/description.exs b/config/description.exs
index 3848cb449..6280665b4 100644
--- a/config/description.exs
+++ b/config/description.exs
@@ -3443,5 +3443,48 @@ config :pleroma, :config_description, [
]
}
]
+ },
+ %{
+ group: :pleroma,
+ key: Pleroma.Search,
+ type: :group,
+ description: "General search settings.",
+ children: [
+ %{
+ key: :module,
+ type: :keyword,
+ description: "Selected search module.",
+ suggestion: [Pleroma.Search.DatabaseSearch, Pleroma.Search.Meilisearch]
+ }
+ ]
+ },
+ %{
+ group: :pleroma,
+ key: Pleroma.Search.Meilisearch,
+ type: :group,
+ description: "Meilisearch settings.",
+ children: [
+ %{
+ key: :url,
+ type: :string,
+ description: "Meilisearch URL.",
+ suggestion: ["http://127.0.0.1:7700/"]
+ },
+ %{
+ key: :private_key,
+ type: :string,
+ description:
+ "Private key for meilisearch authentication, or `nil` to disable private key authentication.",
+ suggestion: [nil]
+ },
+ %{
+ key: :initial_indexing_chunk_size,
+ type: :int,
+ description:
+ "Amount of posts in a batch when running the initial indexing operation. Should probably not be more than 100000" <>
+ " since there's a limit on maximum insert size",
+ suggestion: [100_000]
+ }
+ ]
}
]
diff --git a/config/test.exs b/config/test.exs
index 78303eb1d..23489d452 100644
--- a/config/test.exs
+++ b/config/test.exs
@@ -133,6 +133,10 @@ config :pleroma, :side_effects,
ap_streamer: Pleroma.Web.ActivityPub.ActivityPubMock,
logger: Pleroma.LoggerMock
+config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch
+
+config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/", private_key: nil
+
# Reduce recompilation time
# https://dashbit.co/blog/speeding-up-re-compilation-of-elixir-projects
config :phoenix, :plug_init_mode, :runtime
diff --git a/docs/configuration/search.md b/docs/configuration/search.md
new file mode 100644
index 000000000..f131948a7
--- /dev/null
+++ b/docs/configuration/search.md
@@ -0,0 +1,123 @@
+# Configuring search
+
+{! backend/administration/CLI_tasks/general_cli_task_info.include !}
+
+## Built-in search
+
+To use built-in search that has no external dependencies, set the search module to `Pleroma.Activity`:
+
+> config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch
+
+While it has no external dependencies, it has problems with performance and relevancy.
+
+## Meilisearch
+
+Note that it's quite a bit more memory hungry than PostgreSQL (around 4-5G for ~1.2 million
+posts while idle and up to 7G while indexing initially). The disk usage for this additional index is also
+around 4 gigabytes. Like [RUM](./cheatsheet.md#rum-indexing-for-full-text-search) indexes, it offers considerably
+higher performance and ordering by timestamp in a reasonable amount of time.
+Additionally, the search results seem to be more accurate.
+
+Due to high memory usage, it may be best to set it up on a different machine, if running pleroma on a low-resource
+computer, and use private key authentication to secure the remote search instance.
+
+To use [meilisearch](https://www.meilisearch.com/), set the search module to `Pleroma.Search.Meilisearch`:
+
+> config :pleroma, Pleroma.Search, module: Pleroma.Search.Meilisearch
+
+You then need to set the address of the meilisearch instance, and optionally the private key for authentication. You might
+also want to change the `initial_indexing_chunk_size` to be smaller if you're server is not very powerful, but not higher than `100_000`,
+because meilisearch will refuse to process it if it's too big. However, in general you want this to be as big as possible, because meilisearch
+indexes faster when it can process many posts in a single batch.
+
+> config :pleroma, Pleroma.Search.Meilisearch,
+> url: "http://127.0.0.1:7700/",
+> private_key: "private key",
+> initial_indexing_chunk_size: 100_000
+
+Information about setting up meilisearch can be found in the
+[official documentation](https://docs.meilisearch.com/learn/getting_started/installation.html).
+You probably want to start it with `MEILI_NO_ANALYTICS=true` environment variable to disable analytics.
+At least version 0.25.0 is required, but you are strongly adviced to use at least 0.26.0, as it introduces
+the `--enable-auto-batching` option which drastically improves performance. Without this option, the search
+is hardly usable on a somewhat big instance.
+
+### Private key authentication (optional)
+
+To set the private key, use the `MEILI_MASTER_KEY` environment variable when starting. After setting the _master key_,
+you have to get the _private key_, which is actually used for authentication.
+
+=== "OTP"
+ ```sh
+ ./bin/pleroma_ctl search.meilisearch show-keys <your master key here>
+ ```
+
+=== "From Source"
+ ```sh
+ mix pleroma.search.meilisearch show-keys <your master key here>
+ ```
+
+You will see a "Default Admin API Key", this is the key you actually put into your configuration file.
+
+### Initial indexing
+
+After setting up the configuration, you'll want to index all of your already existsing posts. Only public posts are indexed. You'll only
+have to do it one time, but it might take a while, depending on the amount of posts your instance has seen. This is also a fairly RAM
+consuming process for `meilisearch`, and it will take a lot of RAM when running if you have a lot of posts (seems to be around 5G for ~1.2
+million posts while idle and up to 7G while indexing initially, but your experience may be different).
+
+The sequence of actions is as follows:
+
+1. First, change the configuration to use `Pleroma.Search.Meilisearch` as the search backend
+2. Restart your instance, at this point it can be used while the search indexing is running, though search won't return anything
+3. Start the initial indexing process (as described below with `index`),
+ and wait until the task says it sent everything from the database to index
+4. Wait until everything is actually indexed (by checking with `stats` as described below),
+ at this point you don't have to do anything, just wait a while.
+
+To start the initial indexing, run the `index` command:
+
+=== "OTP"
+ ```sh
+ ./bin/pleroma_ctl search.meilisearch index
+ ```
+
+=== "From Source"
+ ```sh
+ mix pleroma.search.meilisearch index
+ ```
+
+This will show you the total amount of posts to index, and then show you the amount of posts indexed currently, until the numbers eventually
+become the same. The posts are indexed in big batches and meilisearch will take some time to actually index them, even after you have
+inserted all the posts into it. Depending on the amount of posts, this may be as long as several hours. To get information about the status
+of indexing and how many posts have actually been indexed, use the `stats` command:
+
+=== "OTP"
+ ```sh
+ ./bin/pleroma_ctl search.meilisearch stats
+ ```
+
+=== "From Source"
+ ```sh
+ mix pleroma.search.meilisearch stats
+ ```
+
+### Clearing the index
+
+In case you need to clear the index (for example, to re-index from scratch, if that needs to happen for some reason), you can
+use the `clear` command:
+
+=== "OTP"
+ ```sh
+ ./bin/pleroma_ctl search.meilisearch clear
+ ```
+
+=== "From Source"
+ ```sh
+ mix pleroma.search.meilisearch clear
+ ```
+
+This will clear **all** the posts from the search index. Note, that deleted posts are also removed from index by the instance itself, so
+there is no need to actually clear the whole index, unless you want **all** of it gone. That said, the index does not hold any information
+that cannot be re-created from the database, it should also generally be a lot smaller than the size of your database. Still, the size
+depends on the amount of text in posts.
diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex
new file mode 100644
index 000000000..8379a0c25
--- /dev/null
+++ b/lib/mix/tasks/pleroma/search/meilisearch.ex
@@ -0,0 +1,145 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
+ require Pleroma.Constants
+
+ import Mix.Pleroma
+ import Ecto.Query
+
+ import Pleroma.Search.Meilisearch,
+ only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete: 1]
+
+ def run(["index"]) do
+ start_pleroma()
+ Pleroma.HTML.compile_scrubbers()
+
+ meili_version =
+ (
+ {:ok, result} = meili_get("/version")
+
+ result["pkgVersion"]
+ )
+
+ # The ranking rule syntax was changed but nothing about that is mentioned in the changelog
+ if not Version.match?(meili_version, ">= 0.25.0") do
+ raise "Meilisearch <0.24.0 not supported"
+ end
+
+ {:ok, _} =
+ meili_post(
+ "/indexes/objects/settings/ranking-rules",
+ [
+ "published:desc",
+ "words",
+ "exactness",
+ "proximity",
+ "typo",
+ "attribute",
+ "sort"
+ ]
+ )
+
+ {:ok, _} =
+ meili_post(
+ "/indexes/objects/settings/searchable-attributes",
+ [
+ "content"
+ ]
+ )
+
+ IO.puts("Created indices. Starting to insert posts.")
+
+ chunk_size = Pleroma.Config.get([Pleroma.Search.Meilisearch, :initial_indexing_chunk_size])
+
+ Pleroma.Repo.transaction(
+ fn ->
+ query =
+ from(Pleroma.Object,
+ # Only index public and unlisted posts which are notes and have some text
+ where:
+ fragment("data->>'type' = 'Note'") and
+ (fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) or
+ fragment("data->'cc' \\? ?", ^Pleroma.Constants.as_public())),
+ order_by: [desc: fragment("data->'published'")]
+ )
+
+ count = query |> Pleroma.Repo.aggregate(:count, :data)
+ IO.puts("Entries to index: #{count}")
+
+ Pleroma.Repo.stream(
+ query,
+ timeout: :infinity
+ )
+ |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
+ |> Stream.filter(fn o -> not is_nil(o) end)
+ |> Stream.chunk_every(chunk_size)
+ |> Stream.transform(0, fn objects, acc ->
+ new_acc = acc + Enum.count(objects)
+
+ # Reset to the beginning of the line and rewrite it
+ IO.write("\r")
+ IO.write("Indexed #{new_acc} entries")
+
+ {[objects], new_acc}
+ end)
+ |> Stream.each(fn objects ->
+ result =
+ meili_put(
+ "/indexes/objects/documents",
+ objects
+ )
+
+ with {:ok, res} <- result do
+ if not Map.has_key?(res, "uid") do
+ IO.puts("\nFailed to index: #{inspect(result)}")
+ end
+ else
+ e -> IO.puts("\nFailed to index due to network error: #{inspect(e)}")
+ end
+ end)
+ |> Stream.run()
+ end,
+ timeout: :infinity
+ )
+
+ IO.write("\n")
+ end
+
+ def run(["clear"]) do
+ start_pleroma()
+
+ meili_delete("/indexes/objects/documents")
+ end
+
+ def run(["show-keys", master_key]) do
+ start_pleroma()
+
+ endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+
+ {:ok, result} =
+ Pleroma.HTTP.get(
+ Path.join(endpoint, "/keys"),
+ [{"Authorization", "Bearer #{master_key}"}]
+ )
+
+ decoded = Jason.decode!(result.body)
+
+ if decoded["results"] do
+ Enum.each(decoded["results"], fn %{"description" => desc, "key" => key} ->
+ IO.puts("#{desc}: #{key}")
+ end)
+ else
+ IO.puts("Error fetching the keys, check the master key is correct: #{inspect(decoded)}")
+ end
+ end
+
+ def run(["stats"]) do
+ start_pleroma()
+
+ {:ok, result} = meili_get("/indexes/objects/stats")
+ IO.puts("Number of entries: #{result["numberOfDocuments"]}")
+ IO.puts("Indexing? #{result["isIndexing"]}")
+ end
+end
diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex
index 3556aaf9e..8a512dc57 100644
--- a/lib/pleroma/activity.ex
+++ b/lib/pleroma/activity.ex
@@ -368,7 +368,7 @@ defmodule Pleroma.Activity do
)
end
- defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search
+ defdelegate search(user, query, options \\ []), to: Pleroma.Search.DatabaseSearch
def direct_conversation_id(activity, for_user) do
alias Pleroma.Conversation.Participation
diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex
index 1c1db8c10..62d1b8b39 100644
--- a/lib/pleroma/application.ex
+++ b/lib/pleroma/application.ex
@@ -321,7 +321,11 @@ defmodule Pleroma.Application do
def limiters_setup do
config = Config.get(ConcurrentLimiter, [])
- [Pleroma.Web.RichMedia.Helpers, Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy]
+ [
+ Pleroma.Web.RichMedia.Helpers,
+ Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy,
+ Pleroma.Search
+ ]
|> Enum.each(fn module ->
mod_config = Keyword.get(config, module, [])
diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex
new file mode 100644
index 000000000..3b266e59b
--- /dev/null
+++ b/lib/pleroma/search.ex
@@ -0,0 +1,17 @@
+defmodule Pleroma.Search do
+ alias Pleroma.Workers.SearchIndexingWorker
+
+ def add_to_index(%Pleroma.Activity{id: activity_id}) do
+ SearchIndexingWorker.enqueue("add_to_index", %{"activity" => activity_id})
+ end
+
+ def remove_from_index(%Pleroma.Object{id: object_id}) do
+ SearchIndexingWorker.enqueue("remove_from_index", %{"object" => object_id})
+ end
+
+ def search(query, options) do
+ search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity)
+
+ search_module.search(options[:for_user], query, options)
+ end
+end
diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/search/database_search.ex
index 0b9b24aa4..9a340abf1 100644
--- a/lib/pleroma/activity/search.ex
+++ b/lib/pleroma/search/database_search.ex
@@ -1,8 +1,8 @@
# Pleroma: A lightweight social networking server
-# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
-defmodule Pleroma.Activity.Search do
+defmodule Pleroma.Search.DatabaseSearch do
alias Pleroma.Activity
alias Pleroma.Object.Fetcher
alias Pleroma.Pagination
@@ -13,6 +13,9 @@ defmodule Pleroma.Activity.Search do
import Ecto.Query
+ @behaviour Pleroma.Search.SearchBackend
+
+ @impl true
def search(user, search_query, options \\ []) do
index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin
limit = Enum.min([Keyword.get(options, :limit), 40])
@@ -30,7 +33,7 @@ defmodule Pleroma.Activity.Search do
Activity
|> Activity.with_preloaded_object()
|> Activity.restrict_deactivated_users()
- |> restrict_public(user)
+ |> restrict_public()
|> query_with(index_type, search_query, search_function)
|> maybe_restrict_local(user)
|> maybe_restrict_author(author)
@@ -45,6 +48,12 @@ defmodule Pleroma.Activity.Search do
end
end
+ @impl true
+ def add_to_index(_activity), do: nil
+
+ @impl true
+ def remove_from_index(_object), do: nil
+
def maybe_restrict_author(query, %User{} = author) do
Activity.Queries.by_author(query, author)
end
@@ -57,19 +66,7 @@ defmodule Pleroma.Activity.Search do
def maybe_restrict_blocked(query, _), do: query
- defp restrict_public(q, user) when not is_nil(user) do
- intended_recipients = [
- Pleroma.Constants.as_public(),
- Pleroma.Web.ActivityPub.Utils.as_local_public()
- ]
-
- from([a, o] in q,
- where: fragment("?->>'type' = 'Create'", a.data),
- where: fragment("? && ?", ^intended_recipients, a.recipients)
- )
- end
-
- defp restrict_public(q, _user) do
+ def restrict_public(q) do
from([a, o] in q,
where: fragment("?->>'type' = 'Create'", a.data),
where: ^Pleroma.Constants.as_public() in a.recipients
@@ -136,7 +133,7 @@ defmodule Pleroma.Activity.Search do
)
end
- defp maybe_restrict_local(q, user) do
+ def maybe_restrict_local(q, user) do
limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated)
case {limit, user} do
@@ -149,7 +146,7 @@ defmodule Pleroma.Activity.Search do
defp restrict_local(q), do: where(q, local: true)
- defp maybe_fetch(activities, user, search_query) do
+ def maybe_fetch(activities, user, search_query) do
with true <- Regex.match?(~r/https?:/, search_query),
{:ok, object} <- Fetcher.fetch_object_from_id(search_query),
%Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]),
diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex
new file mode 100644
index 000000000..7af7f460a
--- /dev/null
+++ b/lib/pleroma/search/meilisearch.ex
@@ -0,0 +1,176 @@
+defmodule Pleroma.Search.Meilisearch do
+ require Logger
+ require Pleroma.Constants
+
+ alias Pleroma.Activity
+
+ import Pleroma.Search.DatabaseSearch
+ import Ecto.Query
+
+ @behaviour Pleroma.Search.SearchBackend
+
+ defp meili_headers do
+ private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key])
+
+ [{"Content-Type", "application/json"}] ++
+ if is_nil(private_key), do: [], else: [{"Authorization", "Bearer #{private_key}"}]
+ end
+
+ def meili_get(path) do
+ endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+
+ result =
+ Pleroma.HTTP.get(
+ Path.join(endpoint, path),
+ meili_headers()
+ )
+
+ with {:ok, res} <- result do
+ {:ok, Jason.decode!(res.body)}
+ end
+ end
+
+ def meili_post(path, params) do
+ endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+
+ result =
+ Pleroma.HTTP.post(
+ Path.join(endpoint, path),
+ Jason.encode!(params),
+ meili_headers()
+ )
+
+ with {:ok, res} <- result do
+ {:ok, Jason.decode!(res.body)}
+ end
+ end
+
+ def meili_put(path, params) do
+ endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+
+ result =
+ Pleroma.HTTP.request(
+ :put,
+ Path.join(endpoint, path),
+ Jason.encode!(params),
+ meili_headers(),
+ []
+ )
+
+ with {:ok, res} <- result do
+ {:ok, Jason.decode!(res.body)}
+ end
+ end
+
+ def meili_delete(path) do
+ endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+
+ Pleroma.HTTP.request(
+ :delete,
+ Path.join(endpoint, path),
+ "",
+ meili_headers(),
+ []
+ )
+ end
+
+ @impl true
+ def search(user, query, options \\ []) do
+ limit = Enum.min([Keyword.get(options, :limit), 40])
+ offset = Keyword.get(options, :offset, 0)
+ author = Keyword.get(options, :author)
+
+ res =
+ meili_post(
+ "/indexes/objects/search",
+ %{q: query, offset: offset, limit: limit}
+ )
+
+ with {:ok, result} <- res do
+ hits = result["hits"] |> Enum.map(& &1["ap"])
+
+ try do
+ hits
+ |> Activity.create_by_object_ap_id()
+ |> Activity.with_preloaded_object()
+ |> Activity.restrict_deactivated_users()
+ |> maybe_restrict_local(user)
+ |> maybe_restrict_author(author)
+ |> maybe_restrict_blocked(user)
+ |> maybe_fetch(user, query)
+ |> order_by([object: obj], desc: obj.data["published"])
+ |> Pleroma.Repo.all()
+ rescue
+ _ -> maybe_fetch([], user, query)
+ end
+ end
+ end
+
+ def object_to_search_data(object) do
+ # Only index public or unlisted Notes
+ if not is_nil(object) and object.data["type"] == "Note" and
+ not is_nil(object.data["content"]) and
+ (Pleroma.Constants.as_public() in object.data["to"] or
+ Pleroma.Constants.as_public() in object.data["cc"]) and
+ object.data["content"] not in ["", "."] do
+ data = object.data
+
+ content_str =
+ case data["content"] do
+ [nil | rest] -> to_string(rest)
+ str -> str
+ end
+
+ content =
+ with {:ok, scrubbed} <-
+ FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing),
+ trimmed <- String.trim(scrubbed) do
+ trimmed
+ end
+
+ # Make sure we have a non-empty string
+ if content != "" do
+ {:ok, published, _} = DateTime.from_iso8601(data["published"])
+
+ %{
+ id: object.id,
+ content: content,
+ ap: data["id"],
+ published: published |> DateTime.to_unix()
+ }
+ end
+ end
+ end
+
+ @impl true
+ def add_to_index(activity) do
+ maybe_search_data = object_to_search_data(activity.object)
+
+ if activity.data["type"] == "Create" and maybe_search_data do
+ result =
+ meili_put(
+ "/indexes/objects/documents",
+ [maybe_search_data]
+ )
+
+ with {:ok, res} <- result,
+ true <- Map.has_key?(res, "updateId") do
+ # Added successfully
+ :ok
+ else
+ _ ->
+ # There was an error, report it
+ Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}")
+ {:error, result}
+ end
+ else
+ # The post isn't something we can search, that's ok
+ :ok
+ end
+ end
+
+ @impl true
+ def remove_from_index(object) do
+ meili_delete("/indexes/objects/documents/#{object.id}")
+ end
+end
diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex
new file mode 100644
index 000000000..a42e2f5f6
--- /dev/null
+++ b/lib/pleroma/search/search_backend.ex
@@ -0,0 +1,24 @@
+defmodule Pleroma.Search.SearchBackend do
+ @doc """
+ Search statuses with a query, restricting to only those the user should have access to.
+ """
+ @callback search(user :: Pleroma.User.t(), query :: String.t(), options :: [any()]) :: [
+ Pleroma.Activity.t()
+ ]
+
+ @doc """
+ Add the object associated with the activity to the search index.
+
+ The whole activity is passed, to allow filtering on things such as scope.
+ """
+ @callback add_to_index(activity :: Pleroma.Activity.t()) :: :ok | {:error, any()}
+
+ @doc """
+ Remove the object from the index.
+
+ Just the object, as opposed to the whole activity, is passed, since the object
+ is what contains the actual content and there is no need for fitlering when removing
+ from index.
+ """
+ @callback remove_from_index(object :: Pleroma.Object.t()) :: {:ok, any()} | {:error, any()}
+end
diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex
index fa251394b..6887710dc 100644
--- a/lib/pleroma/web/activity_pub/activity_pub.ex
+++ b/lib/pleroma/web/activity_pub/activity_pub.ex
@@ -140,6 +140,9 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end)
end)
+ # Add local posts to search index
+ if local, do: Pleroma.Search.add_to_index(activity)
+
{:ok, activity}
else
%Activity{} = activity ->
diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex
index a2152b945..c3879f638 100644
--- a/lib/pleroma/web/activity_pub/side_effects.ex
+++ b/lib/pleroma/web/activity_pub/side_effects.ex
@@ -197,6 +197,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
# - Increase replies count
# - Set up ActivityExpiration
# - Set up notifications
+ # - Index incoming posts for search (if needed)
@impl true
def handle(%{data: %{"type" => "Create"}} = activity, meta) do
with {:ok, object, meta} <- handle_object_creation(meta[:object_data], activity, meta),
@@ -226,6 +227,8 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end)
end)
+ Pleroma.Search.add_to_index(Map.put(activity, :object, object))
+
meta =
meta
|> add_notifications(notifications)
@@ -285,6 +288,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
# - Reduce the user note count
# - Reduce the reply count
# - Stream out the activity
+ # - Removes posts from search index (if needed)
@impl true
def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, meta) do
deleted_object =
@@ -323,6 +327,11 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
end
if result == :ok do
+ # Only remove from index when deleting actual objects, not users or anything else
+ with %Pleroma.Object{} <- deleted_object do
+ Pleroma.Search.remove_from_index(deleted_object)
+ end
+
{:ok, object, meta}
else
{:error, result}
diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex
index 5e6e04734..e4acba226 100644
--- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex
+++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex
@@ -5,7 +5,6 @@
defmodule Pleroma.Web.MastodonAPI.SearchController do
use Pleroma.Web, :controller
- alias Pleroma.Activity
alias Pleroma.Repo
alias Pleroma.User
alias Pleroma.Web.ControllerHelper
@@ -100,7 +99,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do
end
defp resource_search(_, "statuses", query, options) do
- statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end)
+ statuses = with_fallback(fn -> Pleroma.Search.search(query, options) end)
StatusView.render("index.json",
activities: statuses,
diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex
new file mode 100644
index 000000000..43b7bad1e
--- /dev/null
+++ b/lib/pleroma/workers/search_indexing_worker.ex
@@ -0,0 +1,21 @@
+defmodule Pleroma.Workers.SearchIndexingWorker do
+ use Pleroma.Workers.WorkerHelper, queue: "search_indexing"
+
+ @impl Oban.Worker
+
+ def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do
+ activity = Pleroma.Activity.get_by_id_with_object(activity_id)
+
+ search_module = Pleroma.Config.get([Pleroma.Search, :module])
+
+ search_module.add_to_index(activity)
+ end
+
+ def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do
+ object = Pleroma.Object.get_by_id(object_id)
+
+ search_module = Pleroma.Config.get([Pleroma.Search, :module])
+
+ search_module.remove_from_index(object)
+ end
+end
diff --git a/priv/scrubbers/search_indexing.ex b/priv/scrubbers/search_indexing.ex
new file mode 100644
index 000000000..02756ab79
--- /dev/null
+++ b/priv/scrubbers/search_indexing.ex
@@ -0,0 +1,24 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.HTML.Scrubber.SearchIndexing do
+ @moduledoc """
+ An HTML scrubbing policy that scrubs things for searching.
+ """
+
+ require FastSanitize.Sanitizer.Meta
+ alias FastSanitize.Sanitizer.Meta
+
+ # Explicitly remove mentions
+ def scrub({:a, attrs, children}) do
+ if(Enum.any?(attrs, fn {att, val} -> att == "class" and String.contains?(val, "mention") end),
+ do: nil,
+ # Strip the tag itself, leave only children (text, presumably)
+ else: children
+ )
+ end
+
+ Meta.strip_comments()
+ Meta.strip_everything_not_covered()
+end
diff --git a/test/pleroma/activity/search_test.exs b/test/pleroma/search/database_search_test.ex
index 3b5fd2c3c..c123d0b84 100644
--- a/test/pleroma/activity/search_test.exs
+++ b/test/pleroma/search/database_search_test.ex
@@ -2,8 +2,8 @@
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
-defmodule Pleroma.Activity.SearchTest do
- alias Pleroma.Activity.Search
+defmodule Pleroma.Search.DatabaseSearchTest do
+ alias Pleroma.Search.DatabaseSearch
alias Pleroma.Web.CommonAPI
import Pleroma.Factory
@@ -13,7 +13,7 @@ defmodule Pleroma.Activity.SearchTest do
user = insert(:user)
{:ok, post} = CommonAPI.post(user, %{status: "it's wednesday my dudes"})
- [result] = Search.search(nil, "wednesday")
+ [result] = DatabaseSearch.search(nil, "wednesday")
assert result.id == post.id
end
@@ -45,7 +45,7 @@ defmodule Pleroma.Activity.SearchTest do
{:ok, _post2} = CommonAPI.post(user, %{status: "it's wednesday my bros"})
# plainto doesn't understand complex queries
- assert [result] = Search.search(nil, "wednesday -dudes")
+ assert [result] = DatabaseSearch.search(nil, "wednesday -dudes")
assert result.id == post.id
end
@@ -55,7 +55,7 @@ defmodule Pleroma.Activity.SearchTest do
{:ok, _post} = CommonAPI.post(user, %{status: "it's wednesday my dudes"})
{:ok, other_post} = CommonAPI.post(user, %{status: "it's wednesday my bros"})
- assert [result] = Search.search(nil, "wednesday -dudes")
+ assert [result] = DatabaseSearch.search(nil, "wednesday -dudes")
assert result.id == other_post.id
end
diff --git a/test/pleroma/search/meilisearch_test.exs b/test/pleroma/search/meilisearch_test.exs
new file mode 100644
index 000000000..04a2d75d9
--- /dev/null
+++ b/test/pleroma/search/meilisearch_test.exs
@@ -0,0 +1,129 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Search.MeilisearchTest do
+ require Pleroma.Constants
+
+ use Pleroma.DataCase
+ use Oban.Testing, repo: Pleroma.Repo
+
+ import Pleroma.Factory
+ import Tesla.Mock
+ import Mock
+
+ alias Pleroma.Search.Meilisearch
+ alias Pleroma.Web.CommonAPI
+ alias Pleroma.Workers.SearchIndexingWorker
+
+ setup_all do
+ Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
+ :ok
+ end
+
+ describe "meilisearch" do
+ setup do: clear_config([Pleroma.Search, :module], Meilisearch)
+
+ setup_with_mocks(
+ [
+ {Meilisearch, [:passthrough],
+ [
+ add_to_index: fn a -> passthrough([a]) end,
+ remove_from_index: fn a -> passthrough([a]) end,
+ meili_put: fn u, a -> passthrough([u, a]) end
+ ]}
+ ],
+ context,
+ do: {:ok, context}
+ )
+
+ test "indexes a local post on creation" do
+ user = insert(:user)
+
+ mock_global(fn
+ %{method: :put, url: "http://127.0.0.1:7700/indexes/objects/documents", body: body} ->
+ assert match?(
+ [%{"content" => "guys i just don&#39;t wanna leave the swamp"}],
+ Jason.decode!(body)
+ )
+
+ json(%{updateId: 1})
+ end)
+
+ {:ok, activity} =
+ CommonAPI.post(user, %{
+ status: "guys i just don't wanna leave the swamp",
+ visibility: "public"
+ })
+
+ args = %{"op" => "add_to_index", "activity" => activity.id}
+
+ assert_enqueued(
+ worker: SearchIndexingWorker,
+ args: args
+ )
+
+ assert :ok = perform_job(SearchIndexingWorker, args)
+
+ assert_called(Meilisearch.add_to_index(activity))
+ end
+
+ test "doesn't index posts that are not public" do
+ user = insert(:user)
+
+ Enum.each(["private", "direct"], fn visibility ->
+ {:ok, activity} =
+ CommonAPI.post(user, %{
+ status: "guys i just don't wanna leave the swamp",
+ visibility: visibility
+ })
+
+ args = %{"op" => "add_to_index", "activity" => activity.id}
+
+ assert_enqueued(worker: SearchIndexingWorker, args: args)
+ assert :ok = perform_job(SearchIndexingWorker, args)
+
+ assert_not_called(Meilisearch.meili_put(:_))
+ end)
+
+ history = call_history(Meilisearch)
+ assert Enum.count(history) == 2
+ end
+
+ test "deletes posts from index when deleted locally" do
+ user = insert(:user)
+
+ mock_global(fn
+ %{method: :put, url: "http://127.0.0.1:7700/indexes/objects/documents", body: body} ->
+ assert match?(
+ [%{"content" => "guys i just don&#39;t wanna leave the swamp"}],
+ Jason.decode!(body)
+ )
+
+ json(%{updateId: 1})
+
+ %{method: :delete, url: "http://127.0.0.1:7700/indexes/objects/documents/" <> id} ->
+ assert String.length(id) > 1
+ json(%{updateId: 2})
+ end)
+
+ {:ok, activity} =
+ CommonAPI.post(user, %{
+ status: "guys i just don't wanna leave the swamp",
+ visibility: "public"
+ })
+
+ args = %{"op" => "add_to_index", "activity" => activity.id}
+ assert_enqueued(worker: SearchIndexingWorker, args: args)
+ assert :ok = perform_job(SearchIndexingWorker, args)
+
+ {:ok, _} = CommonAPI.delete(activity.id, user)
+
+ delete_args = %{"op" => "remove_from_index", "object" => activity.object.id}
+ assert_enqueued(worker: SearchIndexingWorker, args: delete_args)
+ assert :ok = perform_job(SearchIndexingWorker, delete_args)
+
+ assert_called(Meilisearch.remove_from_index(:_))
+ end
+ end
+end