summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/mix/tasks/pleroma/search/meilisearch.ex145
-rw-r--r--lib/pleroma/activity.ex2
-rw-r--r--lib/pleroma/application.ex6
-rw-r--r--lib/pleroma/search.ex17
-rw-r--r--lib/pleroma/search/database_search.ex (renamed from lib/pleroma/activity/search.ex)33
-rw-r--r--lib/pleroma/search/meilisearch.ex176
-rw-r--r--lib/pleroma/search/search_backend.ex24
-rw-r--r--lib/pleroma/web/activity_pub/activity_pub.ex3
-rw-r--r--lib/pleroma/web/activity_pub/side_effects.ex9
-rw-r--r--lib/pleroma/web/mastodon_api/controllers/search_controller.ex3
-rw-r--r--lib/pleroma/workers/search_indexing_worker.ex21
11 files changed, 417 insertions, 22 deletions
diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex
new file mode 100644
index 000000000..8379a0c25
--- /dev/null
+++ b/lib/mix/tasks/pleroma/search/meilisearch.ex
@@ -0,0 +1,145 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
+ require Pleroma.Constants
+
+ import Mix.Pleroma
+ import Ecto.Query
+
+ import Pleroma.Search.Meilisearch,
+ only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete: 1]
+
+ def run(["index"]) do
+ start_pleroma()
+ Pleroma.HTML.compile_scrubbers()
+
+ meili_version =
+ (
+ {:ok, result} = meili_get("/version")
+
+ result["pkgVersion"]
+ )
+
+ # The ranking rule syntax was changed but nothing about that is mentioned in the changelog
+ if not Version.match?(meili_version, ">= 0.25.0") do
+ raise "Meilisearch <0.24.0 not supported"
+ end
+
+ {:ok, _} =
+ meili_post(
+ "/indexes/objects/settings/ranking-rules",
+ [
+ "published:desc",
+ "words",
+ "exactness",
+ "proximity",
+ "typo",
+ "attribute",
+ "sort"
+ ]
+ )
+
+ {:ok, _} =
+ meili_post(
+ "/indexes/objects/settings/searchable-attributes",
+ [
+ "content"
+ ]
+ )
+
+ IO.puts("Created indices. Starting to insert posts.")
+
+ chunk_size = Pleroma.Config.get([Pleroma.Search.Meilisearch, :initial_indexing_chunk_size])
+
+ Pleroma.Repo.transaction(
+ fn ->
+ query =
+ from(Pleroma.Object,
+ # Only index public and unlisted posts which are notes and have some text
+ where:
+ fragment("data->>'type' = 'Note'") and
+ (fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) or
+ fragment("data->'cc' \\? ?", ^Pleroma.Constants.as_public())),
+ order_by: [desc: fragment("data->'published'")]
+ )
+
+ count = query |> Pleroma.Repo.aggregate(:count, :data)
+ IO.puts("Entries to index: #{count}")
+
+ Pleroma.Repo.stream(
+ query,
+ timeout: :infinity
+ )
+ |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
+ |> Stream.filter(fn o -> not is_nil(o) end)
+ |> Stream.chunk_every(chunk_size)
+ |> Stream.transform(0, fn objects, acc ->
+ new_acc = acc + Enum.count(objects)
+
+ # Reset to the beginning of the line and rewrite it
+ IO.write("\r")
+ IO.write("Indexed #{new_acc} entries")
+
+ {[objects], new_acc}
+ end)
+ |> Stream.each(fn objects ->
+ result =
+ meili_put(
+ "/indexes/objects/documents",
+ objects
+ )
+
+ with {:ok, res} <- result do
+ if not Map.has_key?(res, "uid") do
+ IO.puts("\nFailed to index: #{inspect(result)}")
+ end
+ else
+ e -> IO.puts("\nFailed to index due to network error: #{inspect(e)}")
+ end
+ end)
+ |> Stream.run()
+ end,
+ timeout: :infinity
+ )
+
+ IO.write("\n")
+ end
+
+ def run(["clear"]) do
+ start_pleroma()
+
+ meili_delete("/indexes/objects/documents")
+ end
+
+ def run(["show-keys", master_key]) do
+ start_pleroma()
+
+ endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+
+ {:ok, result} =
+ Pleroma.HTTP.get(
+ Path.join(endpoint, "/keys"),
+ [{"Authorization", "Bearer #{master_key}"}]
+ )
+
+ decoded = Jason.decode!(result.body)
+
+ if decoded["results"] do
+ Enum.each(decoded["results"], fn %{"description" => desc, "key" => key} ->
+ IO.puts("#{desc}: #{key}")
+ end)
+ else
+ IO.puts("Error fetching the keys, check the master key is correct: #{inspect(decoded)}")
+ end
+ end
+
+ def run(["stats"]) do
+ start_pleroma()
+
+ {:ok, result} = meili_get("/indexes/objects/stats")
+ IO.puts("Number of entries: #{result["numberOfDocuments"]}")
+ IO.puts("Indexing? #{result["isIndexing"]}")
+ end
+end
diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex
index 3556aaf9e..8a512dc57 100644
--- a/lib/pleroma/activity.ex
+++ b/lib/pleroma/activity.ex
@@ -368,7 +368,7 @@ defmodule Pleroma.Activity do
)
end
- defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search
+ defdelegate search(user, query, options \\ []), to: Pleroma.Search.DatabaseSearch
def direct_conversation_id(activity, for_user) do
alias Pleroma.Conversation.Participation
diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex
index e68a3c57e..7bbc132f1 100644
--- a/lib/pleroma/application.ex
+++ b/lib/pleroma/application.ex
@@ -322,7 +322,11 @@ defmodule Pleroma.Application do
def limiters_setup do
config = Config.get(ConcurrentLimiter, [])
- [Pleroma.Web.RichMedia.Helpers, Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy]
+ [
+ Pleroma.Web.RichMedia.Helpers,
+ Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy,
+ Pleroma.Search
+ ]
|> Enum.each(fn module ->
mod_config = Keyword.get(config, module, [])
diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex
new file mode 100644
index 000000000..3b266e59b
--- /dev/null
+++ b/lib/pleroma/search.ex
@@ -0,0 +1,17 @@
+defmodule Pleroma.Search do
+ alias Pleroma.Workers.SearchIndexingWorker
+
+ def add_to_index(%Pleroma.Activity{id: activity_id}) do
+ SearchIndexingWorker.enqueue("add_to_index", %{"activity" => activity_id})
+ end
+
+ def remove_from_index(%Pleroma.Object{id: object_id}) do
+ SearchIndexingWorker.enqueue("remove_from_index", %{"object" => object_id})
+ end
+
+ def search(query, options) do
+ search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity)
+
+ search_module.search(options[:for_user], query, options)
+ end
+end
diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/search/database_search.ex
index 0b9b24aa4..9a340abf1 100644
--- a/lib/pleroma/activity/search.ex
+++ b/lib/pleroma/search/database_search.ex
@@ -1,8 +1,8 @@
# Pleroma: A lightweight social networking server
-# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
-defmodule Pleroma.Activity.Search do
+defmodule Pleroma.Search.DatabaseSearch do
alias Pleroma.Activity
alias Pleroma.Object.Fetcher
alias Pleroma.Pagination
@@ -13,6 +13,9 @@ defmodule Pleroma.Activity.Search do
import Ecto.Query
+ @behaviour Pleroma.Search.SearchBackend
+
+ @impl true
def search(user, search_query, options \\ []) do
index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin
limit = Enum.min([Keyword.get(options, :limit), 40])
@@ -30,7 +33,7 @@ defmodule Pleroma.Activity.Search do
Activity
|> Activity.with_preloaded_object()
|> Activity.restrict_deactivated_users()
- |> restrict_public(user)
+ |> restrict_public()
|> query_with(index_type, search_query, search_function)
|> maybe_restrict_local(user)
|> maybe_restrict_author(author)
@@ -45,6 +48,12 @@ defmodule Pleroma.Activity.Search do
end
end
+ @impl true
+ def add_to_index(_activity), do: nil
+
+ @impl true
+ def remove_from_index(_object), do: nil
+
def maybe_restrict_author(query, %User{} = author) do
Activity.Queries.by_author(query, author)
end
@@ -57,19 +66,7 @@ defmodule Pleroma.Activity.Search do
def maybe_restrict_blocked(query, _), do: query
- defp restrict_public(q, user) when not is_nil(user) do
- intended_recipients = [
- Pleroma.Constants.as_public(),
- Pleroma.Web.ActivityPub.Utils.as_local_public()
- ]
-
- from([a, o] in q,
- where: fragment("?->>'type' = 'Create'", a.data),
- where: fragment("? && ?", ^intended_recipients, a.recipients)
- )
- end
-
- defp restrict_public(q, _user) do
+ def restrict_public(q) do
from([a, o] in q,
where: fragment("?->>'type' = 'Create'", a.data),
where: ^Pleroma.Constants.as_public() in a.recipients
@@ -136,7 +133,7 @@ defmodule Pleroma.Activity.Search do
)
end
- defp maybe_restrict_local(q, user) do
+ def maybe_restrict_local(q, user) do
limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated)
case {limit, user} do
@@ -149,7 +146,7 @@ defmodule Pleroma.Activity.Search do
defp restrict_local(q), do: where(q, local: true)
- defp maybe_fetch(activities, user, search_query) do
+ def maybe_fetch(activities, user, search_query) do
with true <- Regex.match?(~r/https?:/, search_query),
{:ok, object} <- Fetcher.fetch_object_from_id(search_query),
%Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]),
diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex
new file mode 100644
index 000000000..7af7f460a
--- /dev/null
+++ b/lib/pleroma/search/meilisearch.ex
@@ -0,0 +1,176 @@
+defmodule Pleroma.Search.Meilisearch do
+ require Logger
+ require Pleroma.Constants
+
+ alias Pleroma.Activity
+
+ import Pleroma.Search.DatabaseSearch
+ import Ecto.Query
+
+ @behaviour Pleroma.Search.SearchBackend
+
+ defp meili_headers do
+ private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key])
+
+ [{"Content-Type", "application/json"}] ++
+ if is_nil(private_key), do: [], else: [{"Authorization", "Bearer #{private_key}"}]
+ end
+
+ def meili_get(path) do
+ endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+
+ result =
+ Pleroma.HTTP.get(
+ Path.join(endpoint, path),
+ meili_headers()
+ )
+
+ with {:ok, res} <- result do
+ {:ok, Jason.decode!(res.body)}
+ end
+ end
+
+ def meili_post(path, params) do
+ endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+
+ result =
+ Pleroma.HTTP.post(
+ Path.join(endpoint, path),
+ Jason.encode!(params),
+ meili_headers()
+ )
+
+ with {:ok, res} <- result do
+ {:ok, Jason.decode!(res.body)}
+ end
+ end
+
+ def meili_put(path, params) do
+ endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+
+ result =
+ Pleroma.HTTP.request(
+ :put,
+ Path.join(endpoint, path),
+ Jason.encode!(params),
+ meili_headers(),
+ []
+ )
+
+ with {:ok, res} <- result do
+ {:ok, Jason.decode!(res.body)}
+ end
+ end
+
+ def meili_delete(path) do
+ endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+
+ Pleroma.HTTP.request(
+ :delete,
+ Path.join(endpoint, path),
+ "",
+ meili_headers(),
+ []
+ )
+ end
+
+ @impl true
+ def search(user, query, options \\ []) do
+ limit = Enum.min([Keyword.get(options, :limit), 40])
+ offset = Keyword.get(options, :offset, 0)
+ author = Keyword.get(options, :author)
+
+ res =
+ meili_post(
+ "/indexes/objects/search",
+ %{q: query, offset: offset, limit: limit}
+ )
+
+ with {:ok, result} <- res do
+ hits = result["hits"] |> Enum.map(& &1["ap"])
+
+ try do
+ hits
+ |> Activity.create_by_object_ap_id()
+ |> Activity.with_preloaded_object()
+ |> Activity.restrict_deactivated_users()
+ |> maybe_restrict_local(user)
+ |> maybe_restrict_author(author)
+ |> maybe_restrict_blocked(user)
+ |> maybe_fetch(user, query)
+ |> order_by([object: obj], desc: obj.data["published"])
+ |> Pleroma.Repo.all()
+ rescue
+ _ -> maybe_fetch([], user, query)
+ end
+ end
+ end
+
+ def object_to_search_data(object) do
+ # Only index public or unlisted Notes
+ if not is_nil(object) and object.data["type"] == "Note" and
+ not is_nil(object.data["content"]) and
+ (Pleroma.Constants.as_public() in object.data["to"] or
+ Pleroma.Constants.as_public() in object.data["cc"]) and
+ object.data["content"] not in ["", "."] do
+ data = object.data
+
+ content_str =
+ case data["content"] do
+ [nil | rest] -> to_string(rest)
+ str -> str
+ end
+
+ content =
+ with {:ok, scrubbed} <-
+ FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing),
+ trimmed <- String.trim(scrubbed) do
+ trimmed
+ end
+
+ # Make sure we have a non-empty string
+ if content != "" do
+ {:ok, published, _} = DateTime.from_iso8601(data["published"])
+
+ %{
+ id: object.id,
+ content: content,
+ ap: data["id"],
+ published: published |> DateTime.to_unix()
+ }
+ end
+ end
+ end
+
+ @impl true
+ def add_to_index(activity) do
+ maybe_search_data = object_to_search_data(activity.object)
+
+ if activity.data["type"] == "Create" and maybe_search_data do
+ result =
+ meili_put(
+ "/indexes/objects/documents",
+ [maybe_search_data]
+ )
+
+ with {:ok, res} <- result,
+ true <- Map.has_key?(res, "updateId") do
+ # Added successfully
+ :ok
+ else
+ _ ->
+ # There was an error, report it
+ Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}")
+ {:error, result}
+ end
+ else
+ # The post isn't something we can search, that's ok
+ :ok
+ end
+ end
+
+ @impl true
+ def remove_from_index(object) do
+ meili_delete("/indexes/objects/documents/#{object.id}")
+ end
+end
diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex
new file mode 100644
index 000000000..a42e2f5f6
--- /dev/null
+++ b/lib/pleroma/search/search_backend.ex
@@ -0,0 +1,24 @@
+defmodule Pleroma.Search.SearchBackend do
+ @doc """
+ Search statuses with a query, restricting to only those the user should have access to.
+ """
+ @callback search(user :: Pleroma.User.t(), query :: String.t(), options :: [any()]) :: [
+ Pleroma.Activity.t()
+ ]
+
+ @doc """
+ Add the object associated with the activity to the search index.
+
+ The whole activity is passed, to allow filtering on things such as scope.
+ """
+ @callback add_to_index(activity :: Pleroma.Activity.t()) :: :ok | {:error, any()}
+
+ @doc """
+ Remove the object from the index.
+
+ Just the object, as opposed to the whole activity, is passed, since the object
+ is what contains the actual content and there is no need for fitlering when removing
+ from index.
+ """
+ @callback remove_from_index(object :: Pleroma.Object.t()) :: {:ok, any()} | {:error, any()}
+end
diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex
index 3979d418e..54a77a228 100644
--- a/lib/pleroma/web/activity_pub/activity_pub.ex
+++ b/lib/pleroma/web/activity_pub/activity_pub.ex
@@ -140,6 +140,9 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end)
end)
+ # Add local posts to search index
+ if local, do: Pleroma.Search.add_to_index(activity)
+
{:ok, activity}
else
%Activity{} = activity ->
diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex
index 098c177c7..7a28a7c97 100644
--- a/lib/pleroma/web/activity_pub/side_effects.ex
+++ b/lib/pleroma/web/activity_pub/side_effects.ex
@@ -197,6 +197,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
# - Increase replies count
# - Set up ActivityExpiration
# - Set up notifications
+ # - Index incoming posts for search (if needed)
@impl true
def handle(%{data: %{"type" => "Create"}} = activity, meta) do
with {:ok, object, meta} <- handle_object_creation(meta[:object_data], activity, meta),
@@ -226,6 +227,8 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end)
end)
+ Pleroma.Search.add_to_index(Map.put(activity, :object, object))
+
meta =
meta
|> add_notifications(notifications)
@@ -285,6 +288,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
# - Reduce the user note count
# - Reduce the reply count
# - Stream out the activity
+ # - Removes posts from search index (if needed)
@impl true
def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, meta) do
deleted_object =
@@ -323,6 +327,11 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
end
if result == :ok do
+ # Only remove from index when deleting actual objects, not users or anything else
+ with %Pleroma.Object{} <- deleted_object do
+ Pleroma.Search.remove_from_index(deleted_object)
+ end
+
{:ok, object, meta}
else
{:error, result}
diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex
index 5e6e04734..e4acba226 100644
--- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex
+++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex
@@ -5,7 +5,6 @@
defmodule Pleroma.Web.MastodonAPI.SearchController do
use Pleroma.Web, :controller
- alias Pleroma.Activity
alias Pleroma.Repo
alias Pleroma.User
alias Pleroma.Web.ControllerHelper
@@ -100,7 +99,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do
end
defp resource_search(_, "statuses", query, options) do
- statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end)
+ statuses = with_fallback(fn -> Pleroma.Search.search(query, options) end)
StatusView.render("index.json",
activities: statuses,
diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex
new file mode 100644
index 000000000..43b7bad1e
--- /dev/null
+++ b/lib/pleroma/workers/search_indexing_worker.ex
@@ -0,0 +1,21 @@
+defmodule Pleroma.Workers.SearchIndexingWorker do
+ use Pleroma.Workers.WorkerHelper, queue: "search_indexing"
+
+ @impl Oban.Worker
+
+ def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do
+ activity = Pleroma.Activity.get_by_id_with_object(activity_id)
+
+ search_module = Pleroma.Config.get([Pleroma.Search, :module])
+
+ search_module.add_to_index(activity)
+ end
+
+ def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do
+ object = Pleroma.Object.get_by_id(object_id)
+
+ search_module = Pleroma.Config.get([Pleroma.Search, :module])
+
+ search_module.remove_from_index(object)
+ end
+end