diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/mix/tasks/pleroma/search/meilisearch.ex | 145 | ||||
| -rw-r--r-- | lib/pleroma/activity.ex | 2 | ||||
| -rw-r--r-- | lib/pleroma/application.ex | 6 | ||||
| -rw-r--r-- | lib/pleroma/search.ex | 17 | ||||
| -rw-r--r-- | lib/pleroma/search/database_search.ex (renamed from lib/pleroma/activity/search.ex) | 33 | ||||
| -rw-r--r-- | lib/pleroma/search/meilisearch.ex | 176 | ||||
| -rw-r--r-- | lib/pleroma/search/search_backend.ex | 24 | ||||
| -rw-r--r-- | lib/pleroma/web/activity_pub/activity_pub.ex | 3 | ||||
| -rw-r--r-- | lib/pleroma/web/activity_pub/side_effects.ex | 9 | ||||
| -rw-r--r-- | lib/pleroma/web/mastodon_api/controllers/search_controller.ex | 3 | ||||
| -rw-r--r-- | lib/pleroma/workers/search_indexing_worker.ex | 21 | 
11 files changed, 417 insertions, 22 deletions
diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex new file mode 100644 index 000000000..8379a0c25 --- /dev/null +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -0,0 +1,145 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Mix.Tasks.Pleroma.Search.Meilisearch do +  require Pleroma.Constants + +  import Mix.Pleroma +  import Ecto.Query + +  import Pleroma.Search.Meilisearch, +    only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete: 1] + +  def run(["index"]) do +    start_pleroma() +    Pleroma.HTML.compile_scrubbers() + +    meili_version = +      ( +        {:ok, result} = meili_get("/version") + +        result["pkgVersion"] +      ) + +    # The ranking rule syntax was changed but nothing about that is mentioned in the changelog +    if not Version.match?(meili_version, ">= 0.25.0") do +      raise "Meilisearch <0.24.0 not supported" +    end + +    {:ok, _} = +      meili_post( +        "/indexes/objects/settings/ranking-rules", +        [ +          "published:desc", +          "words", +          "exactness", +          "proximity", +          "typo", +          "attribute", +          "sort" +        ] +      ) + +    {:ok, _} = +      meili_post( +        "/indexes/objects/settings/searchable-attributes", +        [ +          "content" +        ] +      ) + +    IO.puts("Created indices. Starting to insert posts.") + +    chunk_size = Pleroma.Config.get([Pleroma.Search.Meilisearch, :initial_indexing_chunk_size]) + +    Pleroma.Repo.transaction( +      fn -> +        query = +          from(Pleroma.Object, +            # Only index public and unlisted posts which are notes and have some text +            where: +              fragment("data->>'type' = 'Note'") and +                (fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) or +                   fragment("data->'cc' \\? ?", ^Pleroma.Constants.as_public())), +            order_by: [desc: fragment("data->'published'")] +          ) + +        count = query |> Pleroma.Repo.aggregate(:count, :data) +        IO.puts("Entries to index: #{count}") + +        Pleroma.Repo.stream( +          query, +          timeout: :infinity +        ) +        |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1) +        |> Stream.filter(fn o -> not is_nil(o) end) +        |> Stream.chunk_every(chunk_size) +        |> Stream.transform(0, fn objects, acc -> +          new_acc = acc + Enum.count(objects) + +          # Reset to the beginning of the line and rewrite it +          IO.write("\r") +          IO.write("Indexed #{new_acc} entries") + +          {[objects], new_acc} +        end) +        |> Stream.each(fn objects -> +          result = +            meili_put( +              "/indexes/objects/documents", +              objects +            ) + +          with {:ok, res} <- result do +            if not Map.has_key?(res, "uid") do +              IO.puts("\nFailed to index: #{inspect(result)}") +            end +          else +            e -> IO.puts("\nFailed to index due to network error: #{inspect(e)}") +          end +        end) +        |> Stream.run() +      end, +      timeout: :infinity +    ) + +    IO.write("\n") +  end + +  def run(["clear"]) do +    start_pleroma() + +    meili_delete("/indexes/objects/documents") +  end + +  def run(["show-keys", master_key]) do +    start_pleroma() + +    endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + +    {:ok, result} = +      Pleroma.HTTP.get( +        Path.join(endpoint, "/keys"), +        [{"Authorization", "Bearer #{master_key}"}] +      ) + +    decoded = Jason.decode!(result.body) + +    if decoded["results"] do +      Enum.each(decoded["results"], fn %{"description" => desc, "key" => key} -> +        IO.puts("#{desc}: #{key}") +      end) +    else +      IO.puts("Error fetching the keys, check the master key is correct: #{inspect(decoded)}") +    end +  end + +  def run(["stats"]) do +    start_pleroma() + +    {:ok, result} = meili_get("/indexes/objects/stats") +    IO.puts("Number of entries: #{result["numberOfDocuments"]}") +    IO.puts("Indexing? #{result["isIndexing"]}") +  end +end diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index 3556aaf9e..8a512dc57 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -368,7 +368,7 @@ defmodule Pleroma.Activity do      )    end -  defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search +  defdelegate search(user, query, options \\ []), to: Pleroma.Search.DatabaseSearch    def direct_conversation_id(activity, for_user) do      alias Pleroma.Conversation.Participation diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index e68a3c57e..7bbc132f1 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -322,7 +322,11 @@ defmodule Pleroma.Application do    def limiters_setup do      config = Config.get(ConcurrentLimiter, []) -    [Pleroma.Web.RichMedia.Helpers, Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy] +    [ +      Pleroma.Web.RichMedia.Helpers, +      Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, +      Pleroma.Search +    ]      |> Enum.each(fn module ->        mod_config = Keyword.get(config, module, []) diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex new file mode 100644 index 000000000..3b266e59b --- /dev/null +++ b/lib/pleroma/search.ex @@ -0,0 +1,17 @@ +defmodule Pleroma.Search do +  alias Pleroma.Workers.SearchIndexingWorker + +  def add_to_index(%Pleroma.Activity{id: activity_id}) do +    SearchIndexingWorker.enqueue("add_to_index", %{"activity" => activity_id}) +  end + +  def remove_from_index(%Pleroma.Object{id: object_id}) do +    SearchIndexingWorker.enqueue("remove_from_index", %{"object" => object_id}) +  end + +  def search(query, options) do +    search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) + +    search_module.search(options[:for_user], query, options) +  end +end diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/search/database_search.ex index 0b9b24aa4..9a340abf1 100644 --- a/lib/pleroma/activity/search.ex +++ b/lib/pleroma/search/database_search.ex @@ -1,8 +1,8 @@  # Pleroma: A lightweight social networking server -# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/> +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>  # SPDX-License-Identifier: AGPL-3.0-only -defmodule Pleroma.Activity.Search do +defmodule Pleroma.Search.DatabaseSearch do    alias Pleroma.Activity    alias Pleroma.Object.Fetcher    alias Pleroma.Pagination @@ -13,6 +13,9 @@ defmodule Pleroma.Activity.Search do    import Ecto.Query +  @behaviour Pleroma.Search.SearchBackend + +  @impl true    def search(user, search_query, options \\ []) do      index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin      limit = Enum.min([Keyword.get(options, :limit), 40]) @@ -30,7 +33,7 @@ defmodule Pleroma.Activity.Search do        Activity        |> Activity.with_preloaded_object()        |> Activity.restrict_deactivated_users() -      |> restrict_public(user) +      |> restrict_public()        |> query_with(index_type, search_query, search_function)        |> maybe_restrict_local(user)        |> maybe_restrict_author(author) @@ -45,6 +48,12 @@ defmodule Pleroma.Activity.Search do      end    end +  @impl true +  def add_to_index(_activity), do: nil + +  @impl true +  def remove_from_index(_object), do: nil +    def maybe_restrict_author(query, %User{} = author) do      Activity.Queries.by_author(query, author)    end @@ -57,19 +66,7 @@ defmodule Pleroma.Activity.Search do    def maybe_restrict_blocked(query, _), do: query -  defp restrict_public(q, user) when not is_nil(user) do -    intended_recipients = [ -      Pleroma.Constants.as_public(), -      Pleroma.Web.ActivityPub.Utils.as_local_public() -    ] - -    from([a, o] in q, -      where: fragment("?->>'type' = 'Create'", a.data), -      where: fragment("? && ?", ^intended_recipients, a.recipients) -    ) -  end - -  defp restrict_public(q, _user) do +  def restrict_public(q) do      from([a, o] in q,        where: fragment("?->>'type' = 'Create'", a.data),        where: ^Pleroma.Constants.as_public() in a.recipients @@ -136,7 +133,7 @@ defmodule Pleroma.Activity.Search do      )    end -  defp maybe_restrict_local(q, user) do +  def maybe_restrict_local(q, user) do      limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated)      case {limit, user} do @@ -149,7 +146,7 @@ defmodule Pleroma.Activity.Search do    defp restrict_local(q), do: where(q, local: true) -  defp maybe_fetch(activities, user, search_query) do +  def maybe_fetch(activities, user, search_query) do      with true <- Regex.match?(~r/https?:/, search_query),           {:ok, object} <- Fetcher.fetch_object_from_id(search_query),           %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex new file mode 100644 index 000000000..7af7f460a --- /dev/null +++ b/lib/pleroma/search/meilisearch.ex @@ -0,0 +1,176 @@ +defmodule Pleroma.Search.Meilisearch do +  require Logger +  require Pleroma.Constants + +  alias Pleroma.Activity + +  import Pleroma.Search.DatabaseSearch +  import Ecto.Query + +  @behaviour Pleroma.Search.SearchBackend + +  defp meili_headers do +    private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) + +    [{"Content-Type", "application/json"}] ++ +      if is_nil(private_key), do: [], else: [{"Authorization", "Bearer #{private_key}"}] +  end + +  def meili_get(path) do +    endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + +    result = +      Pleroma.HTTP.get( +        Path.join(endpoint, path), +        meili_headers() +      ) + +    with {:ok, res} <- result do +      {:ok, Jason.decode!(res.body)} +    end +  end + +  def meili_post(path, params) do +    endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + +    result = +      Pleroma.HTTP.post( +        Path.join(endpoint, path), +        Jason.encode!(params), +        meili_headers() +      ) + +    with {:ok, res} <- result do +      {:ok, Jason.decode!(res.body)} +    end +  end + +  def meili_put(path, params) do +    endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + +    result = +      Pleroma.HTTP.request( +        :put, +        Path.join(endpoint, path), +        Jason.encode!(params), +        meili_headers(), +        [] +      ) + +    with {:ok, res} <- result do +      {:ok, Jason.decode!(res.body)} +    end +  end + +  def meili_delete(path) do +    endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + +    Pleroma.HTTP.request( +      :delete, +      Path.join(endpoint, path), +      "", +      meili_headers(), +      [] +    ) +  end + +  @impl true +  def search(user, query, options \\ []) do +    limit = Enum.min([Keyword.get(options, :limit), 40]) +    offset = Keyword.get(options, :offset, 0) +    author = Keyword.get(options, :author) + +    res = +      meili_post( +        "/indexes/objects/search", +        %{q: query, offset: offset, limit: limit} +      ) + +    with {:ok, result} <- res do +      hits = result["hits"] |> Enum.map(& &1["ap"]) + +      try do +        hits +        |> Activity.create_by_object_ap_id() +        |> Activity.with_preloaded_object() +        |> Activity.restrict_deactivated_users() +        |> maybe_restrict_local(user) +        |> maybe_restrict_author(author) +        |> maybe_restrict_blocked(user) +        |> maybe_fetch(user, query) +        |> order_by([object: obj], desc: obj.data["published"]) +        |> Pleroma.Repo.all() +      rescue +        _ -> maybe_fetch([], user, query) +      end +    end +  end + +  def object_to_search_data(object) do +    # Only index public or unlisted Notes +    if not is_nil(object) and object.data["type"] == "Note" and +         not is_nil(object.data["content"]) and +         (Pleroma.Constants.as_public() in object.data["to"] or +            Pleroma.Constants.as_public() in object.data["cc"]) and +         object.data["content"] not in ["", "."] do +      data = object.data + +      content_str = +        case data["content"] do +          [nil | rest] -> to_string(rest) +          str -> str +        end + +      content = +        with {:ok, scrubbed} <- +               FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing), +             trimmed <- String.trim(scrubbed) do +          trimmed +        end + +      # Make sure we have a non-empty string +      if content != "" do +        {:ok, published, _} = DateTime.from_iso8601(data["published"]) + +        %{ +          id: object.id, +          content: content, +          ap: data["id"], +          published: published |> DateTime.to_unix() +        } +      end +    end +  end + +  @impl true +  def add_to_index(activity) do +    maybe_search_data = object_to_search_data(activity.object) + +    if activity.data["type"] == "Create" and maybe_search_data do +      result = +        meili_put( +          "/indexes/objects/documents", +          [maybe_search_data] +        ) + +      with {:ok, res} <- result, +           true <- Map.has_key?(res, "updateId") do +        # Added successfully +        :ok +      else +        _ -> +          # There was an error, report it +          Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") +          {:error, result} +      end +    else +      # The post isn't something we can search, that's ok +      :ok +    end +  end + +  @impl true +  def remove_from_index(object) do +    meili_delete("/indexes/objects/documents/#{object.id}") +  end +end diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex new file mode 100644 index 000000000..a42e2f5f6 --- /dev/null +++ b/lib/pleroma/search/search_backend.ex @@ -0,0 +1,24 @@ +defmodule Pleroma.Search.SearchBackend do +  @doc """ +  Search statuses with a query, restricting to only those the user should have access to. +  """ +  @callback search(user :: Pleroma.User.t(), query :: String.t(), options :: [any()]) :: [ +              Pleroma.Activity.t() +            ] + +  @doc """ +  Add the object associated with the activity to the search index. + +  The whole activity is passed, to allow filtering on things such as scope. +  """ +  @callback add_to_index(activity :: Pleroma.Activity.t()) :: :ok | {:error, any()} + +  @doc """ +  Remove the object from the index. + +  Just the object, as opposed to the whole activity, is passed, since the object +  is what contains the actual content and there is no need for fitlering when removing +  from index. +  """ +  @callback remove_from_index(object :: Pleroma.Object.t()) :: {:ok, any()} | {:error, any()} +end diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 3979d418e..54a77a228 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -140,6 +140,9 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do          Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end)        end) +      # Add local posts to search index +      if local, do: Pleroma.Search.add_to_index(activity) +        {:ok, activity}      else        %Activity{} = activity -> diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 098c177c7..7a28a7c97 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -197,6 +197,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do    # - Increase replies count    # - Set up ActivityExpiration    # - Set up notifications +  # - Index incoming posts for search (if needed)    @impl true    def handle(%{data: %{"type" => "Create"}} = activity, meta) do      with {:ok, object, meta} <- handle_object_creation(meta[:object_data], activity, meta), @@ -226,6 +227,8 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do          Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end)        end) +      Pleroma.Search.add_to_index(Map.put(activity, :object, object)) +        meta =          meta          |> add_notifications(notifications) @@ -285,6 +288,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do    # - Reduce the user note count    # - Reduce the reply count    # - Stream out the activity +  # - Removes posts from search index (if needed)    @impl true    def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, meta) do      deleted_object = @@ -323,6 +327,11 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do        end      if result == :ok do +      # Only remove from index when deleting actual objects, not users or anything else +      with %Pleroma.Object{} <- deleted_object do +        Pleroma.Search.remove_from_index(deleted_object) +      end +        {:ok, object, meta}      else        {:error, result} diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 5e6e04734..e4acba226 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -5,7 +5,6 @@  defmodule Pleroma.Web.MastodonAPI.SearchController do    use Pleroma.Web, :controller -  alias Pleroma.Activity    alias Pleroma.Repo    alias Pleroma.User    alias Pleroma.Web.ControllerHelper @@ -100,7 +99,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do    end    defp resource_search(_, "statuses", query, options) do -    statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end) +    statuses = with_fallback(fn -> Pleroma.Search.search(query, options) end)      StatusView.render("index.json",        activities: statuses, diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex new file mode 100644 index 000000000..43b7bad1e --- /dev/null +++ b/lib/pleroma/workers/search_indexing_worker.ex @@ -0,0 +1,21 @@ +defmodule Pleroma.Workers.SearchIndexingWorker do +  use Pleroma.Workers.WorkerHelper, queue: "search_indexing" + +  @impl Oban.Worker + +  def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do +    activity = Pleroma.Activity.get_by_id_with_object(activity_id) + +    search_module = Pleroma.Config.get([Pleroma.Search, :module]) + +    search_module.add_to_index(activity) +  end + +  def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do +    object = Pleroma.Object.get_by_id(object_id) + +    search_module = Pleroma.Config.get([Pleroma.Search, :module]) + +    search_module.remove_from_index(object) +  end +end  | 
