From e154ebbf7933123e91d5b5c6f5070e78eb3e383b Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 15 Aug 2021 21:53:04 +0300 Subject: Initial meilisearch implementation, doesn't delete posts yet --- lib/mix/tasks/pleroma/search/meilisearch.ex | 38 ++++++++++++++ lib/pleroma/activity.ex | 1 + lib/pleroma/activity/search.ex | 4 +- lib/pleroma/application.ex | 6 ++- lib/pleroma/search/meilisearch.ex | 60 ++++++++++++++++++++++ lib/pleroma/web/activity_pub/activity_pub.ex | 6 +++ .../mastodon_api/controllers/search_controller.ex | 5 +- 7 files changed, 115 insertions(+), 5 deletions(-) create mode 100644 lib/mix/tasks/pleroma/search/meilisearch.ex create mode 100644 lib/pleroma/search/meilisearch.ex (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex new file mode 100644 index 000000000..2af8e5853 --- /dev/null +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -0,0 +1,38 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Mix.Tasks.Pleroma.Search.Meilisearch do + import Mix.Pleroma + + import Ecto.Query + + def run(["index"]) do + start_pleroma() + + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + Pleroma.Repo.chunk_stream( + from(Pleroma.Object, + limit: 200, + where: fragment("data->>'type' = 'Note'") and fragment("LENGTH(data->>'source') > 0") + ), + 100, + :batches + ) + |> Stream.map(fn objects -> + Enum.map(objects, fn object -> + data = object.data + %{id: object.id, source: data["source"], ap: data["id"]} + end) + end) + |> Stream.each(fn activities -> + {:ok, _} = + Pleroma.HTTP.post( + "#{endpoint}/indexes/objects/documents", + Jason.encode!(activities) + ) + end) + |> Stream.run() + end +end diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index ebfd4ed45..9563136f9 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -367,6 +367,7 @@ defmodule Pleroma.Activity do end defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search + def add_to_index(_activity), do: nil def direct_conversation_id(activity, for_user) do alias Pleroma.Conversation.Participation diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/activity/search.ex index 0b9b24aa4..3dce9d355 100644 --- a/lib/pleroma/activity/search.ex +++ b/lib/pleroma/activity/search.ex @@ -136,7 +136,7 @@ defmodule Pleroma.Activity.Search do ) end - defp maybe_restrict_local(q, user) do + def maybe_restrict_local(q, user) do limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated) case {limit, user} do @@ -149,7 +149,7 @@ defmodule Pleroma.Activity.Search do defp restrict_local(q), do: where(q, local: true) - defp maybe_fetch(activities, user, search_query) do + def maybe_fetch(activities, user, search_query) do with true <- Regex.match?(~r/https?:/, search_query), {:ok, object} <- Fetcher.fetch_object_from_id(search_query), %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index 1c1db8c10..62d1b8b39 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -321,7 +321,11 @@ defmodule Pleroma.Application do def limiters_setup do config = Config.get(ConcurrentLimiter, []) - [Pleroma.Web.RichMedia.Helpers, Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy] + [ + Pleroma.Web.RichMedia.Helpers, + Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, + Pleroma.Search + ] |> Enum.each(fn module -> mod_config = Keyword.get(config, module, []) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex new file mode 100644 index 000000000..92e0d3429 --- /dev/null +++ b/lib/pleroma/search/meilisearch.ex @@ -0,0 +1,60 @@ +defmodule Pleroma.Search.Meilisearch do + require Logger + + alias Pleroma.Activity + + import Pleroma.Activity.Search + import Ecto.Query + + def search(user, query, options \\ []) do + limit = Enum.min([Keyword.get(options, :limit), 40]) + offset = Keyword.get(options, :offset, 0) + author = Keyword.get(options, :author) + + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, result} = + Pleroma.HTTP.post( + "#{endpoint}/indexes/objects/search", + Jason.encode!(%{q: query, offset: offset, limit: limit}) + ) + + hits = Jason.decode!(result.body)["hits"] |> Enum.map(& &1["ap"]) + + try do + hits + |> Activity.create_by_object_ap_id() + |> Activity.with_preloaded_object() + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> maybe_restrict_local(user) + |> maybe_restrict_author(author) + |> maybe_restrict_blocked(user) + |> maybe_fetch(user, query) + |> order_by([activity], desc: activity.id) + |> Pleroma.Repo.all() + rescue + _ -> maybe_fetch([], user, query) + end + end + + def add_to_index(activity) do + object = activity.object + + if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" do + data = object.data + + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, result} = + Pleroma.HTTP.post( + "#{endpoint}/indexes/objects/documents", + Jason.encode!([%{id: object.id, source: data["source"], ap: data["id"]}]) + ) + + if not Map.has_key?(Jason.decode!(result.body), "updateId") do + Logger.error("Failed to add activity #{activity.id} to index: #{result.body}") + end + end + end +end diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index a5d7036d9..034c3b185 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -140,6 +140,12 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) end) + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.add_to_index(activity) end) + end) + {:ok, activity} else %Activity{} = activity -> diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 5e6e04734..99c33eba6 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -5,7 +5,6 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do use Pleroma.Web, :controller - alias Pleroma.Activity alias Pleroma.Repo alias Pleroma.User alias Pleroma.Web.ControllerHelper @@ -100,7 +99,9 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do end defp resource_search(_, "statuses", query, options) do - statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end) + search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) + + statuses = with_fallback(fn -> search_module.search(options[:for_user], query, options) end) StatusView.render("index.json", activities: statuses, -- cgit v1.2.3 From 0318e9a59945d7a5625111157867f0f9ebaffd91 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 16 Aug 2021 10:18:01 +0300 Subject: Add logging to milisiearch index and make it use desc(id) --- lib/mix/tasks/pleroma/search/meilisearch.ex | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 2af8e5853..1fece96e5 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -3,8 +3,9 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Mix.Tasks.Pleroma.Search.Meilisearch do - import Mix.Pleroma + require Logger + import Mix.Pleroma import Ecto.Query def run(["index"]) do @@ -12,12 +13,25 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + {:ok, _} = + Pleroma.HTTP.post( + "#{endpoint}/indexes/objects/settings/ranking-rules", + Jason.encode!([ + "desc(id)", + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" + ]) + ) + Pleroma.Repo.chunk_stream( from(Pleroma.Object, - limit: 200, where: fragment("data->>'type' = 'Note'") and fragment("LENGTH(data->>'source') > 0") ), - 100, + 200, :batches ) |> Stream.map(fn objects -> @@ -26,12 +40,14 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do %{id: object.id, source: data["source"], ap: data["id"]} end) end) - |> Stream.each(fn activities -> + |> Stream.each(fn objects -> {:ok, _} = Pleroma.HTTP.post( "#{endpoint}/indexes/objects/documents", - Jason.encode!(activities) + Jason.encode!(objects) ) + + IO.puts("Indexed #{Enum.count(objects)} entries") end) |> Stream.run() end -- cgit v1.2.3 From 365024abec905e427babb5403f0fccbde65f4bcd Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 16 Aug 2021 22:24:31 +0300 Subject: Ensure only indexing public posts and implement clearing and delete --- lib/mix/tasks/pleroma/search/meilisearch.ex | 15 ++++++++++++++- lib/pleroma/activity.ex | 1 + lib/pleroma/search/meilisearch.ex | 17 ++++++++++++++++- lib/pleroma/web/common_api.ex | 7 +++++++ 4 files changed, 38 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 1fece96e5..0b86fdece 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -4,6 +4,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do require Logger + require Pleroma.Constants import Mix.Pleroma import Ecto.Query @@ -29,7 +30,11 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do Pleroma.Repo.chunk_stream( from(Pleroma.Object, - where: fragment("data->>'type' = 'Note'") and fragment("LENGTH(data->>'source') > 0") + # Only index public posts which are notes and have some text + where: + fragment("data->>'type' = 'Note'") and + fragment("LENGTH(data->>'source') > 0") and + fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) ), 200, :batches @@ -51,4 +56,12 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do end) |> Stream.run() end + + def run(["clear"]) do + start_pleroma() + + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, _} = Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [], []) + end end diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index 9563136f9..2c168fd41 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -368,6 +368,7 @@ defmodule Pleroma.Activity do defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search def add_to_index(_activity), do: nil + def remove_from_index(_object), do: nil def direct_conversation_id(activity, for_user) do alias Pleroma.Conversation.Participation diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 92e0d3429..dbe6b2d67 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -1,5 +1,6 @@ defmodule Pleroma.Search.Meilisearch do require Logger + require Pleroma.Constants alias Pleroma.Activity @@ -41,7 +42,8 @@ defmodule Pleroma.Search.Meilisearch do def add_to_index(activity) do object = activity.object - if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" do + if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" and + Pleroma.Constants.as_public() in object.data["to"] do data = object.data endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) @@ -57,4 +59,17 @@ defmodule Pleroma.Search.Meilisearch do end end end + + def remove_from_index(object) do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, _} = + Pleroma.HTTP.request( + :delete, + "#{endpoint}/indexes/objects/documents/#{object.id}", + "", + [], + [] + ) + end end diff --git a/lib/pleroma/web/common_api.ex b/lib/pleroma/web/common_api.ex index 89f5dd606..54a8aa213 100644 --- a/lib/pleroma/web/common_api.ex +++ b/lib/pleroma/web/common_api.ex @@ -147,6 +147,13 @@ defmodule Pleroma.Web.CommonAPI do true <- User.superuser?(user) || user.ap_id == object.data["actor"], {:ok, delete_data, _} <- Builder.delete(user, object.data["id"]), {:ok, delete, _} <- Pipeline.common_pipeline(delete_data, local: true) do + # Also delete from search index + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.remove_from_index(object) end) + end) + {:ok, delete} else {:find_activity, _} -> -- cgit v1.2.3 From ea6a6a128712e81c4f298b2bb2cedfadf2295cff Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 16 Aug 2021 22:30:56 +0300 Subject: Make the indexing batch differently and more, show number indexed --- lib/mix/tasks/pleroma/search/meilisearch.ex | 63 +++++++++++++++++------------ 1 file changed, 38 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 0b86fdece..2a6438528 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -28,33 +28,46 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do ]) ) - Pleroma.Repo.chunk_stream( - from(Pleroma.Object, - # Only index public posts which are notes and have some text - where: - fragment("data->>'type' = 'Note'") and - fragment("LENGTH(data->>'source') > 0") and - fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) - ), - 200, - :batches - ) - |> Stream.map(fn objects -> - Enum.map(objects, fn object -> - data = object.data - %{id: object.id, source: data["source"], ap: data["id"]} - end) - end) - |> Stream.each(fn objects -> - {:ok, _} = - Pleroma.HTTP.post( - "#{endpoint}/indexes/objects/documents", - Jason.encode!(objects) + chunk_size = 100_000 + + Pleroma.Repo.transaction( + fn -> + Pleroma.Repo.stream( + from(Pleroma.Object, + # Only index public posts which are notes and have some text + where: + fragment("data->>'type' = 'Note'") and + fragment("LENGTH(data->>'source') > 0") and + fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()), + order_by: fragment("data->'published' DESC") + ), + timeout: :infinity ) + |> Stream.chunk_every(chunk_size) + |> Stream.transform(0, fn objects, acc -> + new_acc = acc + Enum.count(objects) - IO.puts("Indexed #{Enum.count(objects)} entries") - end) - |> Stream.run() + IO.puts("Indexed #{new_acc} entries") + + {[objects], new_acc} + end) + |> Stream.map(fn objects -> + Enum.map(objects, fn object -> + data = object.data + %{id: object.id, source: data["source"], ap: data["id"]} + end) + end) + |> Stream.each(fn objects -> + {:ok, _} = + Pleroma.HTTP.post( + "#{endpoint}/indexes/objects/documents", + Jason.encode!(objects) + ) + end) + |> Stream.run() + end, + timeout: :infinity + ) end def run(["clear"]) do -- cgit v1.2.3 From 38996f551a4ec014e9f4cb4a691d31beecab43ba Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 17 Aug 2021 00:06:32 +0300 Subject: Make meilisearch sort on publish date converted to unix time --- lib/mix/tasks/pleroma/search/meilisearch.ex | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 2a6438528..2dd9c0a62 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -18,7 +18,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do Pleroma.HTTP.post( "#{endpoint}/indexes/objects/settings/ranking-rules", Jason.encode!([ - "desc(id)", + "desc(published)", "typo", "words", "proximity", @@ -54,7 +54,15 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do |> Stream.map(fn objects -> Enum.map(objects, fn object -> data = object.data - %{id: object.id, source: data["source"], ap: data["id"]} + + {:ok, published, _} = DateTime.from_iso8601(data["published"]) + + %{ + id: object.id, + source: data["source"], + ap: data["id"], + published: published |> DateTime.to_unix() + } end) end) |> Stream.each(fn objects -> -- cgit v1.2.3 From 9beaebd97e1746df010aecfcc01d9e2e9a4c60ac Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 17 Aug 2021 00:30:14 +0300 Subject: Tweak search ordering to hopefully return newer results --- lib/mix/tasks/pleroma/search/meilisearch.ex | 15 ++++++++++++--- lib/pleroma/search/meilisearch.ex | 13 +++++++++++-- 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 2dd9c0a62..dcecbd7cf 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -39,7 +39,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do fragment("data->>'type' = 'Note'") and fragment("LENGTH(data->>'source') > 0") and fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()), - order_by: fragment("data->'published' DESC") + order_by: [desc: fragment("data->'published'")] ), timeout: :infinity ) @@ -66,11 +66,15 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do end) end) |> Stream.each(fn objects -> - {:ok, _} = + {:ok, result} = Pleroma.HTTP.post( "#{endpoint}/indexes/objects/documents", Jason.encode!(objects) ) + + if not Map.has_key?(Jason.decode!(result.body), "updateId") do + IO.puts("Failed to index: #{result}") + end end) |> Stream.run() end, @@ -83,6 +87,11 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, _} = Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [], []) + {:ok, result} = + Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [], []) + + if not Map.has_key?(Jason.decode!(result.body), "updateId") do + IO.puts("Failed to clear: #{result}") + end end end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index dbe6b2d67..9fdb0a07f 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -32,7 +32,7 @@ defmodule Pleroma.Search.Meilisearch do |> maybe_restrict_author(author) |> maybe_restrict_blocked(user) |> maybe_fetch(user, query) - |> order_by([activity], desc: activity.id) + |> order_by([object: obj], desc: obj.data["published"]) |> Pleroma.Repo.all() rescue _ -> maybe_fetch([], user, query) @@ -48,10 +48,19 @@ defmodule Pleroma.Search.Meilisearch do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + {:ok, published, _} = DateTime.from_iso8601(data["published"]) + {:ok, result} = Pleroma.HTTP.post( "#{endpoint}/indexes/objects/documents", - Jason.encode!([%{id: object.id, source: data["source"], ap: data["id"]}]) + Jason.encode!([ + %{ + id: object.id, + source: data["source"], + ap: data["id"], + published: published |> DateTime.to_unix() + } + ]) ) if not Map.has_key?(Jason.decode!(result.body), "updateId") do -- cgit v1.2.3 From 00c48a33acf0bd59fa7e7b58a67b049e4f4adc31 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 17 Aug 2021 00:57:53 +0300 Subject: Use content instead of source and scrub it --- lib/mix/tasks/pleroma/search/meilisearch.ex | 12 ++++-------- lib/pleroma/search/meilisearch.ex | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index dcecbd7cf..5270de255 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -37,7 +37,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do # Only index public posts which are notes and have some text where: fragment("data->>'type' = 'Note'") and - fragment("LENGTH(data->>'source') > 0") and + fragment("LENGTH(data->>'content') > 0") and fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()), order_by: [desc: fragment("data->'published'")] ), @@ -56,10 +56,11 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do data = object.data {:ok, published, _} = DateTime.from_iso8601(data["published"]) + {:ok, content} = FastSanitize.strip_tags(data["content"]) %{ id: object.id, - source: data["source"], + content: content, ap: data["id"], published: published |> DateTime.to_unix() } @@ -87,11 +88,6 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, result} = - Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [], []) - - if not Map.has_key?(Jason.decode!(result.body), "updateId") do - IO.puts("Failed to clear: #{result}") - end + {:ok, _} = Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects", "", [], []) end end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 9fdb0a07f..87fdeaf5e 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -56,7 +56,7 @@ defmodule Pleroma.Search.Meilisearch do Jason.encode!([ %{ id: object.id, - source: data["source"], + content: data["content"] |> Pleroma.HTML.filter_tags(), ap: data["id"], published: published |> DateTime.to_unix() } -- cgit v1.2.3 From e35d87ea54f70a39206f6103ef0e7334e2a428cc Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 17 Aug 2021 01:37:43 +0300 Subject: Make the chunk size smaller --- lib/mix/tasks/pleroma/search/meilisearch.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 5270de255..44af25f3e 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -28,7 +28,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do ]) ) - chunk_size = 100_000 + chunk_size = 10_000 Pleroma.Repo.transaction( fn -> -- cgit v1.2.3 From 2b2e409ad72862967cabf06344874ae9bff9860f Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 22 Aug 2021 16:37:52 +0300 Subject: Also index incoming federated posts --- lib/pleroma/search/search.ex | 18 ++++++++++++++++++ lib/pleroma/web/activity_pub/activity_pub.ex | 7 ++----- lib/pleroma/web/activity_pub/side_effects.ex | 7 +++++++ lib/pleroma/web/common_api.ex | 8 ++------ 4 files changed, 29 insertions(+), 11 deletions(-) create mode 100644 lib/pleroma/search/search.ex (limited to 'lib') diff --git a/lib/pleroma/search/search.ex b/lib/pleroma/search/search.ex new file mode 100644 index 000000000..e363abf19 --- /dev/null +++ b/lib/pleroma/search/search.ex @@ -0,0 +1,18 @@ +defmodule Pleroma.Search do + def add_to_index(activity) do + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.add_to_index(activity) end) + end) + end + + def remove_from_index(object) do + # Also delete from search index + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.remove_from_index(object) end) + end) + end +end diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 034c3b185..7178cf9eb 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -140,11 +140,8 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) end) - search_module = Pleroma.Config.get([Pleroma.Search, :module]) - - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.add_to_index(activity) end) - end) + # Add local posts to search index + Pleroma.Search.add_to_index(activity) {:ok, activity} else diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 5eefd2824..15e006b18 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -197,6 +197,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do # - Increase replies count # - Set up ActivityExpiration # - Set up notifications + # - Index incoming posts for search (if needed) @impl true def handle(%{data: %{"type" => "Create"}} = activity, meta) do with {:ok, object, meta} <- handle_object_creation(meta[:object_data], activity, meta), @@ -226,6 +227,8 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) end) + Pleroma.Search.add_to_index(Map.put(activity, :object, object)) + meta = meta |> add_notifications(notifications) @@ -286,6 +289,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do # - Reduce the user note count # - Reduce the reply count # - Stream out the activity + # - Removes posts from search index (if needed) @impl true def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, meta) do deleted_object = @@ -325,6 +329,9 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do if result == :ok do Notification.create_notifications(object) + + Pleroma.Search.remove_from_index(object) + {:ok, object, meta} else {:error, result} diff --git a/lib/pleroma/web/common_api.ex b/lib/pleroma/web/common_api.ex index 54a8aa213..ba6c07975 100644 --- a/lib/pleroma/web/common_api.ex +++ b/lib/pleroma/web/common_api.ex @@ -147,12 +147,8 @@ defmodule Pleroma.Web.CommonAPI do true <- User.superuser?(user) || user.ap_id == object.data["actor"], {:ok, delete_data, _} <- Builder.delete(user, object.data["id"]), {:ok, delete, _} <- Pipeline.common_pipeline(delete_data, local: true) do - # Also delete from search index - search_module = Pleroma.Config.get([Pleroma.Search, :module]) - - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.remove_from_index(object) end) - end) + # Remove from search index for local posts + Pleroma.Search.remove_from_index(object) {:ok, delete} else -- cgit v1.2.3 From 9f16ca80e0fe60b8b0e3e8ddb9b06ca0bec31002 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 22 Aug 2021 18:47:41 +0300 Subject: Mark only content as searchable for meilisearch --- lib/mix/tasks/pleroma/search/meilisearch.ex | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 44af25f3e..ebd3cc81f 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -28,6 +28,14 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do ]) ) + {:ok, _} = + Pleroma.HTTP.post( + "#{endpoint}/indexes/objects/settings/searchable-attributes", + Jason.encode!([ + "content" + ]) + ) + chunk_size = 10_000 Pleroma.Repo.transaction( @@ -55,8 +63,14 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do Enum.map(objects, fn object -> data = object.data + content_str = + case data["content"] do + [nil | rest] -> to_string(rest) + str -> str + end + {:ok, published, _} = DateTime.from_iso8601(data["published"]) - {:ok, content} = FastSanitize.strip_tags(data["content"]) + {:ok, content} = FastSanitize.strip_tags(content_str) %{ id: object.id, -- cgit v1.2.3 From 3dedadf192a3acd0c1dfc2b11eba5a247ae7f61c Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 22 Aug 2021 19:38:03 +0300 Subject: Adjust content indexing to skip more unneeded stuff --- lib/mix/tasks/pleroma/search/meilisearch.ex | 47 +++++++++++++++++++---------- 1 file changed, 31 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index ebd3cc81f..3704e0bdc 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -52,13 +52,6 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do timeout: :infinity ) |> Stream.chunk_every(chunk_size) - |> Stream.transform(0, fn objects, acc -> - new_acc = acc + Enum.count(objects) - - IO.puts("Indexed #{new_acc} entries") - - {[objects], new_acc} - end) |> Stream.map(fn objects -> Enum.map(objects, fn object -> data = object.data @@ -70,15 +63,34 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do end {:ok, published, _} = DateTime.from_iso8601(data["published"]) - {:ok, content} = FastSanitize.strip_tags(content_str) - - %{ - id: object.id, - content: content, - ap: data["id"], - published: published |> DateTime.to_unix() - } + + content = + with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), + trimmed <- String.trim(scrubbed) do + trimmed + end + + # Only index if there is anything in the string. If there is a single symbol, + # it's probably a dot from mastodon posts with just the picture + if String.length(content) > 1 do + %{ + id: object.id, + content: content, + ap: data["id"], + published: published |> DateTime.to_unix() + } + else + nil + end end) + |> Enum.filter(fn o -> not is_nil(o) end) + end) + |> Stream.transform(0, fn objects, acc -> + new_acc = acc + Enum.count(objects) + + IO.puts("Indexed #{new_acc} entries") + + {[objects], new_acc} end) |> Stream.each(fn objects -> {:ok, result} = @@ -102,6 +114,9 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, _} = Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects", "", [], []) + {:ok, _} = + Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [], + timeout: :infinity + ) end end -- cgit v1.2.3 From 35e9192cedcbc56fb07c9933e2988bf900256b53 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 22 Aug 2021 22:53:18 +0300 Subject: Rework task indexing to share code with the main module The code in the main module now scrubs new posts too --- lib/mix/tasks/pleroma/search/meilisearch.ex | 35 ++-------------------- lib/pleroma/search/meilisearch.ex | 46 ++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 47 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 3704e0bdc..b5a394e34 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -51,40 +51,9 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do ), timeout: :infinity ) + |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1) + |> Stream.filter(fn o -> not is_nil(o) end) |> Stream.chunk_every(chunk_size) - |> Stream.map(fn objects -> - Enum.map(objects, fn object -> - data = object.data - - content_str = - case data["content"] do - [nil | rest] -> to_string(rest) - str -> str - end - - {:ok, published, _} = DateTime.from_iso8601(data["published"]) - - content = - with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), - trimmed <- String.trim(scrubbed) do - trimmed - end - - # Only index if there is anything in the string. If there is a single symbol, - # it's probably a dot from mastodon posts with just the picture - if String.length(content) > 1 do - %{ - id: object.id, - content: content, - ap: data["id"], - published: published |> DateTime.to_unix() - } - else - nil - end - end) - |> Enum.filter(fn o -> not is_nil(o) end) - end) |> Stream.transform(0, fn objects, acc -> new_acc = acc + Enum.count(objects) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 87fdeaf5e..10468e36c 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -39,28 +39,46 @@ defmodule Pleroma.Search.Meilisearch do end end - def add_to_index(activity) do - object = activity.object - - if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" and + def object_to_search_data(object) do + if not is_nil(object) and object.data["type"] == "Note" and Pleroma.Constants.as_public() in object.data["to"] do data = object.data - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + content_str = + case data["content"] do + [nil | rest] -> to_string(rest) + str -> str + end + + content = + with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), + trimmed <- String.trim(scrubbed) do + trimmed + end + + if String.length(content) > 1 do + {:ok, published, _} = DateTime.from_iso8601(data["published"]) + + %{ + id: object.id, + content: content, + ap: data["id"], + published: published |> DateTime.to_unix() + } + end + end + end - {:ok, published, _} = DateTime.from_iso8601(data["published"]) + def add_to_index(activity) do + maybe_search_data = object_to_search_data(activity) + + if activity.data["type"] == "Create" and maybe_search_data do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) {:ok, result} = Pleroma.HTTP.post( "#{endpoint}/indexes/objects/documents", - Jason.encode!([ - %{ - id: object.id, - content: data["content"] |> Pleroma.HTML.filter_tags(), - ap: data["id"], - published: published |> DateTime.to_unix() - } - ]) + Jason.encode!([maybe_search_data]) ) if not Map.has_key?(Jason.decode!(result.body), "updateId") do -- cgit v1.2.3 From 410c8cb765bbec1014cb2bbdbcc44d3a25f834e1 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 22 Aug 2021 23:47:43 +0300 Subject: Make indexing logs rewrite themselves --- lib/mix/tasks/pleroma/search/meilisearch.ex | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index b5a394e34..2485a441d 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -57,7 +57,9 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do |> Stream.transform(0, fn objects, acc -> new_acc = acc + Enum.count(objects) - IO.puts("Indexed #{new_acc} entries") + # Reset to the beginning of the line and rewrite it + IO.write("\r") + IO.write("Indexed #{new_acc} entries") {[objects], new_acc} end) @@ -76,6 +78,8 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do end, timeout: :infinity ) + + IO.write("\n") end def run(["clear"]) do -- cgit v1.2.3 From 2c7d973af7797ae860829c1764ade521a17e7263 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 23 Aug 2021 19:35:21 +0300 Subject: Implement meilisearch auth --- lib/mix/tasks/pleroma/search/meilisearch.ex | 75 +++++++++++++++++------------ lib/pleroma/search/meilisearch.ex | 69 ++++++++++++++++---------- 2 files changed, 88 insertions(+), 56 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 2485a441d..230be5aa1 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -9,32 +9,30 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do import Mix.Pleroma import Ecto.Query + import Pleroma.Search.Meilisearch, only: [meili_post!: 2, meili_delete!: 1] + def run(["index"]) do start_pleroma() - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - - {:ok, _} = - Pleroma.HTTP.post( - "#{endpoint}/indexes/objects/settings/ranking-rules", - Jason.encode!([ - "desc(published)", - "typo", - "words", - "proximity", - "attribute", - "wordsPosition", - "exactness" - ]) - ) + meili_post!( + "/indexes/objects/settings/ranking-rules", + [ + "desc(published)", + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" + ] + ) - {:ok, _} = - Pleroma.HTTP.post( - "#{endpoint}/indexes/objects/settings/searchable-attributes", - Jason.encode!([ - "content" - ]) - ) + meili_post!( + "/indexes/objects/settings/searchable-attributes", + [ + "content" + ] + ) chunk_size = 10_000 @@ -64,14 +62,14 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do {[objects], new_acc} end) |> Stream.each(fn objects -> - {:ok, result} = - Pleroma.HTTP.post( - "#{endpoint}/indexes/objects/documents", - Jason.encode!(objects) + result = + meili_post!( + "/indexes/objects/documents", + objects ) - if not Map.has_key?(Jason.decode!(result.body), "updateId") do - IO.puts("Failed to index: #{result}") + if not Map.has_key?(result, "updateId") do + IO.puts("Failed to index: #{inspect(result)}") end end) |> Stream.run() @@ -85,11 +83,26 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do def run(["clear"]) do start_pleroma() + meili_delete!("/indexes/objects/documents") + end + + def run(["show-private-key", master_key]) do + start_pleroma() + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, _} = - Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [], - timeout: :infinity + {:ok, result} = + Pleroma.HTTP.get( + Path.join(endpoint, "/keys"), + [{"X-Meili-API-Key", master_key}] ) + + decoded = Jason.decode!(result.body) + + if decoded["private"] do + IO.puts(decoded["private"]) + else + IO.puts("Error fetching the key, check the master key is correct: #{inspect(decoded)}") + end end end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 10468e36c..8745d539d 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -7,20 +7,50 @@ defmodule Pleroma.Search.Meilisearch do import Pleroma.Activity.Search import Ecto.Query - def search(user, query, options \\ []) do - limit = Enum.min([Keyword.get(options, :limit), 40]) - offset = Keyword.get(options, :offset, 0) - author = Keyword.get(options, :author) + defp meili_headers() do + private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) + if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] + end + + def meili_post!(path, params) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) {:ok, result} = Pleroma.HTTP.post( - "#{endpoint}/indexes/objects/search", - Jason.encode!(%{q: query, offset: offset, limit: limit}) + Path.join(endpoint, path), + Jason.encode!(params), + meili_headers() + ) + + Jason.decode!(result.body) + end + + def meili_delete!(path) do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, _} = + Pleroma.HTTP.request( + :delete, + Path.join(endpoint, path), + "", + meili_headers(), + timeout: :infinity + ) + end + + def search(user, query, options \\ []) do + limit = Enum.min([Keyword.get(options, :limit), 40]) + offset = Keyword.get(options, :offset, 0) + author = Keyword.get(options, :author) + + result = + meili_post!( + "/indexes/objects/search", + %{q: query, offset: offset, limit: limit} ) - hits = Jason.decode!(result.body)["hits"] |> Enum.map(& &1["ap"]) + hits = result["hits"] |> Enum.map(& &1["ap"]) try do hits @@ -73,30 +103,19 @@ defmodule Pleroma.Search.Meilisearch do maybe_search_data = object_to_search_data(activity) if activity.data["type"] == "Create" and maybe_search_data do - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - - {:ok, result} = - Pleroma.HTTP.post( - "#{endpoint}/indexes/objects/documents", - Jason.encode!([maybe_search_data]) + result = + meili_post!( + "/indexes/objects/documents", + [maybe_search_data] ) - if not Map.has_key?(Jason.decode!(result.body), "updateId") do - Logger.error("Failed to add activity #{activity.id} to index: #{result.body}") + if not Map.has_key?(result, "updateId") do + Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") end end end def remove_from_index(object) do - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - - {:ok, _} = - Pleroma.HTTP.request( - :delete, - "#{endpoint}/indexes/objects/documents/#{object.id}", - "", - [], - [] - ) + meili_delete!("/indexes/objects/documents/#{object.id}") end end -- cgit v1.2.3 From a67f9da5cc46b4e184aa1afe3dd1bd1df31de15b Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 23 Aug 2021 20:02:34 +0300 Subject: Add a message with a count of posts to index --- lib/mix/tasks/pleroma/search/meilisearch.ex | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 230be5aa1..557b06182 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -38,7 +38,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do Pleroma.Repo.transaction( fn -> - Pleroma.Repo.stream( + query = from(Pleroma.Object, # Only index public posts which are notes and have some text where: @@ -46,7 +46,13 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do fragment("LENGTH(data->>'content') > 0") and fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()), order_by: [desc: fragment("data->'published'")] - ), + ) + + count = query |> Pleroma.Repo.aggregate(:count, :data) + IO.puts("Entries to index: #{count}") + + Pleroma.Repo.stream( + query, timeout: :infinity ) |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1) -- cgit v1.2.3 From 09a1ae1b6eca4efbb935aa1c0da950009d110fb2 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 23 Aug 2021 20:21:46 +0300 Subject: Add the meilisearch.stats command --- lib/mix/tasks/pleroma/search/meilisearch.ex | 10 +++++++++- lib/pleroma/search/meilisearch.ex | 12 ++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 557b06182..f2d9fe312 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -9,7 +9,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do import Mix.Pleroma import Ecto.Query - import Pleroma.Search.Meilisearch, only: [meili_post!: 2, meili_delete!: 1] + import Pleroma.Search.Meilisearch, only: [meili_post!: 2, meili_delete!: 1, meili_get!: 1] def run(["index"]) do start_pleroma() @@ -111,4 +111,12 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do IO.puts("Error fetching the key, check the master key is correct: #{inspect(decoded)}") end end + + def run(["stats"]) do + start_pleroma() + + result = meili_get!("/indexes/objects/stats") + IO.puts("Number of entries: #{result["numberOfDocuments"]}") + IO.puts("Indexing? #{result["isIndexing"]}") + end end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 8745d539d..1ad17bf9f 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -13,6 +13,18 @@ defmodule Pleroma.Search.Meilisearch do if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] end + def meili_get!(path) do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, result} = + Pleroma.HTTP.get( + Path.join(endpoint, path), + meili_headers() + ) + + Jason.decode!(result.body) + end + def meili_post!(path, params) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) -- cgit v1.2.3 From d9ef7e075880ba39dd4ca8e21566c680070faa42 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 23 Aug 2021 21:15:15 +0300 Subject: Fix activity being passed to objec_to_search_data --- lib/pleroma/search/meilisearch.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 1ad17bf9f..212bdd473 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -112,7 +112,7 @@ defmodule Pleroma.Search.Meilisearch do end def add_to_index(activity) do - maybe_search_data = object_to_search_data(activity) + maybe_search_data = object_to_search_data(activity.object) if activity.data["type"] == "Create" and maybe_search_data do result = -- cgit v1.2.3 From 40280cc273ad7f2b355846e2f41b9873a8d5ff2c Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 28 Aug 2021 15:59:13 +0300 Subject: Reorder ranking rules for (maybe) better results --- lib/mix/tasks/pleroma/search/meilisearch.ex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index f2d9fe312..cdf9ab0bd 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -18,12 +18,12 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do "/indexes/objects/settings/ranking-rules", [ "desc(published)", - "typo", "words", + "exactness", "proximity", - "attribute", "wordsPosition", - "exactness" + "typo", + "attribute" ] ) -- cgit v1.2.3 From 6beef2d1179ab9a377e87872b7fbe2997bbbbebd Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Fri, 8 Oct 2021 12:24:37 +0300 Subject: Move add_to_index / remove_from_index to Pleroma.Actitivy.Search --- lib/pleroma/activity.ex | 2 -- lib/pleroma/activity/search.ex | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index 2c168fd41..ebfd4ed45 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -367,8 +367,6 @@ defmodule Pleroma.Activity do end defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search - def add_to_index(_activity), do: nil - def remove_from_index(_object), do: nil def direct_conversation_id(activity, for_user) do alias Pleroma.Conversation.Participation diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/activity/search.ex index 3dce9d355..47ab5208c 100644 --- a/lib/pleroma/activity/search.ex +++ b/lib/pleroma/activity/search.ex @@ -45,6 +45,9 @@ defmodule Pleroma.Activity.Search do end end + def add_to_index(_activity), do: nil + def remove_from_index(_object), do: nil + def maybe_restrict_author(query, %User{} = author) do Activity.Queries.by_author(query, author) end -- cgit v1.2.3 From 95cb2bb694e3f8857895b21331b02b9277d65d9b Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 12 Oct 2021 19:17:37 +0300 Subject: Don't try removing from index again in common_api It's already removed in the side effects of the pipeline --- lib/pleroma/web/common_api.ex | 3 --- 1 file changed, 3 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/web/common_api.ex b/lib/pleroma/web/common_api.ex index ba6c07975..89f5dd606 100644 --- a/lib/pleroma/web/common_api.ex +++ b/lib/pleroma/web/common_api.ex @@ -147,9 +147,6 @@ defmodule Pleroma.Web.CommonAPI do true <- User.superuser?(user) || user.ap_id == object.data["actor"], {:ok, delete_data, _} <- Builder.delete(user, object.data["id"]), {:ok, delete, _} <- Pipeline.common_pipeline(delete_data, local: true) do - # Remove from search index for local posts - Pleroma.Search.remove_from_index(object) - {:ok, delete} else {:find_activity, _} -> -- cgit v1.2.3 From cf558208c202d5188954e26077d35bcc1ae02fce Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 12 Oct 2021 19:34:57 +0300 Subject: Use proper deleted object for removing from index --- lib/pleroma/web/activity_pub/side_effects.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 15e006b18..4762b5ac6 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -330,7 +330,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do if result == :ok do Notification.create_notifications(object) - Pleroma.Search.remove_from_index(object) + Pleroma.Search.remove_from_index(deleted_object) {:ok, object, meta} else -- cgit v1.2.3 From e4b7a3f51f270f468c15cc4ce850c847633c030b Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Fri, 29 Oct 2021 00:38:00 +0300 Subject: Modify some meilisearch variables --- lib/pleroma/search/meilisearch.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 212bdd473..b8248e40c 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -47,7 +47,7 @@ defmodule Pleroma.Search.Meilisearch do Path.join(endpoint, path), "", meili_headers(), - timeout: :infinity + [] ) end -- cgit v1.2.3 From 0b4fd0d342e3ced073e82355b380cbfee5478c60 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Fri, 29 Oct 2021 13:58:24 +0300 Subject: Set content-type to application/json --- lib/pleroma/search/meilisearch.ex | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index b8248e40c..d94ab8b64 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -10,7 +10,8 @@ defmodule Pleroma.Search.Meilisearch do defp meili_headers() do private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) - if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] + [{"Content-Type", "application/json"}] ++ + if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] end def meili_get!(path) do -- cgit v1.2.3 From 4445421297f4a4375ce9df4857a66ad08e984507 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Fri, 29 Oct 2021 21:04:59 +0300 Subject: Only add local posts to index in activity_pub Remote ones are already added in another place --- lib/pleroma/web/activity_pub/activity_pub.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 7178cf9eb..cdc70aacf 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -141,7 +141,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do end) # Add local posts to search index - Pleroma.Search.add_to_index(activity) + if local, do: Pleroma.Search.add_to_index(activity) {:ok, activity} else -- cgit v1.2.3 From e928e307f34542b0a0af8b615c986aeac478b637 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Wed, 10 Nov 2021 21:25:12 +0300 Subject: Add a reindex option Signed-off-by: Ekaterina Vaartis --- lib/mix/tasks/pleroma/search/meilisearch.ex | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index cdf9ab0bd..2a3c3a8b9 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -11,9 +11,11 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do import Pleroma.Search.Meilisearch, only: [meili_post!: 2, meili_delete!: 1, meili_get!: 1] - def run(["index"]) do + def run(["index" | args]) do start_pleroma() + is_reindex = "--reindex" in args + meili_post!( "/indexes/objects/settings/ranking-rules", [ @@ -68,6 +70,19 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do {[objects], new_acc} end) |> Stream.each(fn objects -> + objects = + objects + |> Enum.filter(fn o -> + if is_reindex do + result = meili_get!("/indexes/objects/documents/#{o.id}") + + # Filter out the already indexed documents. This is true when the document does not exist + result["errorCode"] == "document_not_found" + else + true + end + end) + result = meili_post!( "/indexes/objects/documents", -- cgit v1.2.3 From 9c1a9307079c8d007ae7cbf3e089d2bc5ea6b733 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 13 Nov 2021 15:07:51 +0300 Subject: Support reindexing meilisearch >=0.24.0 It has has a different error code key --- lib/mix/tasks/pleroma/search/meilisearch.ex | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 2a3c3a8b9..3b134ad3f 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -76,8 +76,14 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do if is_reindex do result = meili_get!("/indexes/objects/documents/#{o.id}") + # With >= 0.24.0 the name for "errorCode" is just "code" + error_code_key = + if meili_get!("/version")["pkgVersion"] |> Version.match?(">= 0.24.0"), + do: "code", + else: "errorCode" + # Filter out the already indexed documents. This is true when the document does not exist - result["errorCode"] == "document_not_found" + result[error_code_key] == "document_not_found" else true end -- cgit v1.2.3 From 7009ef5672ad20f92374d218cd614a38cd70515e Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 14 Nov 2021 20:24:05 +0300 Subject: Move the search.ex file so credo doesn't complain --- lib/pleroma/search.ex | 18 ++++++++++++++++++ lib/pleroma/search/search.ex | 18 ------------------ 2 files changed, 18 insertions(+), 18 deletions(-) create mode 100644 lib/pleroma/search.ex delete mode 100644 lib/pleroma/search/search.ex (limited to 'lib') diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex new file mode 100644 index 000000000..e363abf19 --- /dev/null +++ b/lib/pleroma/search.ex @@ -0,0 +1,18 @@ +defmodule Pleroma.Search do + def add_to_index(activity) do + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.add_to_index(activity) end) + end) + end + + def remove_from_index(object) do + # Also delete from search index + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.remove_from_index(object) end) + end) + end +end diff --git a/lib/pleroma/search/search.ex b/lib/pleroma/search/search.ex deleted file mode 100644 index e363abf19..000000000 --- a/lib/pleroma/search/search.ex +++ /dev/null @@ -1,18 +0,0 @@ -defmodule Pleroma.Search do - def add_to_index(activity) do - search_module = Pleroma.Config.get([Pleroma.Search, :module]) - - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.add_to_index(activity) end) - end) - end - - def remove_from_index(object) do - # Also delete from search index - search_module = Pleroma.Config.get([Pleroma.Search, :module]) - - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.remove_from_index(object) end) - end) - end -end -- cgit v1.2.3 From 39e596a5b51c0c86b6d6bd5f23177a1e6a64cf0b Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 14 Nov 2021 21:42:18 +0300 Subject: Style fixes --- lib/mix/tasks/pleroma/search/meilisearch.ex | 3 ++- lib/pleroma/search/meilisearch.ex | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 3b134ad3f..62ace7e39 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -82,7 +82,8 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do do: "code", else: "errorCode" - # Filter out the already indexed documents. This is true when the document does not exist + # Filter out the already indexed documents. + # This is true when the document does not exist result[error_code_key] == "document_not_found" else true diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index d94ab8b64..41f99ad9f 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -7,7 +7,7 @@ defmodule Pleroma.Search.Meilisearch do import Pleroma.Activity.Search import Ecto.Query - defp meili_headers() do + defp meili_headers do private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) [{"Content-Type", "application/json"}] ++ -- cgit v1.2.3 From 0fae71f88d142f64ec18a49ff4292db816dacdc8 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 16 Nov 2021 21:54:26 +0300 Subject: Rename search.ex to database_search.ex and add search/2 --- lib/pleroma/search.ex | 18 ---------------- lib/pleroma/search/database_search.ex | 24 ++++++++++++++++++++++ lib/pleroma/web/activity_pub/activity_pub.ex | 2 +- lib/pleroma/web/activity_pub/side_effects.ex | 4 ++-- .../mastodon_api/controllers/search_controller.ex | 4 +--- 5 files changed, 28 insertions(+), 24 deletions(-) delete mode 100644 lib/pleroma/search.ex create mode 100644 lib/pleroma/search/database_search.ex (limited to 'lib') diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex deleted file mode 100644 index e363abf19..000000000 --- a/lib/pleroma/search.ex +++ /dev/null @@ -1,18 +0,0 @@ -defmodule Pleroma.Search do - def add_to_index(activity) do - search_module = Pleroma.Config.get([Pleroma.Search, :module]) - - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.add_to_index(activity) end) - end) - end - - def remove_from_index(object) do - # Also delete from search index - search_module = Pleroma.Config.get([Pleroma.Search, :module]) - - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.remove_from_index(object) end) - end) - end -end diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex new file mode 100644 index 000000000..be0e19be0 --- /dev/null +++ b/lib/pleroma/search/database_search.ex @@ -0,0 +1,24 @@ +defmodule Pleroma.Search.DatabaseSearch do + def add_to_index(activity) do + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.add_to_index(activity) end) + end) + end + + def remove_from_index(object) do + # Also delete from search index + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.remove_from_index(object) end) + end) + end + + def search(query, options) do + search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) + + search_module.search(options[:for_user], query, options) + end +end diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index cdc70aacf..7e3444676 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -141,7 +141,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do end) # Add local posts to search index - if local, do: Pleroma.Search.add_to_index(activity) + if local, do: Pleroma.Search.DatabaseSearch.add_to_index(activity) {:ok, activity} else diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 4762b5ac6..fa57eab69 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -227,7 +227,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) end) - Pleroma.Search.add_to_index(Map.put(activity, :object, object)) + Pleroma.Search.DatabaseSearch.add_to_index(Map.put(activity, :object, object)) meta = meta @@ -330,7 +330,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do if result == :ok do Notification.create_notifications(object) - Pleroma.Search.remove_from_index(deleted_object) + Pleroma.Search.DatabaseSearch.remove_from_index(deleted_object) {:ok, object, meta} else diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 99c33eba6..10f1aa532 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -99,9 +99,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do end defp resource_search(_, "statuses", query, options) do - search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) - - statuses = with_fallback(fn -> search_module.search(options[:for_user], query, options) end) + statuses = with_fallback(fn -> Pleroma.Search.DatabaseSearch.search(query, options) end) StatusView.render("index.json", activities: statuses, -- cgit v1.2.3 From a6946048fbe049aa223d094d36eb767739ab5ff2 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Wed, 17 Nov 2021 22:29:49 +0300 Subject: Rename Activity.Search to Search.DatabaseSearch --- lib/pleroma/activity.ex | 2 +- lib/pleroma/activity/search.ex | 165 --------------------- lib/pleroma/search.ex | 24 +++ lib/pleroma/search/database_search.ex | 157 ++++++++++++++++++-- lib/pleroma/search/meilisearch.ex | 2 +- lib/pleroma/web/activity_pub/activity_pub.ex | 2 +- lib/pleroma/web/activity_pub/side_effects.ex | 4 +- .../mastodon_api/controllers/search_controller.ex | 2 +- 8 files changed, 173 insertions(+), 185 deletions(-) delete mode 100644 lib/pleroma/activity/search.ex create mode 100644 lib/pleroma/search.ex (limited to 'lib') diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index ebfd4ed45..389c80691 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -366,7 +366,7 @@ defmodule Pleroma.Activity do from(activity in query, where: activity.actor not in subquery(deactivated_users_query)) end - defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search + defdelegate search(user, query, options \\ []), to: Pleroma.Search.DatabaseSearch def direct_conversation_id(activity, for_user) do alias Pleroma.Conversation.Participation diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/activity/search.ex deleted file mode 100644 index 47ab5208c..000000000 --- a/lib/pleroma/activity/search.ex +++ /dev/null @@ -1,165 +0,0 @@ -# Pleroma: A lightweight social networking server -# Copyright © 2017-2022 Pleroma Authors -# SPDX-License-Identifier: AGPL-3.0-only - -defmodule Pleroma.Activity.Search do - alias Pleroma.Activity - alias Pleroma.Object.Fetcher - alias Pleroma.Pagination - alias Pleroma.User - alias Pleroma.Web.ActivityPub.Visibility - - require Pleroma.Constants - - import Ecto.Query - - def search(user, search_query, options \\ []) do - index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin - limit = Enum.min([Keyword.get(options, :limit), 40]) - offset = Keyword.get(options, :offset, 0) - author = Keyword.get(options, :author) - - search_function = - if :persistent_term.get({Pleroma.Repo, :postgres_version}) >= 11 do - :websearch - else - :plain - end - - try do - Activity - |> Activity.with_preloaded_object() - |> Activity.restrict_deactivated_users() - |> restrict_public(user) - |> query_with(index_type, search_query, search_function) - |> maybe_restrict_local(user) - |> maybe_restrict_author(author) - |> maybe_restrict_blocked(user) - |> Pagination.fetch_paginated( - %{"offset" => offset, "limit" => limit, "skip_order" => index_type == :rum}, - :offset - ) - |> maybe_fetch(user, search_query) - rescue - _ -> maybe_fetch([], user, search_query) - end - end - - def add_to_index(_activity), do: nil - def remove_from_index(_object), do: nil - - def maybe_restrict_author(query, %User{} = author) do - Activity.Queries.by_author(query, author) - end - - def maybe_restrict_author(query, _), do: query - - def maybe_restrict_blocked(query, %User{} = user) do - Activity.Queries.exclude_authors(query, User.blocked_users_ap_ids(user)) - end - - def maybe_restrict_blocked(query, _), do: query - - defp restrict_public(q, user) when not is_nil(user) do - intended_recipients = [ - Pleroma.Constants.as_public(), - Pleroma.Web.ActivityPub.Utils.as_local_public() - ] - - from([a, o] in q, - where: fragment("?->>'type' = 'Create'", a.data), - where: fragment("? && ?", ^intended_recipients, a.recipients) - ) - end - - defp restrict_public(q, _user) do - from([a, o] in q, - where: fragment("?->>'type' = 'Create'", a.data), - where: ^Pleroma.Constants.as_public() in a.recipients - ) - end - - defp query_with(q, :gin, search_query, :plain) do - %{rows: [[tsc]]} = - Ecto.Adapters.SQL.query!( - Pleroma.Repo, - "select current_setting('default_text_search_config')::regconfig::oid;" - ) - - from([a, o] in q, - where: - fragment( - "to_tsvector(?::oid::regconfig, ?->>'content') @@ plainto_tsquery(?)", - ^tsc, - o.data, - ^search_query - ) - ) - end - - defp query_with(q, :gin, search_query, :websearch) do - %{rows: [[tsc]]} = - Ecto.Adapters.SQL.query!( - Pleroma.Repo, - "select current_setting('default_text_search_config')::regconfig::oid;" - ) - - from([a, o] in q, - where: - fragment( - "to_tsvector(?::oid::regconfig, ?->>'content') @@ websearch_to_tsquery(?)", - ^tsc, - o.data, - ^search_query - ) - ) - end - - defp query_with(q, :rum, search_query, :plain) do - from([a, o] in q, - where: - fragment( - "? @@ plainto_tsquery(?)", - o.fts_content, - ^search_query - ), - order_by: [fragment("? <=> now()::date", o.inserted_at)] - ) - end - - defp query_with(q, :rum, search_query, :websearch) do - from([a, o] in q, - where: - fragment( - "? @@ websearch_to_tsquery(?)", - o.fts_content, - ^search_query - ), - order_by: [fragment("? <=> now()::date", o.inserted_at)] - ) - end - - def maybe_restrict_local(q, user) do - limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated) - - case {limit, user} do - {:all, _} -> restrict_local(q) - {:unauthenticated, %User{}} -> q - {:unauthenticated, _} -> restrict_local(q) - {false, _} -> q - end - end - - defp restrict_local(q), do: where(q, local: true) - - def maybe_fetch(activities, user, search_query) do - with true <- Regex.match?(~r/https?:/, search_query), - {:ok, object} <- Fetcher.fetch_object_from_id(search_query), - %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), - true <- Visibility.visible_for_user?(activity, user) do - [activity | activities] - else - _ -> activities - end - end -end diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex new file mode 100644 index 000000000..ae0b28c54 --- /dev/null +++ b/lib/pleroma/search.ex @@ -0,0 +1,24 @@ +defmodule Pleroma.Search do + def add_to_index(activity) do + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.add_to_index(activity) end) + end) + end + + def remove_from_index(object) do + # Also delete from search index + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.remove_from_index(object) end) + end) + end + + def search(query, options) do + search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) + + search_module.search(options[:for_user], query, options) + end +end diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index be0e19be0..5a8b8ca67 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -1,24 +1,153 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + defmodule Pleroma.Search.DatabaseSearch do - def add_to_index(activity) do - search_module = Pleroma.Config.get([Pleroma.Search, :module]) + alias Pleroma.Activity + alias Pleroma.Object.Fetcher + alias Pleroma.Pagination + alias Pleroma.User + alias Pleroma.Web.ActivityPub.Visibility + + require Pleroma.Constants + + import Ecto.Query + + def search(user, search_query, options \\ []) do + index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin + limit = Enum.min([Keyword.get(options, :limit), 40]) + offset = Keyword.get(options, :offset, 0) + author = Keyword.get(options, :author) + + search_function = + if :persistent_term.get({Pleroma.Repo, :postgres_version}) >= 11 do + :websearch + else + :plain + end + + try do + Activity + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> restrict_public() + |> query_with(index_type, search_query, search_function) + |> maybe_restrict_local(user) + |> maybe_restrict_author(author) + |> maybe_restrict_blocked(user) + |> Pagination.fetch_paginated( + %{"offset" => offset, "limit" => limit, "skip_order" => index_type == :rum}, + :offset + ) + |> maybe_fetch(user, search_query) + rescue + _ -> maybe_fetch([], user, search_query) + end + end + + def add_to_index(_activity), do: nil + def remove_from_index(_object), do: nil + + def maybe_restrict_author(query, %User{} = author) do + Activity.Queries.by_author(query, author) + end + + def maybe_restrict_author(query, _), do: query + + def maybe_restrict_blocked(query, %User{} = user) do + Activity.Queries.exclude_authors(query, User.blocked_users_ap_ids(user)) + end + + def maybe_restrict_blocked(query, _), do: query + + def restrict_public(q) do + from([a, o] in q, + where: fragment("?->>'type' = 'Create'", a.data), + where: ^Pleroma.Constants.as_public() in a.recipients + ) + end + + defp query_with(q, :gin, search_query, :plain) do + %{rows: [[tsc]]} = + Ecto.Adapters.SQL.query!( + Pleroma.Repo, + "select current_setting('default_text_search_config')::regconfig::oid;" + ) + + from([a, o] in q, + where: + fragment( + "to_tsvector(?::oid::regconfig, ?->>'content') @@ plainto_tsquery(?)", + ^tsc, + o.data, + ^search_query + ) + ) + end + + defp query_with(q, :gin, search_query, :websearch) do + %{rows: [[tsc]]} = + Ecto.Adapters.SQL.query!( + Pleroma.Repo, + "select current_setting('default_text_search_config')::regconfig::oid;" + ) + + from([a, o] in q, + where: + fragment( + "to_tsvector(?::oid::regconfig, ?->>'content') @@ websearch_to_tsquery(?)", + ^tsc, + o.data, + ^search_query + ) + ) + end + + defp query_with(q, :rum, search_query, :plain) do + from([a, o] in q, + where: + fragment( + "? @@ plainto_tsquery(?)", + o.fts_content, + ^search_query + ), + order_by: [fragment("? <=> now()::date", o.inserted_at)] + ) + end - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.add_to_index(activity) end) - end) + defp query_with(q, :rum, search_query, :websearch) do + from([a, o] in q, + where: + fragment( + "? @@ websearch_to_tsquery(?)", + o.fts_content, + ^search_query + ), + order_by: [fragment("? <=> now()::date", o.inserted_at)] + ) end - def remove_from_index(object) do - # Also delete from search index - search_module = Pleroma.Config.get([Pleroma.Search, :module]) + def maybe_restrict_local(q, user) do + limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated) - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.remove_from_index(object) end) - end) + case {limit, user} do + {:all, _} -> restrict_local(q) + {:unauthenticated, %User{}} -> q + {:unauthenticated, _} -> restrict_local(q) + {false, _} -> q + end end - def search(query, options) do - search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) + defp restrict_local(q), do: where(q, local: true) - search_module.search(options[:for_user], query, options) + def maybe_fetch(activities, user, search_query) do + with true <- Regex.match?(~r/https?:/, search_query), + {:ok, object} <- Fetcher.fetch_object_from_id(search_query), + %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), + true <- Visibility.visible_for_user?(activity, user) do + [activity | activities] + else + _ -> activities + end end end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 41f99ad9f..fa9e27b03 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -4,7 +4,7 @@ defmodule Pleroma.Search.Meilisearch do alias Pleroma.Activity - import Pleroma.Activity.Search + import Pleroma.Search.DatabaseSearch import Ecto.Query defp meili_headers do diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 7e3444676..cdc70aacf 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -141,7 +141,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do end) # Add local posts to search index - if local, do: Pleroma.Search.DatabaseSearch.add_to_index(activity) + if local, do: Pleroma.Search.add_to_index(activity) {:ok, activity} else diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index fa57eab69..4762b5ac6 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -227,7 +227,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) end) - Pleroma.Search.DatabaseSearch.add_to_index(Map.put(activity, :object, object)) + Pleroma.Search.add_to_index(Map.put(activity, :object, object)) meta = meta @@ -330,7 +330,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do if result == :ok do Notification.create_notifications(object) - Pleroma.Search.DatabaseSearch.remove_from_index(deleted_object) + Pleroma.Search.remove_from_index(deleted_object) {:ok, object, meta} else diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 10f1aa532..e4acba226 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -99,7 +99,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do end defp resource_search(_, "statuses", query, options) do - statuses = with_fallback(fn -> Pleroma.Search.DatabaseSearch.search(query, options) end) + statuses = with_fallback(fn -> Pleroma.Search.search(query, options) end) StatusView.render("index.json", activities: statuses, -- cgit v1.2.3 From a12f63bc81481e3f852934e8cc1269e16a57cf0a Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 22 Nov 2021 21:39:54 +0300 Subject: Implement suggestions from the Meilisearch MR - Index unlisted posts - Move version check outside of the streaming and only do it once - Use a PUT request instead of checking manually if there is need to insert - Add error handling, sort of --- lib/mix/tasks/pleroma/search/meilisearch.ex | 86 ++++++++++++---------------- lib/pleroma/search/meilisearch.ex | 87 +++++++++++++++++++---------- 2 files changed, 95 insertions(+), 78 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 62ace7e39..6730a99a9 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -3,38 +3,40 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Mix.Tasks.Pleroma.Search.Meilisearch do - require Logger require Pleroma.Constants import Mix.Pleroma import Ecto.Query - import Pleroma.Search.Meilisearch, only: [meili_post!: 2, meili_delete!: 1, meili_get!: 1] + import Pleroma.Search.Meilisearch, + only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete!: 1] - def run(["index" | args]) do + def run(["index"]) do start_pleroma() - is_reindex = "--reindex" in args - - meili_post!( - "/indexes/objects/settings/ranking-rules", - [ - "desc(published)", - "words", - "exactness", - "proximity", - "wordsPosition", - "typo", - "attribute" - ] - ) + {:ok, _} = + meili_post( + "/indexes/objects/settings/ranking-rules", + [ + "desc(published)", + "words", + "exactness", + "proximity", + "wordsPosition", + "typo", + "attribute" + ] + ) - meili_post!( - "/indexes/objects/settings/searchable-attributes", - [ - "content" - ] - ) + {:ok, _} = + meili_post( + "/indexes/objects/settings/searchable-attributes", + [ + "content" + ] + ) + + IO.puts("Created indices. Starting to insert posts.") chunk_size = 10_000 @@ -42,11 +44,11 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do fn -> query = from(Pleroma.Object, - # Only index public posts which are notes and have some text + # Only index public and unlisted posts which are notes and have some text where: fragment("data->>'type' = 'Note'") and - fragment("LENGTH(data->>'content') > 0") and - fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()), + (fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) or + fragment("data->'cc' \\? ?", ^Pleroma.Constants.as_public())), order_by: [desc: fragment("data->'published'")] ) @@ -70,34 +72,18 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do {[objects], new_acc} end) |> Stream.each(fn objects -> - objects = - objects - |> Enum.filter(fn o -> - if is_reindex do - result = meili_get!("/indexes/objects/documents/#{o.id}") - - # With >= 0.24.0 the name for "errorCode" is just "code" - error_code_key = - if meili_get!("/version")["pkgVersion"] |> Version.match?(">= 0.24.0"), - do: "code", - else: "errorCode" - - # Filter out the already indexed documents. - # This is true when the document does not exist - result[error_code_key] == "document_not_found" - else - true - end - end) - result = - meili_post!( + meili_put( "/indexes/objects/documents", objects ) - if not Map.has_key?(result, "updateId") do - IO.puts("Failed to index: #{inspect(result)}") + with {:ok, res} <- result do + if not Map.has_key?(res, "updateId") do + IO.puts("\nFailed to index: #{inspect(result)}") + end + else + e -> IO.puts("\nFailed to index due to network error: #{inspect(e)}") end end) |> Stream.run() @@ -137,7 +123,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do def run(["stats"]) do start_pleroma() - result = meili_get!("/indexes/objects/stats") + {:ok, result} = meili_get("/indexes/objects/stats") IO.puts("Number of entries: #{result["numberOfDocuments"]}") IO.puts("Indexing? #{result["isIndexing"]}") end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index fa9e27b03..21b44de86 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -14,29 +14,50 @@ defmodule Pleroma.Search.Meilisearch do if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] end - def meili_get!(path) do + def meili_get(path) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, result} = + result = Pleroma.HTTP.get( Path.join(endpoint, path), meili_headers() ) - Jason.decode!(result.body) + with {:ok, res} <- result do + {:ok, Jason.decode!(res.body)} + end end - def meili_post!(path, params) do + def meili_post(path, params) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, result} = + result = Pleroma.HTTP.post( Path.join(endpoint, path), Jason.encode!(params), meili_headers() ) - Jason.decode!(result.body) + with {:ok, res} <- result do + {:ok, Jason.decode!(res.body)} + end + end + + def meili_put(path, params) do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + result = + Pleroma.HTTP.request( + :put, + Path.join(endpoint, path), + Jason.encode!(params), + meili_headers(), + [] + ) + + with {:ok, res} <- result do + {:ok, Jason.decode!(res.body)} + end end def meili_delete!(path) do @@ -57,34 +78,40 @@ defmodule Pleroma.Search.Meilisearch do offset = Keyword.get(options, :offset, 0) author = Keyword.get(options, :author) - result = - meili_post!( + res = + meili_post( "/indexes/objects/search", %{q: query, offset: offset, limit: limit} ) - hits = result["hits"] |> Enum.map(& &1["ap"]) - - try do - hits - |> Activity.create_by_object_ap_id() - |> Activity.with_preloaded_object() - |> Activity.with_preloaded_object() - |> Activity.restrict_deactivated_users() - |> maybe_restrict_local(user) - |> maybe_restrict_author(author) - |> maybe_restrict_blocked(user) - |> maybe_fetch(user, query) - |> order_by([object: obj], desc: obj.data["published"]) - |> Pleroma.Repo.all() - rescue - _ -> maybe_fetch([], user, query) + with {:ok, result} <- res do + hits = result["hits"] |> Enum.map(& &1["ap"]) + + try do + hits + |> Activity.create_by_object_ap_id() + |> Activity.with_preloaded_object() + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> maybe_restrict_local(user) + |> maybe_restrict_author(author) + |> maybe_restrict_blocked(user) + |> maybe_fetch(user, query) + |> order_by([object: obj], desc: obj.data["published"]) + |> Pleroma.Repo.all() + rescue + _ -> maybe_fetch([], user, query) + end end end def object_to_search_data(object) do + # Only index public or unlisted Notes if not is_nil(object) and object.data["type"] == "Note" and - Pleroma.Constants.as_public() in object.data["to"] do + not is_nil(object.data["content"]) and + (Pleroma.Constants.as_public() in object.data["to"] or + Pleroma.Constants.as_public() in object.data["cc"]) and + String.length(object.data["content"]) > 1 do data = object.data content_str = @@ -117,13 +144,17 @@ defmodule Pleroma.Search.Meilisearch do if activity.data["type"] == "Create" and maybe_search_data do result = - meili_post!( + meili_put( "/indexes/objects/documents", [maybe_search_data] ) - if not Map.has_key?(result, "updateId") do - Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") + with {:ok, res} <- result, + true <- Map.has_key?(res, "updateId") do + # Do nothing + else + _ -> + Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") end end end -- cgit v1.2.3 From 3179ed0921197a8a8f32a519c7d41dc09011024d Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 20 Dec 2021 18:48:52 +0300 Subject: Make chunk size configurable --- lib/mix/tasks/pleroma/search/meilisearch.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 6730a99a9..021552f7b 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -38,7 +38,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do IO.puts("Created indices. Starting to insert posts.") - chunk_size = 10_000 + chunk_size = Pleroma.Config.get([Pleroma.Search.Meilisearch, :initial_indexing_chunk_size]) Pleroma.Repo.transaction( fn -> -- cgit v1.2.3 From 571533ae2618478f26db312e52265e143356debd Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 20 Dec 2021 19:05:59 +0300 Subject: Don't support meilisearch < 0.24.0, since it breaks things --- lib/mix/tasks/pleroma/search/meilisearch.ex | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 021552f7b..5098668ad 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -14,17 +14,29 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do def run(["index"]) do start_pleroma() + meili_version = + ( + {:ok, result} = meili_get("/version") + + result["pkgVersion"] + ) + + # The ranking rule syntax was changed but nothing about that is mentioned in the changelog + if not Version.match?(meili_version, ">= 0.24.0") do + raise "Meilisearch <0.24.0 not supported" + end + {:ok, _} = meili_post( "/indexes/objects/settings/ranking-rules", [ - "desc(published)", + "published:desc", "words", "exactness", "proximity", - "wordsPosition", "typo", - "attribute" + "attribute", + "sort" ] ) -- cgit v1.2.3 From 6f2f457751ea09507045e6dd5d5869a14befd3d1 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 20 Dec 2021 22:38:50 +0300 Subject: Add a search backend behaviour --- lib/pleroma/search/database_search.ex | 5 +++++ lib/pleroma/search/meilisearch.ex | 4 ++++ lib/pleroma/search/search_backend.ex | 17 +++++++++++++++++ 3 files changed, 26 insertions(+) create mode 100644 lib/pleroma/search/search_backend.ex (limited to 'lib') diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index 5a8b8ca67..3735a5fab 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -13,6 +13,8 @@ defmodule Pleroma.Search.DatabaseSearch do import Ecto.Query + @behaviour Pleroma.Search.SearchBackend + def search(user, search_query, options \\ []) do index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin limit = Enum.min([Keyword.get(options, :limit), 40]) @@ -45,7 +47,10 @@ defmodule Pleroma.Search.DatabaseSearch do end end + @impl true def add_to_index(_activity), do: nil + + @impl true def remove_from_index(_object), do: nil def maybe_restrict_author(query, %User{} = author) do diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 21b44de86..33bbf8392 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -7,6 +7,8 @@ defmodule Pleroma.Search.Meilisearch do import Pleroma.Search.DatabaseSearch import Ecto.Query + @behaviour Pleroma.Search.SearchBackend + defp meili_headers do private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) @@ -139,6 +141,7 @@ defmodule Pleroma.Search.Meilisearch do end end + @impl true def add_to_index(activity) do maybe_search_data = object_to_search_data(activity.object) @@ -159,6 +162,7 @@ defmodule Pleroma.Search.Meilisearch do end end + @impl true def remove_from_index(object) do meili_delete!("/indexes/objects/documents/#{object.id}") end diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex new file mode 100644 index 000000000..ed6bfd329 --- /dev/null +++ b/lib/pleroma/search/search_backend.ex @@ -0,0 +1,17 @@ +defmodule Pleroma.Search.SearchBackend do + @doc """ + Add the object associated with the activity to the search index. + + The whole activity is passed, to allow filtering on things such as scope. + """ + @callback add_to_index(activity :: Pleroma.Activity.t()) :: nil + + @doc """ + Remove the object from the index. + + Just the object, as opposed to the whole activity, is passed, since the object + is what contains the actual content and there is no need for fitlering when removing + from index. + """ + @callback remove_from_index(object :: Pleroma.Object.t()) :: nil +end -- cgit v1.2.3 From 2bc21c6f1884bae3226f760ed1da39dd9c5f2958 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 22 Jan 2022 15:23:11 +0300 Subject: Use oban for search indexing --- lib/pleroma/search.ex | 15 ++++----------- lib/pleroma/workers/search_indexing_worker.ex | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 lib/pleroma/workers/search_indexing_worker.ex (limited to 'lib') diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex index ae0b28c54..af858fc46 100644 --- a/lib/pleroma/search.ex +++ b/lib/pleroma/search.ex @@ -1,19 +1,12 @@ defmodule Pleroma.Search do - def add_to_index(activity) do - search_module = Pleroma.Config.get([Pleroma.Search, :module]) + alias Pleroma.Workers.SearchIndexingWorker - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.add_to_index(activity) end) - end) + def add_to_index(activity) do + SearchIndexingWorker.enqueue("add_to_index", %{"activity" => activity.id}) end def remove_from_index(object) do - # Also delete from search index - search_module = Pleroma.Config.get([Pleroma.Search, :module]) - - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.remove_from_index(object) end) - end) + SearchIndexingWorker.enqueue("remove_from_index", %{"object" => object.id}) end def search(query, options) do diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex new file mode 100644 index 000000000..43b7bad1e --- /dev/null +++ b/lib/pleroma/workers/search_indexing_worker.ex @@ -0,0 +1,21 @@ +defmodule Pleroma.Workers.SearchIndexingWorker do + use Pleroma.Workers.WorkerHelper, queue: "search_indexing" + + @impl Oban.Worker + + def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do + activity = Pleroma.Activity.get_by_id_with_object(activity_id) + + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + search_module.add_to_index(activity) + end + + def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do + object = Pleroma.Object.get_by_id(object_id) + + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + search_module.remove_from_index(object) + end +end -- cgit v1.2.3 From d89dc5518b5c0eb232e7ac85ddd538f89c32606d Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 22 Jan 2022 16:31:32 +0300 Subject: Fix meilisearch tests and jobs for oban --- lib/pleroma/workers/search_indexing_worker.ex | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex index 43b7bad1e..70a8d42d0 100644 --- a/lib/pleroma/workers/search_indexing_worker.ex +++ b/lib/pleroma/workers/search_indexing_worker.ex @@ -9,6 +9,8 @@ defmodule Pleroma.Workers.SearchIndexingWorker do search_module = Pleroma.Config.get([Pleroma.Search, :module]) search_module.add_to_index(activity) + + :ok end def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do @@ -17,5 +19,7 @@ defmodule Pleroma.Workers.SearchIndexingWorker do search_module = Pleroma.Config.get([Pleroma.Search, :module]) search_module.remove_from_index(object) + + :ok end end -- cgit v1.2.3 From 3387935e8354e32171fe6e28a8f96f49154acbb3 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 22 Jan 2022 16:52:06 +0300 Subject: Don't try removing deleted users and such from index as posts --- lib/pleroma/search.ex | 8 ++++---- lib/pleroma/web/activity_pub/side_effects.ex | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex index af858fc46..3b266e59b 100644 --- a/lib/pleroma/search.ex +++ b/lib/pleroma/search.ex @@ -1,12 +1,12 @@ defmodule Pleroma.Search do alias Pleroma.Workers.SearchIndexingWorker - def add_to_index(activity) do - SearchIndexingWorker.enqueue("add_to_index", %{"activity" => activity.id}) + def add_to_index(%Pleroma.Activity{id: activity_id}) do + SearchIndexingWorker.enqueue("add_to_index", %{"activity" => activity_id}) end - def remove_from_index(object) do - SearchIndexingWorker.enqueue("remove_from_index", %{"object" => object.id}) + def remove_from_index(%Pleroma.Object{id: object_id}) do + SearchIndexingWorker.enqueue("remove_from_index", %{"object" => object_id}) end def search(query, options) do diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 4762b5ac6..644e62630 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -330,7 +330,10 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do if result == :ok do Notification.create_notifications(object) - Pleroma.Search.remove_from_index(deleted_object) + # Only remove from index when deleting actual objects, not users or anything else + with %Pleroma.Object{} <- deleted_object do + Pleroma.Search.remove_from_index(deleted_object) + end {:ok, object, meta} else -- cgit v1.2.3 From 1e23f527e3e22108b402552a0766e488048ed3f4 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 22 Mar 2022 20:29:17 +0300 Subject: Change the meilisearch key auth to conform to 0.25.0 --- lib/mix/tasks/pleroma/search/meilisearch.ex | 14 ++++++++------ lib/pleroma/search/meilisearch.ex | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 5098668ad..db56876fa 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -22,7 +22,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do ) # The ranking rule syntax was changed but nothing about that is mentioned in the changelog - if not Version.match?(meili_version, ">= 0.24.0") do + if not Version.match?(meili_version, ">= 0.25.0") do raise "Meilisearch <0.24.0 not supported" end @@ -112,7 +112,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do meili_delete!("/indexes/objects/documents") end - def run(["show-private-key", master_key]) do + def run(["show-keys", master_key]) do start_pleroma() endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) @@ -120,15 +120,17 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do {:ok, result} = Pleroma.HTTP.get( Path.join(endpoint, "/keys"), - [{"X-Meili-API-Key", master_key}] + [{"Authorization", "Bearer #{master_key}"}] ) decoded = Jason.decode!(result.body) - if decoded["private"] do - IO.puts(decoded["private"]) + if decoded["results"] do + Enum.each(decoded["results"], fn %{"description" => desc, "key" => key} -> + IO.puts("#{desc}: #{key}") + end) else - IO.puts("Error fetching the key, check the master key is correct: #{inspect(decoded)}") + IO.puts("Error fetching the keys, check the master key is correct: #{inspect(decoded)}") end end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 33bbf8392..0f9182ffc 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -13,7 +13,7 @@ defmodule Pleroma.Search.Meilisearch do private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) [{"Content-Type", "application/json"}] ++ - if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] + if is_nil(private_key), do: [], else: [{"Authorization", "Bearer #{private_key}"}] end def meili_get(path) do -- cgit v1.2.3 From 84608be87e2c5961a4deb9030307c978bf1168e5 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 22 Mar 2022 20:45:49 +0300 Subject: Change updateId to uid because apparently that's the new name --- lib/mix/tasks/pleroma/search/meilisearch.ex | 2 +- lib/pleroma/search/meilisearch.ex | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index db56876fa..d4a83c3cd 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -91,7 +91,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do ) with {:ok, res} <- result do - if not Map.has_key?(res, "updateId") do + if not Map.has_key?(res, "uid") do IO.puts("\nFailed to index: #{inspect(result)}") end else diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 0f9182ffc..3db65f261 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -153,7 +153,7 @@ defmodule Pleroma.Search.Meilisearch do ) with {:ok, res} <- result, - true <- Map.has_key?(res, "updateId") do + true <- Map.has_key?(res, "uid") do # Do nothing else _ -> -- cgit v1.2.3 From e20f74c71b078d706bc93632773f9b590d2fb018 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Fri, 26 Aug 2022 23:39:58 +0300 Subject: Remove duplicate function call --- lib/pleroma/search/meilisearch.ex | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 3db65f261..53f8a2544 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -93,7 +93,6 @@ defmodule Pleroma.Search.Meilisearch do hits |> Activity.create_by_object_ap_id() |> Activity.with_preloaded_object() - |> Activity.with_preloaded_object() |> Activity.restrict_deactivated_users() |> maybe_restrict_local(user) |> maybe_restrict_author(author) -- cgit v1.2.3 From 119b2b847b76c7300bd71699d9f2e5676bdb0bb4 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 27 Aug 2022 00:09:37 +0300 Subject: Instead of checking string length, explicitly check for "" and "." --- lib/pleroma/search/meilisearch.ex | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 53f8a2544..2e13b8407 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -112,7 +112,7 @@ defmodule Pleroma.Search.Meilisearch do not is_nil(object.data["content"]) and (Pleroma.Constants.as_public() in object.data["to"] or Pleroma.Constants.as_public() in object.data["cc"]) and - String.length(object.data["content"]) > 1 do + object.data["content"] not in ["", "."] do data = object.data content_str = @@ -127,7 +127,8 @@ defmodule Pleroma.Search.Meilisearch do trimmed end - if String.length(content) > 1 do + # Make sure we have a non-empty string + if content != "" do {:ok, published, _} = DateTime.from_iso8601(data["published"]) %{ -- cgit v1.2.3 From 102ebb42bdba1673da39a8fa8ed1662bc8565aa4 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 27 Aug 2022 00:19:08 +0300 Subject: Make search a callback --- lib/pleroma/search/database_search.ex | 1 + lib/pleroma/search/meilisearch.ex | 1 + lib/pleroma/search/search_backend.ex | 11 +++++++++-- 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index 3735a5fab..9a340abf1 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -15,6 +15,7 @@ defmodule Pleroma.Search.DatabaseSearch do @behaviour Pleroma.Search.SearchBackend + @impl true def search(user, search_query, options \\ []) do index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin limit = Enum.min([Keyword.get(options, :limit), 40]) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 2e13b8407..4e88169d2 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -75,6 +75,7 @@ defmodule Pleroma.Search.Meilisearch do ) end + @impl true def search(user, query, options \\ []) do limit = Enum.min([Keyword.get(options, :limit), 40]) offset = Keyword.get(options, :offset, 0) diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex index ed6bfd329..a42e2f5f6 100644 --- a/lib/pleroma/search/search_backend.ex +++ b/lib/pleroma/search/search_backend.ex @@ -1,10 +1,17 @@ defmodule Pleroma.Search.SearchBackend do + @doc """ + Search statuses with a query, restricting to only those the user should have access to. + """ + @callback search(user :: Pleroma.User.t(), query :: String.t(), options :: [any()]) :: [ + Pleroma.Activity.t() + ] + @doc """ Add the object associated with the activity to the search index. The whole activity is passed, to allow filtering on things such as scope. """ - @callback add_to_index(activity :: Pleroma.Activity.t()) :: nil + @callback add_to_index(activity :: Pleroma.Activity.t()) :: :ok | {:error, any()} @doc """ Remove the object from the index. @@ -13,5 +20,5 @@ defmodule Pleroma.Search.SearchBackend do is what contains the actual content and there is no need for fitlering when removing from index. """ - @callback remove_from_index(object :: Pleroma.Object.t()) :: nil + @callback remove_from_index(object :: Pleroma.Object.t()) :: {:ok, any()} | {:error, any()} end -- cgit v1.2.3 From 5ac67632384bfb284ac51f2a450d41cf3913378a Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 27 Aug 2022 00:31:36 +0300 Subject: Make add_to_index and remove_from_index report errors --- lib/mix/tasks/pleroma/search/meilisearch.ex | 4 ++-- lib/pleroma/search/meilisearch.ex | 27 ++++++++++++++++----------- lib/pleroma/workers/search_indexing_worker.ex | 4 ---- 3 files changed, 18 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index d4a83c3cd..72a558228 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -9,7 +9,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do import Ecto.Query import Pleroma.Search.Meilisearch, - only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete!: 1] + only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete: 1] def run(["index"]) do start_pleroma() @@ -109,7 +109,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do def run(["clear"]) do start_pleroma() - meili_delete!("/indexes/objects/documents") + meili_delete("/indexes/objects/documents") end def run(["show-keys", master_key]) do diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 4e88169d2..24789b00c 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -62,17 +62,16 @@ defmodule Pleroma.Search.Meilisearch do end end - def meili_delete!(path) do + def meili_delete(path) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, _} = - Pleroma.HTTP.request( - :delete, - Path.join(endpoint, path), - "", - meili_headers(), - [] - ) + Pleroma.HTTP.request( + :delete, + Path.join(endpoint, path), + "", + meili_headers(), + [] + ) end @impl true @@ -155,16 +154,22 @@ defmodule Pleroma.Search.Meilisearch do with {:ok, res} <- result, true <- Map.has_key?(res, "uid") do - # Do nothing + # Added successfully + :ok else _ -> + # There was an error, report it Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") + {:error, result} end + else + # The post isn't something we can search, that's ok + :ok end end @impl true def remove_from_index(object) do - meili_delete!("/indexes/objects/documents/#{object.id}") + meili_delete("/indexes/objects/documents/#{object.id}") end end diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex index 70a8d42d0..43b7bad1e 100644 --- a/lib/pleroma/workers/search_indexing_worker.ex +++ b/lib/pleroma/workers/search_indexing_worker.ex @@ -9,8 +9,6 @@ defmodule Pleroma.Workers.SearchIndexingWorker do search_module = Pleroma.Config.get([Pleroma.Search, :module]) search_module.add_to_index(activity) - - :ok end def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do @@ -19,7 +17,5 @@ defmodule Pleroma.Workers.SearchIndexingWorker do search_module = Pleroma.Config.get([Pleroma.Search, :module]) search_module.remove_from_index(object) - - :ok end end -- cgit v1.2.3 From 6256822afd368e5f6b410d47c5ff9b584e50a461 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 27 Aug 2022 01:11:50 +0300 Subject: Check for updateId, not uid --- lib/pleroma/search/meilisearch.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 24789b00c..0b90971b1 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -153,7 +153,7 @@ defmodule Pleroma.Search.Meilisearch do ) with {:ok, res} <- result, - true <- Map.has_key?(res, "uid") do + true <- Map.has_key?(res, "updateId") do # Added successfully :ok else -- cgit v1.2.3 From 5a39866388c411f2bcee9848352f8c420513f34f Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 27 Aug 2022 01:43:59 +0300 Subject: Specifically strip mentions for search indexing --- lib/mix/tasks/pleroma/search/meilisearch.ex | 1 + lib/pleroma/search/meilisearch.ex | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 72a558228..8379a0c25 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -13,6 +13,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do def run(["index"]) do start_pleroma() + Pleroma.HTML.compile_scrubbers() meili_version = ( diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 0b90971b1..7af7f460a 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -122,7 +122,8 @@ defmodule Pleroma.Search.Meilisearch do end content = - with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), + with {:ok, scrubbed} <- + FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing), trimmed <- String.trim(scrubbed) do trimmed end -- cgit v1.2.3 From c1402af2934219b6ab5dc40a7d87a8c916554647 Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Sun, 12 Nov 2023 14:49:15 +0400 Subject: B Getting: Add default implementation, delegate, prepare test support. --- lib/pleroma/config/getting.ex | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib') diff --git a/lib/pleroma/config/getting.ex b/lib/pleroma/config/getting.ex index f9b66bba6..0de4782ea 100644 --- a/lib/pleroma/config/getting.ex +++ b/lib/pleroma/config/getting.ex @@ -5,4 +5,11 @@ defmodule Pleroma.Config.Getting do @callback get(any()) :: any() @callback get(any(), any()) :: any() + + def get(key), do: get(key, nil) + def get(key, default), do: impl().get(key, default) + + def impl() do + Application.get_env(:pleroma, :config_impl, Pleroma.Config) + end end -- cgit v1.2.3 From d3f8950588b444dfdf46d87d5631720cc14a907c Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Sun, 12 Nov 2023 14:49:50 +0400 Subject: B MeiliSearch, SearchIndexingWorker: Use Config.Getting, make tests async. --- lib/pleroma/search/meilisearch.ex | 32 ++++++++++++++++----------- lib/pleroma/workers/search_indexing_worker.ex | 6 +++-- 2 files changed, 23 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 7af7f460a..eed9fca1c 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -3,6 +3,7 @@ defmodule Pleroma.Search.Meilisearch do require Pleroma.Constants alias Pleroma.Activity + alias Pleroma.Config.Getting, as: Config import Pleroma.Search.DatabaseSearch import Ecto.Query @@ -10,14 +11,14 @@ defmodule Pleroma.Search.Meilisearch do @behaviour Pleroma.Search.SearchBackend defp meili_headers do - private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) + private_key = Config.get([Pleroma.Search.Meilisearch, :private_key]) [{"Content-Type", "application/json"}] ++ if is_nil(private_key), do: [], else: [{"Authorization", "Bearer #{private_key}"}] end def meili_get(path) do - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) result = Pleroma.HTTP.get( @@ -31,7 +32,7 @@ defmodule Pleroma.Search.Meilisearch do end def meili_post(path, params) do - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) result = Pleroma.HTTP.post( @@ -46,7 +47,7 @@ defmodule Pleroma.Search.Meilisearch do end def meili_put(path, params) do - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) result = Pleroma.HTTP.request( @@ -63,15 +64,20 @@ defmodule Pleroma.Search.Meilisearch do end def meili_delete(path) do - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - - Pleroma.HTTP.request( - :delete, - Path.join(endpoint, path), - "", - meili_headers(), - [] - ) + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) + + with {:ok, _} <- + Pleroma.HTTP.request( + :delete, + Path.join(endpoint, path), + "", + meili_headers(), + [] + ) do + :ok + else + _ -> :error + end end @impl true diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex index 43b7bad1e..8476a2be5 100644 --- a/lib/pleroma/workers/search_indexing_worker.ex +++ b/lib/pleroma/workers/search_indexing_worker.ex @@ -3,10 +3,12 @@ defmodule Pleroma.Workers.SearchIndexingWorker do @impl Oban.Worker + alias Pleroma.Config.Getting, as: Config + def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do activity = Pleroma.Activity.get_by_id_with_object(activity_id) - search_module = Pleroma.Config.get([Pleroma.Search, :module]) + search_module = Config.get([Pleroma.Search, :module]) search_module.add_to_index(activity) end @@ -14,7 +16,7 @@ defmodule Pleroma.Workers.SearchIndexingWorker do def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do object = Pleroma.Object.get_by_id(object_id) - search_module = Pleroma.Config.get([Pleroma.Search, :module]) + search_module = Config.get([Pleroma.Search, :module]) search_module.remove_from_index(object) end -- cgit v1.2.3 From a1a25029da74949a79c73d400b6f2bc0bf1dc01a Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Sun, 12 Nov 2023 16:19:54 +0400 Subject: B DatabaseSearch: Fix local-only search. --- lib/pleroma/search/database_search.ex | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index 9a340abf1..f4c405773 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -8,6 +8,7 @@ defmodule Pleroma.Search.DatabaseSearch do alias Pleroma.Pagination alias Pleroma.User alias Pleroma.Web.ActivityPub.Visibility + alias Pleroma.Config require Pleroma.Constants @@ -17,7 +18,7 @@ defmodule Pleroma.Search.DatabaseSearch do @impl true def search(user, search_query, options \\ []) do - index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin + index_type = if Config.get([:database, :rum_enabled]), do: :rum, else: :gin limit = Enum.min([Keyword.get(options, :limit), 40]) offset = Keyword.get(options, :offset, 0) author = Keyword.get(options, :author) @@ -33,7 +34,7 @@ defmodule Pleroma.Search.DatabaseSearch do Activity |> Activity.with_preloaded_object() |> Activity.restrict_deactivated_users() - |> restrict_public() + |> restrict_public(user) |> query_with(index_type, search_query, search_function) |> maybe_restrict_local(user) |> maybe_restrict_author(author) @@ -49,10 +50,10 @@ defmodule Pleroma.Search.DatabaseSearch do end @impl true - def add_to_index(_activity), do: nil + def add_to_index(_activity), do: :ok @impl true - def remove_from_index(_object), do: nil + def remove_from_index(_object), do: :ok def maybe_restrict_author(query, %User{} = author) do Activity.Queries.by_author(query, author) @@ -66,7 +67,19 @@ defmodule Pleroma.Search.DatabaseSearch do def maybe_restrict_blocked(query, _), do: query - def restrict_public(q) do + defp restrict_public(q, user) when not is_nil(user) do + intended_recipients = [ + Pleroma.Constants.as_public(), + Pleroma.Web.ActivityPub.Utils.as_local_public() + ] + + from([a, o] in q, + where: fragment("?->>'type' = 'Create'", a.data), + where: fragment("? && ?", ^intended_recipients, a.recipients) + ) + end + + defp restrict_public(q, _user) do from([a, o] in q, where: fragment("?->>'type' = 'Create'", a.data), where: ^Pleroma.Constants.as_public() in a.recipients @@ -134,7 +147,7 @@ defmodule Pleroma.Search.DatabaseSearch do end def maybe_restrict_local(q, user) do - limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated) + limit = Config.get([:instance, :limit_to_local_content], :unauthenticated) case {limit, user} do {:all, _} -> restrict_local(q) -- cgit v1.2.3 From 59018d73c366d9297efe83d290c717d1a3e4756a Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Sun, 12 Nov 2023 16:43:50 +0400 Subject: B Meilisearch: Update to current API responses. --- lib/pleroma/search/meilisearch.ex | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index eed9fca1c..2bff663e8 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -76,7 +76,7 @@ defmodule Pleroma.Search.Meilisearch do ) do :ok else - _ -> :error + _ -> {:error, "Could not remove from index"} end end @@ -159,8 +159,7 @@ defmodule Pleroma.Search.Meilisearch do [maybe_search_data] ) - with {:ok, res} <- result, - true <- Map.has_key?(res, "updateId") do + with {:ok, %{"status" => "enqueued"}} <- result do # Added successfully :ok else -- cgit v1.2.3 From 3d62c71edf8782c5ceae5a0ea3ba5ec08dc5b948 Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Sun, 12 Nov 2023 17:13:27 +0400 Subject: Credo fixes. --- lib/pleroma/config/getting.ex | 2 +- lib/pleroma/search/database_search.ex | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/pleroma/config/getting.ex b/lib/pleroma/config/getting.ex index 0de4782ea..ec93fd02a 100644 --- a/lib/pleroma/config/getting.ex +++ b/lib/pleroma/config/getting.ex @@ -9,7 +9,7 @@ defmodule Pleroma.Config.Getting do def get(key), do: get(key, nil) def get(key, default), do: impl().get(key, default) - def impl() do + def impl do Application.get_env(:pleroma, :config_impl, Pleroma.Config) end end diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index f4c405773..c6311e0c7 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -4,11 +4,11 @@ defmodule Pleroma.Search.DatabaseSearch do alias Pleroma.Activity + alias Pleroma.Config alias Pleroma.Object.Fetcher alias Pleroma.Pagination alias Pleroma.User alias Pleroma.Web.ActivityPub.Visibility - alias Pleroma.Config require Pleroma.Constants -- cgit v1.2.3