diff options
author | Lain Soykaf <lain@lain.com> | 2024-05-14 14:13:37 +0400 |
---|---|---|
committer | Lain Soykaf <lain@lain.com> | 2024-05-14 14:13:37 +0400 |
commit | cd7e2138d11901fc7a0c8c2f22b7a5d57383a555 (patch) | |
tree | 7705c3853bb26f882cfd059b2c325f992eb64c66 /lib | |
parent | c954437cc02f92b5c48c29d2c12d21496d13f813 (diff) | |
download | pleroma-cd7e2138d11901fc7a0c8c2f22b7a5d57383a555.tar.gz pleroma-cd7e2138d11901fc7a0c8c2f22b7a5d57383a555.zip |
Search: Basic Qdrant/Ollama search
Diffstat (limited to 'lib')
-rw-r--r-- | lib/mix/tasks/pleroma/search/indexer.ex | 60 | ||||
-rw-r--r-- | lib/pleroma/search/qdrant_search.ex | 117 |
2 files changed, 177 insertions, 0 deletions
diff --git a/lib/mix/tasks/pleroma/search/indexer.ex b/lib/mix/tasks/pleroma/search/indexer.ex new file mode 100644 index 000000000..ffa2f3c94 --- /dev/null +++ b/lib/mix/tasks/pleroma/search/indexer.ex @@ -0,0 +1,60 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Mix.Tasks.Pleroma.Search.Indexer do + import Mix.Pleroma + import Ecto.Query + + alias Pleroma.Workers.SearchIndexingWorker + + def run(["index" | options]) do + {options, [], []} = + OptionParser.parse( + options, + strict: [ + limit: :integer + ] + ) + + start_pleroma() + + limit = Keyword.get(options, :limit, 100_000) + + per_step = 1000 + chunks = max(div(limit, per_step), 1) + + 1..chunks + |> Enum.each(fn step -> + q = + from(a in Pleroma.Activity, + limit: ^per_step, + offset: ^per_step * (^step - 1), + select: [:id], + order_by: [desc: :id] + ) + + {:ok, ids} = + Pleroma.Repo.transaction(fn -> + Pleroma.Repo.stream(q, timeout: :infinity) + |> Enum.map(fn a -> + a.id + end) + end) + + IO.puts("Got #{length(ids)} activities, adding to indexer") + + ids + |> Enum.chunk_every(100) + |> Enum.each(fn chunk -> + IO.puts("Adding #{length(chunk)} activities to indexing queue") + + chunk + |> Enum.map(fn id -> + SearchIndexingWorker.new(%{"op" => "add_to_index", "activity" => id}) + end) + |> Oban.insert_all() + end) + end) + end +end diff --git a/lib/pleroma/search/qdrant_search.ex b/lib/pleroma/search/qdrant_search.ex new file mode 100644 index 000000000..fcaa9e686 --- /dev/null +++ b/lib/pleroma/search/qdrant_search.ex @@ -0,0 +1,117 @@ +defmodule Pleroma.Search.QdrantSearch do + @behaviour Pleroma.Search.SearchBackend + import Ecto.Query + alias Pleroma.Activity + + alias __MODULE__.QdrantClient + alias __MODULE__.OllamaClient + + import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1] + + def initialize_index() do + payload = Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_index_configuration]) + QdrantClient.put("/collections/posts", payload) + end + + def drop_index() do + QdrantClient.delete("/collections/posts") + end + + def get_embedding(text) do + with {:ok, %{body: %{"embedding" => embedding}}} <- + OllamaClient.post("/api/embeddings", %{ + prompt: text, + model: Pleroma.Config.get([Pleroma.Search.QdrantSearch, :ollama_model]) + }) + |> IO.inspect() do + {:ok, embedding} + else + _ -> + {:error, "Failed to get embedding"} + end + end + + defp build_index_payload(activity, embedding) do + %{ + points: [ + %{ + id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(), + vector: embedding + } + ] + } + end + + defp build_search_payload(embedding) do + %{ + vector: embedding, + limit: 20 + } + end + + @impl true + def add_to_index(activity) do + # This will only index public or unlisted notes + maybe_search_data = object_to_search_data(activity.object) + IO.puts("TRYING TO INDEX\n\n") + + if activity.data["type"] == "Create" and maybe_search_data do + with {:ok, embedding} <- get_embedding(maybe_search_data.content), + {:ok, %{status: 200}} <- + QdrantClient.put( + "/collections/posts/points", + build_index_payload(activity, embedding) + ) do + :ok + else + e -> {:error, e} + end + else + :ok + end + end + + @impl true + def search(_user, query, _options) do + with {:ok, embedding} <- get_embedding(query), + {:ok, %{body: %{"result" => result}}} <- + QdrantClient.post("/collections/posts/points/search", build_search_payload(embedding)) do + ids = + Enum.map(result, fn %{"id" => id} -> + Ecto.UUID.dump!(id) + end) + + from(a in Activity, where: a.id in ^ids) + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> Ecto.Query.order_by([a], fragment("array_position(?, ?)", ^ids, a.id)) + |> Pleroma.Repo.all() + else + _ -> + [] + end + end + + @impl true + def remove_from_index(_object) do + :ok + end +end + +defmodule Pleroma.Search.QdrantSearch.OllamaClient do + use Tesla + + plug(Tesla.Middleware.BaseUrl, Pleroma.Config.get([Pleroma.Search.QdrantSearch, :ollama_url])) + plug(Tesla.Middleware.JSON) +end + +defmodule Pleroma.Search.QdrantSearch.QdrantClient do + use Tesla + + plug(Tesla.Middleware.BaseUrl, Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_url])) + plug(Tesla.Middleware.JSON) + + plug(Tesla.Middleware.Headers, [ + {"api-key", Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_api_key])} + ]) +end |