12 files changed, 368 insertions, 0 deletions
diff --git a/changelog.d/qdrant_search.add b/changelog.d/qdrant_search.add
new file mode 100644
index 000000000..6f9e39e23
--- /dev/null
+++ b/changelog.d/qdrant_search.add
@@ -0,0 +1 @@
+Add Qdrant/Ollama search
diff --git a/config/config.exs b/config/config.exs
index b69044a2b..f74eda6b2 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -915,6 +915,15 @@ config :pleroma, Pleroma.Application,
 
 config :pleroma, Pleroma.Uploaders.Uploader, timeout: 30_000
 
+config :pleroma, Pleroma.Search.QdrantSearch,
+  qdrant_url: "http://127.0.0.1:6333/",
+  qdrant_api_key: nil,
+  ollama_url: "http://127.0.0.1:11434",
+  ollama_model: "snowflake-arctic-embed:xs",
+  qdrant_index_configuration: %{
+    vectors: %{size: 384, distance: "Cosine"}
+  }
+
 # Import environment specific config. This must remain at the bottom
 # of this file so it overrides the configuration defined above.
 import_config "#{Mix.env()}.exs"
diff --git a/docs/configuration/search.md b/docs/configuration/search.md
index 0316c9bf4..682d1e52a 100644
--- a/docs/configuration/search.md
+++ b/docs/configuration/search.md
@@ -10,6 +10,12 @@ To use built-in search that has no external dependencies, set the search module
 
 While it has no external dependencies, it has problems with performance and relevancy.
 
+## QdrantSearch
+
+This uses the vector search engine [Qdrant](https://qdrant.tech) to search the posts in a vector space. This needs a way to generate embeddings, for now only the [Ollama](Ollama) api is supported.
+
+The default settings will support a setup where both Ollama and Qdrant run on the same system as pleroma. The embedding model used by Ollama will need to be pulled first (e.g. `ollama pull snowflake-arctic-embed:xs`) for the embedding to work.
+
 ## Meilisearch
 
 Note that it's quite a bit more memory hungry than PostgreSQL (around 4-5G for ~1.2 million
diff --git a/lib/mix/tasks/pleroma/search/indexer.ex b/lib/mix/tasks/pleroma/search/indexer.ex
new file mode 100644
index 000000000..81a9fced6
--- /dev/null
+++ b/lib/mix/tasks/pleroma/search/indexer.ex
@@ -0,0 +1,80 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Mix.Tasks.Pleroma.Search.Indexer do
+  import Mix.Pleroma
+  import Ecto.Query
+
+  alias Pleroma.Workers.SearchIndexingWorker
+
+  def run(["create_index"]) do
+    start_pleroma()
+
+    with :ok <- Pleroma.Config.get([Pleroma.Search, :module]).create_index() do
+      IO.puts("Index created")
+    else
+      e -> IO.puts("Could not create index: #{inspect(e)}")
+    end
+  end
+
+  def run(["drop_index"]) do
+    start_pleroma()
+
+    with :ok <- Pleroma.Config.get([Pleroma.Search, :module]).drop_index() do
+      IO.puts("Index dropped")
+    else
+      e -> IO.puts("Could not drop index: #{inspect(e)}")
+    end
+  end
+
+  def run(["index" | options]) do
+    {options, [], []} =
+      OptionParser.parse(
+        options,
+        strict: [
+          limit: :integer
+        ]
+      )
+
+    start_pleroma()
+
+    limit = Keyword.get(options, :limit, 100_000)
+
+    per_step = 1000
+    chunks = max(div(limit, per_step), 1)
+
+    1..chunks
+    |> Enum.each(fn step ->
+      q =
+        from(a in Pleroma.Activity,
+          limit: ^per_step,
+          offset: ^per_step * (^step - 1),
+          select: [:id],
+          order_by: [desc: :id]
+        )
+
+      {:ok, ids} =
+        Pleroma.Repo.transaction(fn ->
+          Pleroma.Repo.stream(q, timeout: :infinity)
+          |> Enum.map(fn a ->
+            a.id
+          end)
+        end)
+
+      IO.puts("Got #{length(ids)} activities, adding to indexer")
+
+      ids
+      |> Enum.chunk_every(100)
+      |> Enum.each(fn chunk ->
+        IO.puts("Adding #{length(chunk)} activities to indexing queue")
+
+        chunk
+        |> Enum.map(fn id ->
+          SearchIndexingWorker.new(%{"op" => "add_to_index", "activity" => id})
+        end)
+        |> Oban.insert_all()
+      end)
+    end)
+  end
+end
diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex
index 31bfc7e33..24a1ff431 100644
--- a/lib/pleroma/search/database_search.ex
+++ b/lib/pleroma/search/database_search.ex
@@ -48,6 +48,12 @@ defmodule Pleroma.Search.DatabaseSearch do
   @impl true
   def remove_from_index(_object), do: :ok
 
+  @impl true
+  def create_index, do: :ok
+
+  @impl true
+  def drop_index, do: :ok
+
   def maybe_restrict_author(query, %User{} = author) do
     Activity.Queries.by_author(query, author)
   end
diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex
index 2bff663e8..50f5984d6 100644
--- a/lib/pleroma/search/meilisearch.ex
+++ b/lib/pleroma/search/meilisearch.ex
@@ -10,6 +10,12 @@ defmodule Pleroma.Search.Meilisearch do
 
   @behaviour Pleroma.Search.SearchBackend
 
+  @impl true
+  def create_index, do: :ok
+
+  @impl true
+  def drop_index, do: :ok
+
   defp meili_headers do
     private_key = Config.get([Pleroma.Search.Meilisearch, :private_key])
 
diff --git a/lib/pleroma/search/qdrant_search.ex b/lib/pleroma/search/qdrant_search.ex
new file mode 100644
index 000000000..acfaaff52
--- /dev/null
+++ b/lib/pleroma/search/qdrant_search.ex
@@ -0,0 +1,139 @@
+defmodule Pleroma.Search.QdrantSearch do
+  @behaviour Pleroma.Search.SearchBackend
+  import Ecto.Query
+  alias Pleroma.Activity
+
+  alias __MODULE__.QdrantClient
+  alias __MODULE__.OllamaClient
+  alias Pleroma.Config.Getting, as: Config
+
+  import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1]
+
+  @impl true
+  def create_index() do
+    payload = Config.get([Pleroma.Search.QdrantSearch, :qdrant_index_configuration])
+
+    with {:ok, %{status: 200}} <- QdrantClient.put("/collections/posts", payload) do
+      :ok
+    else
+      e -> {:error, e}
+    end
+  end
+
+  @impl true
+  def drop_index() do
+    with {:ok, %{status: 200}} <- QdrantClient.delete("/collections/posts") do
+      :ok
+    else
+      e -> {:error, e}
+    end
+  end
+
+  def get_embedding(text) do
+    with {:ok, %{body: %{"embedding" => embedding}}} <-
+           OllamaClient.post("/api/embeddings", %{
+             prompt: text,
+             model: Config.get([Pleroma.Search.QdrantSearch, :ollama_model])
+           }) do
+      {:ok, embedding}
+    else
+      _ ->
+        {:error, "Failed to get embedding"}
+    end
+  end
+
+  defp build_index_payload(activity, embedding) do
+    %{
+      points: [
+        %{
+          id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(),
+          vector: embedding
+        }
+      ]
+    }
+  end
+
+  defp build_search_payload(embedding) do
+    %{
+      vector: embedding,
+      limit: 20
+    }
+  end
+
+  @impl true
+  def add_to_index(activity) do
+    # This will only index public or unlisted notes
+    maybe_search_data = object_to_search_data(activity.object)
+
+    if activity.data["type"] == "Create" and maybe_search_data do
+      with {:ok, embedding} <- get_embedding(maybe_search_data.content),
+           {:ok, %{status: 200}} <-
+             QdrantClient.put(
+               "/collections/posts/points",
+               build_index_payload(activity, embedding)
+             ) do
+        :ok
+      else
+        e -> {:error, e}
+      end
+    else
+      :ok
+    end
+  end
+
+  @impl true
+  def remove_from_index(object) do
+    activity = Activity.get_by_object_ap_id_with_object(object.data["id"])
+    id = activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!()
+
+    with {:ok, %{status: 200}} <-
+           QdrantClient.post("/collections/posts/points/delete", %{"points" => [id]}) do
+      :ok
+    else
+      e -> {:error, e}
+    end
+  end
+
+  @impl true
+  def search(_user, query, _options) do
+    query = "Represent this sentence for searching relevant passages: #{query}"
+
+    with {:ok, embedding} <- get_embedding(query),
+         {:ok, %{body: %{"result" => result}}} <-
+           QdrantClient.post("/collections/posts/points/search", build_search_payload(embedding)) do
+      ids =
+        Enum.map(result, fn %{"id" => id} ->
+          Ecto.UUID.dump!(id)
+        end)
+
+      from(a in Activity, where: a.id in ^ids)
+      |> Activity.with_preloaded_object()
+      |> Activity.restrict_deactivated_users()
+      |> Ecto.Query.order_by([a], fragment("array_position(?, ?)", ^ids, a.id))
+      |> Pleroma.Repo.all()
+    else
+      _ ->
+        []
+    end
+  end
+end
+
+defmodule Pleroma.Search.QdrantSearch.OllamaClient do
+  use Tesla
+  alias Pleroma.Config.Getting, as: Config
+
+  plug(Tesla.Middleware.BaseUrl, Config.get([Pleroma.Search.QdrantSearch, :ollama_url]))
+  plug(Tesla.Middleware.JSON)
+end
+
+defmodule Pleroma.Search.QdrantSearch.QdrantClient do
+  use Tesla
+  alias Pleroma.Config.Getting, as: Config
+
+  plug(Tesla.Middleware.BaseUrl, Config.get([Pleroma.Search.QdrantSearch, :qdrant_url]))
+  plug(Tesla.Middleware.JSON)
+
+  plug(Tesla.Middleware.Headers, [
+    {"api-key", Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_api_key])}
+  ])
+end
diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex
index 68bc48cec..9735ab3f4 100644
--- a/lib/pleroma/search/search_backend.ex
+++ b/lib/pleroma/search/search_backend.ex
@@ -21,4 +21,14 @@ defmodule Pleroma.Search.SearchBackend do
   from index.
   """
   @callback remove_from_index(object :: Pleroma.Object.t()) :: :ok | {:error, any()}
+
+  @doc """
+  Create the index
+  """
+  @callback create_index() :: :ok | {:error, any()}
+
+  @doc """
+  Drop the index
+  """
+  @callback drop_index() :: :ok | {:error, any()}
 end
diff --git a/python/Dockerfile b/python/Dockerfile
new file mode 100644
index 000000000..f83c1c1b3
--- /dev/null
+++ b/python/Dockerfile
@@ -0,0 +1,8 @@
+FROM python:3.9
+
+WORKDIR /code
+COPY fastembed-server.py /workdir/fastembed-server.py
+
+RUN pip install --no-cache-dir --upgrade fastembed fastapi uvicorn
+
+CMD ["python", "/workdir/fastembed-server.py"]
diff --git a/python/compose.yml b/python/compose.yml
new file mode 100644
index 000000000..d4cb31722
--- /dev/null
+++ b/python/compose.yml
@@ -0,0 +1,5 @@
+services:
+  web:
+    build: .
+    ports:
+      - "11345:11345"
diff --git a/python/fastembed-server.py b/python/fastembed-server.py
new file mode 100644
index 000000000..fa3f7c82b
--- /dev/null
+++ b/python/fastembed-server.py
@@ -0,0 +1,21 @@
+from fastembed import TextEmbedding
+from fastapi import FastAPI
+from pydantic import BaseModel
+
+model = TextEmbedding("snowflake/snowflake-arctic-embed-xs")
+
+app = FastAPI()
+
+class EmbeddingRequest(BaseModel):
+    model: str
+    prompt: str
+
+@app.post("/api/embeddings")
+def embeddings(request: EmbeddingRequest):
+    embeddings = next(model.embed(request.prompt)).tolist()
+    return {"embedding": embeddings}
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app, host="0.0.0.0", port=11345)
diff --git a/test/pleroma/search/qdrant_search_test.exs b/test/pleroma/search/qdrant_search_test.exs
new file mode 100644
index 000000000..e816311aa
--- /dev/null
+++ b/test/pleroma/search/qdrant_search_test.exs
@@ -0,0 +1,77 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Search.QdrantSearchTest do
+  use Pleroma.DataCase, async: true
+  use Oban.Testing, repo: Pleroma.Repo
+
+  import Pleroma.Factory
+  import Mox
+
+  alias Pleroma.Web.CommonAPI
+  alias Pleroma.UnstubbedConfigMock, as: Config
+  alias Pleroma.Search.QdrantSearch
+  alias Pleroma.Workers.SearchIndexingWorker
+
+  describe "Qdrant search" do
+    test "indexes a public post on creation, deletes from the index on deletion" do
+      user = insert(:user)
+
+      Tesla.Mock.mock(fn
+        %{method: :post, url: "https://ollama.url/api/embeddings"} ->
+          send(self(), "posted_to_ollama")
+          Tesla.Mock.json(%{embedding: [1, 2, 3]})
+
+        %{method: :put, url: "https://qdrant.url/collections/posts/points", body: body} ->
+          send(self(), "posted_to_qdrant")
+
+          assert match?(%{"points" => [%{"vector" => [1, 2, 3]}]}, Jason.decode!(body))
+
+          Tesla.Mock.json("ok")
+
+        %{method: :post, url: "https://qdrant.url/collections/posts/points/delete"} ->
+          send(self(), "deleted_from_qdrant")
+          Tesla.Mock.json("ok")
+      end)
+
+      Config
+      |> expect(:get, 6, fn
+        [Pleroma.Search, :module], nil ->
+          QdrantSearch
+
+        [Pleroma.Search.QdrantSearch, key], nil ->
+          %{
+            ollama_model: "a_model",
+            ollama_url: "https://ollama.url",
+            qdrant_url: "https://qdrant.url"
+          }[key]
+      end)
+
+      {:ok, activity} =
+        CommonAPI.post(user, %{
+          status: "guys i just don't wanna leave the swamp",
+          visibility: "public"
+        })
+
+      args = %{"op" => "add_to_index", "activity" => activity.id}
+
+      assert_enqueued(
+        worker: SearchIndexingWorker,
+        args: args
+      )
+
+      assert :ok = perform_job(SearchIndexingWorker, args)
+      assert_received("posted_to_ollama")
+      assert_received("posted_to_qdrant")
+
+      {:ok, _} = CommonAPI.delete(activity.id, user)
+
+      delete_args = %{"op" => "remove_from_index", "object" => activity.object.id}
+      assert_enqueued(worker: SearchIndexingWorker, args: delete_args)
+      assert :ok = perform_job(SearchIndexingWorker, delete_args)
+
+      assert_received("deleted_from_qdrant")
+    end
+  end
+end