5 files changed, 51 insertions, 0 deletions
diff --git a/supplemental/search/fastembed-api/Dockerfile b/supplemental/search/fastembed-api/Dockerfile
new file mode 100644
index 000000000..c1e0ef51f
--- /dev/null
+++ b/supplemental/search/fastembed-api/Dockerfile
@@ -0,0 +1,9 @@
+FROM python:3.9
+
+WORKDIR /code
+COPY fastembed-server.py /workdir/fastembed-server.py
+COPY requirements.txt /workdir/requirements.txt
+
+RUN pip install -r /workdir/requirements.txt
+
+CMD ["python", "/workdir/fastembed-server.py"]
diff --git a/supplemental/search/fastembed-api/README.md b/supplemental/search/fastembed-api/README.md
new file mode 100644
index 000000000..63a037207
--- /dev/null
+++ b/supplemental/search/fastembed-api/README.md
@@ -0,0 +1,6 @@
+# About
+This is a minimal implementation of the [OpenAI Embeddings API](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings) meant to be used with the QdrantSearch backend. 
+
+# Usage
+
+The easiest way to run it is to just use docker compose with `docker compose up`. This starts the server on the default configured port. Different models can be used, for a full list of supported models, check the [fastembed documentation](https://qdrant.github.io/fastembed/examples/Supported_Models/). The first time a model is requested it will be downloaded, which can take a few seconds.
diff --git a/supplemental/search/fastembed-api/compose.yml b/supplemental/search/fastembed-api/compose.yml
new file mode 100644
index 000000000..d4cb31722
--- /dev/null
+++ b/supplemental/search/fastembed-api/compose.yml
@@ -0,0 +1,5 @@
+services:
+  web:
+    build: .
+    ports:
+      - "11345:11345"
diff --git a/supplemental/search/fastembed-api/fastembed-server.py b/supplemental/search/fastembed-api/fastembed-server.py
new file mode 100644
index 000000000..02da69db2
--- /dev/null
+++ b/supplemental/search/fastembed-api/fastembed-server.py
@@ -0,0 +1,27 @@
+from fastembed import TextEmbedding
+from fastapi import FastAPI
+from pydantic import BaseModel
+
+models = {}
+
+app = FastAPI()
+
+class EmbeddingRequest(BaseModel):
+    model: str
+    input: str
+
+@app.post("/v1/embeddings")
+def embeddings(request: EmbeddingRequest):
+    model = models.get(request.model) or TextEmbedding(request.model)
+    models[request.model] = model
+    embeddings = next(model.embed(request.input)).tolist()
+    return {"data": [{"embedding": embeddings}]}
+
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app, host="0.0.0.0", port=11345)
diff --git a/supplemental/search/fastembed-api/requirements.txt b/supplemental/search/fastembed-api/requirements.txt
new file mode 100644
index 000000000..db67a8402
--- /dev/null
+++ b/supplemental/search/fastembed-api/requirements.txt
@@ -0,0 +1,4 @@
+fastapi==0.111.0
+fastembed==0.2.7
+pydantic==1.10.15
+uvicorn==0.29.0