diff --git a/README.md b/README.md index 55144d5..9786b8e 100644 --- a/README.md +++ b/README.md @@ -1,92 +1,60 @@ # embeddingsearch Logo -embeddingsearch is a search server that uses Embedding Similarity Search (similiarly to [Magna](https://github.com/yousef-rafat/Magna/tree/main)) to semantically compare a given input to a database of indexed entries. +embeddingsearch is a self-hosted semantic search server built on vector embeddings. -embeddingsearch offers: -- Privacy and flexibility through self-hosted solutions like: - - ollama +It lets you index and semantically search text using modern embedding models. It's designed to be flexible, extensible, and easy to use. + +Logo + +## What embeddingsearch offers: +- Privacy and flexibility by allowing one to self-host everything, including: + - Ollama - OpenAI-compatible APIs (like LocalAI) -- Great flexibility through deep control over - - the amount of datapoints per entity (i.e. the thing you're trying to find) - - which models are used (multiple per datapoint possible to improve accuracy) - - which models are sourced from where (multiple Ollama/OpenAI-compatible sources possible) - - similarity calculation methods - - aggregation of results (when multiple models are used per datapoint) +- Astonishing accuracy when using multiple models for single indices +- Ease-of-use and ease-of-implementation + - The server offers a front-end for management and status information, as well as a decorated swagger back-end + - The indexer can also be self-hosted and serves as a host for executing indexing scripts + - The client library can be used to develop your own client software that posts queries or creates indices +- Caching & persistency + - Generating embeddings is expensive. So why not cache AND store them? + - Query results can also be cached. + - "Doesn't that eat a lot of precious RAM?" - My own testing showed: embeddings take up around 4200-5200 bytes each depending on the request string size. So around 4-5 GB per million cached embeddings. -This repository comes with a -- server (accessible via API calls & swagger) -- clientside library (C#) -- scripting based indexer service that supports the use of +This repository comes with a: +- Server +- Client library (C#) +- Scripting based indexer service that supports the use of - Python - - CSharp (Roslyn) - - Golang (Planned) + - CSharp (Roslyn - at-runtime evaluation) + - CSharp (Reflection - compiled) + - Lua (Planned) - Javascript (Planned) -# How to set up / use +# How to set up ## Server -(Docker now available! See [Docker installation](docs/Server.md#docker-installation)) -1. Install [ollama](https://ollama.com/download) -2. Pull a few models using ollama (e.g. `paraphrase-multilingual`, `bge-m3`, `mxbai-embed-large`, `nomic-embed-text`) -3. [Install the depencencies](docs/Server.md#installing-the-dependencies) -4. [Set up a local mysql database](docs/Server.md#mysql-database-setup) -5. [Set up the configuration](docs/Server.md#setup) -6. In `src/server` execute `dotnet build && dotnet run` to start the server -7. (optional) [Create a searchdomain using the web interface](docs/Server.md#accessing-the-api) -## Client -1. Download the package and add it to your project (TODO: NuGet) -2. Create a new client by either: - 1. By injecting IConfiguration (e.g. `services.AddSingleton();`) - 2. By specifying the baseUri, apiKey, and searchdomain (e.g. `new Client.Client(baseUri, apiKey, searchdomain)`) +(Docker also available! See [Docker installation](docs/Server.md#docker-installation)) +1. Install the inferencing tool of your choice, (e.g. [ollama](https://ollama.com/download)) and pull a few models that support generating embeddings. +2. [Install the depencencies](docs/Server.md#installing-the-dependencies) +3. [Set up a mysql database](docs/Server.md#mysql-database-setup) +4. [Set up the configuration](docs/Server.md#configuration) +5. In `src/Server` execute `dotnet build && dotnet run` to start the server +6. (optional) Create a searchdomain using the web interface ## Indexer (Docker now available! See [Docker installation](docs/Indexer.md#docker-installation)) 1. [Install the dependencies](docs/Indexer.md#installing-the-dependencies) -2. [Set up the server](#server) -3. [Configure the indexer](docs/Indexer.md#configuration) -4. [Set up your indexing script(s)](docs/Indexer.md#scripting) -5. Run with `dotnet build && dotnet run` (Or `/usr/bin/dotnet build && /usr/bin/dotnet run`) +2. [Configure the indexer](docs/Indexer.md#configuration) +3. [Set up your indexing script(s)](docs/Indexer.md#scripting) +4. In `src/Indexer` execute `dotnet build && dotnet run` to start the indexer # Known issues | Issue | Solution | | --- | --- | -| Unhandled exception. MySql.Data.MySqlClient.MySqlException (0x80004005): Invalid attempt to access a field before calling Read() | The searchdomain you entered does not exist | -| Unhandled exception. MySql.Data.MySqlClient.MySqlException (0x80004005): Authentication to host 'localhost' for user 'embeddingsearch' using method 'caching_sha2_password' failed with message: Access denied for user 'embeddingsearch'@'localhost' (using password: YES) | TBD | -| System.DllNotFoundException: Could not load libpython3.12.so with flags RTLD_NOW \| RTLD_GLOBAL: libpython3.12.so: cannot open shared object file: No such file or directory | Install python3.12-dev via apt. Also: try running the indexer using `/usr/bin/dotnet` instead of `dotnet` (make sure dotnet is installed via apt) | -# To-do -- (High priority) Add default indexer - - Library - - Processing: - - Text / Markdown documents: file name, full text, paragraphs - - Documents - - PDF: file name, full text, headline?, paragraphs, images? - - odt/docx: file name, full text, headline?, images? - - msg/eml: file name, title, recipients, cc, text - - Images: file name, OCR, image description? - - Videos? - - Presentations (Impress/Powerpoint): file name, full text, first slide title, titles, slide texts - - Tables (Calc / Excel): file name, tab/page names?, full text (per tab/page) - - Other? (TBD) - - Server - - ~~Scripting capability (Python; perhaps also lua)~~ (Done with the latest commits) - - ~~Intended sourcing possibilities:~~ - - ~~Local/Remote files (CIFS, SMB, FTP)~~ - - ~~Database contents (MySQL, MSSQL)~~ - - ~~Web requests (E.g. manual crawling)~~ - - ~~Script call management (interval based & event based)~~ -- Implement [ReaderWriterLock](https://learn.microsoft.com/en-us/dotnet/api/system.threading.readerwriterlockslim?view=net-9.0&redirectedfrom=MSDN) for entityCache to allow for multithreaded read access while retaining single-threaded write access. -- NuGet packaging and corresponding README documentation -- Add option for query result detail levels. e.g.: - - Level 0: `{"Name": "...", "Value": 0.53}` - - Level 1: `{"Name": "...", "Value": 0.53, "Datapoints": [{"Name": "title", "Value": 0.65}, {...}]}` - - Level 2: `{"Name": "...", "Value": 0.53, "Datapoints": [{"Name": "title", "Value": 0.65, "Embeddings": [{"Model": "bge-m3", "Value": 0.87}, {...}]}, {...}]}` -- Add "Click-Through" result evaluation (For each entity: store a list of queries that led to the entity being chosen by the user. Then at query-time choose the best-fitting entry and maybe use it as another datapoint? Or use a separate weight function?) -- Reranker/Crossencoder/RAG (or anything else beyond initial retrieval) support -- Remove the `id` collumns from the database tables where the table is actually identified (and should be unique by) the name, which should become the new primary key. -- Improve performance & latency (Create ready-to-go processes where each contain an n'th share of the entity cache, ready to perform a query. Prepare it after creating the entity cache.) -- Implement dynamic invocation based database migrations - -# Future features -- Support for other database types (MSSQL, SQLite) +| System.DllNotFoundException: Could not load libpython3.13.so with flags RTLD_NOW \| RTLD_GLOBAL: libpython3.12.so: cannot open shared object file: No such file or directory | Install python3.13-dev via apt. Also: try running the indexer using `/usr/bin/dotnet` instead of `dotnet` (to make sure dotnet is not running as a snap) | +# Planned features and support +- Document processor with automatic chunking (e.g.: .md, .pdf, .docx, .xlsx, .png, .mp4) +- Indexer front-end +- Support for other database types (MSSQL, SQLite, PostgreSQL, MongoDB, Redis) # Community Discord \ No newline at end of file diff --git a/docs/Indexer.md b/docs/Indexer.md index 9c0eb23..f27bf7d 100644 --- a/docs/Indexer.md +++ b/docs/Indexer.md @@ -8,15 +8,18 @@ The indexer by default - Uses HealthChecks (endpoint: `/healthz`) ## Docker installation (On Linux you might need root privileges, thus use `sudo` where necessary) -1. Navigate to the `src` directory -2. Build the docker container: `docker build -t embeddingsearch-indexer -f Indexer/Dockerfile .` -3. Run the docker container: `docker run --net=host -t embeddingsearch-indexer` (the `-t` is optional, but you get more meaningful output. Or use `-d` to run it in the background) +1. [Configure the indexer](docs/Indexer.md#configuration) +2. [Set up your indexing script(s)](docs/Indexer.md#scripting) +3. Navigate to the `src` directory +4. Build the docker container: `docker build -t embeddingsearch-indexer -f Indexer/Dockerfile .` +5. Run the docker container: `docker run --net=host -t embeddingsearch-indexer` (the `-t` is optional, but you get more meaningful output. Or use `-d` to run it in the background) ## Installing the dependencies ## Ubuntu 24.04 -1. Install the .NET SDK: `sudo apt update && sudo apt install dotnet-sdk-8.0 -y` -2. Install the python SDK: `sudo apt install python3 python3.12 python3.12-dev` +1. Install the .NET SDK: `sudo apt update && sudo apt install dotnet-sdk-10.0 -y` +2. Install the python SDK: `sudo apt install python3 python3.13 python3.13-dev` + - Note: Python 3.14 is not supported yet ## Windows -Download the [.NET SDK](https://dotnet.microsoft.com/en-us/download) or follow these steps to use WSL: +Download and install the [.NET SDK](https://dotnet.microsoft.com/en-us/download) or follow these steps to use WSL: 1. Install Ubuntu in WSL (`wsl --install` and `wsl --install -d Ubuntu`) 2. Enter your WSL environment `wsl.exe` and configure it 3. Update via `sudo apt update && sudo apt upgrade -y && sudo snap refresh` @@ -26,15 +29,15 @@ The configuration is located in `src/Indexer` and conforms to the [ASP.NET confi If you plan to use multiple environments, create any `appsettings.{YourEnvironment}.json` (e.g. `Development`, `Staging`, `Prod`) and set the environment variable `DOTNET_ENVIRONMENT` accordingly on the target machine. ## Setup -If you just installed the server and want to configure it: -1. Open `src/Server/appsettings.Development.json` +If you just installed the indexer and want to configure it: +1. Open `src/Indexer/appsettings.Development.json` 2. If your search server is not on the same machine as the indexer, update "BaseUri" to reflect the URL to the server. -3. If your search server requires API keys, (i.e. it's operating outside of the "Development" environment) set `"ApiKey": ""` beneath `"BaseUri"` in the `"Embeddingsearch"` section. -4. Create your own indexing script(s) in `src/Indexer/Scripts/` and configure their use as +3. If you configured API keys for the search server, set `"ApiKey": ""` beneath `"BaseUri"` in the `"Server"` section. +4. Create your own indexing script(s) in `src/Indexer/Scripts/` and configure them as shown below ## Structure ```json - "EmbeddingsearchIndexer": { - "Worker": + "Indexer": { + "Workers": [ // This is a list; you can have as many "workers" as you want { "Name": "example", @@ -50,7 +53,12 @@ If you just installed the server and want to configure it: "Name": "secondWorker", /* ... */ } - ] + ], + "ApiKeys": ["YourApiKeysHereForTheIndexer"], // API Keys for if you want to protect the indexer's API + "Server": { + "BaseUri": "http://localhost:5000", // URL to the embeddingsearch server + "ApiKey": "ServerApiKeyHere" // API Key set in the server + } } ``` ## Call types @@ -71,6 +79,13 @@ If you just installed the server and want to configure it: - Parameters: - Path (e.g. "Scripts/example_content") # Scripting +Scripts should be put in `src/Indexer/Scripts/`. If you look there, by default you will find some example scripts that can be taken as reference when building your own. + +For configuration of the scripts see: [Structure](#structure) + +The next few sections explain some core concepts/patterns. If you want to skip to explicit code examples, look here: +- [Python](#python) +- [Roslyn](#c-roslyn) ## General Scripts need to define the following functions: - `init()` @@ -186,7 +201,7 @@ from tools import * # Import all tools that are provided for ease of scripting def init(toolset: Toolset): # defining an init() function with 1 parameter is required. pass # Your code would go here. - # DO NOT put a main loop here! Why? + # Don't put a main loop here! Why? # This function prevents the application from initializing and maintains exclusive control over the GIL def update(toolset: Toolset): # defining an update() function with 1 parameter is required. @@ -261,7 +276,7 @@ public class ExampleScript : Indexer.Models.IScript // Required: return an instance of your IScript-extending class return new ExampleScript(); ``` -## Golang +## Lua TODO ## Javascript TODO \ No newline at end of file diff --git a/docs/ProjectOutline/ProjectOutlineDiagram.excalidraw.md b/docs/ProjectOutline/ProjectOutlineDiagram.excalidraw.md new file mode 100644 index 0000000..6cf6e5d --- /dev/null +++ b/docs/ProjectOutline/ProjectOutlineDiagram.excalidraw.md @@ -0,0 +1,190 @@ +--- + +excalidraw-plugin: parsed +tags: [excalidraw] + +--- +==⚠ Switch to EXCALIDRAW VIEW in the MORE OPTIONS menu of this document. ⚠== You can decompress Drawing data with the command palette: 'Decompress current Excalidraw file'. For more info check in plugin settings under 'Saving' + + +# Excalidraw Data + +## Text Elements +Server ^TJzgO4nS + +Indexer ^rgrd8gyy + +embeddingsearch ^jB1B7xr7 + +Client ^ZttcBOXC + +embeddings +provider ^mEIPhpn1 + +✔️ Ollama +✔️ OpenAI-compatible + (e.g. LocalAI) ^o6rED2fi + +uses ^QkKnkGvS + +Database ^yaSaChsK + +✔️ MySQL +⚒️ SQLite +⚒️ MSSQL +⚒️ PostgreSQL +⚒️ MongoDB +⚒️ Redis ^LHP4PU6V + +Stores +data in ^FP2xPhxz + +Listens on port 5146 + ^CJG2peC6 + +Listens on port 5210 ^iLZT5hca + +Workers ^33rXJfFb + +- example.py +- example.csx +- ... ^e1BVqXa2 + +✔️ Front-end +✔️ Swagger +✔️ Elmah ^6UTNDntp + +⚒️ Front-end +✔️ Swagger +✔️ Elmah ^tlLF3R27 + +✔️ Caches embeddings +✔️ Caches queries ^I2lN1U82 + +✔️ C# library +⚒️ NuGet +✔️ Searchdomain CRUD +✔️ Entity CRUD +✔️ Management operations ^4Ab3XHhK + +Uses ^KvuBRV2K + +Accesses ^ikhSH5rs + +✔️ Multiple provider +configuration ^ipkoadg8 + +%% +## Drawing +```compressed-json +N4KAkARALgngDgUwgLgAQQQDwMYEMA2AlgCYBOuA7hADTgQBuCpAzoQPYB2KqATLZMzYBXUtiRoIACyhQ4zZAHoFAc0JRJQgEYA6bGwC2CgF7N6hbEcK4OCtptbErHALRY8RMpWdx8Q1TdIEfARcZgRmBShcZQUebQBGAE5tHho6IIR9BA4oZm4AbXAwUDBSiBJuCBghAHUAeQA1QgAzAE000shYRErA7CiOZWCOssxuZwA2HgntCYAGAHYADh4l + +hYAWAGY5ub4iyBhx9YBWY+1d9cSp+Pi5pdvj/jKKEnVueO2Z6eOrpeO544neLrJ6QSQIQjKaTcRI/WanCabY4TdY8TYLdGgiDWIbiVBzLHMKCkNgAawQAGE2Pg2KRKgBieIIJlMkaQTS4bCk5QkoQcYhUml0iSM5litkQZqEfD4ADKsGGEkEHglRJJ5Jqr0k3Dm2ke+wgarJCHlMEV6GVFSxvKhHHCeTQBINbDgnLUhzQtydnQgPOEcAAksQHah8 + +gBdLHNchZIPcDhCGVYwj8rCVXBzCW8/l25ghko+7p4zb7AC+hIQCGI3DROySgKWWMYLHYXE9sMbTFYnAAcpwxNwlkt1hN7lck8wACIZKCV7jNAhhLGaYT8gCiwSyORD8cTBqEcGIuBnVc9CwmiR4C3ixyvP0SWKIHFJcYT+AfbC5s7Q8/wYSKZaKfNIAqCQAEcGgWZRNlJAAlAAJCVC16BB+hxYYsTGNBnHiFZtE2RFVniHh9R9D1UGcb5tGHBYM + +R4eINhOOZEk2LEXmIN40CRBY9T+ZiFhIspwUhaFOPWdZtCuHgtimQEuImLE0Lxb0yiNclBVpBlmi07SJQ5Lk/T5AVqQ0iRiWsZhXUCHIJSlGVTXNQ1qStA1VIQTV2O1NA9h9Vz7LxRyVWtYRbXtd4sRdN1YHeHYsQMwNgwKCMDSjXAYxPVAdzfA1k2IVMJFweJMxXYgczzfYungItS3LL9UHiRFYUWeqQQNJsu1bOrlg7Zsez7PEbxRY5gQbbLJ2 + +nWqf0XA1l0M9dMmyXICnKoCyhA9AAEFCEkCcAC0AwAIQWNkyiQ/LSBJKhyoAzpinK4D0ogAAxXJlCMGB6HaUETsqtNzrYS6bpLfYkp9fdD2Pd4zwvW96rRbyykfZ80Ey99P3SyaECxSRQgAFSwKAABlkyR1AMf/J4gPKB6cYAKSMZQ6nWDhZUQn7TPxiVMPI4cUh2dYaJ2QXaxa0jxjhvCMTmYd/iF3ZWK1AczmmOZNm2Hha1hTZ4ixiEoSgbgkQ + +UwZzWUgRiWNdThXQeltJ0pdOW5LMjKFSozI4CzcCs/XI2lOUFX8y0q0Jc2NQVrzg/VE1/cqQOir8SRSrC51XWwd1otN31eXikNw0jaMEFjZHXyTFMuexVIgsMxOi93HyK3S4EeEvP4phGn02pbbh1jubr2t7Dh+09H4z2BRJ1jb1axuCCHvwXTHpuKubN0WtBc73A8j1q+jz0va9Yc2eHIBpNG5znrEZ0wfWJFlJgm0zSg8cvyob9IO/I04KBZUI + +Iw8VWd+ckeqlaUZEBJdHxutIgygOoQGCM0b2rUmBQHMAQCBkJoFQBdBKPQORcDJiYIXDKxdnSkEhMmAgj8r7oBfm/A0uAhAYJguEb+eJiRCHnj6R8CA4K6xEnVFIoCpC43xkTJ8p9fzsIRsTF8MpyaAWyg9XA+0jAAHF8AAEUajYBgA0DgzQYBqMmJBKAcwuDnzZugPoAxcSc2rBMM4CxEhDk2IkAETElgsQNGRCidiqITBolrc8Sw5gjgWPLDyB + +sbw8R+OiARQk9bcCGvECSKwmIHxVure8tDjZKQjhbYyVsIA21ts0XSDsDL8ktq7cg7tLILRsr7PyMcnJBxciHNyYdeC5PJI0pUzS44hVzEnH0EVU5RU9DFA0cUgw5xBmUFKaVpFZR9DlPK6BcCbDjtmUKaAVrQHMcWTo10VL12rIsZxQ1gkT0gB3Tg7wxKZPbp2Fs/dB51T+Gsa8QTxxTmnhNM+C9ZobgWjnZad0qaVAABowTqMo7+AArTQx0Ko9 + +DOhdCAV1gZYjBpvBuUNd5DSmAfB8Uia5LIRh+ck6N/k+mxswChIiSZk1KEcyma0ICkB5MQJYygYCHDMSi9AF94E+i5s4uIUsLy7G2G47YIsyhePFokRINwRxiSCSOeSBo2IcTqgCc4Q51gqroqk/iOthKUMNlk6xjoumUnyZpYppT9JO0qaZapHsvb1LstHXpgVWmR3cjqw+ho2k9ItH0yuAyQza2TpFMiXpYpZ2mYlPOqUC7pRRtlUuaZ1ibJKt + +swhtdjlby2FsVWw1e6d09BMOxlbeoD1/irZE7jNXLKnggGepNqVlBmmuIFW4U3r3BlvPFMNCXBuPpSsRU0CwcwkAGUuTB74UAoZUBduVMBLv/p/Zh1YrmSg/oA/QwCEnn3AZA6BsDhVlCbEg9wqCoGu0wVibBUQ8GkAIZm4ZJD/DkLnegddWAt20PoWwRhrAf7cFYRIo+eDuHmvePwrGQjL4MunTBmBJLC34FkaUVlD04X7XiIdTApAjr8v8kKmx + +WFgTcWIgCK4EwcLNQBFiLxqJuKbHuKca8YSg1LBmDReYF5jg8AvFcB5gkeGUJOEba1+JbWuutsUkp9tnXFSU9Ad1tTrI+29WaAOEb/XGkDZ5Tpxnuk+vDX6n0NoE4FpjcMlOadxkZymQlVeszIDzPTYskuG60zHDzdXbDNV0oOJ+LDUTdaOp/GDTcjgLy8RETVSrVEGdCDts7RjJci9+0r1DF5iA2LO3b2hnvcdqMp2z3EWep+EhMiaErI4QYYRP + +bYG1NaB+/6MD6Ca8QFryg2uiE68lD+X9IOcVbXMw9QD8AgLq1AB9l6EBwIlLe5B+BltPrgFgj+uC7QfozUQ79pCOB/vq+gRrzXkxDZCCNiUdCGFMMm6gaDxK7TwfiZ6JDBpaX0qwzlg0iNFm4dussh6xxsDrGaPQfQph1gIDqJlgAqnBVcxAaiSGwLtijyFULZOo+Rbe2gFiXi1k3ZVdFlXTYOKc5Iw55g/BVhifCknIDarM1xKJfFYnSerDzOYt + +wuOnBcS45YcrICKR1Ip+1IoVNOsdhpuXgrtOezqXpv2Bmmk2ZUm00z1ZbVhoCs5WzwV7ODM9OFZzYzdVuaTR5wrqaFmkv86s7EExgsFt2adVABzmVheiiOdJmxx4xfeCEmLSX3ha1D9eZYGWst/NqwCvt80B2r1BTde6kLoWwqMAipFeyBXYj+gDQ5mKh04shjvMdcMPsky/eSk+NWZ2CRQ4TQHc8wf4cqDtGQ2B9p1AhRSVmJeqMYXGEiCS2x/j + +zENdTxYtOqgC6SWPaYTdVis53nxsz/EJIXgPuPMTtxrzL7ibwy1Pppc2os3al28vHVqaV4ZTTbsPUa+Sg0qzJuWk+X1x0sGr5D/rHJGhbtGtbnGunImv6Mmp5s7r5q7lmgFvlORgaE7CFk3gICctWiiEsI4qJpLgwE8rcl5LsPFiQYln1O8IOHcPMClt8uNFSinj6L2sQEvMCoOqDBvKVqOhVvXsDhSsnm3mApdhABSEQJ/rZt1mIRIYQFITNjkB + +Nr/Pus0LNsevNqegaEKtthIFeutogptroYKs+gaK+gdvgsdkWpALSGdhdpQuIZIbpiBs9hBiwqQGwh9lwvzj9sRMhnSsIl3iwZIqIqSj3vIpUAALLrB1AwAcAfqRFj7+SWLS6T5YQ/BnD0TDgqwCaGqJBXhsYJIzDIiERBLMQ7x+K757q6iSp+KIg0RDjuJEEX6UKXgzAtzOJBJNyXDqxEE34KZ36aZFK2yK7lLOwmSq7mQ6bXrebf7a6+qm564B + +qAFG4gFGZm7xwhaOZlAjIuZ24wH7hwFO7JT5yfonarTZr5SJBe6W6oA+77LVQuQ4GoBTCXhKpXjL4JYJITKPI9RUENo0EHy8S3D7qZY/IdrCEYZsEcEZ6hhZ43TgoSDPTMCvTvSfRgq+6l5ooYqdBFYlYjq178FErA5YZYEwJCHMEiGCIBGoZBF/gB5yIQ6VD6CrgBgAAKkgcAHAhUeO7MT8aR5ExwB8eERE6IGqeRZOhRaAQSpOqsAIZ4TEqw7O + +EAnO7w/weETOsICwKsREfOCGaA2pcmJssuD+ymT+00ZSLqKuWmUx6uzhPotkWuDkoBd+Bu4cd+xuLpGxUaQyOxNu8aPxZQ7mMyCBZx1h5Qlxayo+lcWytxZJYQtUMsDwg44eaAomypCW0ebYtwfiSQKIjBvylJUJeW6eBWa83Bw6uKhJBKAhHCFJ6Gi2lQ12A2t2zAAAOhwHACSGYLlHSF1iuj1s2YNu2Z2d2SQMBg6eNrul5KoeoSemmYtsYRAG + +IDkBOTeoYfehejtntjgu+mGWSjYT+mQvgKug1n1jdq1h2V2WwD2WuVLqBuBtOW9h4Rhpwl9rwkkn4X9h3mhq3q+aSa+OEUyRIGwBMKQKuBODwFKEka7BzAKc4MiJsHhM4lLC2hKVKXVFMPqlrPgcJkqVLFUV5ERFRB8GPFLGJiEnqd9qgKalasaYMdacMXbBaepq/tae/tMV6k6YZrrmbMseEu6f/pHJ6esWUHZlsZAaMgGfbrAY7uWXMqcVYQeR + +GSgWsquDcduOcdgbVP4jsGiMSb8e1NwCiBQX8VmbwMEkxlsMcCmaNOCdlt2uyCWcvCGVXrwdWfvBOvWX+Y2RIIACjkgA8H+oB1AyipS4AdmBXBWIAcDrQBjOB6D6CuhIKaDBAdmoDpWoAAAUCA2gyg2gqABMH4KCAYAAlMuqeegJFSFfgGFRFUFXUNFbFfFQYElYQClQgGlRldlblflYVfeqVTZFOa9urNukevOagAIjoVuXoatjMcQaQHeigtNS + +YbjmYftnuUpeFEeedieT1lVaFfoOFRwFVY1XFQla1e1Z1eld1XlQVUVVtgNQpA+S9u4Z4SSZ9j4Xwl+TSj+XSf+aEdhkBatA9MiaiR9DBaiv9ETthO4toPcKrKHoiDWjsPmZ4twDRFRBsJ8KrFsH8EQaqZ6EkFRN3D8KsJcBsPcOfp9bcNxIiH8ORUKaJkiH0YTrfkJXkqaYUgrs/mMW/mrp6priJbxSGvxUGqsfMdZosZAOJQ5pJXsQmpMg7q5Q + +6YpX5sge7rgI9BpdwPcQKjwI8XXLVKsAJheCNQgn8WqRiFHtQZ6FxteKLgZZPPZZCbloCqWSCjdCtMipRnBWCmymoqSAANJPjKL0Asw4mlB4k8EEnlY1mO2wYA1kmTou0GhwBsDJhlnlSFDZ6mylBzDlReZgDZ03TYRE1iQMZk35Hjz1RfRgA02zAi4M1IhoiAgF2V7/6exQD7Q5S3Y63lQYD5YEIQDgSQTQTwTHQQD6BsC5SVC0iaBqAT1SibrE + +Bsnp3WSZ450ak2UqyuJjy8So2b2Ih5HniXAAjAgs5t2HJYjZDEDd38i907L90ZDLxD3RGxHxEBiJFfST3T3+Rz0L3f1L2Vir0Z1LSb0YiDj4ErArDOInDDi126gQODiOKrBohjyAjrCX0Mkd0LXrRl7gi4CbUGg314MXQEMKJl4ShBDLgUAp0+hT2MCREkAgNbiajqAwmUJA4/U0md4A1MpgAsoRESAB3B2kih0sy8mCq+0ipiw+JST4SLDERCZc + +YYU3BLCk7MSnBDhMa0bAiEWoBbAkVKpM0ibLC6lmrUXfVlD9EZyuRDHc0sUv4VLsX80KGzH6bOmiV8UmYrEelrHC0y23HbE2H+nQGK2yXK0KVpr7lu5ly4DKLa1IGG3pR0RiRQz4GmVGVoCXDW0AmE0n6Djb0FkQlFmu1p4uVcFlD4lVmx2eVVZ0MnQ9ZCBhB5D9kVXFYtODVKFPlm2TkAJzYLbaHnpoKVArkzh9nm2LVbbLWT0kDEDoRrW7mHZD + +2g1vTg1bV2G7ViHNPhCPbPVuFQYvleHvmUKfmxK/V8OOWYaJ2AXYPg7A2VCaAcCtA4xqKgSygwQQ0WIoRWILMyPpFwhXin1cbNwH3ypixiZw04RDQuInBTC1paqAFC4pDMQAjOJIg716N/afXiz4HWVXhjzMRBIs3ya2NtL2PmmsGWnK6c0cV2lzWOlC1S0i0+MCXmbs2WYS2/79LgG+khNQGuYHHZyVPeaq1JMXGqXYgIQxn5q3G61VRX1PFbyn + +A3j1QBKpl1SXCfGUHmX0QOL8zpnBpglMENmp7sH5Ye2dBe2IkbRbS7QHRoHZ7F7+SezYmAzt1VPR01P4p1PvWN5aXkkt5drBFggXOMrd53O94SAwC4Cyi4AUiSDMCB1fPQDSOjDjCEFQvTD8xL67CLAYXYT5ESyyp2IYhk5ngeI+gE11RaxUSFPdwohjzn1U36l+4CI2MmkTFc2Us9rUtsW0uuP2lzJzGePC2uRunstLHGhMt/5iXm4SWxpSVhM+ + +jBkiuShiuhbq1xM0yJObvJMR54tKr4REFfEGkXi5OvLqw7ACYbB/x2Ums+VmscOROQDVM161OVaCFBtcONNiEThHi4AchhDlU9b/tRBAdIDbrKF7qjUDNaGzqXxLn6HdRTNLkYKrU+jmEbVq2na/pbMOFgeAehCQcuFgYvWHNvUcJwbU2/bcMA6XMhvXP+syKRtCPoAExwRsnrBsko4TANApsT4GhcwIU4RUSOIfCXhMTIg5No00aoizBTDSw1r4 + +Qgn6NqNw0ZGikn5C7IgWMfm9PWOs0DEcv35dtMVaSjFWkDu2kC1f4eM8XMvju+Mmczs8sLtOYCv7HhOHFyVFY+YxNbtpjJsyuYEBsJkNxCl3A8bx3zVZN+75EXt4j6v1g4SglJ6lNPsWtrtvungeWft1nftXNUb+VBWREwCyhqIEwdmABJZEFRV0TDODV6V7KPV016gKvUSDyCaJV215EZwMoGwBOPtG14wo4K0+gTIQ4ZFWV61xwLV6gPV2oB1X + +N817N/Nx11AF12t6V/14N8Nyt6gKN5ll0zuq9jXWNv0xoYMwh0tjM8h5M0YTM+hzuW+ss0Q7h8ee09N+Vz1wd4t41wd5ES179+t2wJ14ENt6gH14MHtyN5WMd09a4U+e9u9d4a22c/4fR+G4xyDmEax8Bba9tHtIdCm661DfBThHEEiDWmsGk0owl7J8TuqQ0X4kEqrICGi2pz4sEjWWJKcPxLZTSp9eJFLDcIancNTgo7JnRTkgxZzeZ6po47zS + +4zZ245KCOw57O946HGy0AaGv48y4ExAYu/LYGZAKu/AScdE+9xKxrQTLu/K9WAbcWulLHjeHvSe5QQbMxIl5DMRDcOWsvsa4Waa6wc5ZwRvVaxieYqm/yX7SDWyTwJgByZgEYOiu67iVil6++z6/lyEcx8pcnRlz6GnRnZa6UMXZ0LnWAPnTdIXZX6UNhNz3YrDHzzeDZVcqUCL4asCCCZL/PscFg2AEVkSJ3XfYNn3Qic/QtEPSPVBLBNK/3VPT + +PRIMmA4CRwiUAyvWvZQg3zX+cILDhFsPkdqS2kew4gg8hVJCOFJLsNqZcIOEP0cpADfePw/XcU/YPQ9E8y828x8xPcvz/qkB56MxWYsvRYa787ouoI+ssEIix4GIWLTevhFEz0QcIGqYiB3yf7BxO6pDf6OQxw5lASG+DEIBQzRTX18ANDBppAAYYIAmG2/Mvmw0kAcNQ+7eHhr+WDb0kBGFMNjk9ET7J9JAqfATmm0gDCcxMyQEcIfi4jjwtYwS + +AtvRCQoC8aaPRMSOiHxodJzweoJVOJjKI3gdSenShP8CNKy8TOFLEYjzSs5ds6WtnB0hrx1yOcACuvcWqO0N7ztZaJvW3ArRXZK012/nG3sBEjLYgv66BYqKF3DLhdIYoeK8DhX3Sns/cwTWLs8htq8A1GTbZYGl2drF8e04fWEvJVfbZ9cuH7Wss3mqwcCMMxXKhBgkCAjlwYuAVAMmBA5iF5QtIcIB2RqF1DTEF3U7o2lg5Xd4Ov7W7iMwkBjM + +7y81VDjM2PQDY/mZQLDm90qCbQieDrCULYTw7tMmhVQ1oQB3aF7Mker2FHlRw+ro9aOrArHiwIToF8gaOeCQJEX0AwR6A5XCYAk0kZYlyeQnRDHYmv5L4midiFEMvnjRFt6IkDeYOiFVjnh9GcQPIkxA2CyRaIzRamosAkhRDAQWpLYLsHiEds5eZnBxlS1YrONrONSellxVc62oJ2evYSgby17spXBQTOWh4LN6ZwImPgjdmSRWRxM6gDvfur7n + +1qKt922TOxMOGPae8LanoE4AIkzJJCyK6ICLEH3S6nCIA0JLLpHzwzR9x8wgh5qv1JCSBZQcEY4CwHT4V5M+blGOrnyKFnCCBR8byqUKxCl8chWdKAQgwLpfQ9+FEIxpCLgbAipItdbCPCPyIfAkRN4FEXRCH4j8ogC1N/oMDNEYB+QYY5QBGNH64NiBhDCMUQLIYkDfoZA4hhQP+hUDqSJwx9vQxvK0DmGO/ZgIwOYF/kLhNrcoBqK1E6jxuBYG + +PoJ3+bcwkKHGFnGIORqYhGeHwaAcxHVh/AbK1wM8PozWCylHEjiEcCximD6Dqw7bIzmS0jimDmK2IpxuMQKRWC1ejLckaqAcFi0/GXLL0nO02JuCPOS7QVt52FaW8Va1vCMSyLTCNBd28ZZ4vzGmDaMxIGrIiL72rTdF+RdI4PiU1lHyj3a2XfIZ1Dy4mjA2JQn9qIQcLrRsAYgXMLszaY9ZYJ8EzplBx6YZw1Cl3capNWGaPohhC0EYRtk3KDD0 + +AEw+Zhv2mHrVZhVwm4XcNlAPClh21ewnMLgn2hEJpHR8rsKOao8TmiGKxqGzYF/UG8oOfHmqPQAUgaYyiHgIgApCe4nhjY9NjRj1QbA822wdSfxGmByCrwspITCjQcQ71K2zwJFuJFVj5EVgQLJxLRSF6ttDBMvGXBiIKQK9LONLSwYOwZa2CFiFIpzo4L3HOCKRRvPlhAF2K0iZKPnF9uu2vHit/BkrXAGyQfFhdni2pVEWsA9Hm04ulOT8RNSk + +ipcuM6Qh9paMy5ATLxnrSsjnzrwxci+so8oRACJhEhsgzAVAJwFQBp0FqE1YEBMA7INCHCdUmcO7CakcAWptIKAO1OHBdT0Jw1TCXOU0ILkhmiHO7rNQMILVHupE6AKYUw7UTLCEY5YZ9x6y9SGpA0oaW1KGhjSOh1+fZsjx4n7C0e1FDHt+SEkMcqSuPQGmJMuHoBCABMHaDjGODY5cAQguPk2JBGzB+IUsO2kNH+BjhGeTfbiERBWDXA6IqqQX + +sZN17wibg5k0eIagbbBoWiM4owQ5JMGMUsRvbHEauKqSq8h27jbinYO8k7izMpI6dluLALuc/SnnTwUGW8ElTRWUUvdrbziZqIEpYQ54jhCpy7AacGrVSVlOBAnAQRC+Ypg5UY6ASKmnM4rCBLKzGjKpFoqCbHx6mZY+pjU5qa1JGkoCMwSEsQvtP6kGzhp2U24Cd2g5kEehOExcgtLWwocVp+Elai9wsJHZtpzE/DpUHNn6zBphs62SbM4nkc0A + +ewkIjdI/JHDBJuYwqfsPOGvTKxyiOoJQFJCrgYIZ076CXhSKE4BSFFc4NqTPwrBUQKIeIWRFMnG1y5rccnDZTBFFsGaFwVVlsC0nYt0eY8TQXyJzJSwMQ7YeyWzSnZqRCZPbdkH21xFuTyZHk+ztTO3Gi06ZTgzXm52PEszTxXnLwQyOVm+CbxAQ3AJ8xC7e4ORDxbkS7wHAHwN8UwDOLENLlZSRcOEW9vlJD55ishbtJWccQrLV4Ch6sidABXDJ + +VTn5ccwIo9IQAVi2UqsUgBChpjNBHoiKBScIIgCioRxEnDEPVD8SiY0QGFWtneGvZDg0Q1EUPGp2CS8xy6fiEFvcAElSBPqV+QzqS07ZOSiZY8kmXzSnmEjGZrpZzkPKjj7ivGlIo8dSPcHSUhWRxXIZFJdw8yYpGtHGALOUrhCyClwZiLC0FFxcy2WU4cPgSIjz45Z2YxWRH3fmlTP5oEwoRrMK6McapNQWkOSF1GmyHC5i0gJYrrGKEuhMHToW + +NRmkTUnZq0+7r8TGGrTnuL6Tad7OinBTfZ7TWxfYu2FkcDmEcq6VHL4m+FzmD07Hk9N/k4Zk5bKZkPtAaCgQIUuACuNoQbHwLRUZ4PUPcn7HN0EZvwgcOoxcRAjFghqFYNMCMkc4OkRETjA4igZNR8KLbaitZJoX0UCZ8vBhXKPHmky3ULCwWmwpM4kjF5s8pmSvP5Zry2Z5vDmXoq5liLmRu8lHNIsDxth0mTEMFtci94GlpehlRIXkwMa7BS2Z + +4Ign+PllUkdFOQqOmVK/kVSvKJiqkjVOcCoAsAqUHwDlTgAwAOyXyn5YlWCC6BmAmAIFagG0AwruplQYFZgF+VgqAVUKkFX8vBWQqXA0K2FRNO6EuK4Os0m7kh0WmuySJ7staRhyolLMtpQSnaTtXaYIqkV/ywFVirRVgrsAEKqFTCu0ARKuJr1f6tHNOaxycxQCpJf9STlcDGS4kiABMBRw4xuwE4HIJSu9qwUAZSk8iCODwg3gtYw4T5EKTfFQ + +yA+EsD4N3BzJDgbgoSRFmyyIjJB/ROwIaNME1gfj25PS2cbQsckOozBSvCwWuPcmsLuFY7WmYbj8lLy5l/Ck8abzCkXjVloixAuIpUoa0IU2ypVuFhFySpz26UqtJqxiE6skheZSLM1BuUyiAFco7IWWSeUGK1Zry+ppkOgmVBIqj0EkDkFcD8g6qC3CgNEGUBMA2164Q6qNmkIDkxCDaptVABbXEA21soDtcoC7WkAe1+APtbbIwkOy3FuE+aZ4 + +pJUPcyV6CdaVSte40r41dKliSV1QCNqP4Y6idVOpnVzqF1iPSJZdMo6xKaOFC/7KKvQygKHoUAfAATEeibAYIl4f6XNSKVnA0mzEJIC+MNSVLlJMwZxBJgvCGoKaTSlUi0shZ2qz6jqoUs6psmuq8Zg87XqZ3oWjzhlTClXviOsHDsZ5Xkueay13EudJlh4n0lbgEXLt2Zm8mNdvKCW3j8o6JDYrGU0qCyt4SQBpWeCHGZrSCNFTsacvrSvIPgyN + +bItFnvZPyE5L88propEU5dDF38mtdVJ6zzdT1zam+hes7XdrjqQVXtbgH7ViVJulQXTSOvPUmb21Rm2dfZrM0WbvMQ1PFX0ygCuLru/Q4lS7M3VLVfFO6yADMP3VklD1fsiQDZrPUGb7Nk6xzdevM28rw5z5B9QnUFX8SEl8crWc9Myjvq10PAfAN2HiAo4VgAG6Gi4nOBM01FSIFxPbQLZIVzwQuGngJjRZC5lS1beiFTwiwXh8CpwRYPuhxnZM + +M46IgZZiMI16QVxzC0jRuM8mS0aZ884NbRoDUuC+FxvCNaFKEW+dQyfghNXEx2jJqeRdUaGB0W1ZCjeAKwSWYfmk7EQjWxapTU5VfmqaK17lIxW8sglFc9qQVCkJyHBCNShyrZNtT9o6zhBUAoENhCQg4kDqvu3237aDoB2Xl7NwOv7WDoh3yEHFbm7pmd2XxYSvNBK9xXNIGHkqvF65ZaVuu3L+LqVgSg9SEq+2oBkd8O88i2UR2RUGdjU8HUwH + +R3JaolqWgVXEq+pZbX1Ja3Lbc0lVKiCeEAdYOtE0CbAIUcESQMF3yUqi1VIg8YKfhKVC5T+GA9WBQrIifAD8ZOD4AqRzJgj4RdEeqICGU5Rcx404s8dfjnF0LPVS44mVNpI0f4KZ6vCjfNqo068aNnCokcELW1BSQpgi88cIr85MiA2nGtZLAsD28bYxzxU4FJB/E5rzttGDMrmvOX1Q6wSQD4lotrWlqntjyrPs8o03Vqv2H20xXTopD0hUARAT + +QOQFIAsr5u3YIQMog7QTr7sHWYgAYAOz06YIKOCcD2pyDuh+9g+ttZEWsDRBSyTUxAOQCQScAMd7KKzcepr1162qje5vUFVb3t6oAne9rJIB72HVkwY+ofc5pH2wBT9E+qfV2uXiz6mAR4FsEvtx12zMKy6nzdBL81zViJgW8lX4sWZ7rqd4W2nUOu+21769m+trjvo71xau9h+3vSfopAD6z9kVVcBfpgBX77Nk+87LfoWj3759T+7nfer51PrB + +dtJYBSJLx5i77mb0iAFChhTwpY99YkvGTyoAClu4yQNYKJmSm1a0sRBMiEoz1DAgVO/wE6SrDBGidmtKIY/FeCbjdLeEGgmQYiDRa99lglq+3e6rG0EavVy45XniPd3TyqZlG4kRwrw0B7vSvLRjRttD0bzwpjI7mRstinYB2RCJTkc720oNxs9rPche+MRCIaxR5yxxPgTJwZN89AEsteXxoM5yfaKu6VYHXoBCB9oMEBoDwGTYR1h+JeytXwTj + +o/ybmf8zWVc2tGZ0boe/avrXwNElG7oFESQ6grVS7w5DtdRQzmVnw6qARCwIMdgNDE91wxj9Kfl/zTBKJVEGiLRDoj0QGI/EygYxNnLKCADZ6wAgBv3S34QCwGN0GokxEVL4Q6IiNIUpfzFwbHrg2xy8FgOIZRjujMY3oz6Gn45Ah6UOGHHDgRxI5Uc6OTHNjmVXUDf6cxkAYvUIDgCd+KxqvnDTaJC57a2pZtLIKgGAnjdIJgEMHgmDHGcGS2BM + +btuTF4DUxkNNgxmMoEF6aBdAiASWLUBMD8ssol9eQbFX5aJA8RxI8kdSPlaC5GNEET6JQVNxBwkG3gA4gU5XBhNp/Hosvmrb8wqtxjAEZvm+G27eAbq/pZwsXEWdzBrk31eMrs5GHvdJh3yctv8nLzw1q8yNVtoinsb410e7EAFJCEFpHxRtE+msCPwat8Iks8eFXR3wKb/xJah5eWsyOvbNNFe7MTVJRxoSJug6hwl6ah2OLX9BnTHXjt6GErfN + +MzYYRM28VuzoE5EqYSFoCVD16D+eQvBsxWE9Z/TS+p7Heu4lpbrmGW+JZjyF0PamOok6g1G3elwBSQbAQhsoCWC0nXh6RLjMTQahKpzwDq/g7YiQrRCdSUkLQYhs62iY4a/EJEQEixlUVeE3cHDcZ0lMjydDLuvQ5PJm0e7NxK2hbdRoXkhrZlgehjXVBpE2GWNdhreZHvDL6ncAlE6WkabjKJTEy3GNnl8lE0dRYBkskFrcG7h3s20GQ8I0XudO + +GjvW5egrpXo+V07IiCYJBH8paljleyHZbBFKGUAiBH9tyaxfWtK7gXCAkF68reSc1wXIQiFhfdMYPRY7ksOO6aR/u1lLkozS0nxX/uC3Lkkzu2iLTDqh7oXML0F4zbhYQsEHkLYcnnZHPS3867pdHEs/wyBgZG6AuAOAHAHlCbxJ+XQcEFkEqCQJoQTwBgPIQoD7QRl02gwyMGXIiAvYAYGcPoHlALjGKLIcUKpewD6WZ+RlzS8Rvl7mXWQll6y9 + +caMuPQ5t3LZy+dBsuZATLm5nUHqC8sGWjLflzlmqaCs+WbhVIkMMpD0veXXLmQBqqzMDJxXgrmQR6GRb6GpXIrGV4i6cgisJX9AFCSi4ROjMhaXLUAQy75ZDGImUxiY8VtlcKtoHiAuAigPgPRO6WrL8Vyq0Zdas4wY+TsTqxVaqv6BG1aaGCNSDxBN5DQ2AEkDKCTXjIeYVwZrbe3qhNRVLzAWa9SHwDtA0AE4ouTJqT3TA0BqlowGwAMByXrkB + +ANhAFZohZF/cRyRqz1cyATWq4BaXhfyF0s8gSAQZ2K99eIDygEAu2cM5nBIB9dcoaBwDsEEyGg3RlH/H0PtGpAPQOUHITKnRFCTJCMb6N6gPiD1BlUsQjCBCzVQKQo3cAaN7YDjZrCU2KbuN44GVTBxPXQrSOOAARb41kpY1jCFMCQh6Pw3CB52dqulH4vLkiAwN3nViHOyKXol+Z4QJ+rwQUcMMh1OxUwG7Bpp5bWIRW5YshsC35bDNuwHCh+bM + +BZQ52OAODYQBa3obLA7EP0EICMAcY51/AJdedYxwMg1tni5h2aYYIir5iJOgUcY5RgDAN8YIK7Y6g5bQgS2a27bftvlnHrY3KG3alXKXxIi2QIQG+rF2e7dmOyIGCWCAA=== +``` +%% \ No newline at end of file diff --git a/docs/ProjectOutline/ProjectOutlineDiagram.excalidraw.svg b/docs/ProjectOutline/ProjectOutlineDiagram.excalidraw.svg new file mode 100644 index 0000000..5eb1d48 --- /dev/null +++ b/docs/ProjectOutline/ProjectOutlineDiagram.excalidraw.svg @@ -0,0 +1,2 @@ +ServerIndexerembeddingsearchClientembeddingsprovider✔️ Ollama✔️ OpenAI-compatible (e.g. LocalAI)usesDatabase✔️ MySQL⚒️ SQLite⚒️ MSSQL⚒️ PostgreSQL⚒️ MongoDB⚒️ RedisStoresdata inAccessesListens on port 5146Listens on port 5210Workers- example.py- example.csx- ...✔️ Front-end✔️ Swagger✔️ Elmah⚒️ Front-end✔️ Swagger✔️ Elmah✔️ Caches embeddings✔️ Caches queries✔️ C# library⚒️ NuGet✔️ Searchdomain CRUD✔️ Entity CRUD✔️ Management operationsUses✔️ Multiple providerconfiguration \ No newline at end of file diff --git a/docs/Server.md b/docs/Server.md index 91bdfe8..ae69aa6 100644 --- a/docs/Server.md +++ b/docs/Server.md @@ -1,21 +1,21 @@ # Overview The server by default - runs on port 5146 -- Uses Swagger UI in development mode (`/swagger/index.html`) -- Ignores API keys when in development mode +- Uses Swagger UI (`/swagger/index.html`) - Uses Elmah error logging (endpoint: `/elmah`, local files: `~/logs`) - Uses serilog logging (local files: `~/logs`) - Uses HealthChecks (endpoint: `/healthz`) ## Docker installation -(On Linux you might need root privileges, thus use `sudo` where necessary) -1. Navigate to the `src/server` directory -2. Build the docker container: `docker build -t embeddingsearch-server -f /Dockerfile .` -3. Run the docker container: `docker run --net=host -t embeddingsearch-server` (the `-t` is optional, but you get more meaningful output. Or use `-d` to run it in the background) +(On Linux you might need root privileges. Use `sudo` where necessary) +1. [Set up the configuration](docs/Server.md#setup) +2. Navigate to the `src` directory +3. Build the docker container: `docker build -t embeddingsearch-server -f Server/Dockerfile .` +4. Run the docker container: `docker run --net=host -t embeddingsearch-server` (the `-t` is optional, but you get more meaningful output. Or use `-d` to run it in the background) # Installing the dependencies ## Ubuntu 24.04 -1. Install the .NET SDK: `sudo apt update && sudo apt install dotnet-sdk-8.0 -y` +1. Install the .NET SDK: `sudo apt update && sudo apt install dotnet-sdk-10.0 -y` ## Windows -Download the [.NET SDK](https://dotnet.microsoft.com/en-us/download) or follow these steps to use WSL: +Download and install the [.NET SDK](https://dotnet.microsoft.com/en-us/download) or follow these steps to use WSL: 1. Install Ubuntu in WSL (`wsl --install` and `wsl --install -d Ubuntu`) 2. Enter your WSL environment `wsl.exe` and configure it 3. Update via `sudo apt update && sudo apt upgrade -y && sudo snap refresh` @@ -30,6 +30,9 @@ Download the [.NET SDK](https://dotnet.microsoft.com/en-us/download) or follow t `CREATE DATABASE embeddingsearch; use embeddingsearch;` 4. Create the user (replace "somepassword! with a secure password): `CREATE USER 'embeddingsearch'@'%' identified by "somepassword!"; GRANT ALL ON embeddingsearch.* TO embeddingsearch; FLUSH PRIVILEGES;` + - Caution: The symbol "%" in the command means that this user can be logged into from outside of the machine. + - Replace `'%'` with `'localhost'` or with the IP of your embeddingsearch server machine if that is a concern. +5. Exit mysql: `exit` # Configuration ## Environments @@ -43,34 +46,39 @@ If you just installed the server and want to configure it: 3. Check the "AiProviders" section. If your Ollama/LocalAI/etc. instance does not run locally, update the "baseURL" to point to the correct URL. 4. If you plan on using the server in production: 1. Set the environment variable `DOTNET_ENVIRONMENT` to something that is not "Development". (e.g. "Prod") - 2. Rename the `appsettings.Development.json` - replace "Development" with whatever you chose. (e.g. "Prod") + 2. Rename the `appsettings.Development.json` - replace "Development" with what you chose for `DOTNET_ENVIRONMENT` 3. Set API keys in the "ApiKeys" section (generate keys using the `uuid` command on Linux) ## Structure ```json "Embeddingsearch": { "ConnectionStrings": { - "SQL": "server=localhost;database=embeddingsearch;uid=embeddingsearch;pwd=somepassword!;" + "SQL": "server=localhost;database=embeddingsearch;uid=embeddingsearch;pwd=somepassword!;", + "Cache": "Data Source=embeddings.db;Mode=ReadWriteCreate;Cache=Shared" // Name of the sqlite cache file }, "Elmah": { - "AllowedHosts": [ // Specify which IP addresses can access /elmah - "127.0.0.1", - "::1", - "172.17.0.1" - ] + "LogPath": "~/logs" // Where the logs are stored }, "AiProviders": { - "ollama": { // Name of the provider. Used when defining models for a datapoint, e.g. "ollama:mxbai-embed-large" + "ollama": { // Name for the provider. Used when defining models for a datapoint, e.g. "ollama:mxbai-embed-large" "handler": "ollama", // The type of API located at baseURL - "baseURL": "http://localhost:11434" // Location of the API + "baseURL": "http://localhost:11434", // Location of the API + "Allowlist": [".*"], // Allow- and Denylist. Filter out non-embeddings models using regular expressions + "Denylist": ["qwen3-coder:latest", "qwen3:0.6b", "deepseek-v3.1:671b-cloud", "qwen3-vl", "deepseek-ocr"] }, - "localAI": { + "localAI": { // e.g. model name: "localAI:bert-embeddings" "handler": "openai", "baseURL": "http://localhost:8080", - "ApiKey": "Some API key here" + "ApiKey": "Some API key here", + "Allowlist": [".*"], + "Denylist": ["cross-encoder", "..."] } }, - "ApiKeys": ["Some UUID here", "Another UUID here"], // Restrict access in non-development environments to the server's API using your own generated API keys - "UseHttpsRedirection": true // tbh I don't even know why this is still here. // TODO implement HttpsRedirection or remove this line + "ApiKeys": ["Some UUID here", "Another UUID here"], // (optional) Restrict access using API keys + "Cache": { + "CacheTopN": 10000, // Only cache this number of queries. (Eviction policy: LRU) + "StoreEmbeddingCache": true, // If set to true, the SQLite database will be used to store the embeddings + "StoreTopN": 10000 // Only write the top n number of queries to the SQLite database + } } ``` ## AiProviders @@ -91,9 +99,9 @@ One can even specify multiple Ollama instances and name them however one pleases ``` ### handler Currently two handlers are implemented for embeddings generation: -- ollama +- `ollama` - requests embeddings from `/api/embed` -- localai +- `openai` - requests embeddings from `/v1/embeddings` ### baseURL Specified by `scheme://host:port`. E.g.: `"baseUrl": "http://localhost:11434"` @@ -105,7 +113,7 @@ Any specified absolute path will be disregarded. (e.g. "http://x.x.x.x/any/subro # API ## Accessing the api -Once started, the server's API can be comfortably be viewed and manipulated via swagger. +Once started, the server's API can be viewed and manipulated via swagger. By default it is accessible under: `http://localhost:5146/swagger/index.html` @@ -114,7 +122,7 @@ To make an API request from within swagger: 2. Click the "Try it out" button. The input fields (if there are any for your action) should now be editable. 3. Fill in the necessary information 4. Click "Execute" -## Restricting access -API keys do **not** get checked in Development environment! +## Authorization +Being logged in has priority over API Key requirement (if api keys are set). -Set up a non-development environment as described in [Configuration>Setup](#setup) to enable API key authentication. \ No newline at end of file +So being logged in automatically authorizes endpoint usage. \ No newline at end of file diff --git a/src/Indexer/Dockerfile b/src/Indexer/Dockerfile index 66d934d..9d6b973 100644 --- a/src/Indexer/Dockerfile +++ b/src/Indexer/Dockerfile @@ -1,7 +1,7 @@ -FROM ubuntu:24.04 AS ubuntu +FROM ubuntu:25.10 AS ubuntu WORKDIR /app RUN apt-get update -RUN apt-get install -y python3.12 python3.12-venv python3.12-dev dotnet-sdk-8.0 +RUN apt-get install -y python3.13 python3.13-venv python3.13-dev dotnet-sdk-10.0 RUN apt-get clean COPY . /src/ ENV ASPNETCORE_ENVIRONMENT Docker diff --git a/src/Indexer/Program.cs b/src/Indexer/Program.cs index fbef06d..ad8a797 100644 --- a/src/Indexer/Program.cs +++ b/src/Indexer/Program.cs @@ -80,8 +80,6 @@ else app.UseMiddleware(); } -// app.UseHttpsRedirection(); - app.MapControllers(); app.Run(); diff --git a/src/Indexer/appsettings.Development.json b/src/Indexer/appsettings.Development.json index e753c45..90fb86d 100644 --- a/src/Indexer/appsettings.Development.json +++ b/src/Indexer/appsettings.Development.json @@ -21,7 +21,8 @@ "ApiKeys": ["xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"], "Server": { "BaseUri": "http://localhost:5146", - "ApiKey": "yyyyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy" - } + "ApiKey": "APIKeyForTheServer" + }, + "PythonRuntime": "libpython3.13.so" } } diff --git a/src/Indexer/appsettings.Docker.json b/src/Indexer/appsettings.Docker.json index d62f8a7..b01a905 100644 --- a/src/Indexer/appsettings.Docker.json +++ b/src/Indexer/appsettings.Docker.json @@ -5,26 +5,8 @@ "Microsoft.AspNetCore": "Warning" } }, - "Kestrel":{ - "Endpoints": { - "http":{ - "Url": "http://0.0.0.0:5120" - } - } - }, - "Embeddingsearch": { - "BaseUri": "http://172.17.0.1:5146", - "ApiKeys": ["b54ea868-496e-11f0-9cc7-f79f06b160e5", "bbdeedf0-496e-11f0-9744-97e28c221f67"] - }, - "EmbeddingsearchIndexer": { - "Elmah": { - "AllowedHosts": [ - "127.0.0.1", - "::1", - "172.17.0.1" - ] - }, - "Worker": + "Indexer": { + "Workers": [ { "Name": "pythonExample", @@ -36,6 +18,12 @@ } ] } - ] + ], + "ApiKeys": ["APIKeyOfYourChoice", "AnotherOneIfYouLike"], + "Server": { + "BaseUri": "http://172.17.0.1:5146", + "ApiKey": "APIKeyForTheServer" + }, + "PythonRuntime": "libpython3.13.so" } } diff --git a/src/Server/Dockerfile b/src/Server/Dockerfile index fe1699f..287de2c 100644 --- a/src/Server/Dockerfile +++ b/src/Server/Dockerfile @@ -1,10 +1,10 @@ -FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build +FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build WORKDIR /build COPY . . -RUN dotnet restore ./Server.csproj -RUN dotnet publish ./Server.csproj -c Release -o /output +RUN dotnet restore Server/Server.csproj +RUN dotnet publish Server/Server.csproj -c Release -o /output -FROM mcr.microsoft.com/dotnet/aspnet:8.0 AS final +FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS final WORKDIR /app COPY --from=build /output . ENV ASPNETCORE_ENVIRONMENT Docker diff --git a/src/Server/appsettings.Docker.json b/src/Server/appsettings.Docker.json index f569c4e..f16f796 100644 --- a/src/Server/appsettings.Docker.json +++ b/src/Server/appsettings.Docker.json @@ -15,27 +15,41 @@ "UseSwagger": true, "Embeddingsearch": { "ConnectionStrings": { - "SQL": "server=localhost;database=embeddingsearch;uid=embeddingsearch;pwd=somepassword!;" + "SQL": "server=localhost;database=embeddingsearch;uid=embeddingsearch;pwd=somepassword!;", + "Cache": "Data Source=embeddings.db;Mode=ReadWriteCreate;Cache=Shared" }, "Elmah": { - "AllowedHosts": [ - "127.0.0.1", - "::1", - "172.17.0.1" - ] + "LogPath": "~/logs" }, "AiProviders": { "ollama": { "handler": "ollama", - "baseURL": "http://localhost:11434" + "baseURL": "http://localhost:11434", + "Allowlist": [".*"], + "Denylist": ["qwen3-coder:latest", "qwen3:0.6b", "qwen3-vl", "deepseek-ocr"] }, "localAI": { "handler": "openai", "baseURL": "http://localhost:8080", - "ApiKey": "Some API key here" + "ApiKey": "Some API key here", + "Allowlist": [".*"], + "Denylist": ["cross-encoder", "jina-reranker-v1-tiny-en", "whisper-small"] } }, - "ApiKeys": ["Some UUID here", "Another UUID here"], - "UseHttpsRedirection": true + "SimpleAuth": { + "Users": [ + { + "Username": "admin", + "Password": "UnsafePractice.67", + "Roles": ["Admin"] + } + ] + }, + "ApiKeys": ["APIKeyOfYourChoice", "AnotherOneIfYouLike"], + "Cache": { + "CacheTopN": 10000, + "StoreEmbeddingCache": true, + "StoreTopN": 10000 + } } }