118 Commits

Author SHA1 Message Date
41fd8a067e Added configurable request body size limit 2026-02-12 18:32:17 +01:00
LD50
047526dc3c Merge pull request #120 from LD-Reborn/117-add-bulk-datapoint-insert
Added datapoint bulk insert
2026-01-28 22:22:13 +01:00
329af1c103 Added datapoint bulk insert 2026-01-28 22:21:51 +01:00
LD50
5869eeabd6 Merge pull request #119 from LD-Reborn/116-add-bulk-attributes-insert
116 add bulk attributes insert
2026-01-25 16:35:26 +01:00
7fffd74f26 Changed list conversion for entityCache 2026-01-25 16:35:02 +01:00
a9dada01c0 Added missing BulkExecuteNonQuery 2026-01-25 16:34:18 +01:00
01b0934d6e Removed unused using statements, added early return for GetEmbeddings 2026-01-25 16:33:49 +01:00
c0189016e8 Improved logging in EntityController 2026-01-25 16:32:54 +01:00
7d16f90c71 Added bulk attributes insert 2026-01-25 16:32:37 +01:00
LD50
d7c248945d Merge pull request #115 from LD-Reborn/52-fix-documentation-to-show-that-we-have-a-front-end
52 fix documentation to show that we have a front end
2026-01-24 19:32:18 +01:00
LD50
059bf147dc Updated README logo layout 2026-01-24 19:31:59 +01:00
ffe15e211b Improved README introduction layout 2026-01-24 18:23:47 +01:00
255395b582 Moved logo to heading in README.md 2026-01-24 18:11:40 +01:00
6390dbc9ab Moved logo to docs, fixed logo not visible in light mode 2026-01-24 18:10:18 +01:00
7f2a14609f Updated documentation to reflect current status, fixed broken Dockerfile and Docker configuration, updated to python3.13 2026-01-24 17:48:06 +01:00
LD50
6d39540e8d Merge pull request #107 from LD-Reborn/86-add-embeddingsearch-logo-and-improve-navbar
Added logo to navbar, reworked navbar layout
2026-01-22 19:43:49 +01:00
328615be97 Added logo to navbar, reworked navbar layout 2026-01-22 19:43:28 +01:00
LD50
20cbbfd06c Merge pull request #106 from LD-Reborn/18-update-to-dotnet-10
18 update to dotnet 10
2026-01-22 16:48:22 +01:00
cfeefa385a Removed magic number from SearchdomainHelper float-bytes converter methods 2026-01-22 16:47:48 +01:00
49ecb06fb0 Updated packages 2026-01-22 16:47:11 +01:00
LD50
a15548ea77 Update issue templates 2026-01-22 14:35:52 +01:00
e2cfe56b49 Updated projects to dotnet 10 2026-01-22 00:46:56 +01:00
LD50
9c306a0917 Merge pull request #102 from LD-Reborn/91-add-persistent-embedding-cache
Added persistent embedding cache
2026-01-21 23:58:29 +01:00
5f05aac909 Added persistent embedding cache 2026-01-21 23:54:08 +01:00
LD50
76c9913485 Merge pull request #101 from LD-Reborn/78-query-results-edit-modal-missing-options-to-rename-and-add
Added renaming and adding query results , fixed missing localization
2026-01-19 13:15:36 +01:00
4f257a745b Added renaming and adding query results , fixed missing localization 2026-01-19 13:14:04 +01:00
LD50
59945cb523 Merge pull request #100 from LD-Reborn/87-migrations-currently-only-fire-once-searchdomainmanager-gets-injected-by-an-endpoint
Fixed migrations not running on startup
2026-01-19 03:52:17 +01:00
c13214c4e9 Fixed migrations not running on startup 2026-01-19 03:52:00 +01:00
LD50
6e9e795a16 Merge pull request #99 from LD-Reborn/85-add-database-size-to-stats
Added database size to stats, added total ram size to stats
2026-01-19 03:40:25 +01:00
337782661e Added database size to stats, added total ram size to stats 2026-01-19 03:40:03 +01:00
LD50
b6692770c1 Merge pull request #98 from LD-Reborn/95-add-parallel-embeddings-pre-fetching-setting
Added entity index embeddings prefetching, fixed zero-searchdomain fr…
2026-01-19 02:18:48 +01:00
141a567927 Added entity index embeddings prefetching, fixed zero-searchdomain front-end bug 2026-01-19 02:18:00 +01:00
LD50
ba41c1cd82 Merge pull request #97 from LD-Reborn/90-fix-migration-does-not-support-database-initial-creation
Fixed database initial creation missing
2026-01-16 14:02:28 +01:00
b6b812f458 Fixed database initial creation missing 2026-01-16 14:01:59 +01:00
LD50
9d5f53c5f4 Merge pull request #96 from LD-Reborn/94-implement-datapoint-embeddings-generation-reordering
Added embeddings prefetching for entities ingest
2026-01-16 12:52:37 +01:00
a9a5ee4cb6 Added embeddings prefetching for entities ingest 2026-01-16 12:52:15 +01:00
LD50
17cc8f41d5 Merge pull request #93 from LD-Reborn/92-datapointgenerateembeddings-does-not-feed-embedding-cache
Moved embeddingCache to EnumerableLruCache, fixed GenerateEmbeddings …
2026-01-16 10:36:10 +01:00
a01985d1b8 Moved embeddingCache to EnumerableLruCache, fixed GenerateEmbeddings not feeding embeddingCache 2026-01-16 10:35:46 +01:00
LD50
4c1f0305fc Merge pull request #89 from LD-Reborn/65-add-number-of-cached-queries-to-front-end
65 add number of cached queries to front end
2026-01-07 01:52:38 +01:00
e49a7c83ba Improved sql connection pool resiliency 2026-01-07 01:52:12 +01:00
e83ce61877 Added query cache entry count and capacity to front-end, Fixed ServerGetStatsResult field naming 2026-01-07 01:15:55 +01:00
LD50
c09514c657 Merge pull request #88 from LD-Reborn/66-add-query-cache-size-limit
66 add query cache size limit
2026-01-05 01:04:57 +01:00
3dfcaa19e6 Implemented query cache size limit in front-end and in logic, Reworked LRUCache for performance, Fixed updating entities from front-end not working 2026-01-05 01:04:26 +01:00
88d1b27394 Fixed LRUCache TryGetValue not updating the list 2026-01-03 18:22:30 +01:00
027a9244ad Added query cache size limiting, added custom enumerable LRUCache, renamed search to query in various places, fixed client GetEmbeddingsCacheSize endpoint 2026-01-03 17:57:18 +01:00
063c81e8dc Fixed front-end wrong endpoint name used 2026-01-03 14:39:20 +01:00
LD50
ad84efb611 Merge pull request #84 from LD-Reborn/83-warning-info-modals-text-and-close-button-must-be-dark
Fixed warning and info modal text light on dark mode
2026-01-02 23:20:33 +01:00
ecaa640ec0 Fixed warning and info modal text light on dark mode 2026-01-02 23:20:12 +01:00
LD50
37f1b285d8 Merge pull request #82 from LD-Reborn/81-add-dark-mode-support
Added dark mode, updated bootstrap
2026-01-02 23:11:27 +01:00
71b273f5d7 Added dark mode, updated bootstrap 2026-01-02 23:11:03 +01:00
LD50
1a823bb1e7 Merge pull request #80 from LD-Reborn/77-fix-long-loading-times-for-entity-count-and-query-cache-utilization
Replaced GetEmbeddingCacheSize with GetStats, fixed long loading time…
2026-01-02 02:05:24 +01:00
aa4fc03c3d Replaced GetEmbeddingCacheSize with GetStats, fixed long loading times for front-end stats retrieval 2026-01-02 02:04:19 +01:00
LD50
09832d1c0b Merge pull request #79 from LD-Reborn/74-fix-missing-front-end-localization
Fixed details button not visible
2026-01-01 20:46:54 +01:00
68630fdbef Fixed details button not visible 2026-01-01 19:43:54 +01:00
LD50
c9907da846 Merge pull request #76 from LD-Reborn/74-fix-missing-front-end-localization
74 fix missing front end localization
2026-01-01 19:31:33 +01:00
cddd305d26 Added logic to hint at the exit label in elmah 2026-01-01 19:29:51 +01:00
6f4ffbcaa6 Added more missing localization, added LocalizationChecker tool, moved CriticalCSSGenerator to tools folder 2026-01-01 19:03:57 +01:00
LD50
3e433c3cbe Merge pull request #75 from LD-Reborn/72-swagger-and-elmah-have-no-return-to-front-end-button
Added swagger and elmah return-to-front-end button
2026-01-01 17:39:08 +01:00
8cbc77eb1d Added swagger and elmah return-to-front-end button 2026-01-01 17:38:48 +01:00
LD50
977a8f1637 Merge pull request #73 from LD-Reborn/68-returnurl-does-not-work
Fixed ReturnUrl not working
2026-01-01 16:12:51 +01:00
65ed78462d Fixed ReturnUrl not working 2026-01-01 16:02:30 +01:00
LD50
4d2d2c9938 Merge pull request #71 from LD-Reborn/67-improve-fcp-by-defering-js-and-css
Added CriticalCSS, defered CSS and JS, fixed heading order, fixed fro…
2026-01-01 14:58:09 +01:00
b20102785a Added CriticalCSS, defered CSS and JS, fixed heading order, fixed front-end querycache url, added response compression and caching 2026-01-01 14:57:37 +01:00
LD50
3b96d7212b Merge pull request #70 from LD-Reborn/61-add-a-model-allow-denylist
Added missing configuration file changes
2025-12-31 04:07:59 +01:00
254c534b0b Added missing configuration file changes 2025-12-31 04:07:28 +01:00
LD50
eafc764f73 Merge pull request #69 from LD-Reborn/61-add-a-model-allow-denylist
61 add a model allow denylist
2025-12-31 03:58:47 +01:00
7dfe945a48 Added swagger authorization check 2025-12-31 03:58:18 +01:00
aa95308f61 Added allowlist and denylist, fixed patchy configuration with proper options models, fixed api middleware authorization issues 2025-12-31 03:47:40 +01:00
8d56883e7e Fixed multithreading mutation issue 2025-12-31 03:43:44 +01:00
bc293bf7ec Added proper server config model, added proper apikey authorization with swagger integration, added allowlist and denylist to config 2025-12-30 22:18:26 +01:00
LD50
b5db4bc1e4 Merge pull request #64 from LD-Reborn/62-add-an-embedding-cache-size-label-to-front-end
Added home page dashboard, added embedding cache size estimation and …
2025-12-30 02:55:23 +01:00
0f599a49d0 Added home page dashboard, added embedding cache size estimation and front-end label, added individual health check routes 2025-12-30 02:54:30 +01:00
LD50
4fe6b4a112 Merge pull request #63 from LD-Reborn/59-implement-missing-endpoints-in-client
59 implement missing endpoints in client
2025-12-29 19:51:35 +01:00
16efe447a2 Reorganized client methods to better match swagger sequence 2025-12-29 19:51:16 +01:00
6a7bdf585c Added missing endpoints to client 2025-12-29 19:44:55 +01:00
31c784f0ab Renamed SearchCache mentions to QueryCache for better clarity 2025-12-29 15:41:12 +01:00
625019f9f4 Added swagger decoration, reorganized controller elements for better clarity, renamed entity index endpoint for better clarity 2025-12-29 13:56:44 +01:00
c3dfe1a964 Fixed ridiculously low EmbeddingCacheMaxCount preset 2025-12-29 01:10:31 +01:00
LD50
d647bedb33 Merge pull request #60 from LD-Reborn/44-fix-controller-endpoint-naming-and-http-methods
Fixed endpoint naming and http methods
2025-12-28 17:36:17 +01:00
fe6bbfe9e5 Fixed endpoint naming and http methods 2025-12-28 17:36:01 +01:00
LD50
6f7afca195 Merge pull request #58 from LD-Reborn/56-bug-exception-when-update-indexing-entity
Fixed datapoint stale reference causing issues when updating datapoin…
2025-12-28 00:44:12 +01:00
3fa71a8d8b Fixed datapoint stale reference causing issues when updating datapoint text and probmethod or similaritymethod, fixe probmethod and similaritymethod not being applied in-memory 2025-12-28 00:43:55 +01:00
LD50
8921121078 Merge pull request #57 from LD-Reborn/54-properly-implement-embeddings-cache-size-limit-global
Implemented cache reconciliation
2025-12-28 00:22:15 +01:00
baf76685b7 Implemented cache reconciliation 2025-12-28 00:19:18 +01:00
LD50
4030e4a824 Merge pull request #55 from LD-Reborn/54-properly-implement-embeddings-cache-size-limit-global
Moved embeddingCache from Dictionary to LRUCache
2025-12-27 18:40:45 +01:00
7b4a3bd2c8 Moved embeddingCache from Dictionary to LRUCache 2025-12-27 18:40:03 +01:00
LD50
5eabb0d924 Merge pull request #53 from LD-Reborn/33-move-query-from-entity-to-searchdomain
33 move query from entity to searchdomain
2025-12-27 17:26:29 +01:00
40424053da Merge branch '33-move-query-from-entity-to-searchdomain' of https://github.com/LD-Reborn/embeddingsearch into 33-move-query-from-entity-to-searchdomain 2025-12-27 17:26:13 +01:00
f3a4665153 Moved query action from EntityController to SearchdomainController 2025-12-27 17:26:06 +01:00
a358eaea86 Moved query action from EntityController to SearchdomainController 2025-12-27 17:25:12 +01:00
665a392b5a Fixed redundant Searchdomain retrieval error messages 2025-12-25 15:25:23 +01:00
26d0561c3b Fixed wrong return model returned in EntityController methods 2025-12-25 14:55:58 +01:00
cc93a76546 Fixed DRY violations regarding result models 2025-12-25 14:55:30 +01:00
LD50
7298593341 Merge pull request #51 from LD-Reborn/35-implement-enums-for-probmethods-in-the-shared-models
Fixed embeddingCache not yet global
2025-12-25 14:19:41 +01:00
25723cb7a4 Fixed embeddingCache not yet global 2025-12-25 14:19:25 +01:00
LD50
84d83206cb Merge pull request #50 from LD-Reborn/35-implement-enums-for-probmethods-in-the-shared-models
Removed unused GenerateEmbeddings method
2025-12-25 14:18:41 +01:00
b6e01a3f66 Removed unused GenerateEmbeddings method 2025-12-25 14:13:28 +01:00
LD50
e4cfcb1030 Merge pull request #49 from LD-Reborn/35-implement-enums-for-probmethods-in-the-shared-models
Added enums to JSONEntity and JSONDatapoint
2025-12-25 13:20:44 +01:00
6d1cffe2db Added enums to JSONEntity and JSONDatapoint 2025-12-25 13:20:24 +01:00
LD50
dd0019b1c1 Merge pull request #48 from LD-Reborn/40-add-attributes-to-query-result
40 add attributes to query result
2025-12-25 12:39:18 +01:00
5877ebaff2 Added attributes to query results 2025-12-25 12:39:01 +01:00
040d4f916a Fixed views showing in swagger 2025-12-25 12:21:50 +01:00
LD50
57beddd70f Merge pull request #47 from LD-Reborn/42-create-a-front-end---localization
Added localization
2025-12-23 22:40:42 +01:00
8416d7f404 Added localization 2025-12-23 22:40:04 +01:00
16f08aa8a7 Removed privacy page 2025-12-23 21:37:10 +01:00
LD50
cce42d8ec3 Merge pull request #46 from LD-Reborn/41-create-a-front-end---toasts
41 create a front end   toasts
2025-12-23 14:55:39 +01:00
e64f4ca555 Added toasts, fixed redundant throbber functions 2025-12-23 14:50:34 +01:00
55a475704e Removed clear button that was not used 2025-12-23 14:49:32 +01:00
LD50
89f8aa6591 Merge pull request #45 from LD-Reborn/36-create-a-front-end---functioning-buttons-2
36 create a front end   functioning buttons 2
2025-12-22 16:53:42 +01:00
a3cc5115fc Added query update functionality in the front-end 2025-12-22 16:52:49 +01:00
8b36c65437 Added search cache updating 2025-12-22 16:52:06 +01:00
ee12986fef Added query deleting 2025-12-22 14:22:16 +01:00
fd76da265b Added entity updating to front-end 2025-12-22 12:58:40 +01:00
4aa3015954 Fixed probmethod and similaritymethod not updating with entity index 2025-12-22 12:57:52 +01:00
fb1766b2b5 Fixed embeddings not being created on index 2025-12-22 12:08:09 +01:00
6fe96d41a2 Improved entity updating 2025-12-22 00:18:13 +01:00
fc5e8ceeee Added missing model fix for last commit 2025-12-22 00:17:16 +01:00
33502640a2 Added model return for entity list endpoint 2025-12-22 00:16:30 +01:00
82 changed files with 6252 additions and 2210 deletions

38
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@@ -0,0 +1,38 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: bug
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error
**Expected behavior**
A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.
**Desktop (please complete the following information):**
- OS: [e.g. iOS]
- Browser [e.g. chrome, safari]
- Version / Commit ID [e.g. 22]
**Smartphone (please complete the following information):**
- Device: [e.g. iPhone6]
- OS: [e.g. iOS8.1]
- Browser [e.g. stock browser, safari]
- Version [e.g. 22]
**Additional context**
Add any other context about the problem here.

View File

@@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: enhancement
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.

3
.gitignore vendored
View File

@@ -18,3 +18,6 @@ src/Server/logs
src/Shared/bin
src/Shared/obj
src/Server/wwwroot/logs/*
src/Server/Tools/CriticalCSS/node_modules
src/Server/Tools/CriticalCSS/package*.json
*.db*

115
README.md
View File

@@ -1,92 +1,59 @@
# embeddingsearch
<img src="https://github.com/LD-Reborn/embeddingsearch/blob/main/logo.png" alt="Logo" width="100">
# embeddingsearch<img src="docs/logo.png" alt="Logo" width="50" align="left">
embeddingsearch is a self-hosted semantic search server built on vector embeddings.<br/>It lets you index and semantically search text using modern embedding models.
<br/><br/>
It's designed to be flexible, extensible, and easy to use.
embeddingsearch is a search server that uses Embedding Similarity Search (similiarly to [Magna](https://github.com/yousef-rafat/Magna/tree/main)) to semantically compare a given input to a database of indexed entries.
# Project outline
<img src="docs/ProjectOutline/ProjectOutlineDiagram.excalidraw.svg" alt="Logo">
embeddingsearch offers:
- Privacy and flexibility through self-hosted solutions like:
- ollama
## What embeddingsearch offers:
- Privacy and flexibility by allowing one to self-host everything, including:
- Ollama
- OpenAI-compatible APIs (like LocalAI)
- Great flexibility through deep control over
- the amount of datapoints per entity (i.e. the thing you're trying to find)
- which models are used (multiple per datapoint possible to improve accuracy)
- which models are sourced from where (multiple Ollama/OpenAI-compatible sources possible)
- similarity calculation methods
- aggregation of results (when multiple models are used per datapoint)
- Astonishing accuracy when using multiple models for single indices
- Ease-of-use and ease-of-implementation
- The server offers a front-end for management and status information, as well as a decorated swagger back-end
- The indexer can also be self-hosted and serves as a host for executing indexing scripts
- The client library can be used to develop your own client software that posts queries or creates indices
- Caching & persistency
- Generating embeddings is expensive. So why not cache AND store them?
- Query results can also be cached.
- "Doesn't that eat a lot of precious RAM?" - My own testing showed: embeddings take up around 4200-5200 bytes each depending on the request string size. So around 4-5 GB per million cached embeddings.
This repository comes with a
- server (accessible via API calls & swagger)
- clientside library (C#)
- scripting based indexer service that supports the use of
This repository comes with a:
- Server
- Client library (C#)
- Scripting based indexer service that supports the use of
- Python
- CSharp (Roslyn)
- Golang (Planned)
- CSharp (Roslyn - at-runtime evaluation)
- CSharp (Reflection - compiled)
- Lua (Planned)
- Javascript (Planned)
# How to set up / use
# How to set up
## Server
(Docker now available! See [Docker installation](docs/Server.md#docker-installation))
1. Install [ollama](https://ollama.com/download)
2. Pull a few models using ollama (e.g. `paraphrase-multilingual`, `bge-m3`, `mxbai-embed-large`, `nomic-embed-text`)
3. [Install the depencencies](docs/Server.md#installing-the-dependencies)
4. [Set up a local mysql database](docs/Server.md#mysql-database-setup)
5. [Set up the configuration](docs/Server.md#setup)
6. In `src/server` execute `dotnet build && dotnet run` to start the server
7. (optional) [Create a searchdomain using the web interface](docs/Server.md#accessing-the-api)
## Client
1. Download the package and add it to your project (TODO: NuGet)
2. Create a new client by either:
1. By injecting IConfiguration (e.g. `services.AddSingleton<Client>();`)
2. By specifying the baseUri, apiKey, and searchdomain (e.g. `new Client.Client(baseUri, apiKey, searchdomain)`)
(Docker also available! See [Docker installation](docs/Server.md#docker-installation))
1. Install the inferencing tool of your choice, (e.g. [ollama](https://ollama.com/download)) and pull a few models that support generating embeddings.
2. [Install the depencencies](docs/Server.md#installing-the-dependencies)
3. [Set up a mysql database](docs/Server.md#mysql-database-setup)
4. [Set up the configuration](docs/Server.md#configuration)
5. In `src/Server` execute `dotnet build && dotnet run` to start the server
6. (optional) Create a searchdomain using the web interface
## Indexer
(Docker now available! See [Docker installation](docs/Indexer.md#docker-installation))
1. [Install the dependencies](docs/Indexer.md#installing-the-dependencies)
2. [Set up the server](#server)
3. [Configure the indexer](docs/Indexer.md#configuration)
4. [Set up your indexing script(s)](docs/Indexer.md#scripting)
5. Run with `dotnet build && dotnet run` (Or `/usr/bin/dotnet build && /usr/bin/dotnet run`)
2. [Configure the indexer](docs/Indexer.md#configuration)
3. [Set up your indexing script(s)](docs/Indexer.md#scripting)
4. In `src/Indexer` execute `dotnet build && dotnet run` to start the indexer
# Known issues
| Issue | Solution |
| --- | --- |
| Unhandled exception. MySql.Data.MySqlClient.MySqlException (0x80004005): Invalid attempt to access a field before calling Read() | The searchdomain you entered does not exist |
| Unhandled exception. MySql.Data.MySqlClient.MySqlException (0x80004005): Authentication to host 'localhost' for user 'embeddingsearch' using method 'caching_sha2_password' failed with message: Access denied for user 'embeddingsearch'@'localhost' (using password: YES) | TBD |
| System.DllNotFoundException: Could not load libpython3.12.so with flags RTLD_NOW \| RTLD_GLOBAL: libpython3.12.so: cannot open shared object file: No such file or directory | Install python3.12-dev via apt. Also: try running the indexer using `/usr/bin/dotnet` instead of `dotnet` (make sure dotnet is installed via apt) |
# To-do
- (High priority) Add default indexer
- Library
- Processing:
- Text / Markdown documents: file name, full text, paragraphs
- Documents
- PDF: file name, full text, headline?, paragraphs, images?
- odt/docx: file name, full text, headline?, images?
- msg/eml: file name, title, recipients, cc, text
- Images: file name, OCR, image description?
- Videos?
- Presentations (Impress/Powerpoint): file name, full text, first slide title, titles, slide texts
- Tables (Calc / Excel): file name, tab/page names?, full text (per tab/page)
- Other? (TBD)
- Server
- ~~Scripting capability (Python; perhaps also lua)~~ (Done with the latest commits)
- ~~Intended sourcing possibilities:~~
- ~~Local/Remote files (CIFS, SMB, FTP)~~
- ~~Database contents (MySQL, MSSQL)~~
- ~~Web requests (E.g. manual crawling)~~
- ~~Script call management (interval based & event based)~~
- Implement [ReaderWriterLock](https://learn.microsoft.com/en-us/dotnet/api/system.threading.readerwriterlockslim?view=net-9.0&redirectedfrom=MSDN) for entityCache to allow for multithreaded read access while retaining single-threaded write access.
- NuGet packaging and corresponding README documentation
- Add option for query result detail levels. e.g.:
- Level 0: `{"Name": "...", "Value": 0.53}`
- Level 1: `{"Name": "...", "Value": 0.53, "Datapoints": [{"Name": "title", "Value": 0.65}, {...}]}`
- Level 2: `{"Name": "...", "Value": 0.53, "Datapoints": [{"Name": "title", "Value": 0.65, "Embeddings": [{"Model": "bge-m3", "Value": 0.87}, {...}]}, {...}]}`
- Add "Click-Through" result evaluation (For each entity: store a list of queries that led to the entity being chosen by the user. Then at query-time choose the best-fitting entry and maybe use it as another datapoint? Or use a separate weight function?)
- Reranker/Crossencoder/RAG (or anything else beyond initial retrieval) support
- Remove the `id` collumns from the database tables where the table is actually identified (and should be unique by) the name, which should become the new primary key.
- Improve performance & latency (Create ready-to-go processes where each contain an n'th share of the entity cache, ready to perform a query. Prepare it after creating the entity cache.)
- Implement dynamic invocation based database migrations
# Future features
- Support for other database types (MSSQL, SQLite)
| System.DllNotFoundException: Could not load libpython3.13.so with flags RTLD_NOW \| RTLD_GLOBAL: libpython3.12.so: cannot open shared object file: No such file or directory | Install python3.13-dev via apt. Also: try running the indexer using `/usr/bin/dotnet` instead of `dotnet` (to make sure dotnet is not running as a snap) |
# Planned features and support
- Document processor with automatic chunking (e.g.: .md, .pdf, .docx, .xlsx, .png, .mp4)
- Indexer front-end
- Support for other database types (MSSQL, SQLite, PostgreSQL, MongoDB, Redis)
# Community
<a href="https://discord.gg/MUKeZM3k"><img src="https://img.shields.io/badge/Join%20Discord-7289DA?style=flat&logo=discord&logoColor=whiteServer" alt="Discord"></img></a>

View File

@@ -8,15 +8,18 @@ The indexer by default
- Uses HealthChecks (endpoint: `/healthz`)
## Docker installation
(On Linux you might need root privileges, thus use `sudo` where necessary)
1. Navigate to the `src` directory
2. Build the docker container: `docker build -t embeddingsearch-indexer -f Indexer/Dockerfile .`
3. Run the docker container: `docker run --net=host -t embeddingsearch-indexer` (the `-t` is optional, but you get more meaningful output. Or use `-d` to run it in the background)
1. [Configure the indexer](docs/Indexer.md#configuration)
2. [Set up your indexing script(s)](docs/Indexer.md#scripting)
3. Navigate to the `src` directory
4. Build the docker container: `docker build -t embeddingsearch-indexer -f Indexer/Dockerfile .`
5. Run the docker container: `docker run --net=host -t embeddingsearch-indexer` (the `-t` is optional, but you get more meaningful output. Or use `-d` to run it in the background)
## Installing the dependencies
## Ubuntu 24.04
1. Install the .NET SDK: `sudo apt update && sudo apt install dotnet-sdk-8.0 -y`
2. Install the python SDK: `sudo apt install python3 python3.12 python3.12-dev`
1. Install the .NET SDK: `sudo apt update && sudo apt install dotnet-sdk-10.0 -y`
2. Install the python SDK: `sudo apt install python3 python3.13 python3.13-dev`
- Note: Python 3.14 is not supported yet
## Windows
Download the [.NET SDK](https://dotnet.microsoft.com/en-us/download) or follow these steps to use WSL:
Download and install the [.NET SDK](https://dotnet.microsoft.com/en-us/download) or follow these steps to use WSL:
1. Install Ubuntu in WSL (`wsl --install` and `wsl --install -d Ubuntu`)
2. Enter your WSL environment `wsl.exe` and configure it
3. Update via `sudo apt update && sudo apt upgrade -y && sudo snap refresh`
@@ -26,15 +29,15 @@ The configuration is located in `src/Indexer` and conforms to the [ASP.NET confi
If you plan to use multiple environments, create any `appsettings.{YourEnvironment}.json` (e.g. `Development`, `Staging`, `Prod`) and set the environment variable `DOTNET_ENVIRONMENT` accordingly on the target machine.
## Setup
If you just installed the server and want to configure it:
1. Open `src/Server/appsettings.Development.json`
If you just installed the indexer and want to configure it:
1. Open `src/Indexer/appsettings.Development.json`
2. If your search server is not on the same machine as the indexer, update "BaseUri" to reflect the URL to the server.
3. If your search server requires API keys, (i.e. it's operating outside of the "Development" environment) set `"ApiKey": "<your key here>"` beneath `"BaseUri"` in the `"Embeddingsearch"` section.
4. Create your own indexing script(s) in `src/Indexer/Scripts/` and configure their use as
3. If you configured API keys for the search server, set `"ApiKey": "<your key here>"` beneath `"BaseUri"` in the `"Server"` section.
4. Create your own indexing script(s) in `src/Indexer/Scripts/` and configure them as shown below
## Structure
```json
"EmbeddingsearchIndexer": {
"Worker":
"Indexer": {
"Workers":
[ // This is a list; you can have as many "workers" as you want
{
"Name": "example",
@@ -50,7 +53,12 @@ If you just installed the server and want to configure it:
"Name": "secondWorker",
/* ... */
}
]
],
"ApiKeys": ["YourApiKeysHereForTheIndexer"], // API Keys for if you want to protect the indexer's API
"Server": {
"BaseUri": "http://localhost:5000", // URL to the embeddingsearch server
"ApiKey": "ServerApiKeyHere" // API Key set in the server
}
}
```
## Call types
@@ -71,6 +79,13 @@ If you just installed the server and want to configure it:
- Parameters:
- Path (e.g. "Scripts/example_content")
# Scripting
Scripts should be put in `src/Indexer/Scripts/`. If you look there, by default you will find some example scripts that can be taken as reference when building your own.
For configuration of the scripts see: [Structure](#structure)
The next few sections explain some core concepts/patterns. If you want to skip to explicit code examples, look here:
- [Python](#python)
- [Roslyn](#c-roslyn)
## General
Scripts need to define the following functions:
- `init()`
@@ -186,7 +201,7 @@ from tools import * # Import all tools that are provided for ease of scripting
def init(toolset: Toolset): # defining an init() function with 1 parameter is required.
pass # Your code would go here.
# DO NOT put a main loop here! Why?
# Don't put a main loop here! Why?
# This function prevents the application from initializing and maintains exclusive control over the GIL
def update(toolset: Toolset): # defining an update() function with 1 parameter is required.
@@ -261,7 +276,7 @@ public class ExampleScript : Indexer.Models.IScript
// Required: return an instance of your IScript-extending class
return new ExampleScript();
```
## Golang
## Lua
TODO
## Javascript
TODO

View File

@@ -0,0 +1,190 @@
---
excalidraw-plugin: parsed
tags: [excalidraw]
---
==⚠ Switch to EXCALIDRAW VIEW in the MORE OPTIONS menu of this document. ⚠== You can decompress Drawing data with the command palette: 'Decompress current Excalidraw file'. For more info check in plugin settings under 'Saving'
# Excalidraw Data
## Text Elements
Server ^TJzgO4nS
Indexer ^rgrd8gyy
embeddingsearch ^jB1B7xr7
Client ^ZttcBOXC
embeddings
provider ^mEIPhpn1
✔️ Ollama
✔️ OpenAI-compatible
(e.g. LocalAI) ^o6rED2fi
uses ^QkKnkGvS
Database ^yaSaChsK
✔️ MySQL
⚒️ SQLite
⚒️ MSSQL
⚒️ PostgreSQL
⚒️ MongoDB
⚒️ Redis ^LHP4PU6V
Stores
data in ^FP2xPhxz
Listens on port 5146
^CJG2peC6
Listens on port 5210 ^iLZT5hca
Workers ^33rXJfFb
- example.py
- example.csx
- ... ^e1BVqXa2
✔️ Front-end
✔️ Swagger
✔️ Elmah ^6UTNDntp
⚒️ Front-end
✔️ Swagger
✔️ Elmah ^tlLF3R27
✔️ Caches embeddings
✔️ Caches queries ^I2lN1U82
✔️ C# library
⚒️ NuGet
✔️ Searchdomain CRUD
✔️ Entity CRUD
✔️ Management operations ^4Ab3XHhK
Uses ^KvuBRV2K
Accesses ^ikhSH5rs
✔️ Multiple provider
configuration ^ipkoadg8
%%
## Drawing
```compressed-json
N4KAkARALgngDgUwgLgAQQQDwMYEMA2AlgCYBOuA7hADTgQBuCpAzoQPYB2KqATLZMzYBXUtiRoIACyhQ4zZAHoFAc0JRJQgEYA6bGwC2CgF7N6hbEcK4OCtptbErHALRY8RMpWdx8Q1TdIEfARcZgRmBShcZQUebQBGAE5tHho6IIR9BA4oZm4AbXAwUDBSiBJuCBghAHUAeQA1QgAzAE000shYRErA7CiOZWCOssxuZwA2HgntCYAGAHYADh4l
hYAWAGY5ub4iyBhx9YBWY+1d9cSp+Pi5pdvj/jKKEnVueO2Z6eOrpeO544neLrJ6QSQIQjKaTcRI/WanCabY4TdY8TYLdGgiDWIbiVBzLHMKCkNgAawQAGE2Pg2KRKgBieIIJlMkaQTS4bCk5QkoQcYhUml0iSM5litkQZqEfD4ADKsGGEkEHglRJJ5Jqr0k3Dm2ke+wgarJCHlMEV6GVFSxvKhHHCeTQBINbDgnLUhzQtydnQgPOEcAAksQHah8
gBdLHNchZIPcDhCGVYwj8rCVXBzCW8/l25ghko+7p4zb7AC+hIQCGI3DROySgKWWMYLHYXE9sMbTFYnAAcpwxNwlkt1hN7lck8wACIZKCV7jNAhhLGaYT8gCiwSyORD8cTBqEcGIuBnVc9CwmiR4C3ixyvP0SWKIHFJcYT+AfbC5s7Q8/wYSKZaKfNIAqCQAEcGgWZRNlJAAlAAJCVC16BB+hxYYsTGNBnHiFZtE2RFVniHh9R9D1UGcb5tGHBYM
R4eINhOOZEk2LEXmIN40CRBY9T+ZiFhIspwUhaFOPWdZtCuHgtimQEuImLE0Lxb0yiNclBVpBlmi07SJQ5Lk/T5AVqQ0iRiWsZhXUCHIJSlGVTXNQ1qStA1VIQTV2O1NA9h9Vz7LxRyVWtYRbXtd4sRdN1YHeHYsQMwNgwKCMDSjXAYxPVAdzfA1k2IVMJFweJMxXYgczzfYungItS3LL9UHiRFYUWeqQQNJsu1bOrlg7Zsez7PEbxRY5gQbbLJ2
nWqf0XA1l0M9dMmyXICnKoCyhA9AAEFCEkCcAC0AwAIQWNkyiQ/LSBJKhyoAzpinK4D0ogAAxXJlCMGB6HaUETsqtNzrYS6bpLfYkp9fdD2Pd4zwvW96rRbyykfZ80Ey99P3SyaECxSRQgAFSwKAABlkyR1AMf/J4gPKB6cYAKSMZQ6nWDhZUQn7TPxiVMPI4cUh2dYaJ2QXaxa0jxjhvCMTmYd/iF3ZWK1AczmmOZNm2Hha1hTZ4ixiEoSgbgkQ
UwZzWUgRiWNdThXQeltJ0pdOW5LMjKFSozI4CzcCs/XI2lOUFX8y0q0Jc2NQVrzg/VE1/cqQOir8SRSrC51XWwd1otN31eXikNw0jaMEFjZHXyTFMuexVIgsMxOi93HyK3S4EeEvP4phGn02pbbh1jubr2t7Dh+09H4z2BRJ1jb1axuCCHvwXTHpuKubN0WtBc73A8j1q+jz0va9Yc2eHIBpNG5znrEZ0wfWJFlJgm0zSg8cvyob9IO/I04KBZUI
Iw8VWd+ckeqlaUZEBJdHxutIgygOoQGCM0b2rUmBQHMAQCBkJoFQBdBKPQORcDJiYIXDKxdnSkEhMmAgj8r7oBfm/A0uAhAYJguEb+eJiRCHnj6R8CA4K6xEnVFIoCpC43xkTJ8p9fzsIRsTF8MpyaAWyg9XA+0jAAHF8AAEUajYBgA0DgzQYBqMmJBKAcwuDnzZugPoAxcSc2rBMM4CxEhDk2IkAETElgsQNGRCidiqITBolrc8Sw5gjgWPLDyB
sbw8R+OiARQk9bcCGvECSKwmIHxVure8tDjZKQjhbYyVsIA21ts0XSDsDL8ktq7cg7tLILRsr7PyMcnJBxciHNyYdeC5PJI0pUzS44hVzEnH0EVU5RU9DFA0cUgw5xBmUFKaVpFZR9DlPK6BcCbDjtmUKaAVrQHMcWTo10VL12rIsZxQ1gkT0gB3Tg7wxKZPbp2Fs/dB51T+Gsa8QTxxTmnhNM+C9ZobgWjnZad0qaVAABowTqMo7+AArTQx0Ko9
DOhdCAV1gZYjBpvBuUNd5DSmAfB8Uia5LIRh+ck6N/k+mxswChIiSZk1KEcyma0ICkB5MQJYygYCHDMSi9AF94E+i5s4uIUsLy7G2G47YIsyhePFokRINwRxiSCSOeSBo2IcTqgCc4Q51gqroqk/iOthKUMNlk6xjoumUnyZpYppT9JO0qaZapHsvb1LstHXpgVWmR3cjqw+ho2k9ItH0yuAyQza2TpFMiXpYpZ2mYlPOqUC7pRRtlUuaZ1ibJKt
swhtdjlby2FsVWw1e6d09BMOxlbeoD1/irZE7jNXLKnggGepNqVlBmmuIFW4U3r3BlvPFMNCXBuPpSsRU0CwcwkAGUuTB74UAoZUBduVMBLv/p/Zh1YrmSg/oA/QwCEnn3AZA6BsDhVlCbEg9wqCoGu0wVibBUQ8GkAIZm4ZJD/DkLnegddWAt20PoWwRhrAf7cFYRIo+eDuHmvePwrGQjL4MunTBmBJLC34FkaUVlD04X7XiIdTApAjr8v8kKmx
WFgTcWIgCK4EwcLNQBFiLxqJuKbHuKca8YSg1LBmDReYF5jg8AvFcB5gkeGUJOEba1+JbWuutsUkp9tnXFSU9Ad1tTrI+29WaAOEb/XGkDZ5Tpxnuk+vDX6n0NoE4FpjcMlOadxkZymQlVeszIDzPTYskuG60zHDzdXbDNV0oOJ+LDUTdaOp/GDTcjgLy8RETVSrVEGdCDts7RjJci9+0r1DF5iA2LO3b2hnvcdqMp2z3EWep+EhMiaErI4QYYRP
bYG1NaB+/6MD6Ca8QFryg2uiE68lD+X9IOcVbXMw9QD8AgLq1AB9l6EBwIlLe5B+BltPrgFgj+uC7QfozUQ79pCOB/vq+gRrzXkxDZCCNiUdCGFMMm6gaDxK7TwfiZ6JDBpaX0qwzlg0iNFm4dussh6xxsDrGaPQfQph1gIDqJlgAqnBVcxAaiSGwLtijyFULZOo+Rbe2gFiXi1k3ZVdFlXTYOKc5Iw55g/BVhifCknIDarM1xKJfFYnSerDzOYt
wuOnBcS45YcrICKR1Ip+1IoVNOsdhpuXgrtOezqXpv2Bmmk2ZUm00z1ZbVhoCs5WzwV7ODM9OFZzYzdVuaTR5wrqaFmkv86s7EExgsFt2adVABzmVheiiOdJmxx4xfeCEmLSX3ha1D9eZYGWst/NqwCvt80B2r1BTde6kLoWwqMAipFeyBXYj+gDQ5mKh04shjvMdcMPsky/eSk+NWZ2CRQ4TQHc8wf4cqDtGQ2B9p1AhRSVmJeqMYXGEiCS2x/j
zENdTxYtOqgC6SWPaYTdVis53nxsz/EJIXgPuPMTtxrzL7ibwy1Pppc2os3al28vHVqaV4ZTTbsPUa+Sg0qzJuWk+X1x0sGr5D/rHJGhbtGtbnGunImv6Mmp5s7r5q7lmgFvlORgaE7CFk3gICctWiiEsI4qJpLgwE8rcl5LsPFiQYln1O8IOHcPMClt8uNFSinj6L2sQEvMCoOqDBvKVqOhVvXsDhSsnm3mApdhABSEQJ/rZt1mIRIYQFITNjkB
Nr/Pus0LNsevNqegaEKtthIFeutogptroYKs+gaK+gdvgsdkWpALSGdhdpQuIZIbpiBs9hBiwqQGwh9lwvzj9sRMhnSsIl3iwZIqIqSj3vIpUAALLrB1AwAcAfqRFj7+SWLS6T5YQ/BnD0TDgqwCaGqJBXhsYJIzDIiERBLMQ7x+K757q6iSp+KIg0RDjuJEEX6UKXgzAtzOJBJNyXDqxEE34KZ36aZFK2yK7lLOwmSq7mQ6bXrebf7a6+qm564B
qAFG4gFGZm7xwhaOZlAjIuZ24wH7hwFO7JT5yfonarTZr5SJBe6W6oA+77LVQuQ4GoBTCXhKpXjL4JYJITKPI9RUENo0EHy8S3D7qZY/IdrCEYZsEcEZ6hhZ43TgoSDPTMCvTvSfRgq+6l5ooYqdBFYlYjq178FErA5YZYEwJCHMEiGCIBGoZBF/gB5yIQ6VD6CrgBgAAKkgcAHAhUeO7MT8aR5ExwB8eERE6IGqeRZOhRaAQSpOqsAIZ4TEqw7O
EAnO7w/weETOsICwKsREfOCGaA2pcmJssuD+ymT+00ZSLqKuWmUx6uzhPotkWuDkoBd+Bu4cd+xuLpGxUaQyOxNu8aPxZQ7mMyCBZx1h5Qlxayo+lcWytxZJYQtUMsDwg44eaAomypCW0ebYtwfiSQKIjBvylJUJeW6eBWa83Bw6uKhJBKAhHCFJ6Gi2lQ12A2t2zAAAOhwHACSGYLlHSF1iuj1s2YNu2Z2d2SQMBg6eNrul5KoeoSemmYtsYRAG
IDkBOTeoYfehejtntjgu+mGWSjYT+mQvgKug1n1jdq1h2V2WwD2WuVLqBuBtOW9h4Rhpwl9rwkkn4X9h3mhq3q+aSa+OEUyRIGwBMKQKuBODwFKEka7BzAKc4MiJsHhM4lLC2hKVKXVFMPqlrPgcJkqVLFUV5ERFRB8GPFLGJiEnqd9qgKalasaYMdacMXbBaepq/tae/tMV6k6YZrrmbMseEu6f/pHJ6esWUHZlsZAaMgGfbrAY7uWXMqcVYQeR
GSgWsquDcduOcdgbVP4jsGiMSb8e1NwCiBQX8VmbwMEkxlsMcCmaNOCdlt2uyCWcvCGVXrwdWfvBOvWX+Y2RIIACjkgA8H+oB1AyipS4AdmBXBWIAcDrQBjOB6D6CuhIKaDBAdmoDpWoAAAUCA2gyg2gqABMH4KCAYAAlMuqeegJFSFfgGFRFUFXUNFbFfFQYElYQClQgGlRldlblflYVfeqVTZFOa9urNukevOagAIjoVuXoatjMcQaQHeigtNS
YbjmYftnuUpeFEeedieT1lVaFfoOFRwFVY1XFQla1e1Z1eld1XlQVUVVtgNQpA+S9u4Z4SSZ9j4Xwl+TSj+XSf+aEdhkBatA9MiaiR9DBaiv9ETthO4toPcKrKHoiDWjsPmZ4twDRFRBsJ8KrFsH8EQaqZ6EkFRN3D8KsJcBsPcOfp9bcNxIiH8ORUKaJkiH0YTrfkJXkqaYUgrs/mMW/mrp6priJbxSGvxUGqsfMdZosZAOJQ5pJXsQmpMg7q5Q
6YpX5sge7rgI9BpdwPcQKjwI8XXLVKsAJheCNQgn8WqRiFHtQZ6FxteKLgZZPPZZCbloCqWSCjdCtMipRnBWCmymoqSAANJPjKL0Asw4mlB4k8EEnlY1mO2wYA1kmTou0GhwBsDJhlnlSFDZ6mylBzDlReZgDZ03TYRE1iQMZk35Hjz1RfRgA02zAi4M1IhoiAgF2V7/6exQD7Q5S3Y63lQYD5YEIQDgSQTQTwTHQQD6BsC5SVC0iaBqAT1SibrE
Bsnp3WSZ450ak2UqyuJjy8So2b2Ih5HniXAAjAgs5t2HJYjZDEDd38i907L90ZDLxD3RGxHxEBiJFfST3T3+Rz0L3f1L2Vir0Z1LSb0YiDj4ErArDOInDDi126gQODiOKrBohjyAjrCX0Mkd0LXrRl7gi4CbUGg314MXQEMKJl4ShBDLgUAp0+hT2MCREkAgNbiajqAwmUJA4/U0md4A1MpgAsoRESAB3B2kih0sy8mCq+0ipiw+JST4SLDERCZc
YYU3BLCk7MSnBDhMa0bAiEWoBbAkVKpM0ibLC6lmrUXfVlD9EZyuRDHc0sUv4VLsX80KGzH6bOmiV8UmYrEelrHC0y23HbE2H+nQGK2yXK0KVpr7lu5ly4DKLa1IGG3pR0RiRQz4GmVGVoCXDW0AmE0n6Djb0FkQlFmu1p4uVcFlD4lVmx2eVVZ0MnQ9ZCBhB5D9kVXFYtODVKFPlm2TkAJzYLbaHnpoKVArkzh9nm2LVbbLWT0kDEDoRrW7mHZD
2g1vTg1bV2G7ViHNPhCPbPVuFQYvleHvmUKfmxK/V8OOWYaJ2AXYPg7A2VCaAcCtA4xqKgSygwQQ0WIoRWILMyPpFwhXin1cbNwH3ypixiZw04RDQuInBTC1paqAFC4pDMQAjOJIg716N/afXiz4HWVXhjzMRBIs3ya2NtL2PmmsGWnK6c0cV2lzWOlC1S0i0+MCXmbs2WYS2/79LgG+khNQGuYHHZyVPeaq1JMXGqXYgIQxn5q3G61VRX1PFbyn
A3j1QBKpl1SXCfGUHmX0QOL8zpnBpglMENmp7sH5Ye2dBe2IkbRbS7QHRoHZ7F7+SezYmAzt1VPR01P4p1PvWN5aXkkt5drBFggXOMrd53O94SAwC4Cyi4AUiSDMCB1fPQDSOjDjCEFQvTD8xL67CLAYXYT5ESyyp2IYhk5ngeI+gE11RaxUSFPdwohjzn1U36l+4CI2MmkTFc2Us9rUtsW0uuP2lzJzGePC2uRunstLHGhMt/5iXm4SWxpSVhM+
jBkiuShiuhbq1xM0yJObvJMR54tKr4REFfEGkXi5OvLqw7ACYbB/x2Ums+VmscOROQDVM161OVaCFBtcONNiEThHi4AchhDlU9b/tRBAdIDbrKF7qjUDNaGzqXxLn6HdRTNLkYKrU+jmEbVq2na/pbMOFgeAehCQcuFgYvWHNvUcJwbU2/bcMA6XMhvXP+syKRtCPoAExwRsnrBsko4TANApsT4GhcwIU4RUSOIfCXhMTIg5No00aoizBTDSw1r4
Qgn6NqNw0ZGikn5C7IgWMfm9PWOs0DEcv35dtMVaSjFWkDu2kC1f4eM8XMvju+Mmczs8sLtOYCv7HhOHFyVFY+YxNbtpjJsyuYEBsJkNxCl3A8bx3zVZN+75EXt4j6v1g4SglJ6lNPsWtrtvungeWft1nftXNUb+VBWREwCyhqIEwdmABJZEFRV0TDODV6V7KPV016gKvUSDyCaJV215EZwMoGwBOPtG14wo4K0+gTIQ4ZFWV61xwLV6gPV2oB1X
N817N/Nx11AF12t6V/14N8Nyt6gKN5ll0zuq9jXWNv0xoYMwh0tjM8h5M0YTM+hzuW+ss0Q7h8ee09N+Vz1wd4t41wd5ES179+t2wJ14ENt6gH14MHtyN5WMd09a4U+e9u9d4a22c/4fR+G4xyDmEax8Bba9tHtIdCm661DfBThHEEiDWmsGk0owl7J8TuqQ0X4kEqrICGi2pz4sEjWWJKcPxLZTSp9eJFLDcIancNTgo7JnRTkgxZzeZ6po47zS
4zZ245KCOw57O946HGy0AaGv48y4ExAYu/LYGZAKu/AScdE+9xKxrQTLu/K9WAbcWulLHjeHvSe5QQbMxIl5DMRDcOWsvsa4Waa6wc5ZwRvVaxieYqm/yX7SDWyTwJgByZgEYOiu67iVil6++z6/lyEcx8pcnRlz6GnRnZa6UMXZ0LnWAPnTdIXZX6UNhNz3YrDHzzeDZVcqUCL4asCCCZL/PscFg2AEVkSJ3XfYNn3Qic/QtEPSPVBLBNK/3VPT
PRIMmA4CRwiUAyvWvZQg3zX+cILDhFsPkdqS2kew4gg8hVJCOFJLsNqZcIOEP0cpADfePw/XcU/YPQ9E8y828x8xPcvz/qkB56MxWYsvRYa787ouoI+ssEIix4GIWLTevhFEz0QcIGqYiB3yf7BxO6pDf6OQxw5lASG+DEIBQzRTX18ANDBppAAYYIAmG2/Mvmw0kAcNQ+7eHhr+WDb0kBGFMNjk9ET7J9JAqfATmm0gDCcxMyQEcIfi4jjwtYwS
AtvRCQoC8aaPRMSOiHxodJzweoJVOJjKI3gdSenShP8CNKy8TOFLEYjzSs5ds6WtnB0hrx1yOcACuvcWqO0N7ztZaJvW3ArRXZK012/nG3sBEjLYgv66BYqKF3DLhdIYoeK8DhX3Sns/cwTWLs8htq8A1GTbZYGl2drF8e04fWEvJVfbZ9cuH7Wss3mqwcCMMxXKhBgkCAjlwYuAVAMmBA5iF5QtIcIB2RqF1DTEF3U7o2lg5Xd4Ov7W7iMwkBjM
7y81VDjM2PQDY/mZQLDm90qCbQieDrCULYTw7tMmhVQ1oQB3aF7Mker2FHlRw+ro9aOrArHiwIToF8gaOeCQJEX0AwR6A5XCYAk0kZYlyeQnRDHYmv5L4midiFEMvnjRFt6IkDeYOiFVjnh9GcQPIkxA2CyRaIzRamosAkhRDAQWpLYLsHiEds5eZnBxlS1YrONrONSellxVc62oJ2evYSgby17spXBQTOWh4LN6ZwImPgjdmSRWRxM6gDvfur7n
1qKt922TOxMOGPae8LanoE4AIkzJJCyK6ICLEH3S6nCIA0JLLpHzwzR9x8wgh5qv1JCSBZQcEY4CwHT4V5M+blGOrnyKFnCCBR8byqUKxCl8chWdKAQgwLpfQ9+FEIxpCLgbAipItdbCPCPyIfAkRN4FEXRCH4j8ogC1N/oMDNEYB+QYY5QBGNH64NiBhDCMUQLIYkDfoZA4hhQP+hUDqSJwx9vQxvK0DmGO/ZgIwOYF/kLhNrcoBqK1E6jxuBYG
PoJ3+bcwkKHGFnGIORqYhGeHwaAcxHVh/AbK1wM8PozWCylHEjiEcCximD6Dqw7bIzmS0jimDmK2IpxuMQKRWC1ejLckaqAcFi0/GXLL0nO02JuCPOS7QVt52FaW8Va1vCMSyLTCNBd28ZZ4vzGmDaMxIGrIiL72rTdF+RdI4PiU1lHyj3a2XfIZ1Dy4mjA2JQn9qIQcLrRsAYgXMLszaY9ZYJ8EzplBx6YZw1Cl3capNWGaPohhC0EYRtk3KDD0
AEw+Zhv2mHrVZhVwm4XcNlAPClh21ewnMLgn2hEJpHR8rsKOao8TmiGKxqGzYF/UG8oOfHmqPQAUgaYyiHgIgApCe4nhjY9NjRj1QbA822wdSfxGmByCrwspITCjQcQ71K2zwJFuJFVj5EVgQLJxLRSF6ttDBMvGXBiIKQK9LONLSwYOwZa2CFiFIpzo4L3HOCKRRvPlhAF2K0iZKPnF9uu2vHit/BkrXAGyQfFhdni2pVEWsA9Hm04ulOT8RNSk
ipcuM6Qh9paMy5ATLxnrSsjnzrwxci+so8oRACJhEhsgzAVAJwFQBp0FqE1YEBMA7INCHCdUmcO7CakcAWptIKAO1OHBdT0Jw1TCXOU0ILkhmiHO7rNQMILVHupE6AKYUw7UTLCEY5YZ9x6y9SGpA0oaW1KGhjSOh1+fZsjx4n7C0e1FDHt+SEkMcqSuPQGmJMuHoBCABMHaDjGODY5cAQguPk2JBGzB+IUsO2kNH+BjhGeTfbiERBWDXA6IqqQX
sZN17wibg5k0eIagbbBoWiM4owQ5JMGMUsRvbHEauKqSq8h27jbinYO8k7izMpI6dluLALuc/SnnTwUGW8ElTRWUUvdrbziZqIEpYQ54jhCpy7AacGrVSVlOBAnAQRC+Ypg5UY6ASKmnM4rCBLKzGjKpFoqCbHx6mZY+pjU5qa1JGkoCMwSEsQvtP6kGzhp2U24Cd2g5kEehOExcgtLWwocVp+Elai9wsJHZtpzE/DpUHNn6zBphs62SbM4nkc0A
ewkIjdI/JHDBJuYwqfsPOGvTKxyiOoJQFJCrgYIZ076CXhSKE4BSFFc4NqTPwrBUQKIeIWRFMnG1y5rccnDZTBFFsGaFwVVlsC0nYt0eY8TQXyJzJSwMQ7YeyWzSnZqRCZPbdkH21xFuTyZHk+ztTO3Gi06ZTgzXm52PEszTxXnLwQyOVm+CbxAQ3AJ8xC7e4ORDxbkS7wHAHwN8UwDOLENLlZSRcOEW9vlJD55ishbtJWccQrLV4Ch6sidABXDJ
VTn5ccwIo9IQAVi2UqsUgBChpjNBHoiKBScIIgCioRxEnDEPVD8SiY0QGFWtneGvZDg0Q1EUPGp2CS8xy6fiEFvcAElSBPqV+QzqS07ZOSiZY8kmXzSnmEjGZrpZzkPKjj7ivGlIo8dSPcHSUhWRxXIZFJdw8yYpGtHGALOUrhCyClwZiLC0FFxcy2WU4cPgSIjz45Z2YxWRH3fmlTP5oEwoRrMK6McapNQWkOSF1GmyHC5i0gJYrrGKEuhMHToW
NRmkTUnZq0+7r8TGGrTnuL6Tad7OinBTfZ7TWxfYu2FkcDmEcq6VHL4m+FzmD07Hk9N/k4Zk5bKZkPtAaCgQIUuACuNoQbHwLRUZ4PUPcn7HN0EZvwgcOoxcRAjFghqFYNMCMkc4OkRETjA4igZNR8KLbaitZJoX0UCZ8vBhXKPHmky3ULCwWmwpM4kjF5s8pmSvP5Zry2Z5vDmXoq5liLmRu8lHNIsDxth0mTEMFtci94GlpehlRIXkwMa7BS2Z
4Ign+PllUkdFOQqOmVK/kVSvKJiqkjVOcCoAsAqUHwDlTgAwAOyXyn5YlWCC6BmAmAIFagG0AwruplQYFZgF+VgqAVUKkFX8vBWQqXA0K2FRNO6EuK4Os0m7kh0WmuySJ7staRhyolLMtpQSnaTtXaYIqkV/ywFVirRVgrsAEKqFTCu0ARKuJr1f6tHNOaxycxQCpJf9STlcDGS4kiABMBRw4xuwE4HIJSu9qwUAZSk8iCODwg3gtYw4T5EKTfFQ
yA+EsD4N3BzJDgbgoSRFmyyIjJB/ROwIaNME1gfj25PS2cbQsckOozBSvCwWuPcmsLuFY7WmYbj8lLy5l/Ck8abzCkXjVloixAuIpUoa0IU2ypVuFhFySpz26UqtJqxiE6skheZSLM1BuUyiAFco7IWWSeUGK1Zry+ppkOgmVBIqj0EkDkFcD8g6qC3CgNEGUBMA2164Q6qNmkIDkxCDaptVABbXEA21soDtcoC7WkAe1+APtbbIwkOy3FuE+aZ4
pJUPcyV6CdaVSte40r41dKliSV1QCNqP4Y6idVOpnVzqF1iPSJZdMo6xKaOFC/7KKvQygKHoUAfAATEeibAYIl4f6XNSKVnA0mzEJIC+MNSVLlJMwZxBJgvCGoKaTSlUi0shZ2qz6jqoUs6psmuq8Zg87XqZ3oWjzhlTClXviOsHDsZ5Xkueay13EudJlh4n0lbgEXLt2Zm8mNdvKCW3j8o6JDYrGU0qCyt4SQBpWeCHGZrSCNFTsacvrSvIPgyN
bItFnvZPyE5L88propEU5dDF38mtdVJ6zzdT1zam+hes7XdrjqQVXtbgH7ViVJulQXTSOvPUmb21Rm2dfZrM0WbvMQ1PFX0ygCuLru/Q4lS7M3VLVfFO6yADMP3VklD1fsiQDZrPUGb7Nk6xzdevM28rw5z5B9QnUFX8SEl8crWc9Myjvq10PAfAN2HiAo4VgAG6Gi4nOBM01FSIFxPbQLZIVzwQuGngJjRZC5lS1beiFTwiwXh8CpwRYPuhxnZM
M46IgZZiMI16QVxzC0jRuM8mS0aZ884NbRoDUuC+FxvCNaFKEW+dQyfghNXEx2jJqeRdUaGB0W1ZCjeAKwSWYfmk7EQjWxapTU5VfmqaK17lIxW8sglFc9qQVCkJyHBCNShyrZNtT9o6zhBUAoENhCQg4kDqvu3237aDoB2Xl7NwOv7WDoh3yEHFbm7pmd2XxYSvNBK9xXNIGHkqvF65ZaVuu3L+LqVgSg9SEq+2oBkd8O88i2UR2RUGdjU8HUwH
R3JaolqWgVXEq+pZbX1Ja3Lbc0lVKiCeEAdYOtE0CbAIUcESQMF3yUqi1VIg8YKfhKVC5T+GA9WBQrIifAD8ZOD4AqRzJgj4RdEeqICGU5Rcx404s8dfjnF0LPVS44mVNpI0f4KZ6vCjfNqo068aNnCokcELW1BSQpgi88cIr85MiA2nGtZLAsD28bYxzxU4FJB/E5rzttGDMrmvOX1Q6wSQD4lotrWlqntjyrPs8o03Vqv2H20xXTopD0hUARAT
QOQFIAsr5u3YIQMog7QTr7sHWYgAYAOz06YIKOCcD2pyDuh+9g+ttZEWsDRBSyTUxAOQCQScAMd7KKzcepr1162qje5vUFVb3t6oAne9rJIB72HVkwY+ofc5pH2wBT9E+qfV2uXiz6mAR4FsEvtx12zMKy6nzdBL81zViJgW8lX4sWZ7rqd4W2nUOu+21769m+trjvo71xau9h+3vSfopAD6z9kVVcBfpgBX77Nk+87LfoWj3759T+7nfer51PrB
dtJYBSJLx5i77mb0iAFChhTwpY99YkvGTyoAClu4yQNYKJmSm1a0sRBMiEoz1DAgVO/wE6SrDBGidmtKIY/FeCbjdLeEGgmQYiDRa99lglq+3e6rG0EavVy45XniPd3TyqZlG4kRwrw0B7vSvLRjRttD0bzwpjI7mRstinYB2RCJTkc720oNxs9rPche+MRCIaxR5yxxPgTJwZN89AEsteXxoM5yfaKu6VYHXoBCB9oMEBoDwGTYR1h+JeytXwTj
o/ybmf8zWVc2tGZ0boe/avrXwNElG7oFESQ6grVS7w5DtdRQzmVnw6qARCwIMdgNDE91wxj9Kfl/zTBKJVEGiLRDoj0QGI/EygYxNnLKCADZ6wAgBv3S34QCwGN0GokxEVL4Q6IiNIUpfzFwbHrg2xy8FgOIZRjujMY3oz6Gn45Ah6UOGHHDgRxI5Uc6OTHNjmVXUDf6cxkAYvUIDgCd+KxqvnDTaJC57a2pZtLIKgGAnjdIJgEMHgmDHGcGS2BM
btuTF4DUxkNNgxmMoEF6aBdAiASWLUBMD8ssol9eQbFX5aJA8RxI8kdSPlaC5GNEET6JQVNxBwkG3gA4gU5XBhNp/Hosvmrb8wqtxjAEZvm+G27eAbq/pZwsXEWdzBrk31eMrs5GHvdJh3yctv8nLzw1q8yNVtoinsb410e7EAFJCEFpHxRtE+msCPwat8Iks8eFXR3wKb/xJah5eWsyOvbNNFe7MTVJRxoSJug6hwl6ah2OLX9BnTHXjt6GErfN
MzYYRM28VuzoE5EqYSFoCVD16D+eQvBsxWE9Z/TS+p7Heu4lpbrmGW+JZjyF0PamOok6g1G3elwBSQbAQhsoCWC0nXh6RLjMTQahKpzwDq/g7YiQrRCdSUkLQYhs62iY4a/EJEQEixlUVeE3cHDcZ0lMjydDLuvQ5PJm0e7NxK2hbdRoXkhrZlgehjXVBpE2GWNdhreZHvDL6ncAlE6WkabjKJTEy3GNnl8lE0dRYBkskFrcG7h3s20GQ8I0XudO
GjvW5egrpXo+V07IiCYJBH8paljleyHZbBFKGUAiBH9tyaxfWtK7gXCAkF68reSc1wXIQiFhfdMYPRY7ksOO6aR/u1lLkozS0nxX/uC3Lkkzu2iLTDqh7oXML0F4zbhYQsEHkLYcnnZHPS3867pdHEs/wyBgZG6AuAOAHAHlCbxJ+XQcEFkEqCQJoQTwBgPIQoD7QRl02gwyMGXIiAvYAYGcPoHlALjGKLIcUKpewD6WZ+RlzS8Rvl7mXWQll6y9
caMuPQ5t3LZy+dBsuZATLm5nUHqC8sGWjLflzlmqaCs+WbhVIkMMpD0veXXLmQBqqzMDJxXgrmQR6GRb6GpXIrGV4i6cgisJX9AFCSi4ROjMhaXLUAQy75ZDGImUxiY8VtlcKtoHiAuAigPgPRO6WrL8Vyq0Zdas4wY+TsTqxVaqv6BG1aaGCNSDxBN5DQ2AEkDKCTXjIeYVwZrbe3qhNRVLzAWa9SHwDtA0AE4ouTJqT3TA0BqlowGwAMByXrkB
ANhAFZohZF/cRyRqz1cyATWq4BaXhfyF0s8gSAQZ2K99eIDygEAu2cM5nBIB9dcoaBwDsEEyGg3RlH/H0PtGpAPQOUHITKnRFCTJCMb6N6gPiD1BlUsQjCBCzVQKQo3cAaN7YDjZrCU2KbuN44GVTBxPXQrSOOAARb41kpY1jCFMCQh6Pw3CB52dqulH4vLkiAwN3nViHOyKXol+Z4QJ+rwQUcMMh1OxUwG7Bpp5bWIRW5YshsC35bDNuwHCh+bM
BZQ52OAODYQBa3obLA7EP0EICMAcY51/AJdedYxwMg1tni5h2aYYIir5iJOgUcY5RgDAN8YIK7Y6g5bQgS2a27bftvlnHrY3KG3alXKXxIi2QIQG+rF2e7dmOyIGCWCAA===
```
%%

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 36 KiB

View File

@@ -1,21 +1,21 @@
# Overview
The server by default
- runs on port 5146
- Uses Swagger UI in development mode (`/swagger/index.html`)
- Ignores API keys when in development mode
- Uses Swagger UI (`/swagger/index.html`)
- Uses Elmah error logging (endpoint: `/elmah`, local files: `~/logs`)
- Uses serilog logging (local files: `~/logs`)
- Uses HealthChecks (endpoint: `/healthz`)
## Docker installation
(On Linux you might need root privileges, thus use `sudo` where necessary)
1. Navigate to the `src/server` directory
2. Build the docker container: `docker build -t embeddingsearch-server -f /Dockerfile .`
3. Run the docker container: `docker run --net=host -t embeddingsearch-server` (the `-t` is optional, but you get more meaningful output. Or use `-d` to run it in the background)
(On Linux you might need root privileges. Use `sudo` where necessary)
1. [Set up the configuration](docs/Server.md#setup)
2. Navigate to the `src` directory
3. Build the docker container: `docker build -t embeddingsearch-server -f Server/Dockerfile .`
4. Run the docker container: `docker run --net=host -t embeddingsearch-server` (the `-t` is optional, but you get more meaningful output. Or use `-d` to run it in the background)
# Installing the dependencies
## Ubuntu 24.04
1. Install the .NET SDK: `sudo apt update && sudo apt install dotnet-sdk-8.0 -y`
1. Install the .NET SDK: `sudo apt update && sudo apt install dotnet-sdk-10.0 -y`
## Windows
Download the [.NET SDK](https://dotnet.microsoft.com/en-us/download) or follow these steps to use WSL:
Download and install the [.NET SDK](https://dotnet.microsoft.com/en-us/download) or follow these steps to use WSL:
1. Install Ubuntu in WSL (`wsl --install` and `wsl --install -d Ubuntu`)
2. Enter your WSL environment `wsl.exe` and configure it
3. Update via `sudo apt update && sudo apt upgrade -y && sudo snap refresh`
@@ -30,6 +30,9 @@ Download the [.NET SDK](https://dotnet.microsoft.com/en-us/download) or follow t
`CREATE DATABASE embeddingsearch; use embeddingsearch;`
4. Create the user (replace "somepassword! with a secure password):
`CREATE USER 'embeddingsearch'@'%' identified by "somepassword!"; GRANT ALL ON embeddingsearch.* TO embeddingsearch; FLUSH PRIVILEGES;`
- Caution: The symbol "%" in the command means that this user can be logged into from outside of the machine.
- Replace `'%'` with `'localhost'` or with the IP of your embeddingsearch server machine if that is a concern.
5. Exit mysql: `exit`
# Configuration
## Environments
@@ -43,34 +46,39 @@ If you just installed the server and want to configure it:
3. Check the "AiProviders" section. If your Ollama/LocalAI/etc. instance does not run locally, update the "baseURL" to point to the correct URL.
4. If you plan on using the server in production:
1. Set the environment variable `DOTNET_ENVIRONMENT` to something that is not "Development". (e.g. "Prod")
2. Rename the `appsettings.Development.json` - replace "Development" with whatever you chose. (e.g. "Prod")
2. Rename the `appsettings.Development.json` - replace "Development" with what you chose for `DOTNET_ENVIRONMENT`
3. Set API keys in the "ApiKeys" section (generate keys using the `uuid` command on Linux)
## Structure
```json
"Embeddingsearch": {
"ConnectionStrings": {
"SQL": "server=localhost;database=embeddingsearch;uid=embeddingsearch;pwd=somepassword!;"
"SQL": "server=localhost;database=embeddingsearch;uid=embeddingsearch;pwd=somepassword!;",
"Cache": "Data Source=embeddings.db;Mode=ReadWriteCreate;Cache=Shared" // Name of the sqlite cache file
},
"Elmah": {
"AllowedHosts": [ // Specify which IP addresses can access /elmah
"127.0.0.1",
"::1",
"172.17.0.1"
]
"LogPath": "~/logs" // Where the logs are stored
},
"AiProviders": {
"ollama": { // Name of the provider. Used when defining models for a datapoint, e.g. "ollama:mxbai-embed-large"
"ollama": { // Name for the provider. Used when defining models for a datapoint, e.g. "ollama:mxbai-embed-large"
"handler": "ollama", // The type of API located at baseURL
"baseURL": "http://localhost:11434" // Location of the API
"baseURL": "http://localhost:11434", // Location of the API
"Allowlist": [".*"], // Allow- and Denylist. Filter out non-embeddings models using regular expressions
"Denylist": ["qwen3-coder:latest", "qwen3:0.6b", "deepseek-v3.1:671b-cloud", "qwen3-vl", "deepseek-ocr"]
},
"localAI": {
"localAI": { // e.g. model name: "localAI:bert-embeddings"
"handler": "openai",
"baseURL": "http://localhost:8080",
"ApiKey": "Some API key here"
"ApiKey": "Some API key here",
"Allowlist": [".*"],
"Denylist": ["cross-encoder", "..."]
}
},
"ApiKeys": ["Some UUID here", "Another UUID here"], // Restrict access in non-development environments to the server's API using your own generated API keys
"UseHttpsRedirection": true // tbh I don't even know why this is still here. // TODO implement HttpsRedirection or remove this line
"ApiKeys": ["Some UUID here", "Another UUID here"], // (optional) Restrict access using API keys
"Cache": {
"CacheTopN": 10000, // Only cache this number of queries. (Eviction policy: LRU)
"StoreEmbeddingCache": true, // If set to true, the SQLite database will be used to store the embeddings
"StoreTopN": 10000 // Only write the top n number of queries to the SQLite database
}
}
```
## AiProviders
@@ -91,9 +99,9 @@ One can even specify multiple Ollama instances and name them however one pleases
```
### handler
Currently two handlers are implemented for embeddings generation:
- ollama
- `ollama`
- requests embeddings from `/api/embed`
- localai
- `openai`
- requests embeddings from `/v1/embeddings`
### baseURL
Specified by `scheme://host:port`. E.g.: `"baseUrl": "http://localhost:11434"`
@@ -105,7 +113,7 @@ Any specified absolute path will be disregarded. (e.g. "http://x.x.x.x/any/subro
# API
## Accessing the api
Once started, the server's API can be comfortably be viewed and manipulated via swagger.
Once started, the server's API can be viewed and manipulated via swagger.
By default it is accessible under: `http://localhost:5146/swagger/index.html`
@@ -114,7 +122,7 @@ To make an API request from within swagger:
2. Click the "Try it out" button. The input fields (if there are any for your action) should now be editable.
3. Fill in the necessary information
4. Click "Execute"
## Restricting access
API keys do **not** get checked in Development environment!
## Authorization
Being logged in has priority over API Key requirement (if api keys are set).
Set up a non-development environment as described in [Configuration>Setup](#setup) to enable API key authentication.
So being logged in automatically authorizes endpoint usage.

BIN
docs/logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -8,6 +8,8 @@ using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Configuration;
using System.Reflection.Metadata.Ecma335;
using Shared.Models;
using System.Net;
using Microsoft.Extensions.Options;
namespace Client;
@@ -24,19 +26,65 @@ public class Client
this.searchdomain = searchdomain;
}
public Client(IConfiguration configuration)
public Client(IOptions<ServerOptions> configuration)
{
string? baseUri = configuration.GetSection("Embeddingsearch").GetValue<string>("BaseUri");
string? apiKey = configuration.GetSection("Embeddingsearch").GetValue<string>("ApiKey");
string? searchdomain = configuration.GetSection("Embeddingsearch").GetValue<string>("Searchdomain");
this.baseUri = baseUri ?? "";
string baseUri = configuration.Value.BaseUri;
string? apiKey = configuration.Value.ApiKey;
string? searchdomain = configuration.Value.Searchdomain;
this.baseUri = baseUri;
this.apiKey = apiKey ?? "";
this.searchdomain = searchdomain ?? "";
}
public async Task<EntityListResults> EntityListAsync(bool returnEmbeddings = false)
{
return await EntityListAsync(searchdomain, returnEmbeddings);
}
public async Task<EntityListResults> EntityListAsync(string searchdomain, bool returnEmbeddings = false)
{
var url = $"{baseUri}/Entities?searchdomain={HttpUtility.UrlEncode(searchdomain)}&returnEmbeddings={HttpUtility.UrlEncode(returnEmbeddings.ToString())}";
return await FetchUrlAndProcessJson<EntityListResults>(HttpMethod.Get, url);
}
public async Task<EntityIndexResult> EntityIndexAsync(List<JSONEntity> jsonEntity)
{
return await EntityIndexAsync(JsonSerializer.Serialize(jsonEntity));
}
public async Task<EntityIndexResult> EntityIndexAsync(string jsonEntity)
{
var content = new StringContent(jsonEntity, Encoding.UTF8, "application/json");
return await FetchUrlAndProcessJson<EntityIndexResult>(HttpMethod.Put, GetUrl($"{baseUri}", "Entities", []), content);
}
public async Task<EntityDeleteResults> EntityDeleteAsync(string entityName)
{
return await EntityDeleteAsync(searchdomain, entityName);
}
public async Task<EntityDeleteResults> EntityDeleteAsync(string searchdomain, string entityName)
{
var url = $"{baseUri}/Entity?apiKey={HttpUtility.UrlEncode(apiKey)}&searchdomain={HttpUtility.UrlEncode(searchdomain)}&entity={HttpUtility.UrlEncode(entityName)}";
return await FetchUrlAndProcessJson<EntityDeleteResults>(HttpMethod.Delete, url);
}
public async Task<SearchdomainListResults> SearchdomainListAsync()
{
return await GetUrlAndProcessJson<SearchdomainListResults>(GetUrl($"{baseUri}/Searchdomain", "List", apiKey, []));
return await FetchUrlAndProcessJson<SearchdomainListResults>(HttpMethod.Get, GetUrl($"{baseUri}", "Searchdomains", []));
}
public async Task<SearchdomainCreateResults> SearchdomainCreateAsync()
{
return await SearchdomainCreateAsync(searchdomain);
}
public async Task<SearchdomainCreateResults> SearchdomainCreateAsync(string searchdomain, SearchdomainSettings searchdomainSettings = new())
{
return await FetchUrlAndProcessJson<SearchdomainCreateResults>(HttpMethod.Post, GetUrl($"{baseUri}", "Searchdomain", new Dictionary<string, string>()
{
{"searchdomain", searchdomain}
}), new StringContent(JsonSerializer.Serialize(searchdomainSettings), Encoding.UTF8, "application/json"));
}
public async Task<SearchdomainDeleteResults> SearchdomainDeleteAsync()
@@ -46,20 +94,7 @@ public class Client
public async Task<SearchdomainDeleteResults> SearchdomainDeleteAsync(string searchdomain)
{
return await GetUrlAndProcessJson<SearchdomainDeleteResults>(GetUrl($"{baseUri}/Searchdomain", "Delete", apiKey, new Dictionary<string, string>()
{
{"searchdomain", searchdomain}
}));
}
public async Task<SearchdomainCreateResults> SearchdomainCreateAsync()
{
return await SearchdomainCreateAsync(searchdomain);
}
public async Task<SearchdomainCreateResults> SearchdomainCreateAsync(string searchdomain)
{
return await GetUrlAndProcessJson<SearchdomainCreateResults>(GetUrl($"{baseUri}/Searchdomain", "Create", apiKey, new Dictionary<string, string>()
return await FetchUrlAndProcessJson<SearchdomainDeleteResults>(HttpMethod.Delete, GetUrl($"{baseUri}", "Searchdomain", new Dictionary<string, string>()
{
{"searchdomain", searchdomain}
}));
@@ -72,87 +107,147 @@ public class Client
return updateResults;
}
public async Task<SearchdomainUpdateResults> SearchdomainUpdateAsync(string searchdomain, string newName, SearchdomainSettings settings = new())
{
return await SearchdomainUpdateAsync(searchdomain, newName, JsonSerializer.Serialize(settings));
}
public async Task<SearchdomainUpdateResults> SearchdomainUpdateAsync(string searchdomain, string newName, string settings = "{}")
{
return await GetUrlAndProcessJson<SearchdomainUpdateResults>(GetUrl($"{baseUri}/Searchdomain", "Update", apiKey, new Dictionary<string, string>()
return await FetchUrlAndProcessJson<SearchdomainUpdateResults>(HttpMethod.Put, GetUrl($"{baseUri}", "Searchdomain", new Dictionary<string, string>()
{
{"searchdomain", searchdomain},
{"newName", newName},
{"settings", settings}
}));
{"newName", newName}
}), new StringContent(settings, Encoding.UTF8, "application/json"));
}
public async Task<EntityQueryResults> EntityQueryAsync(string query)
public async Task<SearchdomainQueriesResults> SearchdomainGetQueriesAsync(string searchdomain)
{
return await EntityQueryAsync(searchdomain, query);
Dictionary<string, string> parameters = new()
{
{"searchdomain", searchdomain}
};
return await FetchUrlAndProcessJson<SearchdomainQueriesResults>(HttpMethod.Get, GetUrl($"{baseUri}/Searchdomain", "Queries", parameters));
}
public async Task<EntityQueryResults> EntityQueryAsync(string searchdomain, string query)
public async Task<EntityQueryResults> SearchdomainQueryAsync(string query)
{
return await GetUrlAndProcessJson<EntityQueryResults>(GetUrl($"{baseUri}/Entity", "Query", apiKey, new Dictionary<string, string>()
return await SearchdomainQueryAsync(searchdomain, query);
}
public async Task<EntityQueryResults> SearchdomainQueryAsync(string searchdomain, string query, int? topN = null, bool returnAttributes = false)
{
Dictionary<string, string> parameters = new()
{
{"searchdomain", searchdomain},
{"query", query}
}));
};
if (topN is not null) parameters.Add("topN", ((int)topN).ToString());
if (returnAttributes) parameters.Add("returnAttributes", returnAttributes.ToString());
return await FetchUrlAndProcessJson<EntityQueryResults>(HttpMethod.Post, GetUrl($"{baseUri}/Searchdomain", "Query", parameters), null);
}
public async Task<EntityIndexResult> EntityIndexAsync(List<JSONEntity> jsonEntity)
public async Task<SearchdomainDeleteSearchResult> SearchdomainDeleteQueryAsync(string searchdomain, string query)
{
return await EntityIndexAsync(JsonSerializer.Serialize(jsonEntity));
Dictionary<string, string> parameters = new()
{
{"searchdomain", searchdomain},
{"query", query}
};
return await FetchUrlAndProcessJson<SearchdomainDeleteSearchResult>(HttpMethod.Delete, GetUrl($"{baseUri}/Searchdomain", "Query", parameters));
}
public async Task<EntityIndexResult> EntityIndexAsync(string jsonEntity)
public async Task<SearchdomainUpdateSearchResult> SearchdomainUpdateQueryAsync(string searchdomain, string query, List<ResultItem> results)
{
var content = new StringContent(jsonEntity, Encoding.UTF8, "application/json");
return await PostUrlAndProcessJson<EntityIndexResult>(GetUrl($"{baseUri}/Entity", "Index", apiKey, []), content);//new FormUrlEncodedContent(values));
Dictionary<string, string> parameters = new()
{
{"searchdomain", searchdomain},
{"query", query}
};
return await FetchUrlAndProcessJson<SearchdomainUpdateSearchResult>(
HttpMethod.Patch,
GetUrl($"{baseUri}/Searchdomain", "Query", parameters),
new StringContent(JsonSerializer.Serialize(results), Encoding.UTF8, "application/json"));
}
public async Task<EntityListResults> EntityListAsync(bool returnEmbeddings = false)
public async Task<SearchdomainSettingsResults> SearchdomainGetSettingsAsync(string searchdomain)
{
return await EntityListAsync(searchdomain, returnEmbeddings);
Dictionary<string, string> parameters = new()
{
{"searchdomain", searchdomain}
};
return await FetchUrlAndProcessJson<SearchdomainSettingsResults>(HttpMethod.Get, GetUrl($"{baseUri}/Searchdomain", "Settings", parameters));
}
public async Task<EntityListResults> EntityListAsync(string searchdomain, bool returnEmbeddings = false)
public async Task<SearchdomainUpdateResults> SearchdomainUpdateSettingsAsync(string searchdomain, SearchdomainSettings searchdomainSettings)
{
var url = $"{baseUri}/Entity/List?apiKey={HttpUtility.UrlEncode(apiKey)}&searchdomain={HttpUtility.UrlEncode(searchdomain)}&returnEmbeddings={HttpUtility.UrlEncode(returnEmbeddings.ToString())}";
return await GetUrlAndProcessJson<EntityListResults>(url);
Dictionary<string, string> parameters = new()
{
{"searchdomain", searchdomain}
};
StringContent content = new(JsonSerializer.Serialize(searchdomainSettings), Encoding.UTF8, "application/json");
return await FetchUrlAndProcessJson<SearchdomainUpdateResults>(HttpMethod.Put, GetUrl($"{baseUri}/Searchdomain", "Settings", parameters), content);
}
public async Task<EntityDeleteResults> EntityDeleteAsync(string entityName)
public async Task<SearchdomainQueryCacheSizeResults> SearchdomainGetQueryCacheSizeAsync(string searchdomain)
{
return await EntityDeleteAsync(searchdomain, entityName);
Dictionary<string, string> parameters = new()
{
{"searchdomain", searchdomain}
};
return await FetchUrlAndProcessJson<SearchdomainQueryCacheSizeResults>(HttpMethod.Get, GetUrl($"{baseUri}/Searchdomain/QueryCache", "Size", parameters));
}
public async Task<EntityDeleteResults> EntityDeleteAsync(string searchdomain, string entityName)
public async Task<SearchdomainInvalidateCacheResults> SearchdomainClearQueryCache(string searchdomain)
{
var url = $"{baseUri}/Entity/Delete?apiKey={HttpUtility.UrlEncode(apiKey)}&searchdomain={HttpUtility.UrlEncode(searchdomain)}&entity={HttpUtility.UrlEncode(entityName)}";
return await GetUrlAndProcessJson<EntityDeleteResults>(url);
Dictionary<string, string> parameters = new()
{
{"searchdomain", searchdomain}
};
return await FetchUrlAndProcessJson<SearchdomainInvalidateCacheResults>(HttpMethod.Post, GetUrl($"{baseUri}/Searchdomain/QueryCache", "Clear", parameters), null);
}
private static async Task<T> GetUrlAndProcessJson<T>(string url)
public async Task<SearchdomainGetDatabaseSizeResult> SearchdomainGetDatabaseSizeAsync(string searchdomain)
{
Dictionary<string, string> parameters = new()
{
{"searchdomain", searchdomain}
};
return await FetchUrlAndProcessJson<SearchdomainGetDatabaseSizeResult>(HttpMethod.Get, GetUrl($"{baseUri}/Searchdomain/Database", "Size", parameters));
}
public async Task<ServerGetModelsResult> ServerGetModelsAsync()
{
return await FetchUrlAndProcessJson<ServerGetModelsResult>(HttpMethod.Get, GetUrl($"{baseUri}/Server", "Models", []));
}
public async Task<ServerGetStatsResult> ServerGetStatsAsync()
{
return await FetchUrlAndProcessJson<ServerGetStatsResult>(HttpMethod.Get, GetUrl($"{baseUri}/Server/Stats", "Size", []));
}
private async Task<T> FetchUrlAndProcessJson<T>(HttpMethod httpMethod, string url, HttpContent? content = null)
{
HttpRequestMessage requestMessage = new(httpMethod, url)
{
Content = content,
};
requestMessage.Headers.Add("X-API-KEY", apiKey);
using var client = new HttpClient();
var response = await client.GetAsync(url);
string responseContent = await response.Content.ReadAsStringAsync();
var result = JsonSerializer.Deserialize<T>(responseContent)
?? throw new Exception($"Failed to deserialize JSON to type {typeof(T).Name}");
return result;
}
private static async Task<T> PostUrlAndProcessJson<T>(string url, HttpContent content)
{
using var client = new HttpClient();
var response = await client.PostAsync(url, content);
var response = await client.SendAsync(requestMessage);
string responseContent = await response.Content.ReadAsStringAsync();
if (response.StatusCode == HttpStatusCode.Forbidden || response.StatusCode == HttpStatusCode.Unauthorized) throw new UnauthorizedAccessException(responseContent); // TODO implement distinct exceptions
if (response.StatusCode == HttpStatusCode.InternalServerError) throw new Exception($"Request was unsuccessful due to an internal server error: {responseContent}"); // TODO implement proper InternalServerErrorException
var result = JsonSerializer.Deserialize<T>(responseContent)
?? throw new Exception($"Failed to deserialize JSON to type {typeof(T).Name}");
return result;
}
public static string GetUrl(string baseUri, string endpoint, string apiKey, Dictionary<string, string> parameters)
public static string GetUrl(string baseUri, string endpoint, Dictionary<string, string> parameters)
{
var uriBuilder = new UriBuilder($"{baseUri}/{endpoint}");
var query = HttpUtility.ParseQueryString(uriBuilder.Query);
if (apiKey.Length > 0) query["apiKey"] = apiKey;
foreach (var param in parameters)
{
query[param.Key] = param.Value;

View File

@@ -5,7 +5,7 @@
</ItemGroup>
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

View File

@@ -1,7 +1,7 @@
FROM ubuntu:24.04 AS ubuntu
FROM ubuntu:25.10 AS ubuntu
WORKDIR /app
RUN apt-get update
RUN apt-get install -y python3.12 python3.12-venv python3.12-dev dotnet-sdk-8.0
RUN apt-get install -y python3.13 python3.13-venv python3.13-dev dotnet-sdk-10.0
RUN apt-get clean
COPY . /src/
ENV ASPNETCORE_ENVIRONMENT Docker

View File

@@ -1,21 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk.Web">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<TargetFramework>net10.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="ElmahCore" Version="2.1.2" />
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.14" />
<PackageReference Include="Microsoft.CodeAnalysis.CSharp.Scripting" Version="4.14.0" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="9.0.8" />
<PackageReference Include="Quartz.Extensions.Hosting" Version="3.15.0" />
<PackageReference Include="Serilog.AspNetCore" Version="9.0.0" />
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="10.0.2" />
<PackageReference Include="Microsoft.CodeAnalysis.CSharp.Scripting" Version="5.0.0" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="10.0.2" />
<PackageReference Include="Quartz.Extensions.Hosting" Version="3.15.1" />
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
<PackageReference Include="Serilog.Sinks.File" Version="7.0.0" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.6.2" />
<PackageReference Include="Python" Version="3.13.3" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="10.1.0" />
<PackageReference Include="System.Configuration.ConfigurationManager" Version="10.0.2" />
<PackageReference Include="Python" Version="3.14.2" />
<PackageReference Include="Pythonnet" Version="3.0.5" />
</ItemGroup>
<ItemGroup>

View File

@@ -0,0 +1,9 @@
using Shared.Models;
namespace Indexer.Models;
public class IndexerOptions : ApiKeyOptions
{
public required WorkerConfig[] Workers { get; set; }
public required ServerOptions Server { get; set;}
public required string PythonRuntime { get; set; } = "libpython3.13.so";
}

View File

@@ -15,11 +15,11 @@ public class ScriptToolSet
public Client.Client Client;
public LoggerWrapper Logger;
public ICallbackInfos? CallbackInfos;
public IConfiguration Configuration;
public IndexerOptions Configuration;
public CancellationToken CancellationToken;
public string Name;
public ScriptToolSet(string filePath, Client.Client client, ILogger<WorkerManager> logger, IConfiguration configuration, CancellationToken cancellationToken, string name)
public ScriptToolSet(string filePath, Client.Client client, ILogger<WorkerManager> logger, IndexerOptions configuration, CancellationToken cancellationToken, string name)
{
Configuration = configuration;
Name = name;

View File

@@ -6,6 +6,8 @@ using ElmahCore.Mvc;
using ElmahCore.Mvc.Logger;
using Serilog;
using Quartz;
using System.Configuration;
using Shared.Models;
var builder = WebApplication.CreateBuilder(args);
@@ -21,6 +23,12 @@ Log.Logger = new LoggerConfiguration()
builder.Logging.AddSerilog();
builder.Services.AddHttpContextAccessor();
builder.Services.AddSingleton<IConfigurationRoot>(builder.Configuration);
IConfigurationSection configurationSection = builder.Configuration.GetSection("Indexer");
IndexerOptions configuration = configurationSection.Get<IndexerOptions>() ?? throw new ConfigurationErrorsException("Unable to start server due to an invalid configration");
builder.Services.Configure<IndexerOptions>(configurationSection);
builder.Services.Configure<ServerOptions>(configurationSection.GetSection("Server"));
builder.Services.Configure<ApiKeyOptions>(configurationSection);
builder.Services.AddSingleton<Client.Client>();
builder.Services.AddSingleton<WorkerManager>();
builder.Services.AddHostedService<IndexerService>();
@@ -72,8 +80,6 @@ else
app.UseMiddleware<Shared.ApiKeyMiddleware>();
}
// app.UseHttpsRedirection();
app.MapControllers();
app.Run();

View File

@@ -15,11 +15,8 @@ public class PythonScriptable : IScriptContainer
public ILogger _logger { get; set; }
public PythonScriptable(ScriptToolSet toolSet, ILogger logger)
{
string? runtime = toolSet.Configuration.GetValue<string>("EmbeddingsearchIndexer:PythonRuntime");
if (runtime is not null)
{
string runtime = toolSet.Configuration.PythonRuntime;
Runtime.PythonDLL ??= runtime;
}
_logger = logger;
SourceLoaded = false;
if (!PythonEngine.IsInitialized)

View File

@@ -1,21 +1,22 @@
using Indexer.Exceptions;
using Indexer.Models;
using Indexer.ScriptContainers;
using Microsoft.Extensions.Options;
public class WorkerManager
{
public Dictionary<string, Worker> Workers;
public List<Type> types;
private readonly ILogger<WorkerManager> _logger;
private readonly IConfiguration _configuration;
private readonly IndexerOptions _configuration;
private readonly Client.Client client;
public WorkerManager(ILogger<WorkerManager> logger, IConfiguration configuration, Client.Client client)
public WorkerManager(ILogger<WorkerManager> logger, IOptions<IndexerOptions> configuration, Client.Client client)
{
Workers = [];
types = [typeof(PythonScriptable), typeof(CSharpScriptable)];
_logger = logger;
_configuration = configuration;
_configuration = configuration.Value;
this.client = client;
}
@@ -23,28 +24,13 @@ public class WorkerManager
{
_logger.LogInformation("Initializing workers");
// Load and configure all workers
var sectionMain = _configuration.GetSection("EmbeddingsearchIndexer");
if (!sectionMain.Exists())
{
_logger.LogCritical("Unable to load section \"EmbeddingsearchIndexer\"");
throw new IndexerConfigurationException("Unable to load section \"EmbeddingsearchIndexer\"");
}
WorkerCollectionConfig? sectionWorker = (WorkerCollectionConfig?)sectionMain.Get(typeof(WorkerCollectionConfig)); //GetValue<WorkerCollectionConfig>("Worker");
if (sectionWorker is not null)
{
foreach (WorkerConfig workerConfig in sectionWorker.Worker)
foreach (WorkerConfig workerConfig in _configuration.Workers)
{
CancellationTokenSource cancellationTokenSource = new();
ScriptToolSet toolSet = new(workerConfig.Script, client, _logger, _configuration, cancellationTokenSource.Token, workerConfig.Name);
InitializeWorker(toolSet, workerConfig, cancellationTokenSource);
}
}
else
{
_logger.LogCritical("Unable to load section \"Worker\"");
throw new IndexerConfigurationException("Unable to load section \"Worker\"");
}
_logger.LogInformation("Initialized workers");
}

View File

@@ -5,46 +5,24 @@
"Microsoft.AspNetCore": "Warning"
}
},
"Embeddingsearch": {
"BaseUri": "http://localhost:5146"
},
"EmbeddingsearchIndexer": {
"Worker":
[
"Indexer": {
"Workers": [
{
"Name": "pythonExample",
"Script": "Scripts/example.py",
"Calls": [
{
"Name": "intervalCall",
"Type": "interval",
"Interval": 30000
}
]
},
{
"Name": "csharpExample",
"Script": "Scripts/example.csx",
"Calls": [
{
"Name": "runonceCall",
"Type": "runonce"
},
{
"Name": "scheduleCall",
"Type": "schedule",
"Schedule": "0 0/5 * * * ?"
},
{
"Name": "fileupdateCall",
"Type": "fileupdate",
"Path": "./Scripts/example_content",
"Events": ["Created", "Changed", "Deleted", "Renamed"],
"Filters": ["*.md", "*.txt"],
"IncludeSubdirectories": true
}
]
}
]
],
"ApiKeys": ["xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"],
"Server": {
"BaseUri": "http://localhost:5146",
"ApiKey": "APIKeyForTheServer"
},
"PythonRuntime": "libpython3.13.so"
}
}

View File

@@ -5,26 +5,8 @@
"Microsoft.AspNetCore": "Warning"
}
},
"Kestrel":{
"Endpoints": {
"http":{
"Url": "http://0.0.0.0:5120"
}
}
},
"Embeddingsearch": {
"BaseUri": "http://172.17.0.1:5146",
"ApiKeys": ["b54ea868-496e-11f0-9cc7-f79f06b160e5", "bbdeedf0-496e-11f0-9744-97e28c221f67"]
},
"EmbeddingsearchIndexer": {
"Elmah": {
"AllowedHosts": [
"127.0.0.1",
"::1",
"172.17.0.1"
]
},
"Worker":
"Indexer": {
"Workers":
[
{
"Name": "pythonExample",
@@ -36,6 +18,12 @@
}
]
}
]
],
"ApiKeys": ["APIKeyOfYourChoice", "AnotherOneIfYouLike"],
"Server": {
"BaseUri": "http://172.17.0.1:5146",
"ApiKey": "APIKeyForTheServer"
},
"PythonRuntime": "libpython3.13.so"
}
}

View File

@@ -24,7 +24,7 @@
],
"LogFolder": "./logs"
},
"PythonRuntime": "libpython3.12.so"
"PythonRuntime": "libpython3.13.so"
},
"AllowedHosts": "*"
}

View File

@@ -1,24 +1,25 @@
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Options;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using Server.Exceptions;
using Server.Models;
namespace Server;
public class AIProvider
{
private readonly ILogger<AIProvider> _logger;
private readonly IConfiguration _configuration;
public AIProvidersConfiguration aIProvidersConfiguration;
private readonly EmbeddingSearchOptions _configuration;
public Dictionary<string, AiProvider> aIProvidersConfiguration;
public AIProvider(ILogger<AIProvider> logger, IConfiguration configuration)
public AIProvider(ILogger<AIProvider> logger, IOptions<EmbeddingSearchOptions> configuration)
{
_logger = logger;
_configuration = configuration;
AIProvidersConfiguration? retrievedAiProvidersConfiguration = _configuration
.GetSection("Embeddingsearch")
.Get<AIProvidersConfiguration>();
_configuration = configuration.Value;
Dictionary<string, AiProvider>? retrievedAiProvidersConfiguration = _configuration.AiProviders;
if (retrievedAiProvidersConfiguration is null)
{
_logger.LogCritical("Unable to build AIProvidersConfiguration. Please check your configuration.");
@@ -30,13 +31,18 @@ public class AIProvider
}
}
public float[] GenerateEmbeddings(string modelUri, string[] input)
public float[] GenerateEmbeddings(string modelUri, string input)
{
return [.. GenerateEmbeddings(modelUri, [input]).First()];
}
public IEnumerable<float[]> GenerateEmbeddings(string modelUri, string[] input)
{
Uri uri = new(modelUri);
string provider = uri.Scheme;
string model = uri.AbsolutePath;
AIProviderConfiguration? aIProvider = aIProvidersConfiguration.AiProviders
.FirstOrDefault(x => String.Equals(x.Key.ToLower(), provider.ToLower()))
AiProvider? aIProvider = aIProvidersConfiguration
.FirstOrDefault(x => string.Equals(x.Key.ToLower(), provider.ToLower()))
.Value;
if (aIProvider is null)
{
@@ -102,13 +108,13 @@ public class AIProvider
try
{
JObject responseContentJson = JObject.Parse(responseContent);
JToken? responseContentTokens = responseContentJson.SelectToken(embeddingsJsonPath);
List<JToken>? responseContentTokens = [.. responseContentJson.SelectTokens(embeddingsJsonPath)];
if (responseContentTokens is null)
{
_logger.LogError("Unable to select tokens using JSONPath {embeddingsJsonPath} for string: {responseContent}.", [embeddingsJsonPath, responseContent]);
throw new JSONPathSelectionException(embeddingsJsonPath, responseContent);
}
return [.. responseContentTokens.Values<float>()];
return [.. responseContentTokens.Select(token => token.ToObject<float[]>() ?? throw new Exception("Unable to cast embeddings response to float[]"))];
}
catch (Exception ex)
{
@@ -119,12 +125,12 @@ public class AIProvider
public string[] GetModels()
{
var aIProviders = aIProvidersConfiguration.AiProviders;
var aIProviders = aIProvidersConfiguration;
List<string> results = [];
foreach (KeyValuePair<string, AIProviderConfiguration> aIProviderKV in aIProviders)
foreach (KeyValuePair<string, AiProvider> aIProviderKV in aIProviders)
{
string aIProviderName = aIProviderKV.Key;
AIProviderConfiguration aIProvider = aIProviderKV.Value;
AiProvider aIProvider = aIProviderKV.Value;
using var httpClient = new HttpClient();
@@ -178,9 +184,14 @@ public class AIProvider
foreach (string? result in aIProviderResult)
{
if (result is null) continue;
bool isInAllowList = ElementMatchesAnyRegexInList(result, aIProvider.Allowlist);
bool isInDenyList = ElementMatchesAnyRegexInList(result, aIProvider.Denylist);
if (isInAllowList && !isInDenyList)
{
results.Add(aIProviderName + ":" + result);
}
}
}
catch (Exception ex)
{
_logger.LogError("Unable to parse the response to valid models. {ex.Message}", [ex.Message]);
@@ -189,6 +200,11 @@ public class AIProvider
}
return [.. results];
}
private static bool ElementMatchesAnyRegexInList(string element, string[] list)
{
return list?.Any(pattern => pattern != null && Regex.IsMatch(element, pattern)) ?? false;
}
}
public class AIProvidersConfiguration

View File

@@ -6,14 +6,15 @@ using Server.Models;
namespace Server.Controllers;
[ApiExplorerSettings(IgnoreApi = true)]
[Route("[Controller]")]
public class AccountController : Controller
{
private readonly SimpleAuthOptions _options;
public AccountController(IOptions<SimpleAuthOptions> options)
public AccountController(IOptions<EmbeddingSearchOptions> options)
{
_options = options.Value;
_options = options.Value.SimpleAuth;
}
[HttpGet("Login")]

View File

@@ -24,41 +24,78 @@ public class EntityController : ControllerBase
_databaseHelper = databaseHelper;
}
[HttpGet("Query")]
public ActionResult<EntityQueryResults> Query(string searchdomain, string query, int? topN)
/// <summary>
/// List the entities in a searchdomain
/// </summary>
/// <remarks>
/// With returnModels = false expect: "Datapoints": [..., "Embeddings": null]<br/>
/// With returnModels = true expect: "Datapoints": [..., "Embeddings": [{"Model": "...", "Embeddings": []}, ...]]<br/>
/// With returnEmbeddings = true expect: "Datapoints": [..., "Embeddings": [{"Model": "...", "Embeddings": [0.007384672,0.01309805,0.0012528514,...]}, ...]]
/// </remarks>
/// <param name="searchdomain">Name of the searchdomain</param>
/// <param name="returnModels">Include the models in the response</param>
/// <param name="returnEmbeddings">Include the embeddings in the response (requires returnModels)</param>
[HttpGet("/Entities")]
public ActionResult<EntityListResults> List(string searchdomain, bool returnModels = false, bool returnEmbeddings = false)
{
Searchdomain searchdomain_;
try
if (returnEmbeddings && !returnModels)
{
searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
} catch (SearchdomainNotFoundException)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - it likely does not exist yet", [searchdomain]);
return Ok(new EntityQueryResults() {Results = [], Success = false, Message = "Searchdomain not found" });
} catch (Exception ex)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - {ex.Message} - {ex.StackTrace}", [searchdomain, ex.Message, ex.StackTrace]);
return Ok(new EntityQueryResults() {Results = [], Success = false, Message = "Unable to retrieve the searchdomain - it likely exists, but some other error happened." });
_logger.LogError("Invalid request for {searchdomain} - embeddings return requested but without models - not possible!", [searchdomain]);
return BadRequest(new EntityListResults() {Results = [], Success = false, Message = "Invalid request" });
}
List<(float, string)> results = searchdomain_.Search(query, topN);
List<EntityQueryResult> queryResults = [.. results.Select(r => new EntityQueryResult
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_domainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new SearchdomainUpdateResults(){Success = false, Message = message});
EntityListResults entityListResults = new() {Results = [], Success = true};
foreach (Entity entity in searchdomain_.entityCache)
{
Name = r.Item2,
Value = r.Item1
})];
return Ok(new EntityQueryResults(){Results = queryResults, Success = true });
List<AttributeResult> attributeResults = [];
foreach (KeyValuePair<string, string> attribute in entity.attributes)
{
attributeResults.Add(new AttributeResult() {Name = attribute.Key, Value = attribute.Value});
}
List<DatapointResult> datapointResults = [];
foreach (Datapoint datapoint in entity.datapoints)
{
if (returnModels)
{
List<EmbeddingResult> embeddingResults = [];
foreach ((string, float[]) embedding in datapoint.embeddings)
{
embeddingResults.Add(new EmbeddingResult() {Model = embedding.Item1, Embeddings = returnEmbeddings ? embedding.Item2 : []});
}
datapointResults.Add(new DatapointResult() {Name = datapoint.name, ProbMethod = datapoint.probMethod.name, SimilarityMethod = datapoint.similarityMethod.name, Embeddings = embeddingResults});
}
else
{
datapointResults.Add(new DatapointResult() {Name = datapoint.name, ProbMethod = datapoint.probMethod.name, SimilarityMethod = datapoint.similarityMethod.name, Embeddings = null});
}
}
EntityListResult entityListResult = new()
{
Name = entity.name,
ProbMethod = entity.probMethodName,
Attributes = attributeResults,
Datapoints = datapointResults
};
entityListResults.Results.Add(entityListResult);
}
return Ok(entityListResults);
}
[HttpPost("Index")]
/// <summary>
/// Index entities
/// </summary>
/// <remarks>
/// Behavior: Creates new entities, but overwrites existing entities that have the same name
/// </remarks>
/// <param name="jsonEntities">Entities to index</param>
[HttpPut("/Entities")]
public ActionResult<EntityIndexResult> Index([FromBody] List<JSONEntity>? jsonEntities)
{
try
{
List<Entity>? entities = _searchdomainHelper.EntitiesFromJSON(
[],
_domainManager.embeddingCache,
_domainManager.aIProvider,
_domainManager.helper,
_domainManager,
_logger,
JsonSerializer.Serialize(jsonEntities));
if (entities is not null && jsonEntities is not null)
@@ -72,7 +109,6 @@ public class EntityController : ControllerBase
&& !invalidatedSearchdomains.Contains(jsonEntitySearchdomainName))
{
invalidatedSearchdomains.Add(jsonEntitySearchdomainName);
_domainManager.InvalidateSearchdomainCache(jsonEntitySearchdomainName);
}
}
return Ok(new EntityIndexResult() { Success = true });
@@ -80,92 +116,42 @@ public class EntityController : ControllerBase
else
{
_logger.LogError("Unable to deserialize an entity");
ElmahCore.ElmahExtensions.RaiseError(new Exception("Unable to deserialize an entity"));
return Ok(new EntityIndexResult() { Success = false, Message = "Unable to deserialize an entity"});
}
} catch (Exception ex)
{
if (ex.InnerException is not null) ex = ex.InnerException;
_logger.LogError("Unable to index the provided entities. {ex.Message} - {ex.StackTrace}", [ex.Message, ex.StackTrace]);
ElmahCore.ElmahExtensions.RaiseError(ex);
return Ok(new EntityIndexResult() { Success = false, Message = ex.Message });
}
}
[HttpGet("List")]
public ActionResult<EntityListResults> List(string searchdomain, bool returnEmbeddings = false)
{
Searchdomain searchdomain_;
try
{
searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
} catch (SearchdomainNotFoundException)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - it likely does not exist yet", [searchdomain]);
return Ok(new EntityQueryResults() {Results = [], Success = false, Message = "Searchdomain not found" });
} catch (Exception ex)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - {ex.Message} - {ex.StackTrace}", [searchdomain, ex.Message, ex.StackTrace]);
return Ok(new EntityQueryResults() {Results = [], Success = false, Message = "Unable to retrieve the searchdomain - it likely exists, but some other error happened." });
}
EntityListResults entityListResults = new() {Results = [], Success = true};
foreach (Entity entity in searchdomain_.entityCache)
{
List<AttributeResult> attributeResults = [];
foreach (KeyValuePair<string, string> attribute in entity.attributes)
{
attributeResults.Add(new AttributeResult() {Name = attribute.Key, Value = attribute.Value});
}
List<DatapointResult> datapointResults = [];
foreach (Datapoint datapoint in entity.datapoints)
{
if (returnEmbeddings)
{
List<EmbeddingResult> embeddingResults = [];
foreach ((string, float[]) embedding in datapoint.embeddings)
{
embeddingResults.Add(new EmbeddingResult() {Model = embedding.Item1, Embeddings = embedding.Item2});
}
datapointResults.Add(new DatapointResult() {Name = datapoint.name, ProbMethod = datapoint.probMethod.name, SimilarityMethod = datapoint.similarityMethod.name, Embeddings = embeddingResults});
}
else
{
datapointResults.Add(new DatapointResult() {Name = datapoint.name, ProbMethod = datapoint.probMethod.name, SimilarityMethod = datapoint.similarityMethod.name, Embeddings = null});
}
}
EntityListResult entityListResult = new()
{
Name = entity.name,
Attributes = attributeResults,
Datapoints = datapointResults
};
entityListResults.Results.Add(entityListResult);
}
return Ok(entityListResults);
}
[HttpGet("Delete")]
/// <summary>
/// Deletes an entity
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
/// <param name="entityName">Name of the entity</param>
[HttpDelete]
public ActionResult<EntityDeleteResults> Delete(string searchdomain, string entityName)
{
Searchdomain searchdomain_;
try
{
searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
} catch (SearchdomainNotFoundException)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - it likely does not exist yet", [searchdomain]);
return Ok(new EntityQueryResults() {Results = [], Success = false, Message = "Searchdomain not found" });
} catch (Exception ex)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - {ex.Message} - {ex.StackTrace}", [searchdomain, ex.Message, ex.StackTrace]);
return Ok(new EntityQueryResults() {Results = [], Success = false, Message = "Unable to retrieve the searchdomain - it likely exists, but some other error happened." });
}
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_domainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new SearchdomainUpdateResults(){Success = false, Message = message});
Entity? entity_ = SearchdomainHelper.CacheGetEntity(searchdomain_.entityCache, entityName);
if (entity_ is null)
{
_logger.LogError("Unable to delete the entity {entityName} in {searchdomain} - it was not found under the specified name", [entityName, searchdomain]);
ElmahCore.ElmahExtensions.RaiseError(
new Exception(
$"Unable to delete the entity {entityName} in {searchdomain} - it was not found under the specified name"
)
);
return Ok(new EntityDeleteResults() {Success = false, Message = "Entity not found"});
}
searchdomain_.ReconciliateOrInvalidateCacheForDeletedEntity(entity_);
_databaseHelper.RemoveEntity([], _domainManager.helper, entityName, searchdomain);
searchdomain_.entityCache.RemoveAll(entity => entity.name == entityName);
return Ok(new EntityDeleteResults() {Success = true});

View File

@@ -7,8 +7,8 @@ using Server.Exceptions;
using Server.Models;
namespace Server.Controllers;
[ApiController]
[Route("/")]
[ApiExplorerSettings(IgnoreApi = true)]
[Route("[Controller]")]
public class HomeController : Controller
{
private readonly ILogger<EntityController> _logger;
@@ -20,9 +20,22 @@ public class HomeController : Controller
_domainManager = domainManager;
}
[Authorize]
[HttpGet("/")]
public IActionResult Root()
{
return Redirect("/Home/Index");
}
[Authorize]
[HttpGet("Index")]
public IActionResult Index()
{
return View();
}
[Authorize]
[HttpGet("Searchdomains")]
public IActionResult Searchdomains()
{
HomeIndexViewModel viewModel = new()
{

View File

@@ -1,8 +1,11 @@
using System.ComponentModel.DataAnnotations;
using System.Text.Json;
using ElmahCore;
using Microsoft.AspNetCore.Http.HttpResults;
using Microsoft.AspNetCore.Mvc;
using Server.Exceptions;
using Server.Helper;
using Shared;
using Shared.Models;
namespace Server.Controllers;
@@ -22,7 +25,10 @@ public class SearchdomainController : ControllerBase
_domainManager = domainManager;
}
[HttpGet("List")]
/// <summary>
/// Lists all searchdomains
/// </summary>
[HttpGet("/Searchdomains")]
public ActionResult<SearchdomainListResults> List()
{
List<string> results;
@@ -39,11 +45,20 @@ public class SearchdomainController : ControllerBase
return Ok(searchdomainListResults);
}
[HttpGet("Create")]
public ActionResult<SearchdomainCreateResults> Create(string searchdomain, string settings = "{}")
/// <summary>
/// Creates a new searchdomain
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
/// <param name="settings">Optional initial settings</param>
[HttpPost]
public ActionResult<SearchdomainCreateResults> Create([Required]string searchdomain, [FromBody]SearchdomainSettings settings = new())
{
try
{
if (settings.QueryCacheSize <= 0)
{
settings.QueryCacheSize = 1_000_000; // TODO get rid of this magic number
}
int id = _domainManager.CreateSearchdomain(searchdomain, settings);
return Ok(new SearchdomainCreateResults(){Id = id, Success = true});
} catch (Exception)
@@ -53,8 +68,12 @@ public class SearchdomainController : ControllerBase
}
}
[HttpGet("Delete")]
public ActionResult<SearchdomainDeleteResults> Delete(string searchdomain)
/// <summary>
/// Deletes a searchdomain
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
[HttpDelete]
public ActionResult<SearchdomainDeleteResults> Delete([Required]string searchdomain)
{
bool success;
int deletedEntries;
@@ -83,12 +102,27 @@ public class SearchdomainController : ControllerBase
return Ok(new SearchdomainDeleteResults(){Success = success, DeletedEntities = deletedEntries, Message = message});
}
[HttpGet("Update")]
public ActionResult<SearchdomainUpdateResults> Update(string searchdomain, string newName, string settings = "{}")
/// <summary>
/// Updates name and settings of a searchdomain
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
/// <param name="newName">Updated name of the searchdomain</param>
/// <param name="settings">Updated settings of searchdomain</param>
[HttpPut]
public ActionResult<SearchdomainUpdateResults> Update([Required]string searchdomain, string newName, [FromBody]SearchdomainSettings? settings)
{
try
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_domainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new SearchdomainUpdateResults(){Success = false, Message = message});
if (settings is null)
{
Dictionary<string, dynamic> parameters = new()
{
{"name", newName},
{"id", searchdomain_.id}
};
searchdomain_.helper.ExecuteSQLNonQuery("UPDATE searchdomain set name = @name WHERE id = @id", parameters);
} else
{
Searchdomain searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
Dictionary<string, dynamic> parameters = new()
{
{"name", newName},
@@ -96,24 +130,111 @@ public class SearchdomainController : ControllerBase
{"id", searchdomain_.id}
};
searchdomain_.helper.ExecuteSQLNonQuery("UPDATE searchdomain set name = @name, settings = @settings WHERE id = @id", parameters);
} catch (SearchdomainNotFoundException)
{
_logger.LogError("Unable to update searchdomain {searchdomain} - not found", [searchdomain]);
return Ok(new SearchdomainUpdateResults() { Success = false, Message = $"Unable to update searchdomain {searchdomain} - not found" });
} catch (Exception ex)
{
_logger.LogError("Unable to update searchdomain {searchdomain} - Exception: {ex.Message} - {ex.StackTrace}", [searchdomain, ex.Message, ex.StackTrace]);
return Ok(new SearchdomainUpdateResults() { Success = false, Message = $"Unable to update searchdomain {searchdomain}" });
}
return Ok(new SearchdomainUpdateResults(){Success = true});
}
[HttpPost("UpdateSettings")]
public ActionResult<SearchdomainUpdateResults> UpdateSettings(string searchdomain, [FromBody] SearchdomainSettings request)
/// <summary>
/// Gets the query cache of a searchdomain
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
[HttpGet("Queries")]
public ActionResult<SearchdomainQueriesResults> GetQueries([Required]string searchdomain)
{
try
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_domainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new SearchdomainUpdateResults(){Success = false, Message = message});
Dictionary<string, DateTimedSearchResult> searchCache = searchdomain_.queryCache.AsDictionary();
return Ok(new SearchdomainQueriesResults() { Searches = searchCache, Success = true });
}
/// <summary>
/// Executes a query in the searchdomain
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
/// <param name="query">Query to execute</param>
/// <param name="topN">Return only the top N results</param>
/// <param name="returnAttributes">Return the attributes of the object</param>
[HttpPost("Query")]
public ActionResult<EntityQueryResults> Query([Required]string searchdomain, [Required]string query, int? topN, bool returnAttributes = false)
{
Searchdomain searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_domainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new SearchdomainUpdateResults(){Success = false, Message = message});
List<(float, string)> results = searchdomain_.Search(query, topN);
List<EntityQueryResult> queryResults = [.. results.Select(r => new EntityQueryResult
{
Name = r.Item2,
Value = r.Item1,
Attributes = returnAttributes ? (searchdomain_.entityCache.FirstOrDefault(x => x.name == r.Item2)?.attributes ?? null) : null
})];
return Ok(new EntityQueryResults(){Results = queryResults, Success = true });
}
/// <summary>
/// Deletes a query from the query cache
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
/// <param name="query">Query to delete</param>
[HttpDelete("Query")]
public ActionResult<SearchdomainDeleteSearchResult> DeleteQuery([Required]string searchdomain, [Required]string query)
{
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_domainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new SearchdomainUpdateResults(){Success = false, Message = message});
EnumerableLruCache<string, DateTimedSearchResult> searchCache = searchdomain_.queryCache;
bool containsKey = searchCache.ContainsKey(query);
if (containsKey)
{
searchCache.Remove(query);
return Ok(new SearchdomainDeleteSearchResult() {Success = true});
}
return Ok(new SearchdomainDeleteSearchResult() {Success = false, Message = "Query not found in search cache"});
}
/// <summary>
/// Updates a query from the query cache
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
/// <param name="query">Query to update</param>
/// <param name="results">List of results to apply to the query</param>
[HttpPatch("Query")]
public ActionResult<SearchdomainUpdateSearchResult> UpdateQuery([Required]string searchdomain, [Required]string query, [Required][FromBody]List<ResultItem> results)
{
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_domainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new SearchdomainUpdateResults(){Success = false, Message = message});
EnumerableLruCache<string, DateTimedSearchResult> searchCache = searchdomain_.queryCache;
bool containsKey = searchCache.ContainsKey(query);
if (containsKey)
{
DateTimedSearchResult element = searchCache[query];
element.Results = results;
searchCache[query] = element;
return Ok(new SearchdomainUpdateSearchResult() {Success = true});
}
return Ok(new SearchdomainUpdateSearchResult() {Success = false, Message = "Query not found in search cache"});
}
/// <summary>
/// Get the settings of a searchdomain
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
[HttpGet("Settings")]
public ActionResult<SearchdomainSettingsResults> GetSettings([Required]string searchdomain)
{
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_domainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new SearchdomainUpdateResults(){Success = false, Message = message});
SearchdomainSettings settings = searchdomain_.settings;
return Ok(new SearchdomainSettingsResults() { Settings = settings, Success = true });
}
/// <summary>
/// Update the settings of a searchdomain
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
[HttpPut("Settings")]
public ActionResult<SearchdomainUpdateResults> UpdateSettings([Required]string searchdomain, [Required][FromBody] SearchdomainSettings request)
{
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_domainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new SearchdomainUpdateResults(){Success = false, Message = message});
Dictionary<string, dynamic> parameters = new()
{
{"settings", JsonSerializer.Serialize(request)},
@@ -121,130 +242,51 @@ public class SearchdomainController : ControllerBase
};
searchdomain_.helper.ExecuteSQLNonQuery("UPDATE searchdomain set settings = @settings WHERE id = @id", parameters);
searchdomain_.settings = request;
} catch (SearchdomainNotFoundException)
{
_logger.LogError("Unable to update settings for searchdomain {searchdomain} - not found", [searchdomain]);
return Ok(new SearchdomainUpdateResults() { Success = false, Message = $"Unable to update settings for searchdomain {searchdomain} - not found" });
} catch (Exception ex)
{
_logger.LogError("Unable to update settings for searchdomain {searchdomain} - Exception: {ex.Message} - {ex.StackTrace}", [searchdomain, ex.Message, ex.StackTrace]);
return Ok(new SearchdomainUpdateResults() { Success = false, Message = $"Unable to update settings for searchdomain {searchdomain}" });
}
searchdomain_.queryCache.Capacity = request.QueryCacheSize;
return Ok(new SearchdomainUpdateResults(){Success = true});
}
[HttpGet("GetSearches")]
public ActionResult<SearchdomainSearchesResults> GetSearches(string searchdomain)
/// <summary>
/// Get the query cache size of a searchdomain
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
[HttpGet("QueryCache/Size")]
public ActionResult<SearchdomainQueryCacheSizeResults> GetQueryCacheSize([Required]string searchdomain)
{
Searchdomain searchdomain_;
try
if (!SearchdomainHelper.IsSearchdomainLoaded(_domainManager, searchdomain))
{
searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
return Ok(new SearchdomainQueryCacheSizeResults() { SizeBytes = 0, ElementCount = 0, ElementMaxCount = 0, Success = true });
}
catch (SearchdomainNotFoundException)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - it likely does not exist yet", [searchdomain]);
return Ok(new SearchdomainSearchesResults() { Searches = [], Success = false, Message = "Searchdomain not found" });
}
catch (Exception ex)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - {ex.Message} - {ex.StackTrace}", [searchdomain, ex.Message, ex.StackTrace]);
return Ok(new SearchdomainSearchesResults() { Searches = [], Success = false, Message = ex.Message });
}
Dictionary<string, DateTimedSearchResult> searchCache = searchdomain_.searchCache;
return Ok(new SearchdomainSearchesResults() { Searches = searchCache, Success = true });
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_domainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new SearchdomainUpdateResults(){Success = false, Message = message});
int elementCount = searchdomain_.queryCache.Count;
int ElementMaxCount = searchdomain_.settings.QueryCacheSize;
return Ok(new SearchdomainQueryCacheSizeResults() { SizeBytes = searchdomain_.GetSearchCacheSize(), ElementCount = elementCount, ElementMaxCount = ElementMaxCount, Success = true });
}
[HttpGet("GetSettings")]
public ActionResult<SearchdomainSettingsResults> GetSettings(string searchdomain)
/// <summary>
/// Clear the query cache of a searchdomain
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
[HttpPost("QueryCache/Clear")]
public ActionResult<SearchdomainInvalidateCacheResults> InvalidateSearchCache([Required]string searchdomain)
{
Searchdomain searchdomain_;
try
{
searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
}
catch (SearchdomainNotFoundException)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - it likely does not exist yet", [searchdomain]);
return Ok(new SearchdomainSettingsResults() { Settings = null, Success = false, Message = "Searchdomain not found" });
}
catch (Exception ex)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - {ex.Message} - {ex.StackTrace}", [searchdomain, ex.Message, ex.StackTrace]);
return Ok(new SearchdomainSettingsResults() { Settings = null, Success = false, Message = ex.Message });
}
SearchdomainSettings settings = searchdomain_.settings;
return Ok(new SearchdomainSettingsResults() { Settings = settings, Success = true });
}
[HttpGet("GetSearchCacheSize")]
public ActionResult<SearchdomainSearchCacheSizeResults> GetSearchCacheSize(string searchdomain)
{
Searchdomain searchdomain_;
try
{
searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
}
catch (SearchdomainNotFoundException)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - it likely does not exist yet", [searchdomain]);
return Ok(new SearchdomainSearchCacheSizeResults() { SearchCacheSizeBytes = null, Success = false, Message = "Searchdomain not found" });
}
catch (Exception ex)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - {ex.Message} - {ex.StackTrace}", [searchdomain, ex.Message, ex.StackTrace]);
return Ok(new SearchdomainSearchCacheSizeResults() { SearchCacheSizeBytes = null, Success = false, Message = ex.Message });
}
Dictionary<string, DateTimedSearchResult> searchCache = searchdomain_.searchCache;
long sizeInBytes = 0;
foreach (var entry in searchCache)
{
sizeInBytes += sizeof(int); // string length prefix
sizeInBytes += entry.Key.Length * sizeof(char); // string characters
sizeInBytes += entry.Value.EstimateSize();
}
return Ok(new SearchdomainSearchCacheSizeResults() { SearchCacheSizeBytes = sizeInBytes, Success = true });
}
[HttpGet("ClearSearchCache")]
public ActionResult<SearchdomainInvalidateCacheResults> InvalidateSearchCache(string searchdomain)
{
try
{
Searchdomain searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_domainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new SearchdomainUpdateResults(){Success = false, Message = message});
searchdomain_.InvalidateSearchCache();
} catch (SearchdomainNotFoundException)
{
_logger.LogError("Unable to invalidate search cache for searchdomain {searchdomain} - not found", [searchdomain]);
return Ok(new SearchdomainInvalidateCacheResults() { Success = false, Message = $"Unable to invalidate search cache for searchdomain {searchdomain} - not found" });
} catch (Exception ex)
{
_logger.LogError("Unable to invalidate search cache for searchdomain {searchdomain} - Exception: {ex.Message} - {ex.StackTrace}", [searchdomain, ex.Message, ex.StackTrace]);
return Ok(new SearchdomainInvalidateCacheResults() { Success = false, Message = $"Unable to invalidate search cache for searchdomain {searchdomain}" });
}
return Ok(new SearchdomainInvalidateCacheResults(){Success = true});
}
[HttpGet("GetDatabaseSize")]
public ActionResult<SearchdomainGetDatabaseSizeResult> GetDatabaseSize(string searchdomain)
/// <summary>
/// Get the disk size of a searchdomain in bytes
/// </summary>
/// <param name="searchdomain">Name of the searchdomain</param>
[HttpGet("Database/Size")]
public ActionResult<SearchdomainGetDatabaseSizeResult> GetDatabaseSize([Required]string searchdomain)
{
Searchdomain searchdomain_;
try
{
searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
}
catch (SearchdomainNotFoundException)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - it likely does not exist yet", [searchdomain]);
return Ok(new SearchdomainGetDatabaseSizeResult() { SearchdomainDatabaseSizeBytes = null, Success = false, Message = "Searchdomain not found" });
}
catch (Exception ex)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - {ex.Message} - {ex.StackTrace}", [searchdomain, ex.Message, ex.StackTrace]);
return Ok(new SearchdomainGetDatabaseSizeResult() { SearchdomainDatabaseSizeBytes = null, Success = false, Message = ex.Message });
}
long sizeInBytes = DatabaseHelper.GetSearchdomainDatabaseSize(searchdomain_.helper, searchdomain);
return Ok(new SearchdomainGetDatabaseSizeResult() { SearchdomainDatabaseSizeBytes = sizeInBytes, Success = true });
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_domainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new SearchdomainUpdateResults(){Success = false, Message = message});
long EmbeddingCacheUtilization = DatabaseHelper.GetSearchdomainDatabaseSize(searchdomain_.helper, searchdomain);
return Ok(new SearchdomainGetDatabaseSizeResult() { SearchdomainDatabaseSizeBytes = EmbeddingCacheUtilization, Success = true });
}
}

View File

@@ -1,10 +1,11 @@
namespace Server.Controllers;
using System.Text.Json;
using ElmahCore;
using Microsoft.AspNetCore.Mvc;
using Server.Exceptions;
using Microsoft.Extensions.Options;
using Server.Helper;
using Server.Models;
using Shared;
using Shared.Models;
[ApiController]
@@ -14,15 +15,25 @@ public class ServerController : ControllerBase
private readonly ILogger<ServerController> _logger;
private readonly IConfiguration _config;
private AIProvider _aIProvider;
private readonly SearchdomainManager _searchdomainManager;
private readonly IOptions<EmbeddingSearchOptions> _options;
public ServerController(ILogger<ServerController> logger, IConfiguration config, AIProvider aIProvider)
public ServerController(ILogger<ServerController> logger, IConfiguration config, AIProvider aIProvider, SearchdomainManager searchdomainManager, IOptions<EmbeddingSearchOptions> options)
{
_logger = logger;
_config = config;
_aIProvider = aIProvider;
_searchdomainManager = searchdomainManager;
_options = options;
}
[HttpGet("GetModels")]
/// <summary>
/// Lists the models available to the server
/// </summary>
/// <remarks>
/// Returns ALL models available to the server - not only the embedding models.
/// </remarks>
[HttpGet("Models")]
public ActionResult<ServerGetModelsResult> GetModels()
{
try
@@ -35,4 +46,94 @@ public class ServerController : ControllerBase
return new ServerGetModelsResult() { Success = false, Message = ex.Message};
}
}
/// <summary>
/// Gets numeric info regarding the searchdomains
/// </summary>
[HttpGet("Stats")]
public async Task<ActionResult<ServerGetStatsResult>> Stats()
{
try
{
long size = 0;
long elementCount = 0;
long embeddingsCount = 0;
EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache = _searchdomainManager.embeddingCache;
foreach (KeyValuePair<string, Dictionary<string, float[]>> kv in embeddingCache)
{
string key = kv.Key;
Dictionary<string, float[]> entry = kv.Value;
size += EstimateEntrySize(key, entry);
elementCount++;
embeddingsCount += entry.Keys.Count;
}
var sqlHelper = DatabaseHelper.GetSQLHelper(_options.Value);
var databaseTotalSize = DatabaseHelper.GetTotalDatabaseSize(sqlHelper);
Task<long> entityCountTask = DatabaseHelper.CountEntities(sqlHelper);
long queryCacheUtilization = 0;
long queryCacheElementCount = 0;
long queryCacheMaxElementCountAll = 0;
long queryCacheMaxElementCountLoadedSearchdomainsOnly = 0;
foreach (string searchdomain in _searchdomainManager.ListSearchdomains())
{
if (SearchdomainHelper.IsSearchdomainLoaded(_searchdomainManager, searchdomain))
{
(Searchdomain? searchdomain_, int? httpStatusCode, string? message) = SearchdomainHelper.TryGetSearchdomain(_searchdomainManager, searchdomain, _logger);
if (searchdomain_ is null || httpStatusCode is not null) return StatusCode(httpStatusCode ?? 500, new ServerGetStatsResult(){Success = false, Message = message});
queryCacheUtilization += searchdomain_.GetSearchCacheSize();
queryCacheElementCount += searchdomain_.queryCache.Count;
queryCacheMaxElementCountAll += searchdomain_.queryCache.Capacity;
queryCacheMaxElementCountLoadedSearchdomainsOnly += searchdomain_.queryCache.Capacity;
} else
{
var searchdomainSettings = DatabaseHelper.GetSearchdomainSettings(sqlHelper, searchdomain);
queryCacheMaxElementCountAll += searchdomainSettings.QueryCacheSize;
}
};
long entityCount = await entityCountTask;
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
long ramTotalSize = GC.GetTotalMemory(false);
return new ServerGetStatsResult() {
Success = true,
EntityCount = entityCount,
QueryCacheUtilization = queryCacheUtilization,
QueryCacheElementCount = queryCacheElementCount,
QueryCacheMaxElementCountAll = queryCacheMaxElementCountAll,
QueryCacheMaxElementCountLoadedSearchdomainsOnly = queryCacheMaxElementCountLoadedSearchdomainsOnly,
EmbeddingCacheUtilization = size,
EmbeddingCacheMaxElementCount = _searchdomainManager.EmbeddingCacheMaxCount,
EmbeddingCacheElementCount = elementCount,
EmbeddingsCount = embeddingsCount,
DatabaseTotalSize = databaseTotalSize,
RamTotalSize = ramTotalSize
};
} catch (Exception ex)
{
ElmahExtensions.RaiseError(ex);
return StatusCode(500, new ServerGetStatsResult(){Success = false, Message = ex.Message});
}
}
private static long EstimateEntrySize(string key, Dictionary<string, float[]> value)
{
int stringOverhead = MemorySizes.Align(MemorySizes.ObjectHeader + sizeof(int));
int arrayOverhead = MemorySizes.ArrayHeader;
int dictionaryOverhead = MemorySizes.ObjectHeader;
long size = 0;
size += stringOverhead + key.Length * sizeof(char);
size += dictionaryOverhead;
foreach (var kv in value)
{
size += stringOverhead + kv.Key.Length * sizeof(char);
size += arrayOverhead + kv.Value.Length * sizeof(float);
}
return size;
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,4 @@
using OllamaSharp;
using OllamaSharp.Models;
using Shared;
namespace Server;
@@ -25,79 +24,103 @@ public class Datapoint
return probMethod.method(probabilities);
}
public static Dictionary<string, float[]> GenerateEmbeddings(string content, List<string> models, AIProvider aIProvider)
public static Dictionary<string, float[]> GetEmbeddings(string content, List<string> models, AIProvider aIProvider, EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache)
{
return GenerateEmbeddings(content, models, aIProvider, []);
Dictionary<string, float[]> embeddings = [];
bool embeddingCacheHasContent = embeddingCache.TryGetValue(content, out var embeddingCacheForContent);
if (!embeddingCacheHasContent || embeddingCacheForContent is null)
{
models.ForEach(model =>
embeddings[model] = GenerateEmbeddings(content, model, aIProvider, embeddingCache)
);
return embeddings;
}
models.ForEach(model =>
{
bool embeddingCacheHasModel = embeddingCacheForContent.TryGetValue(model, out float[]? embeddingCacheForModel);
if (embeddingCacheHasModel && embeddingCacheForModel is not null)
{
embeddings[model] = embeddingCacheForModel;
} else
{
embeddings[model] = GenerateEmbeddings(content, model, aIProvider, embeddingCache);
}
});
return embeddings;
}
public static Dictionary<string, float[]> GenerateEmbeddings(List<string> contents, string model, OllamaApiClient ollama, Dictionary<string, Dictionary<string, float[]>> embeddingCache)
public static Dictionary<string, Dictionary<string, float[]>> GetEmbeddings(string[] content, List<string> models, AIProvider aIProvider, EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache)
{
Dictionary<string, float[]> retVal = [];
List<string> remainingContents = new List<string>(contents);
for (int i = contents.Count - 1; i >= 0; i--) // Compare against cache and remove accordingly
{
string content = contents[i];
if (embeddingCache.ContainsKey(model) && embeddingCache[model].ContainsKey(content))
{
retVal[content] = embeddingCache[model][content];
remainingContents.RemoveAt(i);
}
}
if (remainingContents.Count == 0)
{
return retVal;
}
EmbedRequest request = new()
{
Model = model,
Input = remainingContents
};
EmbedResponse response = ollama.EmbedAsync(request).Result;
for (int i = 0; i < response.Embeddings.Count; i++)
{
string content = remainingContents.ElementAt(i);
float[] embeddings = response.Embeddings.ElementAt(i);
retVal[content] = embeddings;
if (!embeddingCache.ContainsKey(model))
{
embeddingCache[model] = [];
}
if (!embeddingCache[model].ContainsKey(content))
{
embeddingCache[model][content] = embeddings;
}
}
return retVal;
}
public static Dictionary<string, float[]> GenerateEmbeddings(string content, List<string> models, AIProvider aIProvider, Dictionary<string, Dictionary<string, float[]>> embeddingCache)
{
Dictionary<string, float[]> retVal = [];
Dictionary<string, Dictionary<string, float[]>> embeddings = [];
foreach (string model in models)
{
if (embeddingCache.ContainsKey(model) && embeddingCache[model].ContainsKey(content))
List<string> toBeGenerated = [];
embeddings[model] = [];
foreach (string value in content)
{
bool generateThisEntry = true;
bool embeddingCacheHasContent = embeddingCache.TryGetValue(value, out var embeddingCacheForContent);
if (embeddingCacheHasContent && embeddingCacheForContent is not null)
{
bool embeddingCacheHasModel = embeddingCacheForContent.TryGetValue(model, out float[]? embedding);
if (embeddingCacheHasModel && embedding is not null)
{
embeddings[model][value] = embedding;
generateThisEntry = false;
}
}
if (generateThisEntry)
{
if (!toBeGenerated.Contains(value))
{
toBeGenerated.Add(value);
}
}
}
if (toBeGenerated.Count == 0)
{
retVal[model] = embeddingCache[model][content];
continue;
}
var response = aIProvider.GenerateEmbeddings(model, [content]);
if (response is not null)
IEnumerable<float[]> generatedEmbeddings = GenerateEmbeddings([.. toBeGenerated], model, aIProvider, embeddingCache);
if (generatedEmbeddings.Count() != toBeGenerated.Count)
{
retVal[model] = response;
if (!embeddingCache.ContainsKey(model))
throw new Exception("Requested embeddings count and generated embeddings count mismatched!");
}
for (int i = 0; i < toBeGenerated.Count; i++)
{
embeddingCache[model] = [];
embeddings[model][toBeGenerated.ElementAt(i)] = generatedEmbeddings.ElementAt(i);
}
if (!embeddingCache[model].ContainsKey(content))
}
return embeddings;
}
public static IEnumerable<float[]> GenerateEmbeddings(string[] content, string model, AIProvider aIProvider, EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache)
{
embeddingCache[model][content] = response;
IEnumerable<float[]> embeddings = aIProvider.GenerateEmbeddings(model, content);
if (embeddings.Count() != content.Length)
{
throw new Exception("Resulting embeddings count does not match up with request count");
}
for (int i = 0; i < content.Length; i++)
{
if (!embeddingCache.ContainsKey(content[i]))
{
embeddingCache[content[i]] = [];
}
embeddingCache[content[i]][model] = embeddings.ElementAt(i);
}
return retVal;
return embeddings;
}
public static float[] GenerateEmbeddings(string content, string model, AIProvider aIProvider, EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache)
{
float[] embeddings = aIProvider.GenerateEmbeddings(model, content);
if (!embeddingCache.ContainsKey(content))
{
embeddingCache[content] = [];
}
embeddingCache[content][model] = embeddings;
return embeddings;
}
}

View File

@@ -1,10 +1,10 @@
FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
WORKDIR /build
COPY . .
RUN dotnet restore ./Server.csproj
RUN dotnet publish ./Server.csproj -c Release -o /output
RUN dotnet restore Server/Server.csproj
RUN dotnet publish Server/Server.csproj -c Release -o /output
FROM mcr.microsoft.com/dotnet/aspnet:8.0 AS final
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS final
WORKDIR /app
COPY --from=build /output .
ENV ASPNETCORE_ENVIRONMENT Docker

View File

@@ -1,9 +1,10 @@
namespace Server;
public class Entity(Dictionary<string, string> attributes, Probmethods.probMethodDelegate probMethod, List<Datapoint> datapoints, string name)
public class Entity(Dictionary<string, string> attributes, Probmethods.probMethodDelegate probMethod, string probMethodName, List<Datapoint> datapoints, string name)
{
public Dictionary<string, string> attributes = attributes;
public Probmethods.probMethodDelegate probMethod = probMethod;
public string probMethodName = probMethodName;
public List<Datapoint> datapoints = datapoints;
public int id;
public string name = name;

View File

@@ -1,7 +1,9 @@
using Shared.Models;
namespace Server.Exceptions;
public class ProbMethodNotFoundException(string probMethod) : Exception($"Unknown probMethod name {probMethod}") { }
public class ProbMethodNotFoundException(ProbMethodEnum probMethod) : Exception($"Unknown probMethod name {probMethod}") { }
public class SimilarityMethodNotFoundException(string similarityMethod) : Exception($"Unknown similarityMethod name \"{similarityMethod}\"") { }
public class SimilarityMethodNotFoundException(SimilarityMethodEnum similarityMethod) : Exception($"Unknown similarityMethod name \"{similarityMethod}\"") { }
public class JSONPathSelectionException(string path, string testedContent) : Exception($"Unable to select tokens using JSONPath {path} for string: {testedContent}.") { }

View File

@@ -0,0 +1,242 @@
using System.Configuration;
using Microsoft.Data.Sqlite;
using Microsoft.Extensions.Options;
using OllamaSharp.Models;
using Server.Models;
using Shared;
namespace Server.Helper;
public static class CacheHelper
{
public static EnumerableLruCache<string, Dictionary<string, float[]>> GetEmbeddingStore(EmbeddingSearchOptions options)
{
SQLiteHelper helper = new(options);
EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache = new((int)(options.Cache.StoreTopN ?? options.Cache.CacheTopN));
helper.ExecuteQuery(
"SELECT cache_key, model_key, embedding, idx FROM embedding_cache ORDER BY idx ASC", [], r =>
{
int embeddingOrdinal = r.GetOrdinal("embedding");
int length = (int)r.GetBytes(embeddingOrdinal, 0, null, 0, 0);
byte[] buffer = new byte[length];
r.GetBytes(embeddingOrdinal, 0, buffer, 0, length);
var cache_key = r.GetString(r.GetOrdinal("cache_key"));
var model_key = r.GetString(r.GetOrdinal("model_key"));
var embedding = SearchdomainHelper.FloatArrayFromBytes(buffer);
var index = r.GetInt32(r.GetOrdinal("idx"));
if (cache_key is null || model_key is null || embedding is null)
{
throw new Exception("Unable to get the embedding store due to a returned element being null");
}
if (!embeddingCache.TryGetValue(cache_key, out Dictionary<string, float[]>? keyElement) || keyElement is null)
{
keyElement = [];
embeddingCache[cache_key] = keyElement;
}
keyElement[model_key] = embedding;
return 0;
}
);
embeddingCache.Capacity = (int)options.Cache.CacheTopN;
return embeddingCache;
}
public static async Task UpdateEmbeddingStore(EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache, EmbeddingSearchOptions options)
{
if (options.Cache.StoreTopN is not null)
{
embeddingCache.Capacity = (int)options.Cache.StoreTopN;
}
SQLiteHelper helper = new(options);
EnumerableLruCache<string, Dictionary<string, float[]>> embeddingStore = GetEmbeddingStore(options);
var embeddingCacheMappings = GetCacheMappings(embeddingCache);
var embeddingCacheIndexMap = embeddingCacheMappings.positionToEntry;
var embeddingCacheObjectMap = embeddingCacheMappings.entryToPosition;
var embeddingStoreMappings = GetCacheMappings(embeddingStore);
var embeddingStoreIndexMap = embeddingStoreMappings.positionToEntry;
var embeddingStoreObjectMap = embeddingStoreMappings.entryToPosition;
List<int> deletedEntries = [];
foreach (KeyValuePair<int, KeyValuePair<string, Dictionary<string, float[]>>> kv in embeddingStoreIndexMap)
{
int storeEntryIndex = kv.Key;
string storeEntryString = kv.Value.Key;
bool cacheEntryExists = embeddingCacheObjectMap.TryGetValue(storeEntryString, out int cacheEntryIndex);
if (!cacheEntryExists) // Deleted
{
deletedEntries.Add(storeEntryIndex);
}
}
Task removeEntriesFromStoreTask = RemoveEntriesFromStore(helper, deletedEntries);
List<(int Index, KeyValuePair<string, Dictionary<string, float[]>> Entry)> createdEntries = [];
List<(int Index, int NewIndex)> changedEntries = [];
List<(int Index, string Model, string Key, float[] Embedding)> AddedModels = [];
List<(int Index, string Model)> RemovedModels = [];
foreach (KeyValuePair<int, KeyValuePair<string, Dictionary<string, float[]>>> kv in embeddingCacheIndexMap)
{
int cacheEntryIndex = kv.Key;
string cacheEntryString = kv.Value.Key;
bool storeEntryExists = embeddingStoreObjectMap.TryGetValue(cacheEntryString, out int storeEntryIndex);
if (!storeEntryExists) // Created
{
createdEntries.Add((
Index: cacheEntryIndex,
Entry: kv.Value
));
continue;
}
if (cacheEntryIndex != storeEntryIndex) // Changed
{
changedEntries.Add((
Index: cacheEntryIndex,
NewIndex: storeEntryIndex
));
}
// Check for new/removed models
var storeModels = embeddingStoreIndexMap[storeEntryIndex].Value;
var cacheModels = kv.Value.Value;
// New models
foreach (var model in storeModels.Keys.Except(cacheModels.Keys))
{
RemovedModels.Add((
Index: cacheEntryIndex,
Model: model
));
}
// Removed models
foreach (var model in cacheModels.Keys.Except(storeModels.Keys))
{
AddedModels.Add((
Index: cacheEntryIndex,
Model: model,
Key: cacheEntryString,
Embedding: cacheModels[model]
));
}
}
var taskSet = new List<Task>
{
removeEntriesFromStoreTask,
CreateEntriesInStore(helper, createdEntries),
UpdateEntryIndicesInStore(helper, changedEntries),
AddModelsToIndices(helper, AddedModels),
RemoveModelsFromIndices(helper, RemovedModels)
};
await Task.WhenAll(taskSet);
}
private static async Task CreateEntriesInStore(
SQLiteHelper helper,
List<(int Index, KeyValuePair<string, Dictionary<string, float[]>> Entry)> createdEntries)
{
helper.BulkExecuteNonQuery(
"INSERT INTO embedding_cache (cache_key, model_key, embedding, idx) VALUES (@cache_key, @model_key, @embedding, @index)",
createdEntries.SelectMany(element => {
return element.Entry.Value.Select(model => new object[]
{
new SqliteParameter("@cache_key", element.Entry.Key),
new SqliteParameter("@model_key", model.Key),
new SqliteParameter("@embedding", SearchdomainHelper.BytesFromFloatArray(model.Value)),
new SqliteParameter("@index", element.Index)
});
})
);
}
private static async Task UpdateEntryIndicesInStore(
SQLiteHelper helper,
List<(int Index, int NewIndex)> changedEntries)
{
helper.BulkExecuteNonQuery(
"UPDATE embedding_cache SET idx = @newIndex WHERE idx = @index",
changedEntries.Select(element => new object[]
{
new SqliteParameter("@index", element.Index),
new SqliteParameter("@newIndex", -element.NewIndex) // The "-" prevents in-place update collisions
})
);
helper.BulkExecuteNonQuery(
"UPDATE embedding_cache SET idx = @newIndex WHERE idx = @index",
changedEntries.Select(element => new object[]
{
new SqliteParameter("@index", -element.NewIndex),
new SqliteParameter("@newIndex", element.NewIndex) // Flip the negative prefix
})
);
}
private static async Task RemoveEntriesFromStore(
SQLiteHelper helper,
List<int> deletedEntries)
{
helper.BulkExecuteNonQuery(
"DELETE FROM embedding_cache WHERE idx = @index",
deletedEntries.Select(index => new object[]
{
new SqliteParameter("@index", index)
})
);
}
private static async Task AddModelsToIndices(
SQLiteHelper helper,
List<(int Index, string Model, string Key, float[] Embedding)> addedModels)
{
helper.BulkExecuteNonQuery(
"INSERT INTO embedding_cache (cache_key, model_key, embedding, idx) VALUES (@cache_key, @model_key, @embedding, @index)",
addedModels.Select(element => new object[]
{
new SqliteParameter("@cache_key", element.Key),
new SqliteParameter("@model_key", element.Model),
new SqliteParameter("@embedding", SearchdomainHelper.BytesFromFloatArray(element.Embedding)),
new SqliteParameter("@index", element.Index)
})
);
}
private static async Task RemoveModelsFromIndices(
SQLiteHelper helper,
List<(int Index, string Model)> removedModels)
{
helper.BulkExecuteNonQuery(
"DELETE FROM embedding_cache WHERE idx = @index AND model_key = @model",
removedModels.Select(element => new object[]
{
new SqliteParameter("@index", element.Index),
new SqliteParameter("@model", element.Model)
})
);
}
private static (Dictionary<int, KeyValuePair<string, Dictionary<string, float[]>>> positionToEntry,
Dictionary<string, int> entryToPosition)
GetCacheMappings(EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache)
{
var positionToEntry = new Dictionary<int, KeyValuePair<string, Dictionary<string, float[]>>>();
var entryToPosition = new Dictionary<string, int>();
int position = 0;
foreach (var entry in embeddingCache)
{
positionToEntry[position] = entry;
entryToPosition[entry.Key] = position;
position++;
}
return (positionToEntry, entryToPosition);
}
}

View File

@@ -1,6 +1,9 @@
using System.Data.Common;
using System.Text;
using System.Text.Json;
using MySql.Data.MySqlClient;
using Server.Exceptions;
using Server.Models;
using Shared.Models;
namespace Server.Helper;
@@ -9,6 +12,14 @@ public class DatabaseHelper(ILogger<DatabaseHelper> logger)
{
private readonly ILogger<DatabaseHelper> _logger = logger;
public static SQLHelper GetSQLHelper(EmbeddingSearchOptions embeddingSearchOptions)
{
string connectionString = embeddingSearchOptions.ConnectionStrings.SQL;
MySqlConnection connection = new(connectionString);
connection.Open();
return new SQLHelper(connection, connectionString);
}
public static void DatabaseInsertEmbeddingBulk(SQLHelper helper, int id_datapoint, List<(string model, byte[] embedding)> data)
{
Dictionary<string, object> parameters = [];
@@ -28,6 +39,19 @@ public class DatabaseHelper(ILogger<DatabaseHelper> logger)
helper.ExecuteSQLNonQuery(query.ToString(), parameters);
}
public static int DatabaseInsertEmbeddingBulk(SQLHelper helper, List<(string hash, string model, byte[] embedding)> data)
{
return helper.BulkExecuteNonQuery(
"INSERT INTO embedding (id_datapoint, model, embedding) SELECT d.id, @model, @embedding FROM datapoint d WHERE d.hash = @hash",
data.Select(element => new object[] {
new MySqlParameter("@model", element.model),
new MySqlParameter("@embedding", element.embedding),
new MySqlParameter("@hash", element.hash)
})
);
}
public static int DatabaseInsertSearchdomain(SQLHelper helper, string name, SearchdomainSettings settings = new())
{
Dictionary<string, dynamic> parameters = new()
@@ -38,12 +62,12 @@ public class DatabaseHelper(ILogger<DatabaseHelper> logger)
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO searchdomain (name, settings) VALUES (@name, @settings)", parameters);
}
public static int DatabaseInsertEntity(SQLHelper helper, string name, string probmethod, int id_searchdomain)
public static int DatabaseInsertEntity(SQLHelper helper, string name, ProbMethodEnum probmethod, int id_searchdomain)
{
Dictionary<string, dynamic> parameters = new()
{
{ "name", name },
{ "probmethod", probmethod },
{ "probmethod", probmethod.ToString() },
{ "id_searchdomain", id_searchdomain }
};
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO entity (name, probmethod, id_searchdomain) VALUES (@name, @probmethod, @id_searchdomain)", parameters);
@@ -60,13 +84,39 @@ public class DatabaseHelper(ILogger<DatabaseHelper> logger)
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO attribute (attribute, value, id_entity) VALUES (@attribute, @value, @id_entity)", parameters);
}
public static int DatabaseInsertDatapoint(SQLHelper helper, string name, string probmethod_embedding, string similarityMethod, string hash, int id_entity)
public static int DatabaseInsertAttributes(SQLHelper helper, List<(string attribute, string value, int id_entity)> values) //string[] attribute, string value, int id_entity)
{
return helper.BulkExecuteNonQuery(
"INSERT INTO attribute (attribute, value, id_entity) VALUES (@attribute, @value, @id_entity)",
values.Select(element => new object[] {
new MySqlParameter("@attribute", element.attribute),
new MySqlParameter("@value", element.value),
new MySqlParameter("@id_entity", element.id_entity)
})
);
}
public static int DatabaseInsertDatapoints(SQLHelper helper, List<(string name, ProbMethodEnum probmethod_embedding, SimilarityMethodEnum similarityMethod, string hash)> values, int id_entity)
{
return helper.BulkExecuteNonQuery(
"INSERT INTO datapoint (name, probmethod_embedding, similaritymethod, hash, id_entity) VALUES (@name, @probmethod_embedding, @similaritymethod, @hash, @id_entity)",
values.Select(element => new object[] {
new MySqlParameter("@name", element.name),
new MySqlParameter("@probmethod_embedding", element.probmethod_embedding),
new MySqlParameter("@similaritymethod", element.similarityMethod),
new MySqlParameter("@hash", element.hash),
new MySqlParameter("@id_entity", id_entity)
})
);
}
public static int DatabaseInsertDatapoint(SQLHelper helper, string name, ProbMethodEnum probmethod_embedding, SimilarityMethodEnum similarityMethod, string hash, int id_entity)
{
Dictionary<string, dynamic> parameters = new()
{
{ "name", name },
{ "probmethod_embedding", probmethod_embedding },
{ "similaritymethod", similarityMethod },
{ "probmethod_embedding", probmethod_embedding.ToString() },
{ "similaritymethod", similarityMethod.ToString() },
{ "hash", hash },
{ "id_entity", id_entity }
};
@@ -132,7 +182,7 @@ public class DatabaseHelper(ILogger<DatabaseHelper> logger)
helper.ExecuteSQLNonQuery("DELETE embedding.* FROM embedding JOIN datapoint dp ON id_datapoint = dp.id JOIN entity ON id_entity = entity.id WHERE entity.id_searchdomain = @searchdomain", parameters);
helper.ExecuteSQLNonQuery("DELETE datapoint.* FROM datapoint JOIN entity ON id_entity = entity.id WHERE entity.id_searchdomain = @searchdomain", parameters);
helper.ExecuteSQLNonQuery("DELETE attribute.* FROM attribute JOIN entity ON id_entity = entity.id WHERE entity.id_searchdomain = @searchdomain", parameters);
helper.ExecuteSQLNonQuery("DELETE FROM attribute WHERE id_entity IN (SELECT entity.id FROM entity WHERE id_searchdomain = @searchdomain)", parameters);
return helper.ExecuteSQLNonQuery("DELETE FROM entity WHERE entity.id_searchdomain = @searchdomain", parameters);
}
@@ -211,4 +261,59 @@ public class DatabaseHelper(ILogger<DatabaseHelper> logger)
return result;
}
public static long GetTotalDatabaseSize(SQLHelper helper)
{
Dictionary<string, dynamic> parameters = [];
DbDataReader searchdomainSumReader = helper.ExecuteSQLCommand("SELECT SUM(Data_length) FROM information_schema.tables", parameters);
try
{
bool success = searchdomainSumReader.Read();
long result = success && !searchdomainSumReader.IsDBNull(0) ? searchdomainSumReader.GetInt64(0) : 0;
return result;
} finally
{
searchdomainSumReader.Close();
}
}
public static async Task<long> CountEntities(SQLHelper helper)
{
DbDataReader searchdomainSumReader = helper.ExecuteSQLCommand("SELECT COUNT(*) FROM entity;", []);
bool success = searchdomainSumReader.Read();
long result = success && !searchdomainSumReader.IsDBNull(0) ? searchdomainSumReader.GetInt64(0) : 0;
searchdomainSumReader.Close();
return result;
}
public static long CountEntitiesForSearchdomain(SQLHelper helper, string searchdomain)
{
Dictionary<string, dynamic> parameters = new()
{
{ "searchdomain", searchdomain}
};
DbDataReader searchdomainSumReader = helper.ExecuteSQLCommand("SELECT COUNT(*) FROM entity e JOIN searchdomain s on e.id_searchdomain = s.id WHERE e.id_searchdomain = s.id AND s.name = @searchdomain;", parameters);
bool success = searchdomainSumReader.Read();
long result = success && !searchdomainSumReader.IsDBNull(0) ? searchdomainSumReader.GetInt64(0) : 0;
searchdomainSumReader.Close();
return result;
}
public static SearchdomainSettings GetSearchdomainSettings(SQLHelper helper, string searchdomain)
{
Dictionary<string, dynamic> parameters = new()
{
["name"] = searchdomain
};
DbDataReader reader = helper.ExecuteSQLCommand("SELECT settings from searchdomain WHERE name = @name", parameters);
try
{
reader.Read();
string settingsString = reader.GetString(0);
return JsonSerializer.Deserialize<SearchdomainSettings>(settingsString);
} finally
{
reader.Close();
}
}
}

View File

@@ -1,3 +1,4 @@
using System.Data;
using System.Data.Common;
using MySql.Data.MySqlClient;
@@ -6,6 +7,7 @@ namespace Server.Helper;
public class SQLHelper:IDisposable
{
public MySqlConnection connection;
public DbDataReader? dbDataReader;
public string connectionString;
public SQLHelper(MySqlConnection connection, string connectionString)
{
@@ -30,13 +32,15 @@ public class SQLHelper:IDisposable
lock (connection)
{
EnsureConnected();
EnsureDbReaderIsClosed();
using MySqlCommand command = connection.CreateCommand();
command.CommandText = query;
foreach (KeyValuePair<string, dynamic> parameter in parameters)
{
command.Parameters.AddWithValue($"@{parameter.Key}", parameter.Value);
}
return command.ExecuteReader();
dbDataReader = command.ExecuteReader();
return dbDataReader;
}
}
@@ -45,6 +49,7 @@ public class SQLHelper:IDisposable
lock (connection)
{
EnsureConnected();
EnsureDbReaderIsClosed();
using MySqlCommand command = connection.CreateCommand();
command.CommandText = query;
@@ -61,6 +66,7 @@ public class SQLHelper:IDisposable
lock (connection)
{
EnsureConnected();
EnsureDbReaderIsClosed();
using MySqlCommand command = connection.CreateCommand();
command.CommandText = query;
@@ -74,6 +80,33 @@ public class SQLHelper:IDisposable
}
}
public int BulkExecuteNonQuery(string sql, IEnumerable<object[]> parameterSets)
{
lock (connection)
{
EnsureConnected();
EnsureDbReaderIsClosed();
using var transaction = connection.BeginTransaction();
using var command = connection.CreateCommand();
command.CommandText = sql;
command.Transaction = transaction;
int affectedRows = 0;
foreach (var parameters in parameterSets)
{
command.Parameters.Clear();
command.Parameters.AddRange(parameters);
affectedRows += command.ExecuteNonQuery();
}
transaction.Commit();
return affectedRows;
}
}
public bool EnsureConnected()
{
if (connection.State != System.Data.ConnectionState.Open)
@@ -83,11 +116,29 @@ public class SQLHelper:IDisposable
connection.Close();
connection.Open();
}
catch (Exception)
catch (Exception ex)
{
throw; // TODO add logging here
ElmahCore.ElmahExtensions.RaiseError(ex);
throw;
}
}
return true;
}
public void EnsureDbReaderIsClosed()
{
int counter = 0;
int sleepTime = 10;
int timeout = 5000;
while (!(dbDataReader?.IsClosed ?? true))
{
if (counter > timeout / sleepTime)
{
TimeoutException ex = new("Unable to ensure dbDataReader is closed");
ElmahCore.ElmahExtensions.RaiseError(ex);
throw ex;
}
Thread.Sleep(sleepTime);
}
}
}

View File

@@ -0,0 +1,76 @@
using System.Data;
using System.Data.Common;
using Microsoft.Data.Sqlite;
using Server.Models;
using MySql.Data.MySqlClient;
using System.Configuration;
namespace Server.Helper;
public class SQLiteHelper : SqlHelper, IDisposable
{
public SQLiteHelper(DbConnection connection, string connectionString) : base(connection, connectionString)
{
Connection = connection;
ConnectionString = connectionString;
}
public SQLiteHelper(EmbeddingSearchOptions options) : base(new SqliteConnection(options.ConnectionStrings.Cache), options.ConnectionStrings.Cache ?? "")
{
if (options.ConnectionStrings.Cache is null)
{
throw new ConfigurationErrorsException("Cache options must not be null when instantiating SQLiteHelper");
}
ConnectionString = options.ConnectionStrings.Cache;
Connection = new SqliteConnection(ConnectionString);
}
public override SQLiteHelper DuplicateConnection()
{
SqliteConnection newConnection = new(ConnectionString);
return new SQLiteHelper(newConnection, ConnectionString);
}
public override int ExecuteSQLCommandGetInsertedID(string query, object[] parameters)
{
lock (Connection)
{
EnsureConnected();
EnsureDbReaderIsClosed();
using DbCommand command = Connection.CreateCommand();
command.CommandText = query;
command.Parameters.AddRange(parameters);
command.ExecuteNonQuery();
command.CommandText = "SELECT last_insert_rowid();";
return Convert.ToInt32(command.ExecuteScalar());
}
}
public int BulkExecuteNonQuery(string sql, IEnumerable<object[]> parameterSets)
{
lock (Connection)
{
EnsureConnected();
EnsureDbReaderIsClosed();
using var transaction = Connection.BeginTransaction();
using var command = Connection.CreateCommand();
command.CommandText = sql;
command.Transaction = transaction;
int affectedRows = 0;
foreach (var parameters in parameterSets)
{
command.Parameters.Clear();
command.Parameters.AddRange(parameters);
affectedRows += command.ExecuteNonQuery();
}
transaction.Commit();
return affectedRows;
}
}
}

View File

@@ -2,7 +2,9 @@ using System.Collections.Concurrent;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using AdaptiveExpressions;
using Server.Exceptions;
using Shared;
using Shared.Models;
namespace Server.Helper;
@@ -14,7 +16,7 @@ public class SearchdomainHelper(ILogger<SearchdomainHelper> logger, DatabaseHelp
public static byte[] BytesFromFloatArray(float[] floats)
{
var byteArray = new byte[floats.Length * 4];
var byteArray = new byte[floats.Length * sizeof(float)];
var floatArray = floats.ToArray();
Buffer.BlockCopy(floatArray, 0, byteArray, 0, byteArray.Length);
return byteArray;
@@ -22,7 +24,7 @@ public class SearchdomainHelper(ILogger<SearchdomainHelper> logger, DatabaseHelp
public static float[] FloatArrayFromBytes(byte[] bytes)
{
var floatArray = new float[bytes.Length / 4];
var floatArray = new float[bytes.Length / sizeof(float)];
Buffer.BlockCopy(bytes, 0, floatArray, 0, bytes.Length);
return floatArray;
}
@@ -44,36 +46,59 @@ public class SearchdomainHelper(ILogger<SearchdomainHelper> logger, DatabaseHelp
return null;
}
public List<Entity>? EntitiesFromJSON(List<Entity> entityCache, Dictionary<string, Dictionary<string, float[]>> embeddingCache, AIProvider aIProvider, SQLHelper helper, ILogger logger, string json)
public List<Entity>? EntitiesFromJSON(SearchdomainManager searchdomainManager, ILogger logger, string json)
{
EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache = searchdomainManager.embeddingCache;
AIProvider aIProvider = searchdomainManager.aIProvider;
SQLHelper helper = searchdomainManager.helper;
List<JSONEntity>? jsonEntities = JsonSerializer.Deserialize<List<JSONEntity>>(json);
if (jsonEntities is null)
{
return null;
}
// toBeCached: model -> [datapoint.text * n]
// Prefetch embeddings
Dictionary<string, List<string>> toBeCached = [];
Dictionary<string, List<string>> toBeCachedParallel = [];
foreach (JSONEntity jSONEntity in jsonEntities)
{
Dictionary<string, List<string>> targetDictionary = toBeCached;
if (searchdomainManager.GetSearchdomain(jSONEntity.Searchdomain).settings.ParallelEmbeddingsPrefetch)
{
targetDictionary = toBeCachedParallel;
}
foreach (JSONDatapoint datapoint in jSONEntity.Datapoints)
{
foreach (string model in datapoint.Model)
{
if (!toBeCached.ContainsKey(model))
if (!targetDictionary.ContainsKey(model))
{
toBeCached[model] = [];
targetDictionary[model] = [];
}
toBeCached[model].Add(datapoint.Text);
targetDictionary[model].Add(datapoint.Text);
}
}
}
foreach (var toBeCachedKV in toBeCached)
{
string model = toBeCachedKV.Key;
List<string> uniqueStrings = [.. toBeCachedKV.Value.Distinct()];
Datapoint.GetEmbeddings([.. uniqueStrings], [model], aIProvider, embeddingCache);
}
Parallel.ForEach(toBeCachedParallel, toBeCachedParallelKV =>
{
string model = toBeCachedParallelKV.Key;
List<string> uniqueStrings = [.. toBeCachedParallelKV.Value.Distinct()];
Datapoint.GetEmbeddings([.. uniqueStrings], [model], aIProvider, embeddingCache);
});
// Index/parse the entities
ConcurrentQueue<Entity> retVal = [];
ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = 16 }; // <-- This is needed! Otherwise if we try to index 100+ entities at once, it spawns 100 threads, exploding the SQL pool
Parallel.ForEach(jsonEntities, parallelOptions, jSONEntity =>
{
using var tempHelper = helper.DuplicateConnection();
var entity = EntityFromJSON(entityCache, embeddingCache, aIProvider, tempHelper, logger, jSONEntity);
var entity = EntityFromJSON(searchdomainManager, logger, jSONEntity);
if (entity is not null)
{
retVal.Enqueue(entity);
@@ -82,71 +107,220 @@ public class SearchdomainHelper(ILogger<SearchdomainHelper> logger, DatabaseHelp
return [.. retVal];
}
public Entity? EntityFromJSON(List<Entity> entityCache, Dictionary<string, Dictionary<string, float[]>> embeddingCache, AIProvider aIProvider, SQLHelper helper, ILogger logger, JSONEntity jsonEntity) //string json)
public Entity? EntityFromJSON(SearchdomainManager searchdomainManager, ILogger logger, JSONEntity jsonEntity) //string json)
{
using SQLHelper helper = searchdomainManager.helper.DuplicateConnection();
Searchdomain searchdomain = searchdomainManager.GetSearchdomain(jsonEntity.Searchdomain);
List<Entity> entityCache = searchdomain.entityCache;
AIProvider aIProvider = searchdomain.aIProvider;
EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache = searchdomain.embeddingCache;
Entity? preexistingEntity = entityCache.FirstOrDefault(entity => entity.name == jsonEntity.Name);
bool invalidateSearchCache = false;
if (preexistingEntity is not null)
{
Dictionary<string, Dictionary<string, float[]>> embeddingsLUT = []; // embeddingsLUT: hash -> model -> [embeddingValues * n]
int? preexistingEntityID = _databaseHelper.GetEntityID(helper, jsonEntity.Name, jsonEntity.Searchdomain);
if (preexistingEntityID is not null)
if (preexistingEntityID is null)
{
lock (helper.connection) // TODO change this to helper and do A/B tests (i.e. before/after)
_logger.LogCritical("Unable to index entity {jsonEntity.Name} because it already exists in the searchdomain but not in the database.", [jsonEntity.Name]);
throw new Exception($"Unable to index entity {jsonEntity.Name} because it already exists in the searchdomain but not in the database.");
}
Dictionary<string, string> attributes = jsonEntity.Attributes;
// Attribute
foreach (KeyValuePair<string, string> attributesKV in preexistingEntity.attributes.ToList())
{
string oldAttributeKey = attributesKV.Key;
string oldAttribute = attributesKV.Value;
bool newHasAttribute = jsonEntity.Attributes.TryGetValue(oldAttributeKey, out string? newAttribute);
if (newHasAttribute && newAttribute is not null && newAttribute != oldAttribute)
{
// Attribute - Updated
Dictionary<string, dynamic> parameters = new()
{
{ "id", preexistingEntityID }
{ "newValue", newAttribute },
{ "entityId", preexistingEntityID },
{ "attribute", oldAttributeKey}
};
System.Data.Common.DbDataReader reader = helper.ExecuteSQLCommand("SELECT e.model, e.embedding, d.hash FROM datapoint d JOIN embedding e ON d.id = e.id_datapoint WHERE d.id_entity = @id", parameters);
while (reader.Read())
helper.ExecuteSQLNonQuery("UPDATE attribute SET value=@newValue WHERE id_entity=@entityId AND attribute=@attribute", parameters);
preexistingEntity.attributes[oldAttributeKey] = newAttribute;
} else if (!newHasAttribute)
{
string model = reader.GetString(0);
long length = reader.GetBytes(1, 0, null, 0, 0);
byte[] embeddingBytes = new byte[length];
reader.GetBytes(1, 0, embeddingBytes, 0, (int)length);
float[] embeddingValues = FloatArrayFromBytes(embeddingBytes);
string hash = reader.GetString(2);
if (!embeddingsLUT.ContainsKey(hash))
// Attribute - Deleted
Dictionary<string, dynamic> parameters = new()
{
embeddingsLUT[hash] = [];
{ "entityId", preexistingEntityID },
{ "attribute", oldAttributeKey}
};
helper.ExecuteSQLNonQuery("DELETE FROM attribute WHERE id_entity=@entityId AND attribute=@attribute", parameters);
preexistingEntity.attributes.Remove(oldAttributeKey);
}
embeddingsLUT[hash].TryAdd(model, embeddingValues);
}
reader.Close();
}
_databaseHelper.RemoveEntity(entityCache, helper, jsonEntity.Name, jsonEntity.Searchdomain); // TODO only remove entity if there is actually a change somewhere. Perhaps create 3 datapoint lists to operate with: 1. delete, 2. update, 3. create
}
int id_entity = DatabaseHelper.DatabaseInsertEntity(helper, jsonEntity.Name, jsonEntity.Probmethod, _databaseHelper.GetSearchdomainID(helper, jsonEntity.Searchdomain));
foreach (KeyValuePair<string, string> attribute in jsonEntity.Attributes)
foreach (var attributesKV in jsonEntity.Attributes)
{
DatabaseHelper.DatabaseInsertAttribute(helper, attribute.Key, attribute.Value, id_entity); // TODO implement bulk insert to reduce number of queries
string newAttributeKey = attributesKV.Key;
string newAttribute = attributesKV.Value;
bool preexistingHasAttribute = preexistingEntity.attributes.TryGetValue(newAttributeKey, out string? preexistingAttribute);
if (!preexistingHasAttribute)
{
// Attribute - New
DatabaseHelper.DatabaseInsertAttribute(helper, newAttributeKey, newAttribute, (int)preexistingEntityID);
preexistingEntity.attributes.Add(newAttributeKey, newAttribute);
}
}
// Datapoint
foreach (Datapoint datapoint_ in preexistingEntity.datapoints.ToList())
{
Datapoint datapoint = datapoint_; // To enable replacing the datapoint reference as foreach iterators cannot be overwritten
bool newEntityHasDatapoint = jsonEntity.Datapoints.Any(x => x.Name == datapoint.name);
if (!newEntityHasDatapoint)
{
// Datapoint - Deleted
Dictionary<string, dynamic> parameters = new()
{
{ "datapointName", datapoint.name },
{ "entityId", preexistingEntityID}
};
helper.ExecuteSQLNonQuery("DELETE e FROM embedding e JOIN datapoint d ON e.id_datapoint=d.id WHERE d.name=@datapointName AND d.id_entity=@entityId", parameters);
helper.ExecuteSQLNonQuery("DELETE FROM datapoint WHERE id_entity=@entityId AND name=@datapointName", parameters);
preexistingEntity.datapoints.Remove(datapoint);
invalidateSearchCache = true;
} else
{
JSONDatapoint? newEntityDatapoint = jsonEntity.Datapoints.FirstOrDefault(x => x.Name == datapoint.name);
if (newEntityDatapoint is not null && newEntityDatapoint.Text is not null)
{
// Datapoint - Updated (text)
Dictionary<string, dynamic> parameters = new()
{
{ "datapointName", datapoint.name },
{ "entityId", preexistingEntityID}
};
helper.ExecuteSQLNonQuery("DELETE e FROM embedding e JOIN datapoint d ON e.id_datapoint=d.id WHERE d.name=@datapointName AND d.id_entity=@entityId", parameters);
helper.ExecuteSQLNonQuery("DELETE FROM datapoint WHERE id_entity=@entityId AND name=@datapointName", parameters);
preexistingEntity.datapoints.Remove(datapoint);
Datapoint newDatapoint = DatabaseInsertDatapointWithEmbeddings(helper, searchdomain, newEntityDatapoint, (int)preexistingEntityID);
preexistingEntity.datapoints.Add(newDatapoint);
datapoint = newDatapoint;
invalidateSearchCache = true;
}
if (newEntityDatapoint is not null && (newEntityDatapoint.Probmethod_embedding != datapoint.probMethod.probMethodEnum || newEntityDatapoint.SimilarityMethod != datapoint.similarityMethod.similarityMethodEnum))
{
// Datapoint - Updated (probmethod or similaritymethod)
Dictionary<string, dynamic> parameters = new()
{
{ "probmethod", newEntityDatapoint.Probmethod_embedding.ToString() },
{ "similaritymethod", newEntityDatapoint.SimilarityMethod.ToString() },
{ "datapointName", datapoint.name },
{ "entityId", preexistingEntityID}
};
helper.ExecuteSQLNonQuery("UPDATE datapoint SET probmethod_embedding=@probmethod, similaritymethod=@similaritymethod WHERE id_entity=@entityId AND name=@datapointName", parameters);
Datapoint preexistingDatapoint = preexistingEntity.datapoints.First(x => x == datapoint); // The for loop is a copy. This retrieves the original such that it can be updated.
preexistingDatapoint.probMethod = new(newEntityDatapoint.Probmethod_embedding, _logger);
preexistingDatapoint.similarityMethod = new(newEntityDatapoint.SimilarityMethod, _logger);
invalidateSearchCache = true;
}
}
}
foreach (JSONDatapoint jsonDatapoint in jsonEntity.Datapoints)
{
bool oldEntityHasDatapoint = preexistingEntity.datapoints.Any(x => x.name == jsonDatapoint.Name);
if (!oldEntityHasDatapoint)
{
// Datapoint - New
Datapoint datapoint = DatabaseInsertDatapointWithEmbeddings(helper, searchdomain, jsonDatapoint, (int)preexistingEntityID);
preexistingEntity.datapoints.Add(datapoint);
invalidateSearchCache = true;
}
}
if (invalidateSearchCache)
{
searchdomain.ReconciliateOrInvalidateCacheForNewOrUpdatedEntity(preexistingEntity);
}
searchdomain.UpdateModelsInUse();
return preexistingEntity;
}
else
{
int id_entity = DatabaseHelper.DatabaseInsertEntity(helper, jsonEntity.Name, jsonEntity.Probmethod, _databaseHelper.GetSearchdomainID(helper, jsonEntity.Searchdomain));
List<(string attribute, string value, int id_entity)> toBeInsertedAttributes = [];
foreach (KeyValuePair<string, string> attribute in jsonEntity.Attributes)
{
toBeInsertedAttributes.Add(new() {
attribute = attribute.Key,
value = attribute.Value,
id_entity = id_entity
});
}
DatabaseHelper.DatabaseInsertAttributes(helper, toBeInsertedAttributes);
List<Datapoint> datapoints = [];
List<(JSONDatapoint datapoint, string hash)> toBeInsertedDatapoints = [];
foreach (JSONDatapoint jsonDatapoint in jsonEntity.Datapoints)
{
string hash = Convert.ToBase64String(SHA256.HashData(Encoding.UTF8.GetBytes(jsonDatapoint.Text)));
Dictionary<string, float[]> embeddings = [];
if (embeddingsLUT.ContainsKey(hash))
toBeInsertedDatapoints.Add(new()
{
Dictionary<string, float[]> hashLUT = embeddingsLUT[hash];
foreach (string model in jsonDatapoint.Model)
{
if (hashLUT.ContainsKey(model))
{
embeddings.Add(model, hashLUT[model]);
datapoint = jsonDatapoint,
hash = hash
});
}
else
List<Datapoint> datapoint = DatabaseInsertDatapointsWithEmbeddings(helper, searchdomain, toBeInsertedDatapoints, id_entity);
var probMethod = Probmethods.GetMethod(jsonEntity.Probmethod) ?? throw new ProbMethodNotFoundException(jsonEntity.Probmethod);
Entity entity = new(jsonEntity.Attributes, probMethod, jsonEntity.Probmethod.ToString(), datapoints, jsonEntity.Name)
{
var additionalEmbeddings = Datapoint.GenerateEmbeddings(jsonDatapoint.Text, [model], aIProvider, embeddingCache);
embeddings.Add(model, additionalEmbeddings.First().Value);
id = id_entity
};
entityCache.Add(entity);
searchdomain.ReconciliateOrInvalidateCacheForNewOrUpdatedEntity(entity);
searchdomain.UpdateModelsInUse();
return entity;
}
}
}
else
public List<Datapoint> DatabaseInsertDatapointsWithEmbeddings(SQLHelper helper, Searchdomain searchdomain, List<(JSONDatapoint datapoint, string hash)> values, int id_entity)
{
embeddings = Datapoint.GenerateEmbeddings(jsonDatapoint.Text, [.. jsonDatapoint.Model], aIProvider, embeddingCache);
List<Datapoint> result = [];
List<(string name, ProbMethodEnum probmethod_embedding, SimilarityMethodEnum similarityMethod, string hash)> toBeInsertedDatapoints = [];
List<(string hash, string model, byte[] embedding)> toBeInsertedEmbeddings = [];
foreach ((JSONDatapoint datapoint, string hash) value in values)
{
Datapoint datapoint = BuildDatapointFromJsonDatapoint(value.datapoint, id_entity, searchdomain, value.hash);
toBeInsertedDatapoints.Add(new()
{
name = datapoint.name,
probmethod_embedding = datapoint.probMethod.probMethodEnum,
similarityMethod = datapoint.similarityMethod.similarityMethodEnum,
hash = value.hash
});
foreach ((string, float[]) embedding in datapoint.embeddings)
{
toBeInsertedEmbeddings.Add(new()
{
hash = value.hash,
model = embedding.Item1,
embedding = BytesFromFloatArray(embedding.Item2)
});
}
var probMethod_embedding = new ProbMethod(jsonDatapoint.Probmethod_embedding, logger) ?? throw new ProbMethodNotFoundException(jsonDatapoint.Probmethod_embedding);
var similarityMethod = new SimilarityMethod(jsonDatapoint.SimilarityMethod, logger) ?? throw new SimilarityMethodNotFoundException(jsonDatapoint.SimilarityMethod);
Datapoint datapoint = new(jsonDatapoint.Name, probMethod_embedding, similarityMethod, hash, [.. embeddings.Select(kv => (kv.Key, kv.Value))]);
result.Add(datapoint);
}
DatabaseHelper.DatabaseInsertDatapoints(helper, toBeInsertedDatapoints, id_entity);
DatabaseHelper.DatabaseInsertEmbeddingBulk(helper, toBeInsertedEmbeddings);
return result;
}
public Datapoint DatabaseInsertDatapointWithEmbeddings(SQLHelper helper, Searchdomain searchdomain, JSONDatapoint jsonDatapoint, int id_entity, string? hash = null)
{
if (jsonDatapoint.Text is null)
{
throw new Exception("jsonDatapoint.Text must not be null at this point");
}
hash ??= Convert.ToBase64String(SHA256.HashData(Encoding.UTF8.GetBytes(jsonDatapoint.Text)));
Datapoint datapoint = BuildDatapointFromJsonDatapoint(jsonDatapoint, id_entity, searchdomain, hash);
int id_datapoint = DatabaseHelper.DatabaseInsertDatapoint(helper, jsonDatapoint.Name, jsonDatapoint.Probmethod_embedding, jsonDatapoint.SimilarityMethod, hash, id_entity); // TODO make this a bulk add action to reduce number of queries
List<(string model, byte[] embedding)> data = [];
foreach ((string, float[]) embedding in datapoint.embeddings)
@@ -154,15 +328,44 @@ public class SearchdomainHelper(ILogger<SearchdomainHelper> logger, DatabaseHelp
data.Add((embedding.Item1, BytesFromFloatArray(embedding.Item2)));
}
DatabaseHelper.DatabaseInsertEmbeddingBulk(helper, id_datapoint, data);
datapoints.Add(datapoint);
return datapoint;
}
var probMethod = Probmethods.GetMethod(jsonEntity.Probmethod) ?? throw new ProbMethodNotFoundException(jsonEntity.Probmethod);
Entity entity = new(jsonEntity.Attributes, probMethod, datapoints, jsonEntity.Name)
public Datapoint BuildDatapointFromJsonDatapoint(JSONDatapoint jsonDatapoint, int entityId, Searchdomain searchdomain, string? hash = null)
{
id = id_entity
};
entityCache.Add(entity);
return entity;
if (jsonDatapoint.Text is null)
{
throw new Exception("jsonDatapoint.Text must not be null at this point");
}
using SQLHelper helper = searchdomain.helper.DuplicateConnection();
EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache = searchdomain.embeddingCache;
hash ??= Convert.ToBase64String(SHA256.HashData(Encoding.UTF8.GetBytes(jsonDatapoint.Text)));
DatabaseHelper.DatabaseInsertDatapoint(helper, jsonDatapoint.Name, jsonDatapoint.Probmethod_embedding, jsonDatapoint.SimilarityMethod, hash, entityId);
Dictionary<string, float[]> embeddings = Datapoint.GetEmbeddings(jsonDatapoint.Text, [.. jsonDatapoint.Model], searchdomain.aIProvider, embeddingCache);
var probMethod_embedding = new ProbMethod(jsonDatapoint.Probmethod_embedding, logger) ?? throw new ProbMethodNotFoundException(jsonDatapoint.Probmethod_embedding);
var similarityMethod = new SimilarityMethod(jsonDatapoint.SimilarityMethod, logger) ?? throw new SimilarityMethodNotFoundException(jsonDatapoint.SimilarityMethod);
return new Datapoint(jsonDatapoint.Name, probMethod_embedding, similarityMethod, hash, [.. embeddings.Select(kv => (kv.Key, kv.Value))]);
}
public static (Searchdomain?, int?, string?) TryGetSearchdomain(SearchdomainManager searchdomainManager, string searchdomain, ILogger logger)
{
try
{
Searchdomain searchdomain_ = searchdomainManager.GetSearchdomain(searchdomain);
return (searchdomain_, null, null);
} catch (SearchdomainNotFoundException)
{
logger.LogError("Unable to update searchdomain {searchdomain} - not found", [searchdomain]);
return (null, 500, $"Unable to update searchdomain {searchdomain} - not found");
} catch (Exception ex)
{
logger.LogError("Unable to update searchdomain {searchdomain} - Exception: {ex.Message} - {ex.StackTrace}", [searchdomain, ex.Message, ex.StackTrace]);
return (null, 404, $"Unable to update searchdomain {searchdomain}");
}
}
public static bool IsSearchdomainLoaded(SearchdomainManager searchdomainManager, string name)
{
return searchdomainManager.IsSearchdomainLoaded(name);
}
}

View File

@@ -12,6 +12,11 @@ public static class DatabaseMigrations
int initialDatabaseVersion = DatabaseGetVersion(helper);
int databaseVersion = initialDatabaseVersion;
if (databaseVersion == 0)
{
databaseVersion = Create(helper);
}
var updateMethods = typeof(DatabaseMigrations)
.GetMethods(BindingFlags.Public | BindingFlags.Static)
.Where(m => m.Name.StartsWith("UpdateFrom") && m.ReturnType == typeof(int))
@@ -91,4 +96,10 @@ public static class DatabaseMigrations
helper.ExecuteSQLNonQuery("ALTER TABLE datapoint ADD COLUMN similaritymethod VARCHAR(512) NULL DEFAULT 'Cosine' AFTER probmethod_embedding", []);
return 4;
}
public static int UpdateFrom4(SQLHelper helper)
{
helper.ExecuteSQLNonQuery("UPDATE searchdomain SET settings = JSON_SET(settings, '$.QueryCacheSize', 1000000) WHERE JSON_EXTRACT(settings, '$.QueryCacheSize') is NULL;", []); // Set QueryCacheSize to a default of 1000000
return 5;
}
}

View File

@@ -0,0 +1,65 @@
using System.Data.Common;
public static class SQLiteMigrations
{
public static void Migrate(DbConnection conn)
{
EnableWal(conn);
using var cmd = conn.CreateCommand();
cmd.CommandText = "PRAGMA user_version;";
var version = Convert.ToInt32(cmd.ExecuteScalar());
if (version == 0)
{
CreateV1(conn);
SetVersion(conn, 1);
version = 1;
}
if (version == 1)
{
// future migration
// UpdateFrom1To2(conn);
// SetVersion(conn, 2);
}
}
private static void EnableWal(DbConnection conn)
{
using var cmd = conn.CreateCommand();
cmd.CommandText = "PRAGMA journal_mode = WAL;";
cmd.ExecuteNonQuery();
}
private static void CreateV1(DbConnection conn)
{
using var tx = conn.BeginTransaction();
using var cmd = conn.CreateCommand();
cmd.CommandText = """
CREATE TABLE embedding_cache (
cache_key TEXT NOT NULL,
model_key TEXT NOT NULL,
embedding BLOB NOT NULL,
idx INTEGER NOT NULL,
PRIMARY KEY (cache_key, model_key)
);
CREATE INDEX idx_index
ON embedding_cache(idx);
""";
cmd.ExecuteNonQuery();
tx.Commit();
}
private static void SetVersion(DbConnection conn, int version)
{
using var cmd = conn.CreateCommand();
cmd.CommandText = $"PRAGMA user_version = {version};";
cmd.ExecuteNonQuery();
}
}

View File

@@ -1,13 +0,0 @@
namespace Server.Models;
public class SimpleAuthOptions
{
public List<SimpleUser> Users { get; set; } = new();
}
public class SimpleUser
{
public string Username { get; set; } = "";
public string Password { get; set; } = "";
public string[] Roles { get; set; } = Array.Empty<string>();
}

View File

@@ -0,0 +1,49 @@
using System.Configuration;
using ElmahCore;
using Shared.Models;
namespace Server.Models;
public class EmbeddingSearchOptions : ApiKeyOptions
{
public required ConnectionStringsOptions ConnectionStrings { get; set; }
public ElmahOptions? Elmah { get; set; }
public required Dictionary<string, AiProvider> AiProviders { get; set; }
public required SimpleAuthOptions SimpleAuth { get; set; }
public required CacheOptions Cache { get; set; }
public required bool UseHttpsRedirection { get; set; }
}
public class AiProvider
{
public required string Handler { get; set; }
public required string BaseURL { get; set; }
public string? ApiKey { get; set; }
public required string[] Allowlist { get; set; }
public required string[] Denylist { get; set; }
}
public class SimpleAuthOptions
{
public List<SimpleUser> Users { get; set; } = [];
}
public class SimpleUser
{
public string Username { get; set; } = "";
public string Password { get; set; } = "";
public string[] Roles { get; set; } = [];
}
public class ConnectionStringsOptions
{
public required string SQL { get; set; }
public string? Cache { get; set; }
}
public class CacheOptions
{
public required long CacheTopN { get; set; }
public bool StoreEmbeddingCache { get; set; } = false;
public int? StoreTopN { get; set; }
}

View File

@@ -0,0 +1,109 @@
namespace Server.Models;
using System.Data.Common;
public abstract partial class SqlHelper : IDisposable
{
public DbConnection Connection { get; set; }
public DbDataReader? DbDataReader { get; set; }
public string ConnectionString { get; set; }
public SqlHelper(DbConnection connection, string connectionString)
{
Connection = connection;
ConnectionString = connectionString;
}
public abstract SqlHelper DuplicateConnection();
public void Dispose()
{
Connection.Close();
GC.SuppressFinalize(this);
}
public DbDataReader ExecuteSQLCommand(string query, object[] parameters)
{
lock (Connection)
{
EnsureConnected();
EnsureDbReaderIsClosed();
using DbCommand command = Connection.CreateCommand();
command.CommandText = query;
command.Parameters.AddRange(parameters);
DbDataReader = command.ExecuteReader();
return DbDataReader;
}
}
public void ExecuteQuery<T>(string query, object[] parameters, Func<DbDataReader, T> map)
{
lock (Connection)
{
EnsureConnected();
EnsureDbReaderIsClosed();
using var command = Connection.CreateCommand();
command.CommandText = query;
command.Parameters.AddRange(parameters);
using var reader = command.ExecuteReader();
while (reader.Read())
{
map(reader);
}
return;
}
}
public int ExecuteSQLNonQuery(string query, object[] parameters)
{
lock (Connection)
{
EnsureConnected();
EnsureDbReaderIsClosed();
using DbCommand command = Connection.CreateCommand();
command.CommandText = query;
command.Parameters.AddRange(parameters);
return command.ExecuteNonQuery();
}
}
public abstract int ExecuteSQLCommandGetInsertedID(string query, object[] parameters);
public bool EnsureConnected()
{
if (Connection.State != System.Data.ConnectionState.Open)
{
try
{
Connection.Close();
Connection.Open();
}
catch (Exception ex)
{
ElmahCore.ElmahExtensions.RaiseError(ex);
throw;
}
}
return true;
}
public void EnsureDbReaderIsClosed()
{
int counter = 0;
int sleepTime = 10;
int timeout = 5000;
while (!(DbDataReader?.IsClosed ?? true))
{
if (counter > timeout / sleepTime)
{
TimeoutException ex = new("Unable to ensure dbDataReader is closed");
ElmahCore.ElmahExtensions.RaiseError(ex);
throw ex;
}
Thread.Sleep(sleepTime);
}
}
}

View File

@@ -1,37 +1,29 @@
using System.Text.Json;
using Server.Exceptions;
using Shared.Models;
namespace Server;
public class ProbMethod
{
public Probmethods.probMethodDelegate method;
public ProbMethodEnum probMethodEnum;
public string name;
public ProbMethod(string name, ILogger logger)
public ProbMethod(ProbMethodEnum probMethodEnum, ILogger logger)
{
this.name = name;
this.probMethodEnum = probMethodEnum;
this.name = probMethodEnum.ToString();
Probmethods.probMethodDelegate? probMethod = Probmethods.GetMethod(name);
if (probMethod is null)
{
logger.LogError("Unable to retrieve probMethod {name}", [name]);
throw new ProbMethodNotFoundException(name);
throw new ProbMethodNotFoundException(probMethodEnum);
}
method = probMethod;
}
}
public enum ProbMethodEnum
{
Mean,
HarmonicMean,
QuadraticMean,
GeometricMean,
EVEWAvg,
HVEWAvg,
LVEWAvg,
DictionaryWeightedAverage
}
public static class Probmethods
{
@@ -54,6 +46,11 @@ public static class Probmethods
};
}
public static probMethodDelegate? GetMethod(ProbMethodEnum probMethodEnum)
{
return GetMethod(probMethodEnum.ToString());
}
public static probMethodDelegate? GetMethod(string name)
{
string methodName = name;

View File

@@ -8,12 +8,51 @@ using Server.HealthChecks;
using Server.Helper;
using Server.Models;
using Server.Services;
using System.Text.Json.Serialization;
using System.Configuration;
using Microsoft.OpenApi;
using Shared.Models;
using Microsoft.AspNetCore.ResponseCompression;
using System.Net;
using System.Text;
using Server.Migrations;
using Microsoft.Data.Sqlite;
var builder = WebApplication.CreateBuilder(args);
// Add services to the container.
// Add Controllers with views & string conversion for enums
builder.Services.AddControllersWithViews()
.AddJsonOptions(options =>
{
options.JsonSerializerOptions.Converters.Add(
new JsonStringEnumConverter()
);
});
builder.Services.AddControllersWithViews();
// Add Configuration
IConfigurationSection configurationSection = builder.Configuration.GetSection("Embeddingsearch");
EmbeddingSearchOptions configuration = configurationSection.Get<EmbeddingSearchOptions>() ?? throw new ConfigurationErrorsException("Unable to start server due to an invalid configration");
builder.Services.Configure<EmbeddingSearchOptions>(configurationSection);
builder.Services.Configure<ApiKeyOptions>(configurationSection);
// Configure Kestrel
builder.WebHost.ConfigureKestrel(options =>
{
options.Limits.MaxRequestBodySize = configuration.MaxRequestBodySize ?? 50 * 1024 * 1024;
});
// Migrate database
var helper = new SQLHelper(new MySql.Data.MySqlClient.MySqlConnection(configuration.ConnectionStrings.SQL), configuration.ConnectionStrings.SQL);
DatabaseMigrations.Migrate(helper);
// Migrate SQLite cache
if (configuration.ConnectionStrings.Cache is not null)
{
var SqliteConnection = new SqliteConnection(configuration.ConnectionStrings.Cache);
SqliteConnection.Open();
SQLiteMigrations.Migrate(SqliteConnection);
}
// Add Localization
builder.Services.AddLocalization(options => options.ResourcesPath = "Resources");
@@ -30,7 +69,38 @@ builder.Services.AddScoped<LocalizationService>();
// Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle
builder.Services.AddEndpointsApiExplorer();
builder.Services.AddSwaggerGen();
builder.Services.AddOpenApi(options =>
{
options.AddDocumentTransformer((document, context, _) =>
{
if (configuration.ApiKeys is null)
return Task.CompletedTask;
document.Components ??= new();
document.Components.SecuritySchemes ??= new Dictionary<string, IOpenApiSecurityScheme>();
document.Components.SecuritySchemes["ApiKey"] =
new OpenApiSecurityScheme
{
Type = SecuritySchemeType.ApiKey,
Name = "X-API-KEY",
In = ParameterLocation.Header,
Description = "ApiKey must appear in header"
};
document.Security ??= [];
// Apply globally
document.Security?.Add(
new OpenApiSecurityRequirement
{
[new OpenApiSecuritySchemeReference("ApiKey", document)] = []
}
);
return Task.CompletedTask;
});
});
Log.Logger = new LoggerConfiguration()
.ReadFrom.Configuration(builder.Configuration)
.CreateLogger();
@@ -40,12 +110,17 @@ builder.Services.AddSingleton<SearchdomainHelper>();
builder.Services.AddSingleton<SearchdomainManager>();
builder.Services.AddSingleton<AIProvider>();
builder.Services.AddHealthChecks()
.AddCheck<DatabaseHealthCheck>("DatabaseHealthCheck")
.AddCheck<AIProviderHealthCheck>("AIProviderHealthCheck");
.AddCheck<DatabaseHealthCheck>("DatabaseHealthCheck", tags: ["Database"])
.AddCheck<AIProviderHealthCheck>("AIProviderHealthCheck", tags: ["AIProvider"]);
builder.Services.AddElmah<XmlFileErrorLog>(Options =>
{
Options.LogPath = builder.Configuration.GetValue<string>("Embeddingsearch:Elmah:LogFolder") ?? "~/logs";
Options.OnPermissionCheck = context =>
context.User.Claims.Any(claim =>
claim.Value.Equals("Admin", StringComparison.OrdinalIgnoreCase)
|| claim.Value.Equals("Elmah", StringComparison.OrdinalIgnoreCase)
);
Options.LogPath = configuration.Elmah?.LogPath ?? "~/logs";
});
builder.Services
@@ -63,29 +138,109 @@ builder.Services.AddAuthorization(options =>
policy => policy.RequireRole("Admin"));
});
IConfigurationSection simpleAuthSection = builder.Configuration.GetSection("Embeddingsearch:SimpleAuth");
if (simpleAuthSection.Exists()) builder.Services.Configure<SimpleAuthOptions>(simpleAuthSection);
builder.Services.AddResponseCompression(options =>
{
options.EnableForHttps = true;
options.Providers.Add<GzipCompressionProvider>();
options.Providers.Add<BrotliCompressionProvider>();
options.MimeTypes =
[
"text/plain",
"text/css",
"application/javascript",
"text/javascript",
"text/html",
"application/xml",
"text/xml",
"application/json",
"image/svg+xml"
];
});
var app = builder.Build();
List<string>? allowedIps = builder.Configuration.GetSection("Embeddingsearch:Elmah:AllowedHosts")
.Get<List<string>>();
app.UseAuthentication();
app.UseAuthorization();
// Configure Elmah
app.Use(async (context, next) =>
{
if (context.Request.Path.StartsWithSegments("/elmah"))
{
context.Response.OnStarting(() =>
{
context.Response.Headers.Append(
"Content-Security-Policy",
"default-src 'self' 'unsafe-inline' 'unsafe-eval'"
);
return Task.CompletedTask;
});
}
await next();
});
app.Use(async (context, next) =>
{
if (!context.Request.Path.StartsWithSegments("/elmah"))
{
await next();
return;
}
var originalBody = context.Response.Body;
using var memStream = new MemoryStream();
context.Response.Body = memStream;
await next();
memStream.Position = 0;
var html = await new StreamReader(memStream).ReadToEndAsync();
if (context.Response.ContentType?.Contains("text/html") == true)
{
html = html.Replace(
"</head>",
"""
<link rel="stylesheet" href="/elmah-ui/custom.css" />
<script src="/elmah-ui/custom.js"></script>
</head>
"""
);
}
var bytes = Encoding.UTF8.GetBytes(html);
context.Response.ContentLength = bytes.Length;
await originalBody.WriteAsync(bytes);
context.Response.Body = originalBody;
});
app.UseElmah();
app.MapHealthChecks("/healthz");
app.MapHealthChecks("/healthz/Database", new Microsoft.AspNetCore.Diagnostics.HealthChecks.HealthCheckOptions
{
Predicate = c => c.Name.Contains("Database")
});
app.MapHealthChecks("/healthz/AIProvider", new Microsoft.AspNetCore.Diagnostics.HealthChecks.HealthCheckOptions
{
Predicate = c => c.Name.Contains("AIProvider")
});
bool IsDevelopment = app.Environment.IsDevelopment();
app.Use(async (context, next) =>
{
bool requestIsElmah = context.Request.Path.StartsWithSegments("/elmah");
bool requestIsSwagger = context.Request.Path.StartsWithSegments("/swagger");
if (context.Request.Path.StartsWithSegments("/swagger"))
{
if (!context.User.Identity?.IsAuthenticated ?? true)
{
context.Response.Redirect($"/Account/Login?ReturnUrl={WebUtility.UrlEncode("/swagger")}");
return;
}
if (requestIsElmah || requestIsSwagger)
if (!context.User.IsInRole("Admin"))
{
var remoteIp = context.Connection.RemoteIpAddress?.ToString();
bool blockRequest = allowedIps is null
|| remoteIp is null
|| !allowedIps.Contains(remoteIp);
if (blockRequest)
{
context.Response.StatusCode = 403;
await context.Response.WriteAsync("Forbidden");
context.Response.StatusCode = StatusCodes.Status403Forbidden;
return;
}
}
@@ -93,26 +248,37 @@ app.Use(async (context, next) =>
await next();
});
app.UseElmah();
app.MapHealthChecks("/healthz");
bool IsDevelopment = app.Environment.IsDevelopment();
bool useSwagger = app.Configuration.GetValue<bool>("UseSwagger");
bool? UseMiddleware = app.Configuration.GetValue<bool?>("UseMiddleware");
// Configure the HTTP request pipeline.
if (IsDevelopment || useSwagger)
app.UseSwaggerUI(options =>
{
app.UseSwagger();
app.UseSwaggerUI();
//app.UseElmahExceptionPage(); // Messes with JSON response for API calls. Leaving this here so I don't accidentally put this in again later on.
}
if (UseMiddleware == true && !IsDevelopment)
options.SwaggerEndpoint("/openapi/v1.json", "API v1");
options.RoutePrefix = "swagger";
options.EnablePersistAuthorization();
options.InjectStylesheet("/swagger-ui/custom.css");
options.InjectJavascript("/swagger-ui/custom.js");
});
app.MapOpenApi("/openapi/v1.json");
//app.UseElmahExceptionPage(); // Messes with JSON response for API calls. Leaving this here so I don't accidentally put this in again later on.
if (configuration.ApiKeys is not null)
{
app.UseMiddleware<Shared.ApiKeyMiddleware>();
app.UseWhen(context =>
{
RouteData routeData = context.GetRouteData();
string controllerName = routeData.Values["controller"]?.ToString() ?? "StaticFile";
if (controllerName == "Account" || controllerName == "Home" || controllerName == "StaticFile")
{
return false;
}
return true;
}, appBuilder =>
{
appBuilder.UseMiddleware<Shared.ApiKeyMiddleware>();
});
}
app.UseResponseCompression();
// Add localization
var supportedCultures = new[] { "de", "de-DE", "en-US" };
var localizationOptions = new RequestLocalizationOptions()
@@ -121,10 +287,23 @@ var localizationOptions = new RequestLocalizationOptions()
.AddSupportedUICultures(supportedCultures);
app.UseRequestLocalization(localizationOptions);
app.UseAuthentication();
app.UseAuthorization();
app.MapControllers();
app.UseStaticFiles();
app.UseStaticFiles(new StaticFileOptions
{
OnPrepareResponse = ctx =>
{
string requestPath = ctx.Context.Request.Path.ToString();
string[] cachedSuffixes = [".css", ".js", ".png", ".ico", ".woff2"];
if (cachedSuffixes.Any(suffix => requestPath.EndsWith(suffix)))
{
ctx.Context.Response.GetTypedHeaders().CacheControl =
new Microsoft.Net.Http.Headers.CacheControlHeaderValue()
{
Public = true,
MaxAge = TimeSpan.FromDays(365)
};
}
}
});
app.Run();

View File

@@ -24,4 +24,316 @@
<data name="IrreversibleActionWarning" xml:space="preserve">
<value>Diese Aktion kann nicht rückgängig gemacht werden.</value>
</data>
<data name="Searchdomain selection" xml:space="preserve">
<value>Searchdomain Auswahl</value>
</data>
<data name="Create" xml:space="preserve">
<value>Erstellen</value>
</data>
<data name="Searchdomain information and settings" xml:space="preserve">
<value>Searchdomain Informationen und Einstellungen</value>
</data>
<data name="Actions" xml:space="preserve">
<value>Aktionen</value>
</data>
<data name="Rename" xml:space="preserve">
<value>Umbenennen</value>
</data>
<data name="Delete" xml:space="preserve">
<value>Löschen</value>
</data>
<data name="Settings" xml:space="preserve">
<value>Einstellungen</value>
</data>
<data name="Cache reconciliation" xml:space="preserve">
<value>Cache Abgleich</value>
</data>
<data name="Update" xml:space="preserve">
<value>Anpassen</value>
</data>
<data name="Search cache" xml:space="preserve">
<value>Such-Cache</value>
</data>
<data name="Search cache utilization" xml:space="preserve">
<value>Such-Cache-Speicherauslastung</value>
</data>
<data name="Clear" xml:space="preserve">
<value>Leeren</value>
</data>
<data name="Database size" xml:space="preserve">
<value>Größe in der Datenbank</value>
</data>
<data name="Add new entity" xml:space="preserve">
<value>Neue Entity erstellen</value>
</data>
<data name="Entity Details" xml:space="preserve">
<value>Entity Details</value>
</data>
<data name="Attributes" xml:space="preserve">
<value>Attribute</value>
</data>
<data name="Key" xml:space="preserve">
<value>Schlüssel</value>
</data>
<data name="Value" xml:space="preserve">
<value>Wert</value>
</data>
<data name="Datapoints" xml:space="preserve">
<value>Datapoints</value>
</data>
<data name="Name" xml:space="preserve">
<value>Name</value>
</data>
<data name="ProbMethod" xml:space="preserve">
<value>ProbMethod</value>
</data>
<data name="SimilarityMethod" xml:space="preserve">
<value>SimilarityMethod</value>
</data>
<data name="Close" xml:space="preserve">
<value>Schließen</value>
</data>
<data name="Query Details" xml:space="preserve">
<value>Suchanfrage Details</value>
</data>
<data name="Access times" xml:space="preserve">
<value>Zugriffszeiten</value>
</data>
<data name="Results" xml:space="preserve">
<value>Ergebnisse</value>
</data>
<data name="Score" xml:space="preserve">
<value>Bewertung</value>
</data>
<data name="Query Update" xml:space="preserve">
<value>Suchanfrage anpassen</value>
</data>
<data name="Rename searchdomain" xml:space="preserve">
<value>Searchdomain umbenennen</value>
</data>
<data name="Delete searchdomain" xml:space="preserve">
<value>Searchdomain löschen</value>
</data>
<data name="Create searchdomain" xml:space="preserve">
<value>Searchdomain anlegen</value>
</data>
<data name="Searchdomain name" xml:space="preserve">
<value>Searchdomain Name</value>
</data>
<data name="Enable cache reconciliation" xml:space="preserve">
<value>Cache-Abgleich verwenden</value>
</data>
<data name="Create entity" xml:space="preserve">
<value>Entity erstellen</value>
</data>
<data name="Entity name" xml:space="preserve">
<value>Entity Name</value>
</data>
<data name="Probmethod" xml:space="preserve">
<value>Probmethod</value>
</data>
<data name="Add attribute" xml:space="preserve">
<value>Attribut hinzufügen</value>
</data>
<data name="Probmethod_embedding" xml:space="preserve">
<value>Probmethod_embedding</value>
</data>
<data name="Similarity method" xml:space="preserve">
<value>Similarity method</value>
</data>
<data name="Model" xml:space="preserve">
<value>Modell</value>
</data>
<data name="Add datapoint" xml:space="preserve">
<value>Datapoint hinzufügen</value>
</data>
<data name="Delete entity" xml:space="preserve">
<value>Entity löschen</value>
</data>
<data name="Update entity" xml:space="preserve">
<value>Entity anpassen</value>
</data>
<data name="Action" xml:space="preserve">
<value>Aktion</value>
</data>
<data name="Delete query" xml:space="preserve">
<value>Suchanfrage löschen</value>
</data>
<data name="Creating entity" xml:space="preserve">
<value>Erstelle Entity</value>
</data>
<data name="Entity was created successfully" xml:space="preserve">
<value>Entity wurde erfolgreich erstellt</value>
</data>
<data name="Failed to create entity" xml:space="preserve">
<value>Entity konnte nicht erstellt werden</value>
</data>
<data name="Searchdomain was created successfully" xml:space="preserve">
<value>Searchdomain wurde erfolgreich erstellt</value>
</data>
<data name="Failed to create searchdomain" xml:space="preserve">
<value>Searchdomain konnte nicht erstellt werden</value>
</data>
<data name="Searchdomain cache was cleared successfully" xml:space="preserve">
<value>Searchdomain-Cache wurde erfolgreich geleert</value>
</data>
<data name="Failed to clear searchdomain cache" xml:space="preserve">
<value>Searchdomain-Cache konnte nicht geleert werden</value>
</data>
<data name="Entity was deleted successfully" xml:space="preserve">
<value>Entity wurde erfolgreich gelöscht</value>
</data>
<data name="Failed to delete entity" xml:space="preserve">
<value>Entity konnte nicht gelöscht werden</value>
</data>
<data name="Updating entity" xml:space="preserve">
<value>Entity wird angepasst</value>
</data>
<data name="Entity was updated successfully" xml:space="preserve">
<value>Entity wurde erfolgreich angepasst</value>
</data>
<data name="Failed to update entity" xml:space="preserve">
<value>Entity konnte nicht angepasst werden</value>
</data>
<data name="Search query was deleted successfully" xml:space="preserve">
<value>Suchanfrage wurde erfolgreich gelöscht</value>
</data>
<data name="Failed to delete search query" xml:space="preserve">
<value>Suchanfrage konnte nicht gelöscht werden</value>
</data>
<data name="Searchdomain was created successfully" xml:space="preserve">
<value>Searchdomain wurde erfolgreich erstellt</value>
</data>
<data name="Updating search query failed" xml:space="preserve">
<value>Suchanfrage konnte nicht angepasst werden</value>
</data>
<data name="Searchdomain was deleted successfully" xml:space="preserve">
<value>Searchdomain wurde erfolgreich gelöscht</value>
</data>
<data name="Failed to delete searchdomain" xml:space="preserve">
<value>Konnte Searchdomain nicht löschen</value>
</data>
<data name="Searchdomain was renamed successfully" xml:space="preserve">
<value>Searchdomain wurde erfolgreich umbenannt</value>
</data>
<data name="Failed to rename searchdomain" xml:space="preserve">
<value>Searchdomain konnte nicht umbenannt werden</value>
</data>
<data name="Searchdomain settings were updated successfully" xml:space="preserve">
<value>Searchdomain Einstellungen wurden erfolgreich angepasst</value>
</data>
<data name="Updating searchdomain settings failed" xml:space="preserve">
<value>Searchdomain Einstellungen konnten nicht angepasst werden</value>
</data>
<data name="Unable to fetch searchdomain config" xml:space="preserve">
<value>Searchdomain Einstellungen konnten nicht abgerufen werden</value>
</data>
<data name="Unable to fetch searchdomain cache utilization" xml:space="preserve">
<value>Searchdomain-Cache-Auslastung konnte nicht abgerufen werden</value>
</data>
<data name="Details" xml:space="preserve">
<value>Details</value>
</data>
<data name="Remove attribute" xml:space="preserve">
<value>Attribut entfernen</value>
</data>
<data name="Remove" xml:space="preserve">
<value>Entfernen</value>
</data>
<data name="Close alert" xml:space="preserve">
<value>Benachrichtigung schließen</value>
</data>
<data name="Recent queries" xml:space="preserve">
<value>Letzte Queries</value>
</data>
<data name="Home" xml:space="preserve">
<value>Dashboard</value>
</data>
<data name="Searchdomains" xml:space="preserve">
<value>Searchdomains</value>
</data>
<data name="Swagger" xml:space="preserve">
<value>Swagger</value>
</data>
<data name="Elmah" xml:space="preserve">
<value>Elmah</value>
</data>
<data name="Hi!" xml:space="preserve">
<value>Hallo!</value>
</data>
<data name="Hi, {0}!" xml:space="preserve">
<value>Hallo {0}!</value>
</data>
<data name="Embedding Cache" xml:space="preserve">
<value>Embedding-Cache</value>
</data>
<data name="Size" xml:space="preserve">
<value>Größe</value>
</data>
<data name="Strings" xml:space="preserve">
<value>Zeichenketten</value>
</data>
<data name="stringsCountInfo" xml:space="preserve">
<value>Die Anzahl der Zeichenketten, für die Embeddings vorliegen. D.h. wenn zwei Modelle verwendet werden, ist die Zahl der Embeddings zweimal so hoch.</value>
</data>
<data name="Embeddings" xml:space="preserve">
<value>Embeddings</value>
</data>
<data name="Health Checks" xml:space="preserve">
<value>Health Checks</value>
</data>
<data name="Server" xml:space="preserve">
<value>Server</value>
</data>
<data name="AI Providers" xml:space="preserve">
<value>AI Providers</value>
</data>
<data name="Count" xml:space="preserve">
<value>Anzahl</value>
</data>
<data name="Total Entities" xml:space="preserve">
<value>Entities insgesamt</value>
</data>
<data name="Total query cache utilization" xml:space="preserve">
<value>Query-Cache-Verwendung insgesamt</value>
</data>
<data name="Unable to fetch searchdomain database utilization" xml:space="preserve">
<value>Searchdomain Datenbank-Auslastung konnte nicht abgerufen werden</value>
</data>
<data name="Query cache entry count" xml:space="preserve">
<value>Query-Cache Einträge</value>
</data>
<data name="Query cache capacity (all)" xml:space="preserve">
<value>Query-Cache Kapazität (alle)</value>
</data>
<data name="queryCacheEntryCountAllInfo" xml:space="preserve">
<value>Anzahl der Einträge, die insgesamt in den Query-Cache passen. Ungeladene Searchdomains werden berücksichtigt.</value>
</data>
<data name="Query cache capacity (loaded)" xml:space="preserve">
<value>Query-Cache Kapazität (geladen)</value>
</data>
<data name="queryCacheEntryCountLoadedInfo" xml:space="preserve">
<value>Anzahl der Einträge, die insgesamt in den Query-Cache der geladenen Searchdomains passen.</value>
</data>
<data name="Query cache size" xml:space="preserve">
<value>Query Cache Größe</value>
</data>
<data name="Embeddings parallel prefetching" xml:space="preserve">
<value>Embeddings parallel prefetchen</value>
</data>
<data name="parallelEmbeddingsPrefetchInfo" xml:space="preserve">
<value>Wenn diese Einstellung aktiv ist, wird das Abrufen von Embeddings beim Indizieren von Entities parallelisiert. Deaktiviere diese Einstellung, falls Model-unloading ein Problem ist.</value>
</data>
<data name="Add result" xml:space="preserve">
<value>Ergebnis hinzufügen</value>
</data>
<data name="Search query was updated successfully" xml:space="preserve">
<value>Suchanfrage wurde erfolgreich angepasst</value>
</data>
<data name="Total RAM usage" xml:space="preserve">
<value>RAM Verwendung insgesamt</value>
</data>
<data name="Total Database size" xml:space="preserve">
<value>Datenbankgröße insgesamt</value>
</data>
</root>

View File

@@ -24,4 +24,316 @@
<data name="IrreversibleActionWarning" xml:space="preserve">
<value>This action cannot be undone.</value>
</data>
<data name="Searchdomain selection" xml:space="preserve">
<value>Searchdomain selection</value>
</data>
<data name="Create" xml:space="preserve">
<value>Create</value>
</data>
<data name="Searchdomain information and settings" xml:space="preserve">
<value>Searchdomain information and settings</value>
</data>
<data name="Actions" xml:space="preserve">
<value>Actions</value>
</data>
<data name="Rename" xml:space="preserve">
<value>Rename</value>
</data>
<data name="Delete" xml:space="preserve">
<value>Delete</value>
</data>
<data name="Settings" xml:space="preserve">
<value>Settings</value>
</data>
<data name="Cache reconciliation" xml:space="preserve">
<value>Cache reconciliation</value>
</data>
<data name="Update" xml:space="preserve">
<value>Update</value>
</data>
<data name="Search cache" xml:space="preserve">
<value>Search cache</value>
</data>
<data name="Search cache utilization" xml:space="preserve">
<value>Search cache utilization</value>
</data>
<data name="Clear" xml:space="preserve">
<value>Clear</value>
</data>
<data name="Database size" xml:space="preserve">
<value>Database size</value>
</data>
<data name="Add new entity" xml:space="preserve">
<value>Add new entity</value>
</data>
<data name="Entity Details" xml:space="preserve">
<value>Entity Details</value>
</data>
<data name="Attributes" xml:space="preserve">
<value>Attributes</value>
</data>
<data name="Key" xml:space="preserve">
<value>Key</value>
</data>
<data name="Value" xml:space="preserve">
<value>Value</value>
</data>
<data name="Datapoints" xml:space="preserve">
<value>Datapoints</value>
</data>
<data name="Name" xml:space="preserve">
<value>Name</value>
</data>
<data name="ProbMethod" xml:space="preserve">
<value>ProbMethod</value>
</data>
<data name="SimilarityMethod" xml:space="preserve">
<value>SimilarityMethod</value>
</data>
<data name="Close" xml:space="preserve">
<value>Close</value>
</data>
<data name="Query Details" xml:space="preserve">
<value>Query Details</value>
</data>
<data name="Access times" xml:space="preserve">
<value>Access times</value>
</data>
<data name="Results" xml:space="preserve">
<value>Results</value>
</data>
<data name="Score" xml:space="preserve">
<value>Score</value>
</data>
<data name="Query Update" xml:space="preserve">
<value>Query Update</value>
</data>
<data name="Rename searchdomain" xml:space="preserve">
<value>Rename searchdomain</value>
</data>
<data name="Delete searchdomain" xml:space="preserve">
<value>Delete searchdomain</value>
</data>
<data name="Create searchdomain" xml:space="preserve">
<value>Create searchdomain</value>
</data>
<data name="Searchdomain name" xml:space="preserve">
<value>Searchdomain name</value>
</data>
<data name="Enable cache reconciliation" xml:space="preserve">
<value>Enable cache reconciliation</value>
</data>
<data name="Create entity" xml:space="preserve">
<value>Create entity</value>
</data>
<data name="Entity name" xml:space="preserve">
<value>Entity name</value>
</data>
<data name="Probmethod" xml:space="preserve">
<value>Probmethod</value>
</data>
<data name="Add attribute" xml:space="preserve">
<value>Add attribute</value>
</data>
<data name="Probmethod_embedding" xml:space="preserve">
<value>Probmethod_embedding</value>
</data>
<data name="Similarity method" xml:space="preserve">
<value>Similarity method</value>
</data>
<data name="Model" xml:space="preserve">
<value>Model</value>
</data>
<data name="Add datapoint" xml:space="preserve">
<value>Add datapoint</value>
</data>
<data name="Delete entity" xml:space="preserve">
<value>Delete entity</value>
</data>
<data name="Update entity" xml:space="preserve">
<value>Update entity</value>
</data>
<data name="Action" xml:space="preserve">
<value>Action</value>
</data>
<data name="Delete query" xml:space="preserve">
<value>Delete query</value>
</data>
<data name="Creating entity" xml:space="preserve">
<value>Creating entity</value>
</data>
<data name="Entity was created successfully" xml:space="preserve">
<value>Entity was created successfully</value>
</data>
<data name="Failed to create entity" xml:space="preserve">
<value>Failed to create entity</value>
</data>
<data name="Searchdomain was created successfully" xml:space="preserve">
<value>Searchdomain was created successfully</value>
</data>
<data name="Failed to create searchdomain" xml:space="preserve">
<value>Failed to create searchdomain</value>
</data>
<data name="Searchdomain cache was cleared successfully" xml:space="preserve">
<value>Searchdomain cache was cleared successfully</value>
</data>
<data name="Failed to clear searchdomain cache" xml:space="preserve">
<value>Failed to clear searchdomain cache</value>
</data>
<data name="Entity was deleted successfully" xml:space="preserve">
<value>Entity was deleted successfully</value>
</data>
<data name="Failed to delete entity" xml:space="preserve">
<value>Failed to delete entity</value>
</data>
<data name="Updating entity" xml:space="preserve">
<value>Updating entity</value>
</data>
<data name="Entity was updated successfully" xml:space="preserve">
<value>Entity was updated successfully</value>
</data>
<data name="Failed to update entity" xml:space="preserve">
<value>Failed to update entity</value>
</data>
<data name="Search query was deleted successfully" xml:space="preserve">
<value>Search query was deleted successfully</value>
</data>
<data name="Failed to delete search query" xml:space="preserve">
<value>Failed to delete search query</value>
</data>
<data name="Searchdomain was created successfully" xml:space="preserve">
<value>Searchdomain was created successfully</value>
</data>
<data name="Updating search query failed" xml:space="preserve">
<value>Updating search query failed</value>
</data>
<data name="Searchdomain was deleted successfully" xml:space="preserve">
<value>Searchdomain was deleted successfully</value>
</data>
<data name="Failed to delete searchdomain" xml:space="preserve">
<value>Failed to delete searchdomain</value>
</data>
<data name="Searchdomain was renamed successfully" xml:space="preserve">
<value>Searchdomain was renamed successfully</value>
</data>
<data name="Failed to rename searchdomain" xml:space="preserve">
<value>Failed to rename searchdomain</value>
</data>
<data name="Searchdomain settings were updated successfully" xml:space="preserve">
<value>Searchdomain settings were updated successfully</value>
</data>
<data name="Updating searchdomain settings failed" xml:space="preserve">
<value>Updating searchdomain settings failed</value>
</data>
<data name="Unable to fetch searchdomain config" xml:space="preserve">
<value>Unable to fetch searchdomain config</value>
</data>
<data name="Unable to fetch searchdomain cache utilization" xml:space="preserve">
<value>"Unable to fetch searchdomain cache utilization</value>
</data>
<data name="Details" xml:space="preserve">
<value>Details</value>
</data>
<data name="Remove attribute" xml:space="preserve">
<value>Remove attribute</value>
</data>
<data name="Remove" xml:space="preserve">
<value>Remove</value>
</data>
<data name="Close alert" xml:space="preserve">
<value>Close alert</value>
</data>
<data name="Recent queries" xml:space="preserve">
<value>Recent queries</value>
</data>
<data name="Home" xml:space="preserve">
<value>Dashboard</value>
</data>
<data name="Searchdomains" xml:space="preserve">
<value>Searchdomains</value>
</data>
<data name="Swagger" xml:space="preserve">
<value>Swagger</value>
</data>
<data name="Elmah" xml:space="preserve">
<value>Elmah</value>
</data>
<data name="Hi!" xml:space="preserve">
<value>Hi!</value>
</data>
<data name="Hi, {0}!" xml:space="preserve">
<value>Hi {0}!</value>
</data>
<data name="Embedding Cache" xml:space="preserve">
<value>Embedding Cache</value>
</data>
<data name="Size" xml:space="preserve">
<value>Size</value>
</data>
<data name="Strings" xml:space="preserve">
<value>Strings</value>
</data>
<data name="stringsCountInfo" xml:space="preserve">
<value>The number of strings for which there are embeddings. I.e. If you use two models, the amount of embeddings will be twice this number.</value>
</data>
<data name="Embeddings" xml:space="preserve">
<value>Embeddings</value>
</data>
<data name="Health Checks" xml:space="preserve">
<value>Health Checks</value>
</data>
<data name="Server" xml:space="preserve">
<value>Server</value>
</data>
<data name="AI Providers" xml:space="preserve">
<value>AI Providers</value>
</data>
<data name="Count" xml:space="preserve">
<value>Count</value>
</data>
<data name="Total Entities" xml:space="preserve">
<value>Total Entities</value>
</data>
<data name="Total query cache utilization" xml:space="preserve">
<value>Total query cache utilization</value>
</data>
<data name="Unable to fetch searchdomain database utilization" xml:space="preserve">
<value>Unable to fetch searchdomain database utilization</value>
</data>
<data name="Query cache entry count" xml:space="preserve">
<value>Query cache entry count</value>
</data>
<data name="Query cache capacity (all)" xml:space="preserve">
<value>Query cache capacity (all)</value>
</data>
<data name="queryCacheEntryCountAllInfo" xml:space="preserve">
<value>Number of query cache entries that can be stored in the query cache, including searchdomains that are currently not loaded.</value>
</data>
<data name="Query cache capacity (loaded)" xml:space="preserve">
<value>Query cache capacity (loaded)</value>
</data>
<data name="queryCacheEntryCountLoadedInfo" xml:space="preserve">
<value>Number of query cache entries that can be stored in the query cache of all loaded searchdomains.</value>
</data>
<data name="Query cache size" xml:space="preserve">
<value>Query Cache size</value>
</data>
<data name="Embeddings parallel prefetching" xml:space="preserve">
<value>Embeddings parallel prefetching</value>
</data>
<data name="parallelEmbeddingsPrefetchInfo" xml:space="preserve">
<value>With this setting activated the embeddings retrieval will be parallelized when indexing entities. Disable this setting if model unloading is an issue.</value>
</data>
<data name="Add result" xml:space="preserve">
<value>Add result</value>
</data>
<data name="Search query was updated successfully" xml:space="preserve">
<value>Search query was updated successfully</value>
</data>
<data name="Total RAM usage" xml:space="preserve">
<value>Total RAM usage</value>
</data>
<data name="Total Database size" xml:space="preserve">
<value>Total Database size</value>
</data>
</root>

View File

@@ -4,7 +4,9 @@ using System.Text.Json;
using ElmahCore.Mvc.Logger;
using MySql.Data.MySqlClient;
using Server.Helper;
using Shared;
using Shared.Models;
using AdaptiveExpressions;
namespace Server;
@@ -16,16 +18,15 @@ public class Searchdomain
public string searchdomain;
public int id;
public SearchdomainSettings settings;
public Dictionary<string, DateTimedSearchResult> searchCache; // Key: query, Value: Search results for that query (with timestamp)
public EnumerableLruCache<string, DateTimedSearchResult> queryCache; // Key: query, Value: Search results for that query (with timestamp)
public List<Entity> entityCache;
public List<string> modelsInUse;
public Dictionary<string, Dictionary<string, float[]>> embeddingCache;
public int embeddingCacheMaxSize = 10000000;
public EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache;
private readonly MySqlConnection connection;
public SQLHelper helper;
private readonly ILogger _logger;
public Searchdomain(string searchdomain, string connectionString, AIProvider aIProvider, Dictionary<string, Dictionary<string, float[]>> embeddingCache, ILogger logger, string provider = "sqlserver", bool runEmpty = false)
public Searchdomain(string searchdomain, string connectionString, AIProvider aIProvider, EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache, ILogger logger, string provider = "sqlserver", bool runEmpty = false)
{
_connectionString = connectionString;
_provider = provider.ToLower();
@@ -33,12 +34,12 @@ public class Searchdomain
this.aIProvider = aIProvider;
this.embeddingCache = embeddingCache;
this._logger = logger;
searchCache = [];
entityCache = [];
connection = new MySqlConnection(connectionString);
connection.Open();
helper = new SQLHelper(connection, connectionString);
settings = GetSettings();
queryCache = new(settings.QueryCacheSize);
modelsInUse = []; // To make the compiler shut up - it is set in UpdateSearchDomain() don't worry // yeah, about that...
if (!runEmpty)
{
@@ -96,8 +97,16 @@ public class Searchdomain
string probmethodString = datapointReader.GetString(3);
string similarityMethodString = datapointReader.GetString(4);
string hash = datapointReader.GetString(5);
ProbMethod probmethod = new(probmethodString, _logger);
SimilarityMethod similarityMethod = new(similarityMethodString, _logger);
ProbMethodEnum probmethodEnum = (ProbMethodEnum)Enum.Parse(
typeof(ProbMethodEnum),
probmethodString
);
SimilarityMethodEnum similairtyMethodEnum = (SimilarityMethodEnum)Enum.Parse(
typeof(SimilarityMethodEnum),
similarityMethodString
);
ProbMethod probmethod = new(probmethodEnum, _logger);
SimilarityMethod similarityMethod = new(similairtyMethodEnum, _logger);
if (embedding_unassigned.TryGetValue(id, out Dictionary<string, float[]>? embeddings) && probmethod is not null)
{
embedding_unassigned.Remove(id);
@@ -142,7 +151,7 @@ public class Searchdomain
Probmethods.probMethodDelegate? probmethod = Probmethods.GetMethod(probmethodString);
if (datapoint_unassigned.TryGetValue(id, out List<Datapoint>? datapoints) && probmethod is not null)
{
Entity entity = new(attributes, probmethod, datapoints, name)
Entity entity = new(attributes, probmethod, probmethodString, datapoints, name)
{
id = id
};
@@ -151,29 +160,69 @@ public class Searchdomain
}
entityReader.Close();
modelsInUse = GetModels(entityCache);
embeddingCache = []; // TODO remove this and implement proper remediation to improve performance
}
public List<(float, string)> Search(string query, int? topN = null)
{
if (searchCache.TryGetValue(query, out DateTimedSearchResult cachedResult))
if (queryCache.TryGetValue(query, out DateTimedSearchResult cachedResult))
{
cachedResult.AccessDateTimes.Add(DateTime.Now);
return [.. cachedResult.Results.Select(r => (r.Score, r.Name))];
}
if (!embeddingCache.TryGetValue(query, out Dictionary<string, float[]>? queryEmbeddings))
{
queryEmbeddings = Datapoint.GenerateEmbeddings(query, modelsInUse, aIProvider);
if (embeddingCache.Count < embeddingCacheMaxSize) // TODO add better way of managing cache limit hits
{ // Idea: Add access count to each entry. On limit hit, sort the entries by access count and remove the bottom 10% of entries
embeddingCache.Add(query, queryEmbeddings);
}
} // TODO implement proper cache remediation for embeddingCache here
Dictionary<string, float[]> queryEmbeddings = GetQueryEmbeddings(query);
List<(float, string)> result = [];
foreach (Entity entity in entityCache)
{
result.Add((EvaluateEntityAgainstQueryEmbeddings(entity, queryEmbeddings), entity.name));
}
IEnumerable<(float, string)> sortedResults = result.OrderByDescending(s => s.Item1);
if (topN is not null)
{
sortedResults = sortedResults.Take(topN ?? 0);
}
List<(float, string)> results = [.. sortedResults];
List<ResultItem> searchResult = new(
[.. sortedResults.Select(r =>
new ResultItem(r.Item1, r.Item2 ))]
);
queryCache.Set(query, new DateTimedSearchResult(DateTime.Now, searchResult));
return results;
}
public Dictionary<string, float[]> GetQueryEmbeddings(string query)
{
bool hasQuery = embeddingCache.TryGetValue(query, out Dictionary<string, float[]>? queryEmbeddings);
bool allModelsInQuery = queryEmbeddings is not null && modelsInUse.All(model => queryEmbeddings.ContainsKey(model));
if (!(hasQuery && allModelsInQuery) || queryEmbeddings is null)
{
queryEmbeddings = Datapoint.GetEmbeddings(query, modelsInUse, aIProvider, embeddingCache);
if (!embeddingCache.TryGetValue(query, out var embeddingCacheForCurrentQuery))
{
embeddingCache.Set(query, queryEmbeddings);
}
else // embeddingCache already has an entry for this query, so the missing model-embedding pairs have to be filled in
{
foreach (KeyValuePair<string, float[]> kvp in queryEmbeddings) // kvp.Key = model, kvp.Value = embedding
{
if (!embeddingCache.TryGetValue(kvp.Key, out var _))
{
embeddingCacheForCurrentQuery[kvp.Key] = kvp.Value;
}
}
}
}
return queryEmbeddings;
}
public void UpdateModelsInUse()
{
modelsInUse = GetModels(entityCache.ToList());
}
private static float EvaluateEntityAgainstQueryEmbeddings(Entity entity, Dictionary<string, float[]> queryEmbeddings)
{
List<(string, float)> datapointProbs = [];
foreach (Datapoint datapoint in entity.datapoints)
@@ -188,25 +237,14 @@ public class Searchdomain
}
datapointProbs.Add((datapoint.name, datapoint.probMethod.method(list)));
}
result.Add((entity.probMethod(datapointProbs), entity.name));
}
IEnumerable<(float, string)> sortedResults = result.OrderByDescending(s => s.Item1);
if (topN is not null)
{
sortedResults = sortedResults.Take(topN ?? 0);
}
List<(float, string)> results = [.. sortedResults];
List<ResultItem> searchResult = new(
[.. sortedResults.Select(r =>
new ResultItem(r.Item1, r.Item2 ))]
);
searchCache[query] = new DateTimedSearchResult(DateTime.Now, searchResult);
return results;
return entity.probMethod(datapointProbs);
}
public static List<string> GetModels(List<Entity> entities)
{
List<string> result = [];
lock (entities)
{
foreach (Entity entity in entities)
{
foreach (Datapoint datapoint in entity.datapoints)
@@ -221,6 +259,7 @@ public class Searchdomain
}
}
}
}
return result;
}
@@ -239,19 +278,70 @@ public class Searchdomain
public SearchdomainSettings GetSettings()
{
Dictionary<string, dynamic> parameters = new()
return DatabaseHelper.GetSearchdomainSettings(helper, searchdomain);
}
public void ReconciliateOrInvalidateCacheForNewOrUpdatedEntity(Entity entity)
{
["name"] = searchdomain
};
DbDataReader reader = helper.ExecuteSQLCommand("SELECT settings from searchdomain WHERE name = @name", parameters);
reader.Read();
string settingsString = reader.GetString(0);
reader.Close();
return JsonSerializer.Deserialize<SearchdomainSettings>(settingsString);
if (settings.CacheReconciliation)
{
foreach (var element in queryCache)
{
string query = element.Key;
DateTimedSearchResult searchResult = element.Value;
Dictionary<string, float[]> queryEmbeddings = GetQueryEmbeddings(query);
float evaluationResult = EvaluateEntityAgainstQueryEmbeddings(entity, queryEmbeddings);
searchResult.Results.RemoveAll(x => x.Name == entity.name); // If entity already exists in that results list: remove it.
ResultItem newItem = new(evaluationResult, entity.name);
int index = searchResult.Results.BinarySearch(
newItem,
Comparer<ResultItem>.Create((a, b) => b.Score.CompareTo(a.Score)) // Invert searching order
);
if (index < 0) // If not found, BinarySearch gives the bitwise complement
index = ~index;
searchResult.Results.Insert(index, newItem);
}
}
else
{
InvalidateSearchCache();
}
}
public void ReconciliateOrInvalidateCacheForDeletedEntity(Entity entity)
{
if (settings.CacheReconciliation)
{
foreach (KeyValuePair<string, DateTimedSearchResult> element in queryCache)
{
string query = element.Key;
DateTimedSearchResult searchResult = element.Value;
searchResult.Results.RemoveAll(x => x.Name == entity.name);
}
}
else
{
InvalidateSearchCache();
}
}
public void InvalidateSearchCache()
{
searchCache = [];
queryCache = new(settings.QueryCacheSize);
}
public long GetSearchCacheSize()
{
long EmbeddingCacheUtilization = 0;
foreach (var entry in queryCache)
{
EmbeddingCacheUtilization += sizeof(int); // string length prefix
EmbeddingCacheUtilization += entry.Key.Length * sizeof(char); // string characters
EmbeddingCacheUtilization += entry.Value.EstimateSize();
}
return EmbeddingCacheUtilization;
}
}

View File

@@ -3,41 +3,51 @@ using System.Data.Common;
using Server.Migrations;
using Server.Helper;
using Server.Exceptions;
using AdaptiveExpressions;
using Shared.Models;
using System.Text.Json;
using Microsoft.Extensions.Options;
using Server.Models;
using Shared;
using System.Diagnostics;
namespace Server;
public class SearchdomainManager
public class SearchdomainManager : IDisposable
{
private Dictionary<string, Searchdomain> searchdomains = [];
private readonly ILogger<SearchdomainManager> _logger;
private readonly IConfiguration _config;
private readonly EmbeddingSearchOptions _options;
public readonly AIProvider aIProvider;
private readonly DatabaseHelper _databaseHelper;
private readonly string connectionString;
private MySqlConnection connection;
public SQLHelper helper;
public Dictionary<string, Dictionary<string, float[]>> embeddingCache;
public EnumerableLruCache<string, Dictionary<string, float[]>> embeddingCache;
public long EmbeddingCacheMaxCount;
private bool disposed = false;
public SearchdomainManager(ILogger<SearchdomainManager> logger, IConfiguration config, AIProvider aIProvider, DatabaseHelper databaseHelper)
public SearchdomainManager(ILogger<SearchdomainManager> logger, IOptions<EmbeddingSearchOptions> options, AIProvider aIProvider, DatabaseHelper databaseHelper)
{
_logger = logger;
_config = config;
_options = options.Value;
this.aIProvider = aIProvider;
_databaseHelper = databaseHelper;
embeddingCache = [];
connectionString = _config.GetSection("Embeddingsearch").GetConnectionString("SQL") ?? "";
EmbeddingCacheMaxCount = _options.Cache.CacheTopN;
if (options.Value.Cache.StoreEmbeddingCache)
{
var stopwatch = Stopwatch.StartNew();
embeddingCache = CacheHelper.GetEmbeddingStore(options.Value);
stopwatch.Stop();
_logger.LogInformation("GetEmbeddingStore completed in {ElapsedMilliseconds} ms", stopwatch.ElapsedMilliseconds);
} else
{
embeddingCache = new((int)EmbeddingCacheMaxCount);
}
connectionString = _options.ConnectionStrings.SQL;
connection = new MySqlConnection(connectionString);
connection.Open();
helper = new SQLHelper(connection, connectionString);
try
{
DatabaseMigrations.Migrate(helper);
}
catch (Exception ex)
{
_logger.LogCritical("Unable to migrate the database due to the exception: {ex}", [ex.Message]);
throw;
}
}
public Searchdomain GetSearchdomain(string searchdomain)
@@ -66,7 +76,7 @@ public class SearchdomainManager
{
var searchdomain = GetSearchdomain(searchdomainName);
searchdomain.UpdateEntityCache();
searchdomain.InvalidateSearchCache(); // TODO implement cache remediation (Suggestion: searchdomain-wide setting for cache remediation / invalidation - )
searchdomain.InvalidateSearchCache();
}
public List<string> ListSearchdomains()
@@ -75,15 +85,25 @@ public class SearchdomainManager
{
DbDataReader reader = helper.ExecuteSQLCommand("SELECT name FROM searchdomain", []);
List<string> results = [];
try
{
while (reader.Read())
{
results.Add(reader.GetString(0));
}
reader.Close();
return results;
}
finally
{
reader.Close();
}
}
}
public int CreateSearchdomain(string searchdomain, SearchdomainSettings settings)
{
return CreateSearchdomain(searchdomain, JsonSerializer.Serialize(settings));
}
public int CreateSearchdomain(string searchdomain, string settings = "{}")
{
if (searchdomains.TryGetValue(searchdomain, out Searchdomain? value))
@@ -113,4 +133,44 @@ public class SearchdomainManager
searchdomains[name] = searchdomain;
return searchdomain;
}
public bool IsSearchdomainLoaded(string name)
{
return searchdomains.ContainsKey(name);
}
// Cleanup procedure
private async Task Cleanup()
{
try
{
if (_options.Cache.StoreEmbeddingCache)
{
var stopwatch = Stopwatch.StartNew();
await CacheHelper.UpdateEmbeddingStore(embeddingCache, _options);
stopwatch.Stop();
_logger.LogInformation("UpdateEmbeddingStore completed in {ElapsedMilliseconds} ms", stopwatch.ElapsedMilliseconds);
}
_logger.LogInformation("SearchdomainManager cleanup completed");
}
catch (Exception ex)
{
_logger.LogError(ex, "Error during SearchdomainManager cleanup");
}
}
public void Dispose()
{
Dispose(true).Wait();
GC.SuppressFinalize(this);
}
protected virtual async Task Dispose(bool disposing)
{
if (!disposed && disposing)
{
await Cleanup();
disposed = true;
}
}
}

View File

@@ -1,26 +1,33 @@
<Project Sdk="Microsoft.NET.Sdk.Web">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<TargetFramework>net10.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
</PropertyGroup>
<PropertyGroup>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<NoWarn>$(NoWarn);1591</NoWarn>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="AdaptiveExpressions" Version="4.23.1" />
<PackageReference Include="ElmahCore" Version="2.1.2" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
<PackageReference Include="Serilog.AspNetCore" Version="9.0.0" />
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="10.0.2" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.4" />
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
<PackageReference Include="Serilog.Sinks.File" Version="7.0.0" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.6.2" />
<PackageReference Include="Microsoft.Data.SqlClient" Version="6.0.1" />
<PackageReference Include="Microsoft.Data.Sqlite" Version="9.0.3" />
<PackageReference Include="MySql.Data" Version="9.2.0" />
<PackageReference Include="Npgsql" Version="9.0.3" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="10.1.0" />
<PackageReference Include="Microsoft.Data.SqlClient" Version="6.1.4" />
<PackageReference Include="Microsoft.Data.Sqlite" Version="10.0.2" />
<PackageReference Include="MySql.Data" Version="9.6.0" />
<PackageReference Include="Npgsql" Version="10.0.1" />
<PackageReference Include="OllamaSharp" Version="5.2.2" />
<PackageReference Include="System.Configuration.ConfigurationManager" Version="9.0.3" />
<PackageReference Include="System.Configuration.ConfigurationManager" Version="10.0.2" />
<PackageReference Include="System.Data.SqlClient" Version="4.9.0" />
<PackageReference Include="System.Data.Sqlite" Version="1.0.119" />
<PackageReference Include="System.Numerics.Tensors" Version="9.0.3" />
<PackageReference Include="System.Data.Sqlite" Version="2.0.2" />
<PackageReference Include="System.Numerics.Tensors" Version="10.0.2" />
</ItemGroup>
<ItemGroup>

View File

@@ -1,16 +1,18 @@
using System.Numerics.Tensors;
using System.Text.Json;
using Shared.Models;
namespace Server;
public class SimilarityMethod
{
public SimilarityMethods.similarityMethodDelegate method;
public SimilarityMethodEnum similarityMethodEnum;
public string name;
public SimilarityMethod(string name, ILogger logger)
public SimilarityMethod(SimilarityMethodEnum similarityMethodEnum, ILogger logger)
{
this.name = name;
this.similarityMethodEnum = similarityMethodEnum;
this.name = similarityMethodEnum.ToString();
SimilarityMethods.similarityMethodDelegate? probMethod = SimilarityMethods.GetMethod(name);
if (probMethod is null)
{
@@ -21,14 +23,6 @@ public class SimilarityMethod
}
}
public enum SimilarityMethodEnum
{
Cosine,
Euclidian,
Manhattan,
Pearson
}
public static class SimilarityMethods
{
public delegate float similarityMethodProtoDelegate(float[] vector1, float[] vector2);

View File

@@ -0,0 +1,127 @@
import { generate } from 'critical';
import fs from 'fs';
import path from 'path';
import puppeteer from 'puppeteer';
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Login
await page.goto('http://localhost:5146/Account/Login');
await page.type('#username', 'admin');
await page.type('#password', 'UnsafePractice.67');
await page.click('button[type=submit]');
await page.waitForNavigation();
// Extract cookies
const cookies = await page.cookies();
await browser.close();
async function generateCriticalCSSForViews() {
const viewsDir = '../../Views';
// Helper function to get all .cshtml files recursively
function getAllCshtmlFiles(dir) {
let results = [];
const list = fs.readdirSync(dir);
list.forEach(file => {
const filePath = path.join(dir, file);
const stat = fs.statSync(filePath);
if (stat && stat.isDirectory()) {
// Recursively get files from subdirectories
results = results.concat(getAllCshtmlFiles(filePath));
} else if (file.endsWith('.cshtml') && filePath.search("/_") == -1) {
results.push(filePath);
}
});
return results;
}
// Helper function to convert file path to URL path
function filePathToUrlPath(filePath) {
// Remove 'Views/' prefix
let relativePath = filePath.replace(/^Views[\/\\]/, '');
// Remove .cshtml extension
relativePath = relativePath.replace(/\.cshtml$/, '');
// Convert to URL format (replace \ with / and capitalize first letter)
const urlPath = relativePath
.split(/[\/\\]/)
.map((segment, index) =>
index === 0 ? segment : segment.charAt(0).toUpperCase() + segment.slice(1)
)
.join('/');
// Handle the case where we have a single file (like Index.cshtml)
if (relativePath.includes('/')) {
// Convert to URL path format: Views/Home/Index.cshtml -> /Home/Index
return '/' + relativePath.replace(/\\/g, '/').replace(/\.cshtml$/, '');
} else {
// For files directly in Views folder (like Views/Index.cshtml)
return '/' + relativePath.replace(/\.cshtml$/, '');
}
}
// Get all .cshtml files
const cshtmlFiles = getAllCshtmlFiles(viewsDir);
const criticalCssDir = '.';
// if (!fs.existsSync(criticalCssDir)) {
// fs.mkdirSync(criticalCssDir, { recursive: true });
// }
// Process each file
for (const file of cshtmlFiles) {
try {
const urlPath = filePathToUrlPath(file).replace("../", "").replace("../", "").replace("/Views", "");
// Generate critical CSS
await generate({
src: `http://localhost:5146${urlPath}?noCriticalCSS`,
inline: false,
width: 1920,
height: 1080,
penthouse: {
customHeaders: {
cookie: cookies.map(c => `${c.name}=${c.value}`).join('; ')
},
forceExclude: ['.btn'], // Otherwise buttons end up colorless and .btn overrides other classes like .btn-warning, etc. - so it has to be force-excluded here and re-added later
forceInclude: [
'[data-bs-theme="dark"]', '[data-bs-theme="dark"] body', '[data-bs-theme="dark"] .navbar', '[data-bs-theme="dark"] .card', '[data-bs-theme="dark"] .btn',
'.col-md-4',
'.navbar', '.ms-auto', '.dropdown', '.dropdown-menu',
'.visually-hidden', // visually hidden headings
'.bi-info-circle-fill', '.text-info', // info icon
'.container', '.col-md-6', '.row', '.g-4', '.row>*',
'p', '.fs-3', '.py-4', // title
'.mb-4',
'.card', '.card-body', '.p-2', // card
'h2', '.card-title', '.fs-5', // card - title
'.d-flex', '.justify-content-between', '.mt-2', // card - content
'.progress', '.mt-3', // card - progress bar
'.list-group', '.list-group-flush', '.list-group-item', '.list-group-flush>.list-group-item', '.list-group-flush>.list-group-item:last-child', '.badge', '.bg-warning', '.bg-success', '.h-100', // card - health check list
'.btn-primary', '.btn-warning', '.btn-danger', '.btn-info', // Searchdomains buttons
'.col-md-8', '.sidebar',
'.mb-0', '.mb-2', '.align-items-center',
'h3', '.col-md-3', '.col-md-2', '.text-nowrap', '.overflow-auto'
]
},
target: {
css: path.join(criticalCssDir, "../../CriticalCSS/" + urlPath.replace(/\//g, '.').replace(/^\./, '').replace("...", "") + '.css')
}
});
console.log(`Critical CSS generated for: ${urlPath}`);
} catch (err) {
console.error(`Error processing ${file}:`, err);
}
}
console.log('All critical CSS files generated!');
}
// Run the function
generateCriticalCSSForViews().catch(console.error);

View File

@@ -0,0 +1,10 @@
# How to use CriticalCSS
1. Install the dependencies from here
```bash
npm i -D critical
npm install puppeteer
```
2. Run the css generator:
```bash
node CriticalCSSGenerator.js
```

View File

@@ -0,0 +1,78 @@
import re
import sys
def extract_translations_from_View(view_path):
"""Extract all translation strings from file A"""
translations = {}
try:
with open(view_path, 'r', encoding='utf-8') as file_a:
for line_num, line in enumerate(file_a, 1):
# Match T["..."] patterns
matches = re.findall(r'T\["([^"]*)"\]', line)
for match in matches:
translations[match] = line_num
except FileNotFoundError:
print(f"Error: File {view_path} not found")
sys.exit(1)
except Exception as e:
print(f"Error reading file {view_path}: {e}")
sys.exit(1)
return translations
def extract_localizations_from_resource_file(file_b_path):
"""Extract all translation strings from file B"""
translations = set()
try:
with open(file_b_path, 'r', encoding='utf-8') as file_b:
for line in file_b:
# Match the pattern in file B
match = re.search(r'<data name="([^"]*)"', line)
if match:
translations.add(match.group(1))
except FileNotFoundError:
print(f"Error: File {file_b_path} not found")
sys.exit(1)
except Exception as e:
print(f"Error reading file {file_b_path}: {e}")
sys.exit(1)
return translations
def find_missing_translations(view, resource):
"""Find translations in file A that don't exist in file B"""
# Extract translations from both files
file_a_translations = extract_translations_from_View(view)
file_b_translations = extract_localizations_from_resource_file(resource)
# Find missing translations
missing_translations = []
for translation_text, line_number in file_a_translations.items():
if translation_text not in file_b_translations:
missing_translations.append((translation_text, line_number))
return missing_translations
def main():
views = ["Shared/_Layout.cshtml", "Home/Index.cshtml", "Home/Searchdomains.cshtml"]
resources = ["SharedResources.en.resx", "SharedResources.de.resx"]
print("Checking for missing translations...")
print("=" * 50)
for view in views:
for resource in resources:
missing = find_missing_translations("../../Views/" + view, "../../Resources/" + resource)
if missing:
print(f"Found {len(missing)} missing translations in {view}:")
print("-" * 50)
for translation_text, line_number in missing:
print(f"Line {line_number}: T[\"{translation_text}\"]")
else:
print(f"All localizations in {view} have a matching resource in {resource}!")
if __name__ == "__main__":
main()

View File

@@ -1,3 +1,4 @@
@using Microsoft.Extensions.Primitives
@using Server.Services
@inject LocalizationService T
@{
@@ -9,6 +10,10 @@
<h1>Login</h1>
<form asp-action="Login" method="post" class="mt-4" style="max-width: 400px; margin: auto;">
<div class="form-group mb-3">
@if (Context.Request.Query.TryGetValue("ReturnUrl", out StringValues returnUrl))
{
<input type="hidden" name="ReturnUrl" value="@(returnUrl)" />
}
<label for="username" class="form-label">@T["Username"]</label>
<input autofocus type="text" class="form-control" id="username" name="username" autocomplete="username" required>
</div>

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +0,0 @@
@{
ViewData["Title"] = "Privacy Policy";
}
<h1>@ViewData["Title"]</h1>
<p>Use this page to detail your site's privacy policy.</p>

File diff suppressed because it is too large Load Diff

View File

@@ -1,17 +1,60 @@
<!DOCTYPE html>
<html lang="en">
@using System.Globalization
@using Server.Services
@using System.Net
@inject LocalizationService T
@{
var currentUrl = WebUtility.HtmlEncode(Context.Request.Path);
}
<!DOCTYPE html>
<html lang="@CultureInfo.CurrentUICulture.TwoLetterISOLanguageName">
<head>
<meta charset="utf-8" />
<meta name="description" content="Embeddingsearch server" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>@ViewData["Title"] - embeddingsearch</title>
<link rel="stylesheet" href="~/lib/bootstrap/dist/css/bootstrap.min.css" />
<link rel="stylesheet" href="~/css/site.css" asp-append-version="true" />
<link rel="preload" href="~/fonts/bootstrap-icons.woff2" as="font" type="font/woff2" crossorigin="anonymous"/>
@if (!Context.Request.Query.ContainsKey("renderRaw") && !Context.Request.Query.ContainsKey("noCriticalCSS"))
{
<link rel="preload" href="~/lib/bootstrap/dist/css/bootstrap.min.css" as="style"/>
<link rel="stylesheet" fetchpriority="high"
href="~/lib/bootstrap/dist/css/bootstrap.min.css"
media="print"
onload="this.media='all'">
} else if (Context.Request.Query.ContainsKey("noCriticalCSS"))
{
<link rel="preload" href="~/lib/bootstrap/dist/css/bootstrap.min.css" as="style"/>
<link rel="stylesheet" fetchpriority="high"
href="~/lib/bootstrap/dist/css/bootstrap.min.css">
}
<style>
@Html.Raw(File.ReadAllText(System.IO.Path.Combine(Directory.GetCurrentDirectory(), "wwwroot", "css", "site.css")))
</style>
@if (!Context.Request.Query.ContainsKey("noCriticalCSS"))
{
<style>
@if (Context.Request.Path.Value is not null)
{
string path = System.IO.Path.Combine("CriticalCSS", Context.Request.Path.Value.Trim('/').Replace("/", ".") + ".css");
if (File.Exists(path))
{
@Html.Raw(File.ReadAllText(path));
}
}
</style>
}
<script>
window.appTranslations = {
closeAlert: '@T["Close alert"]'
};
</script>
</head>
<body>
<body data-bs-theme="dark">
<header>
<nav class="navbar navbar-expand-sm navbar-toggleable-sm navbar-light bg-white border-bottom box-shadow mb-3">
<nav class="navbar navbar-expand-sm navbar-toggleable-sm navbar-light border-bottom box-shadow mb-3">
<div class="container-fluid">
<a class="navbar-brand" asp-area="" asp-controller="Home" asp-action="Index">embeddingsearch</a>
<a class="navbar-brand" asp-area="" asp-controller="Home" asp-action="Index">
<img fetchpriority="high" alt="Logo" src="/logo.png" width="40" height="40" style="width: 40px; height: 40px;">
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target=".navbar-collapse" aria-controls="navbarSupportedContent"
aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
@@ -21,16 +64,34 @@
@if (User.Identity?.IsAuthenticated == true)
{
<li class="nav-item">
<a class="nav-link text-dark" asp-area="" asp-controller="Home" asp-action="Index">Home</a>
<a class="nav-link text" asp-area="" asp-controller="Home" asp-action="Index">@T["Home"]</a>
</li>
<li class="nav-item">
<a class="nav-link text-dark" asp-area="" asp-controller="Account" asp-action="Logout">Logout</a>
<a class="nav-link text" asp-area="" asp-controller="Home" asp-action="Searchdomains">@T["Searchdomains"]</a>
</li>
@if (User.IsInRole("Admin") || User.IsInRole("Swagger"))
{
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdownMenuLink" data-bs-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
@T["Tools"]
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdownMenuLink">
<a class="dropdown-item" href="/swagger/index.html?ReturnUrl=@(currentUrl)">@T["Swagger"]</a>
@if (User.IsInRole("Admin"))
{
<a class="dropdown-item" href="/elmah?ReturnUrl=@(currentUrl)">@T["Elmah"]</a>
}
</div>
</li>
}
<li class="nav-item ms-auto">
<a class="nav-link text" asp-area="" asp-controller="Account" asp-action="Logout">@T["Logout"]</a>
</li>
}
else
{
<li class="nav-item">
<a class="nav-link text-dark" asp-area="" asp-controller="Account" asp-action="Login">Login</a>
<a class="nav-link text" asp-area="" asp-controller="Account" asp-action="Login">@T["Login"]</a>
</li>
}
</ul>
@@ -46,12 +107,25 @@
<footer class="border-top footer text-muted">
<div class="container">
&copy; 2025 - embeddingsearch - <a asp-area="" asp-controller="Home" asp-action="Privacy">Privacy</a>
&copy; 2025 - embeddingsearch
</div>
</footer>
<script src="~/lib/jquery/dist/jquery.min.js"></script>
<script src="~/lib/bootstrap/dist/js/bootstrap.bundle.min.js"></script>
<script src="~/js/site.js" asp-append-version="true"></script>
<script src="~/lib/jquery/dist/jquery.min.js" defer></script>
<script src="~/lib/bootstrap/dist/js/bootstrap.bundle.min.js" defer></script>
<script src="~/js/site.js" asp-append-version="true" defer></script>
@await RenderSectionAsync("Scripts", required: false)
</body>
</html>
<script>
const mediaQuery = window.matchMedia('(prefers-color-scheme: dark)');
function applyTheme(e) {
document.body.setAttribute(
'data-bs-theme',
e.matches ? 'dark' : 'light'
);
}
applyTheme(mediaQuery);
mediaQuery.addEventListener('change', applyTheme);
</script>

View File

@@ -15,27 +15,42 @@
"Embeddingsearch": {
"ConnectionStrings": {
"SQL": "server=localhost;database=embeddingsearch;uid=embeddingsearch;pwd=somepassword!;"
"SQL": "server=localhost;database=embeddingsearch;uid=embeddingsearch;pwd=somepassword!;",
"Cache": "Data Source=embeddings.db;Mode=ReadWriteCreate;Cache=Shared"
},
"Elmah": {
"AllowedHosts": [
"127.0.0.1",
"::1",
"172.17.0.1"
]
"LogPath": "~/logs"
},
"AiProviders": {
"ollama": {
"handler": "ollama",
"baseURL": "http://localhost:11434"
"baseURL": "http://localhost:11434",
"Allowlist": [".*"],
"Denylist": ["qwen3-coder:latest", "qwen3:0.6b", "deepseek-v3.1:671b-cloud", "qwen3-vl", "deepseek-ocr"]
},
"localAI": {
"handler": "openai",
"baseURL": "http://localhost:8080",
"ApiKey": "Some API key here"
"ApiKey": "Some API key here",
"Allowlist": [".*"],
"Denylist": ["cross-encoder", "kitten-tts", "jina-reranker-v1-tiny-en", "whisper-small", "qwen3-vl-2b-instruct"]
}
},
"SimpleAuth": {
"Users": [
{
"Username": "admin",
"Password": "UnsafePractice.67",
"Roles": ["Admin"]
}
]
},
"ApiKeys": ["Some UUID here", "Another UUID here"],
"UseHttpsRedirection": true
"UseHttpsRedirection": true,
"Cache": {
"CacheTopN": 100000,
"StoreEmbeddingCache": true,
"StoreTopN": 20000
}
}
}

View File

@@ -15,27 +15,41 @@
"UseSwagger": true,
"Embeddingsearch": {
"ConnectionStrings": {
"SQL": "server=localhost;database=embeddingsearch;uid=embeddingsearch;pwd=somepassword!;"
"SQL": "server=localhost;database=embeddingsearch;uid=embeddingsearch;pwd=somepassword!;",
"Cache": "Data Source=embeddings.db;Mode=ReadWriteCreate;Cache=Shared"
},
"Elmah": {
"AllowedHosts": [
"127.0.0.1",
"::1",
"172.17.0.1"
]
"LogPath": "~/logs"
},
"AiProviders": {
"ollama": {
"handler": "ollama",
"baseURL": "http://localhost:11434"
"baseURL": "http://localhost:11434",
"Allowlist": [".*"],
"Denylist": ["qwen3-coder:latest", "qwen3:0.6b", "qwen3-vl", "deepseek-ocr"]
},
"localAI": {
"handler": "openai",
"baseURL": "http://localhost:8080",
"ApiKey": "Some API key here"
"ApiKey": "Some API key here",
"Allowlist": [".*"],
"Denylist": ["cross-encoder", "jina-reranker-v1-tiny-en", "whisper-small"]
}
},
"ApiKeys": ["Some UUID here", "Another UUID here"],
"UseHttpsRedirection": true
"SimpleAuth": {
"Users": [
{
"Username": "admin",
"Password": "UnsafePractice.67",
"Roles": ["Admin"]
}
]
},
"ApiKeys": ["APIKeyOfYourChoice", "AnotherOneIfYouLike"],
"Cache": {
"CacheTopN": 10000,
"StoreEmbeddingCache": true,
"StoreTopN": 10000
}
}
}

View File

@@ -16,14 +16,8 @@
"Application": "Embeddingsearch.Server"
}
},
"EmbeddingsearchIndexer": {
"Elmah": {
"AllowedHosts": [
"127.0.0.1",
"::1"
],
"LogFolder": "./logs"
}
"Embeddingsearch": {
"MaxRequestBodySize": 524288000
},
"AllowedHosts": "*"
}

View File

@@ -49,3 +49,38 @@ body {
.modal-title {
font-size: 1.25rem;
}
/* Bootstrap icons */
@font-face {
font-display: block;
font-family: "bootstrap-icons";
src: url("/fonts/bootstrap-icons.woff2") format("woff2"),
url("/fonts/bootstrap-icons.woff") format("woff");
}
.bi::before,
[class^="bi-"]::before,
[class*=" bi-"]::before {
display: inline-block;
font-family: bootstrap-icons !important;
font-style: normal;
font-weight: normal !important;
font-variant: normal;
text-transform: none;
line-height: 1;
vertical-align: -.125em;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
.bi-info-circle-fill::before { content: "\f430"; }
td.btn-group {
display: revert;
min-width: 15rem;
}
[data-bs-theme="light"] img[alt="Logo"] {
filter: invert(100%);
}

View File

@@ -0,0 +1,56 @@
.elmah-return-btn {
position: fixed;
top: 6px;
right: 24px;
z-index: 9999;
display: flex;
align-items: center;
height: 44px;
min-width: 44px;
padding: 0 14px;
background: #85ea2d;
color: black;
border-radius: 999px;
font-weight: 600;
text-decoration: none;
box-shadow: 0 4px 12px rgba(0,0,0,0.2);
overflow: hidden;
white-space: nowrap;
justify-content: center;
text-decoration: none !important;
transition:
top 0.25s ease,
background-color 0.2s ease;
}
/* hidden label */
.elmah-return-btn::before {
content: "Return to Front-end";
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
max-width: 0;
opacity: 0;
transition:
max-width 0.3s ease,
opacity 0.2s ease;
}
/* expand on hover */
.elmah-return-btn.show-label::before,
.elmah-return-btn:hover::before {
max-width: 220px;
padding: 0.5rem;
opacity: 1;
}
/* hover colors */
.elmah-return-btn.show-label,
.elmah-return-btn:hover {
background: #0b5ed7;
color: white;
}

View File

@@ -0,0 +1,17 @@
document.addEventListener('DOMContentLoaded', async () => {
const url = new URL(window.location.href);
const btn = document.createElement("a");
btn.href = url.searchParams.get('ReturnUrl') ?? "/";
btn.innerText = "⎋";
btn.setAttribute("aria-label", "Return to Front-End");
btn.className = "elmah-return-btn";
document.body.appendChild(btn);
const showLabelBriefly = () => {
btn.classList.add("show-label");
setTimeout(() => btn.classList.remove("show-label"), 2000);
};
setTimeout(showLabelBriefly, 1000);
});

Binary file not shown.

View File

@@ -2,3 +2,60 @@
// for details on configuring this project to bundle and minify static web assets.
// Write your JavaScript code.
function createToastContainer() {
const container = document.createElement('div');
container.id = 'toastContainer';
container.className = 'toast-container position-fixed bottom-0 end-0 p-3';
container.setAttribute("aria-live", "polite");
container.setAttribute("aria-atomic", "true");
const liveRegion = document.createElement('div');
liveRegion.id = 'toastLiveRegion';
liveRegion.className = 'visually-hidden';
liveRegion.setAttribute('aria-live', 'assertive');
liveRegion.setAttribute('aria-atomic', 'true');
container.appendChild(liveRegion);
document.body.appendChild(container);
return container;
}
// Simple toast helper
function showToast(message, type) {
const toastContainer = document.getElementById('toastContainer') || createToastContainer();
const toast = document.createElement('div');
toast.className = `toast align-items-center text-white bg-${type} border-0`;
toast.role = 'alert';
var useDarkElements = type === "warning"
toast.innerHTML = `
<div class="d-flex">
<div class="toast-body">${message}</div>
<button type="button" class="btn-close${useDarkElements ? "" : " btn-close-white"} me-2 m-auto"${useDarkElements ? ' style="filter: unset;"' : ""} data-bs-dismiss="toast" aria-label="${window.appTranslations.closeAlert}"></button>
</div>
`;
if (useDarkElements) {
toast.classList.remove("text-white");
toast.classList.add("text-dark");
}
toastContainer.appendChild(toast);
const liveRegion = document.getElementById('toastLiveRegion');
if (liveRegion) {
liveRegion.textContent = '';
setTimeout(() => liveRegion.textContent = message, 500);
}
const bsToast = new bootstrap.Toast(toast, { delay: 10000 });
bsToast.show();
toast.addEventListener('hidden.bs.toast', () => toast.remove());
}
document.addEventListener('DOMContentLoaded', async () => {
// Initialize all tooltips
var tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'))
var tooltipList = tooltipTriggerList.map(function (tooltipTriggerEl) {
let retVal = new bootstrap.Tooltip(tooltipTriggerEl);
tooltipTriggerEl.role = "tooltip";
return retVal;
});
});

File diff suppressed because one or more lines are too long

View File

Before

Width:  |  Height:  |  Size: 9.1 KiB

After

Width:  |  Height:  |  Size: 9.1 KiB

View File

@@ -0,0 +1,58 @@
.swagger-return-btn {
position: fixed;
top: 6px;
left: 24px;
z-index: 9999;
display: flex;
align-items: center;
height: 44px;
min-width: 44px;
padding: 0 14px;
background: #85ea2d;
color: black;
border-radius: 999px;
font-weight: 600;
text-decoration: none;
box-shadow: 0 4px 12px rgba(0,0,0,0.2);
overflow: hidden;
white-space: nowrap;
justify-content: center;
transition:
top 0.25s ease,
background-color 0.2s ease;
}
/* hidden label */
.swagger-return-btn::after {
content: "Return to Front-end";
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
max-width: 0;
opacity: 0;
transition:
max-width 0.3s ease,
opacity 0.2s ease;
}
/* expand on hover */
.swagger-return-btn:hover::after {
max-width: 220px;
padding: 0.5rem;
opacity: 1;
}
/* hover colors */
.swagger-return-btn:hover {
background: #0b5ed7;
color: white;
}
/* scrolled state */
.swagger-return-btn.scrolled {
top: 24px;
}

View File

@@ -0,0 +1,24 @@
document.addEventListener('DOMContentLoaded', async () => {
const url = new URL(window.location.href);
const btn = document.createElement("a");
btn.href = url.searchParams.get('ReturnUrl') ?? "/";
btn.innerText = "⎋";
btn.setAttribute("aria-label", "Return to Front-End");
btn.className = "swagger-return-btn";
document.body.appendChild(btn);
const togglePosition = () => {
if (window.scrollY > 0) {
btn.classList.add("scrolled");
} else {
btn.classList.remove("scrolled");
}
};
// Initial state
togglePosition();
// On scroll
window.addEventListener("scroll", togglePosition, { passive: true });
});

View File

@@ -1,21 +1,25 @@
using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Options;
using Microsoft.Extensions.Primitives;
using Shared.Models;
namespace Shared;
public class ApiKeyMiddleware
{
private readonly RequestDelegate _next;
private readonly IConfiguration _configuration;
private readonly ApiKeyOptions _configuration;
public ApiKeyMiddleware(RequestDelegate next, IConfiguration configuration)
public ApiKeyMiddleware(RequestDelegate next, IOptions<ApiKeyOptions> configuration)
{
_next = next;
_configuration = configuration;
_configuration = configuration.Value;
}
public async Task InvokeAsync(HttpContext context)
{
if (!(context.User.Identity?.IsAuthenticated ?? false))
{
if (!context.Request.Headers.TryGetValue("X-API-KEY", out StringValues extractedApiKey))
{
@@ -24,15 +28,14 @@ public class ApiKeyMiddleware
return;
}
var validApiKeys = _configuration.GetSection("Embeddingsearch").GetSection("ApiKeys").Get<List<string>>();
#pragma warning disable CS8604
if (validApiKeys == null || !validApiKeys.Contains(extractedApiKey)) // CS8604 extractedApiKey is not null here, but the compiler still thinks that it might be.
string[]? validApiKeys = _configuration.ApiKeys;
if (validApiKeys == null || !validApiKeys.ToList().Contains(extractedApiKey))
{
context.Response.StatusCode = 403;
await context.Response.WriteAsync("Invalid API Key.");
return;
}
#pragma warning restore CS8604
}
await _next(context);
}

240
src/Shared/LRUCache.cs Normal file
View File

@@ -0,0 +1,240 @@
namespace Shared;
public sealed class EnumerableLruCache<TKey, TValue> where TKey : notnull
{
private sealed record CacheItem(TKey Key, TValue Value);
private readonly Dictionary<TKey, LinkedListNode<CacheItem>> _map;
private readonly LinkedList<CacheItem> _lruList;
private readonly ReaderWriterLockSlim _lock = new();
private int _capacity;
public EnumerableLruCache(int capacity)
{
if (capacity <= 0)
throw new ArgumentOutOfRangeException(nameof(capacity));
_capacity = capacity;
_map = new Dictionary<TKey, LinkedListNode<CacheItem>>(capacity);
_lruList = new LinkedList<CacheItem>();
}
public int Capacity
{
get
{
_lock.EnterReadLock();
try
{
return _capacity;
}
finally
{
_lock.ExitReadLock();
}
}
set
{
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(value);
_lock.EnterWriteLock();
try
{
_capacity = value;
TrimIfNeeded();
}
finally
{
_lock.ExitWriteLock();
}
}
}
public int Count
{
get
{
_lock.EnterReadLock();
try
{
return _map.Count;
}
finally
{
_lock.ExitReadLock();
}
}
}
public TValue this[TKey key]
{
get
{
if (!TryGetValue(key, out var value))
throw new KeyNotFoundException();
return value!;
}
set => Set(key, value);
}
public bool TryGetValue(TKey key, out TValue? value)
{
_lock.EnterUpgradeableReadLock();
try
{
if (!_map.TryGetValue(key, out var node))
{
value = default;
return false;
}
value = node.Value.Value;
// LRU aktualisieren
_lock.EnterWriteLock();
try
{
_lruList.Remove(node);
_lruList.AddFirst(node);
}
finally
{
_lock.ExitWriteLock();
}
return true;
}
finally
{
_lock.ExitUpgradeableReadLock();
}
}
public void Set(TKey key, TValue value)
{
_lock.EnterWriteLock();
try
{
if (_map.TryGetValue(key, out var existing))
{
// Update + nach vorne
existing.Value = existing.Value with { Value = value };
_lruList.Remove(existing);
_lruList.AddFirst(existing);
return;
}
var item = new CacheItem(key, value);
var node = new LinkedListNode<CacheItem>(item);
_lruList.AddFirst(node);
_map[key] = node;
TrimIfNeeded();
}
finally
{
_lock.ExitWriteLock();
}
}
public bool Remove(TKey key)
{
_lock.EnterWriteLock();
try
{
if (!_map.TryGetValue(key, out var node))
return false;
_lruList.Remove(node);
_map.Remove(key);
return true;
}
finally
{
_lock.ExitWriteLock();
}
}
public bool ContainsKey(TKey key)
{
_lock.EnterReadLock();
try
{
return _map.ContainsKey(key);
}
finally
{
_lock.ExitReadLock();
}
}
public Dictionary<TKey, TValue> AsDictionary()
{
_lock.EnterReadLock();
try
{
return _map.Values.ToDictionary(
n => n.Value.Key,
n => n.Value.Value
);
}
finally
{
_lock.ExitReadLock();
}
}
public IEnumerable<KeyValuePair<TKey, TValue>> Items()
{
_lock.EnterReadLock();
try
{
foreach (var item in _lruList)
{
yield return new KeyValuePair<TKey, TValue>(item.Key, item.Value);
}
}
finally
{
_lock.ExitReadLock();
}
}
public IEnumerator<KeyValuePair<TKey, TValue>> GetEnumerator()
{
List<KeyValuePair<TKey, TValue>> snapshot;
_lock.EnterReadLock();
try
{
snapshot = new List<KeyValuePair<TKey, TValue>>(_map.Count);
foreach (var item in _lruList)
{
snapshot.Add(new KeyValuePair<TKey, TValue>(
item.Key,
item.Value
));
}
}
finally
{
_lock.ExitReadLock();
}
return snapshot.GetEnumerator();
}
private void TrimIfNeeded()
{
while (_map.Count > _capacity)
{
var lruNode = _lruList.Last!;
_lruList.RemoveLast();
_map.Remove(lruNode.Value.Key);
}
}
}

View File

@@ -0,0 +1,12 @@
using System.Text.Json.Serialization;
namespace Shared.Models;
public class SuccesMessageBaseModel
{
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? Message { get; set; }
}

View File

@@ -3,14 +3,10 @@ using System.Text.Json.Serialization;
namespace Shared.Models;
public class EntityQueryResults
public class EntityQueryResults : SuccesMessageBaseModel
{
[JsonPropertyName("Results")]
public required List<EntityQueryResult> Results { get; set; }
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
}
public class EntityQueryResult
@@ -19,20 +15,19 @@ public class EntityQueryResult
public required string Name { get; set; }
[JsonPropertyName("Value")]
public float Value { get; set; }
[JsonPropertyName("Attributes")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public Dictionary<string, string>? Attributes { get; set; }
}
public class EntityIndexResult
{
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
}
public class EntityIndexResult : SuccesMessageBaseModel {}
public class EntityListResults
{
[JsonPropertyName("Results")]
public required List<EntityListResult> Results { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
[JsonPropertyName("Success")]
public required bool Success { get; set; }
}
@@ -41,6 +36,8 @@ public class EntityListResult
{
[JsonPropertyName("Name")]
public required string Name { get; set; }
[JsonPropertyName("ProbMethod")]
public required string ProbMethod { get; set; }
[JsonPropertyName("Attributes")]
public required List<AttributeResult> Attributes { get; set; }
[JsonPropertyName("Datapoints")]
@@ -75,11 +72,5 @@ public class EmbeddingResult
public required float[] Embeddings { get; set; }
}
public class EntityDeleteResults
{
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
}
public class EntityDeleteResults : SuccesMessageBaseModel {}

View File

@@ -3,7 +3,7 @@ namespace Shared.Models;
public class JSONEntity
{
public required string Name { get; set; }
public required string Probmethod { get; set; }
public required ProbMethodEnum Probmethod { get; set; }
public required string Searchdomain { get; set; }
public required Dictionary<string, string> Attributes { get; set; }
public required JSONDatapoint[] Datapoints { get; set; }
@@ -12,8 +12,28 @@ public class JSONEntity
public class JSONDatapoint
{
public required string Name { get; set; }
public required string Text { get; set; }
public required string Probmethod_embedding { get; set; }
public required string SimilarityMethod { get; set; }
public required string? Text { get; set; }
public required ProbMethodEnum Probmethod_embedding { get; set; }
public required SimilarityMethodEnum SimilarityMethod { get; set; }
public required string[] Model { get; set; }
}
public enum ProbMethodEnum
{
Mean,
HarmonicMean,
QuadraticMean,
GeometricMean,
EVEWAvg,
HVEWAvg,
LVEWAvg,
DictionaryWeightedAverage
}
public enum SimilarityMethodEnum
{
Cosine,
Euclidian,
Manhattan,
Pearson
}

View File

@@ -0,0 +1,13 @@
namespace Shared.Models;
public class ApiKeyOptions
{
public string[]? ApiKeys { get; set; }
}
public class ServerOptions
{
public required string BaseUri { get; set; }
public string? ApiKey { get; set; }
public string? Searchdomain { get; set; }
}

View File

@@ -2,12 +2,19 @@
using System.Text.Json.Serialization;
namespace Shared.Models;
public readonly struct ResultItem(float score, string name)
public readonly struct ResultItem
{
[JsonPropertyName("Score")]
public readonly float Score { get; } = score;
public readonly float Score { get; }
[JsonPropertyName("Name")]
public readonly string Name { get; } = name;
public readonly string Name { get; }
[JsonConstructor]
public ResultItem(float score, string name)
{
Score = score;
Name = name;
}
public static long EstimateSize(ResultItem item)
{
@@ -88,13 +95,17 @@ public struct DateTimedSearchResult(DateTime dateTime, List<ResultItem> results)
}
}
public struct SearchdomainSettings(bool cacheReconciliation = false)
public struct SearchdomainSettings(bool cacheReconciliation = false, int queryCacheSize = 1_000_000, bool parallelEmbeddingsPrefetch = false)
{
[JsonPropertyName("CacheReconciliation")]
public bool CacheReconciliation { get; set; } = cacheReconciliation;
[JsonPropertyName("QueryCacheSize")]
public int QueryCacheSize { get; set; } = queryCacheSize;
[JsonPropertyName("ParallelEmbeddingsPrefetch")]
public bool ParallelEmbeddingsPrefetch { get; set; } = parallelEmbeddingsPrefetch;
}
internal static class MemorySizes
public static class MemorySizes
{
public static readonly int PointerSize = IntPtr.Size;
public static readonly int ObjectHeader = PointerSize * 2;

View File

@@ -1,4 +1,5 @@
using System.Text.Json.Serialization;
using Shared;
namespace Shared.Models;
@@ -11,91 +12,50 @@ public class SearchdomainListResults
public string? Message { get; set; }
}
public class SearchdomainCreateResults
public class SearchdomainCreateResults : SuccesMessageBaseModel
{
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
[JsonPropertyName("Id")]
public int? Id { get; set; }
}
public class SearchdomainUpdateResults
public class SearchdomainUpdateResults : SuccesMessageBaseModel {}
public class SearchdomainDeleteResults : SuccesMessageBaseModel
{
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
}
public class SearchdomainDeleteResults
{
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
[JsonPropertyName("DeletedEntities")]
public required int DeletedEntities { get; set; }
}
public class SearchdomainSearchesResults
public class SearchdomainQueriesResults : SuccesMessageBaseModel
{
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
[JsonPropertyName("Searches")]
public required Dictionary<string, DateTimedSearchResult> Searches { get; set; }
}
public class SearchdomainSettingsResults
public class SearchdomainDeleteSearchResult : SuccesMessageBaseModel {}
public class SearchdomainUpdateSearchResult : SuccesMessageBaseModel {}
public class SearchdomainSettingsResults : SuccesMessageBaseModel
{
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
[JsonPropertyName("Settings")]
public required SearchdomainSettings? Settings { get; set; }
}
public class SearchdomainSearchCacheSizeResults
public class SearchdomainQueryCacheSizeResults : SuccesMessageBaseModel
{
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
[JsonPropertyName("SearchCacheSizeBytes")]
public required long? SearchCacheSizeBytes { get; set; }
[JsonPropertyName("ElementCount")]
public required int? ElementCount { get; set; }
[JsonPropertyName("ElementMaxCount")]
public required int? ElementMaxCount { get; set; }
[JsonPropertyName("SizeBytes")]
public required long? SizeBytes { get; set; }
}
public class SearchdomainInvalidateCacheResults
public class SearchdomainInvalidateCacheResults : SuccesMessageBaseModel {}
public class SearchdomainGetDatabaseSizeResult : SuccesMessageBaseModel
{
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
}
public class SearchdomainGetDatabaseSizeResult
{
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
[JsonPropertyName("SearchdomainDatabaseSizeBytes")]
public required long? SearchdomainDatabaseSizeBytes { get; set; }
}

View File

@@ -2,14 +2,34 @@ using System.Text.Json.Serialization;
namespace Shared.Models;
public class ServerGetModelsResult
public class ServerGetModelsResult : SuccesMessageBaseModel
{
[JsonPropertyName("Success")]
public required bool Success { get; set; }
[JsonPropertyName("Message")]
public string? Message { get; set; }
[JsonPropertyName("Models")]
public string[]? Models { get; set; }
}
public class ServerGetStatsResult : SuccesMessageBaseModel
{
[JsonPropertyName("EmbeddingCacheUtilization")]
public long? EmbeddingCacheUtilization { get; set; }
[JsonPropertyName("EmbeddingCacheMaxElementCount")]
public long? EmbeddingCacheMaxElementCount { get; set; }
[JsonPropertyName("ElementCount")]
public long? EmbeddingCacheElementCount { get; set; }
[JsonPropertyName("EmbeddingsCount")]
public long? EmbeddingsCount { get; set; }
[JsonPropertyName("EntityCount")]
public long? EntityCount { get; set; }
[JsonPropertyName("QueryCacheElementCount")]
public long? QueryCacheElementCount { get; set; }
[JsonPropertyName("QueryCacheMaxElementCountAll")]
public long? QueryCacheMaxElementCountAll { get; set; }
[JsonPropertyName("QueryCacheMaxElementCountLoadedSearchdomainsOnly")]
public long? QueryCacheMaxElementCountLoadedSearchdomainsOnly { get; set; }
[JsonPropertyName("QueryCacheUtilization")]
public long? QueryCacheUtilization { get; set; }
[JsonPropertyName("DatabaseTotalSize")]
public long? DatabaseTotalSize { get; set; }
[JsonPropertyName("RamTotalSize")]
public long? RamTotalSize { get; set; }
}

View File

@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>