Implemented indexer, cleanup

This commit is contained in:
EzFeDezy
2025-05-26 15:53:18 +02:00
parent 85141c879f
commit f6a4a75e4f
22 changed files with 791 additions and 50 deletions

5
.gitignore vendored
View File

@@ -9,4 +9,7 @@ src/server/obj
src/Client/bin
src/Client/obj
src/Models/bin
src/Models/obj
src/Models/obj
src/Indexer/bin
src/Indexer/obj
src/Indexer/Scripts/__pycache__

6
.vscode/tasks.json vendored
View File

@@ -7,7 +7,7 @@
"type": "process",
"args": [
"build",
"${workspaceFolder}/src/testapp/testapp.sln",
"${workspaceFolder}/embeddingsearch.sln",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary;ForceNoAlign"
],
@@ -19,7 +19,7 @@
"type": "process",
"args": [
"publish",
"${workspaceFolder}/src/testapp/testapp.sln",
"${workspaceFolder}/embeddingsearch.sln",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary;ForceNoAlign"
],
@@ -33,7 +33,7 @@
"watch",
"run",
"--project",
"${workspaceFolder}/src/testapp/testapp.sln"
"${workspaceFolder}/embeddingsearch.sln"
],
"problemMatcher": "$msCompile"
}

View File

@@ -17,6 +17,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Client", "src\Client\Client
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Models", "src\Models\Models.csproj", "{4468F2B1-425E-441C-B288-C3403BB771CB}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Indexer", "src\Indexer\Indexer.csproj", "{5361FD10-E85C-496C-9BEF-9232F767F904}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -46,6 +48,10 @@ Global
{4468F2B1-425E-441C-B288-C3403BB771CB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{4468F2B1-425E-441C-B288-C3403BB771CB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{4468F2B1-425E-441C-B288-C3403BB771CB}.Release|Any CPU.Build.0 = Release|Any CPU
{5361FD10-E85C-496C-9BEF-9232F767F904}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5361FD10-E85C-496C-9BEF-9232F767F904}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5361FD10-E85C-496C-9BEF-9232F767F904}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5361FD10-E85C-496C-9BEF-9232F767F904}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{67AA89C0-3630-4994-B4EE-FC86CFF407DB} = {6AA0A9E0-A361-4E86-BA02-D5F6779C6DEF}
@@ -54,5 +60,6 @@ Global
{D61A2C50-B46C-42BA-B75D-E84D8FA28C29} = {BC4F3063-B921-4C4A-A7CE-11FAF5B73D50}
{A8EBB748-5BBA-47EB-840D-E398365C52A2} = {6AA0A9E0-A361-4E86-BA02-D5F6779C6DEF}
{4468F2B1-425E-441C-B288-C3403BB771CB} = {6AA0A9E0-A361-4E86-BA02-D5F6779C6DEF}
{5361FD10-E85C-496C-9BEF-9232F767F904} = {6AA0A9E0-A361-4E86-BA02-D5F6779C6DEF}
EndGlobalSection
EndGlobal

View File

@@ -104,8 +104,9 @@ public class Client
public async Task<EntityIndexResult> EntityIndexAsync(string searchdomain, string jsonEntity)
{
var url = $"{baseUri}/Entity/Index?apiKey={HttpUtility.UrlEncode(apiKey)}&searchdomain={HttpUtility.UrlEncode(searchdomain)}&jsonEntity={HttpUtility.UrlEncode(jsonEntity)}";
return await GetUrlAndProcessJson<EntityIndexResult>(url);
var url = $"{baseUri}/Entity/Index?apiKey={HttpUtility.UrlEncode(apiKey)}&searchdomain={HttpUtility.UrlEncode(searchdomain)}";
var content = new StringContent(jsonEntity, Encoding.UTF8, "application/json");
return await PostUrlAndProcessJson<EntityIndexResult>(url, content);//new FormUrlEncodedContent(values));
}
public async Task<EntityListResults> EntityListAsync(bool returnEmbeddings = false)
@@ -134,8 +135,19 @@ public class Client
{
using var client = new HttpClient();
var response = await client.GetAsync(url);
string content = await response.Content.ReadAsStringAsync();
var result = JsonSerializer.Deserialize<T>(content)
string responseContent = await response.Content.ReadAsStringAsync();
var result = JsonSerializer.Deserialize<T>(responseContent)
?? throw new Exception($"Failed to deserialize JSON to type {typeof(T).Name}");
return result;
}
private static async Task<T> PostUrlAndProcessJson<T>(string url, HttpContent content)
{
using var client = new HttpClient();
var response = await client.PostAsync(url, content);
string responseContent = await response.Content.ReadAsStringAsync();
Console.WriteLine("DEBUG@GetUrlAndProcessJson");
Console.WriteLine(responseContent);
var result = JsonSerializer.Deserialize<T>(responseContent)
?? throw new Exception($"Failed to deserialize JSON to type {typeof(T).Name}");
return result;
}

View File

@@ -0,0 +1,14 @@
namespace Indexer.Exceptions;
public class IndexerConfigurationException : Exception
{
public IndexerConfigurationException()
: base("Configuration is incomplete or was set up incorrectly")
{
}
public IndexerConfigurationException(string message)
: base(message)
{
}
}

View File

@@ -0,0 +1,18 @@
namespace Indexer.Exceptions;
public class UnknownScriptLanguageException : Exception
{
public string? FileName { get; }
public UnknownScriptLanguageException(string? fileName = null)
: base("Unable to determine script language")
{
FileName = fileName;
}
public UnknownScriptLanguageException(string message, string? fileName = null)
: base(message)
{
FileName = fileName;
}
}

View File

@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk.Web">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.14" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.6.2" />
<PackageReference Include="Python" Version="3.13.3" />
<PackageReference Include="Pythonnet" Version="3.0.5" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\server\server.csproj" />
<ProjectReference Include="..\Client\Client.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,12 @@
namespace Indexer.Models;
public interface IScriptable
{
ScriptToolSet ToolSet { get; set; }
void Init();
void Update(ICallbackInfos callbackInfos);
bool IsScript(string filePath);
}
public interface ICallbackInfos { }

View File

@@ -0,0 +1,94 @@
using System.Text.Json;
using System.Timers;
using embeddingsearch;
using Python.Runtime;
namespace Indexer.Models;
public class PythonScriptable : IScriptable
{
public ScriptToolSet ToolSet { get; set; }
public PyObject? pyToolSet;
public PyModule scope;
public dynamic sys;
public string source;
public PythonScriptable(ScriptToolSet toolSet)
{
Runtime.PythonDLL = @"libpython3.12.so";
if (!PythonEngine.IsInitialized)
{
PythonEngine.Initialize();
PythonEngine.BeginAllowThreads();
}
ToolSet = toolSet;
source = File.ReadAllText(ToolSet.filePath);
string fullPath = Path.GetFullPath(ToolSet.filePath);
string? scriptDir = Path.GetDirectoryName(fullPath);
using (Py.GIL())
{
scope = Py.CreateScope();
sys = Py.Import("sys");
if (scriptDir is not null)
{
sys.path.append(scriptDir);
}
}
Init();
}
public void Init()
{
using (Py.GIL())
{
pyToolSet = ToolSet.ToPython();
scope.Set("toolset", pyToolSet);
scope.Exec(source);
scope.Exec("init(toolset)");
}
}
public void Update(ICallbackInfos callbackInfos)
{
PythonEngine.Initialize();
using (Py.GIL())
{
ToolSet.callbackInfos = callbackInfos;
pyToolSet = ToolSet.ToPython();
scope.Set("toolset", pyToolSet);
scope.Exec("update(toolset)");
}
PythonEngine.Shutdown();
}
public bool IsScript(string fileName)
{
return fileName.EndsWith(".py");
}
}
/*
TODO Add the following languages
- Javascript
- Golang (reconsider)
*/
public class ScriptToolSet
{
public string filePath;
public Client.Client client;
public ICallbackInfos? callbackInfos;
// IConfiguration - Access to connection strings, ollama, etc. maybe?
public ScriptToolSet(string filePath, Client.Client client)
{
this.filePath = filePath;
this.client = client;
}
}
public class IntervalCallbackInfos : ICallbackInfos
{
public object? sender;
public required ElapsedEventArgs e;
}

View File

@@ -0,0 +1,45 @@
namespace Indexer.Models;
public class WorkerCollection
{
public List<Worker> Workers;
public List<Type> types;
public WorkerCollection()
{
Workers = [];
types = [typeof(PythonScriptable)];
}
}
public class Worker
{
public WorkerConfig Config { get; set; }
public IScriptable Scriptable { get; set; }
public Worker(WorkerConfig workerConfig, IScriptable scriptable)
{
this.Config = workerConfig;
this.Scriptable = scriptable;
}
}
public class WorkerCollectionConfig
{
public required List<WorkerConfig> Worker { get; set; }
}
public class WorkerConfig
{
public required string Name { get; set; }
public required List<string> Searchdomains { get; set; }
public required string Script { get; set; }
public required List<Call> Calls { get; set; }
}
public class Call
{
public required string Type { get; set; }
public long? Interval { get; set; } // For Type: Interval
public string? Path { get; set; } // For Type: FileSystemWatcher
}

31
src/Indexer/Program.cs Normal file
View File

@@ -0,0 +1,31 @@
using Indexer.Models;
using Indexer.Services;
using server;
var builder = WebApplication.CreateBuilder(args);
// Add services to the container.
builder.Services.AddControllers();
// Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle
builder.Services.AddEndpointsApiExplorer();
builder.Services.AddSwaggerGen();
builder.Services.AddSingleton<Client.Client>();
builder.Services.AddSingleton<WorkerCollection>();
builder.Services.AddHostedService<IndexerService>();
var app = builder.Build();
// Configure the HTTP request pipeline.
if (app.Environment.IsDevelopment())
{
app.UseSwagger();
app.UseSwaggerUI();
}
else
{
app.UseMiddleware<ApiKeyMiddleware>();
}
// app.UseHttpsRedirection();
app.Run();

View File

@@ -0,0 +1,41 @@
{
"$schema": "http://json.schemastore.org/launchsettings.json",
"iisSettings": {
"windowsAuthentication": false,
"anonymousAuthentication": true,
"iisExpress": {
"applicationUrl": "http://localhost:36687",
"sslPort": 44337
}
},
"profiles": {
"http": {
"commandName": "Project",
"dotnetRunMessages": true,
"launchBrowser": true,
"launchUrl": "swagger",
"applicationUrl": "http://localhost:5210",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
}
},
"https": {
"commandName": "Project",
"dotnetRunMessages": true,
"launchBrowser": true,
"launchUrl": "swagger",
"applicationUrl": "https://localhost:7282;http://localhost:5210",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
}
},
"IIS Express": {
"commandName": "IISExpress",
"launchBrowser": true,
"launchUrl": "swagger",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
}
}
}
}

View File

@@ -0,0 +1,50 @@
import os
from tools import *
import json
from dataclasses import asdict
example_content = "./Scripts/example_content"
example_searchdomain = "example"
example_counter = 0
models = ["bge-m3", "mxbai-embed-large"]
def init(toolset: Toolset):
global example_counter
print("Py-DEBUG@init")
print("This is the init function from the python example script")
print(f"example_counter: {example_counter}")
searchdomainlist:SearchdomainListResults = toolset.client.SearchdomainListAsync().Result
print("Currently these searchdomains exist")
for searchdomain in searchdomainlist.Searchdomains:
print(f" - {searchdomain}")
index_files(toolset)
def update(toolset: Toolset):
global example_counter
print("Py-DEBUG@update")
print("This is the update function from the python example script")
callbackInfos:ICallbackInfos = toolset.callbackInfos
if (callbackInfos is IntervalCallbackInfos):
print("It was called via an interval callback")
example_counter += 1
mycounter = example_counter
print(f"example_counter: {example_counter}")
index_files(toolset)
def index_files(toolset: Toolset):
jsonEntities:list = []
for filename in os.listdir(example_content):
qualified_filepath = example_content + "/" + filename
with open(qualified_filepath, "r") as file:
title = file.readline()
text = file.read()
datapoints:list = [
JSONDatapoint("filename", qualified_filepath, "wavg", models),
JSONDatapoint("title", title, "wavg", models),
JSONDatapoint("text", text, "wavg", models)
]
jsonEntity:dict = asdict(JSONEntity(qualified_filepath, "wavg", example_searchdomain, {}, datapoints))
jsonEntities.append(jsonEntity)
jsonstring = json.dumps(jsonEntities)
result:EntityIndexResult = toolset.client.EntityIndexAsync(jsonstring).Result
print(f"Update was successful: {result.Success}")

View File

@@ -0,0 +1,122 @@
#!/usr/bin/env python3
"""
Generate ten brief-overview files for a given topic using an Ollama model.
▪ The directory ./files is used as a mini knowledge-base.
▪ Two Python functions are exposed to the model as *tools*:
• list_files() return [{name, title}, …] for everything in ./files
• create_file() create/overwrite a file and write the content supplied
▪ The model is instructed to call create_file() ten times (one per sub-topic)
and put the title on the first line of each file.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
from typing import List, Dict, Any
import ollama # pip install -U ollama
# ---------- constant configuration ------------------------------------------------
FILES_DIR = Path(__file__).parent / "example_content"
FILES_DIR.mkdir(exist_ok=True)
# ---------- tool functions ---------------------------------------------------------
def list_files() -> List[Dict[str, str]]:
"""
List every regular file in ./example_content together with its first line (title).
Returns
-------
list[dict]
Each element has: {"name": "<filename>", "title": "<first line or ''>"}
"""
results: List[Dict[str, str]] = []
for path in FILES_DIR.iterdir():
if path.is_file():
with path.open("r", encoding="utf-8", errors="ignore") as fh:
title = fh.readline().rstrip("\n")
results.append({"name": path.name, "title": title})
return results
def create_file(filename: str, content: str) -> str:
"""
Create (or overwrite) a file inside ./files.
Parameters
----------
filename : str
A simple name like "quantum_entanglement.md". Any path components
beyond the basename are stripped for safety.
content : str
The full text to write the first line *must* be the title.
Returns
-------
str
Absolute path of the file that was written.
"""
safe_name = os.path.basename(filename)
if not safe_name:
raise ValueError("filename cannot be empty")
path = FILES_DIR / safe_name
path.write_text(content, encoding="utf-8")
return str(path.resolve())
# ---------- main driver ------------------------------------------------------------
def run(topic: str, *, model: str = "qwen3:latest", temperature: float = 0.2) -> None:
"""Ask the model to create ten overview files about *topic*."""
system_prompt = (
"You are a file-writing assistant. For each of ten distinct sub-topics "
"related to the given topic you will call the `create_file` tool to "
"write a Markdown file that contains at least 5 sentences. Use a short, "
"snake_case filename ending with '.md'. The very first line of the "
"file **must** be the title (capitalized). After you have created all "
"ten files, reply with only the single word DONE."
)
messages: List[Dict[str, Any]] = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": f"Topic: {topic}"},
]
tools = [list_files, create_file]
available = {f.__name__: f for f in tools}
# initial call
response = ollama.chat(model=model,
messages=messages,
tools=tools,
options={"temperature": temperature})
for call in response.message.tool_calls or []:
fn_name = call.function.name
fn_args = call.function.arguments
result = available[fn_name](**fn_args) # Run tool calls
messages.append({"role": "tool",
"name": fn_name,
"content": json.dumps(result, ensure_ascii=False)})
# ---------- CLI entry-point --------------------------------------------------------
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="Generate ten overview files for a topic using Ollama + tool calling")
parser.add_argument("topic", help="Main subject area, e.g. 'quantum computing'")
parser.add_argument("--model", default="qwen3:latest",
help="Local Ollama model to use (default: qwen3:latest)")
args = parser.parse_args()
run(args.topic, model=args.model)

View File

@@ -0,0 +1,130 @@
from dataclasses import dataclass
import array
from typing import Optional
@dataclass
class JSONDatapoint:
Name:str
Text:str
Probmethod_embedding:str
Model:list[str]
@dataclass
class JSONEntity:
Name:str
Probmethod:str
Searchdomain:str
Attributes:dict
Datapoints:array.array[JSONDatapoint]
#Model - Searchdomain
@dataclass
class SearchdomainListResults:
Searchdomains:list[str]
@dataclass
class SearchdomainCreateResults:
Success:bool
id:int|None
@dataclass
class SearchdomainUpdateResults:
Success:bool
@dataclass
class SearchdomainDeleteResults:
Success:bool
DeletedEntities:int
#Model - Entity
@dataclass
class EntityQueryResult:
name:str
ValueError:float
@dataclass
class EntityQueryResults:
Results:list[EntityQueryResult]
@dataclass
class EntityIndexResult:
Success:bool
@dataclass
class AttributeResult:
Name:str
Value:str
@dataclass
class EmbeddingResult:
Model:str
Embeddings:array.array[float]
@dataclass
class DatapointResult:
Name:str
ProbMethod:str
Embeddings:list[EmbeddingResult]|None
@dataclass
class EntityListResults:
Name:str
Attributes:list[AttributeResult]
Datapoints:list[DatapointResult]
@dataclass
class EntityDeleteResults:
Success:bool
# Model - Client
@dataclass
class Client:
baseUri:str
apiKey:str
searchdomain:str
async def SearchdomainListAsync() -> SearchdomainListResults:
pass
async def SearchdomainDeleteAsync() -> SearchdomainDeleteResults:
pass
async def SearchdomainCreateAsync() -> SearchdomainCreateResults:
pass
async def SearchdomainCreateAsync(searchdomain:str) -> SearchdomainCreateResults:
pass
async def SearchdomainUpdateAsync(newName:str, settings:str) -> SearchdomainUpdateResults:
pass
async def SearchdomainUpdateAsync(searchdomain:str, newName:str, settings:str) -> SearchdomainUpdateResults:
pass
async def EntityQueryAsync(query:str) -> EntityQueryResults:
pass
async def EntityQueryAsync(searchdomain:str, query:str) -> EntityQueryResults:
pass
#async def EntityIndexAsync(jsonEntity): # -> EntityIndexResult:#:NetList[JSONEntity]) -> EntityIndexResult: #TODO fix clr issues, i.e. make this work
# pass
#async def EntityIndexAsync(searchdomain:str, jsonEntity:list[JSONEntity]) -> EntityIndexResult:
# pass
async def EntityIndexAsync(jsonEntity:str) -> EntityIndexResult:
pass
async def EntityIndexAsync(searchdomain:str, jsonEntity:str) -> EntityIndexResult:
pass
async def EntityListAsync(returnEmbeddings:bool = False) -> EntityListResults:
pass
async def EntityListAsync(searchdomain:str, returnEmbeddings:bool = False) -> EntityListResults:
pass
async def EntityDeleteAsync(searchdomain:str, entityName:str) -> EntityDeleteResults:
pass
class ICallbackInfos:
pass
@dataclass
class IntervalCallbackInfos(ICallbackInfos):
sender: Optional[object]
e: object
@dataclass
class Toolset:
filePath:str
client:Client
callbackInfos: Optional[ICallbackInfos] = None

View File

@@ -0,0 +1,102 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Indexer.Exceptions;
using Indexer.Models;
using System.Timers;
using Microsoft.AspNetCore.Http.HttpResults;
using embeddingsearch;
using Python.Runtime;
namespace Indexer.Services;
public class IndexerService : IHostedService
{
private readonly WorkerCollection workerCollection;
private readonly IConfiguration _config;
private readonly Client.Client client;
public IndexerService(WorkerCollection workerCollection, IConfiguration configuration, Client.Client client)
{
this._config = configuration;
this.client = client;
this.workerCollection = workerCollection;
// Load and configure all workers
var sectionMain = _config.GetSection("EmbeddingsearchIndexer");
WorkerCollectionConfig? sectionWorker = (WorkerCollectionConfig?) sectionMain.Get(typeof(WorkerCollectionConfig)); //GetValue<WorkerCollectionConfig>("Worker");
if (sectionWorker is not null)
{
foreach (WorkerConfig workerConfig in sectionWorker.Worker)
{
if (client.searchdomain == "" && workerConfig.Searchdomains.Count >= 1)
{
client.searchdomain = workerConfig.Searchdomains.First();
}
ScriptToolSet toolSet = new(workerConfig.Script, client);
Worker worker = new(workerConfig, GetScriptable(toolSet));
workerCollection.Workers.Add(worker);
foreach (Call call in workerConfig.Calls)
{
switch (call.Type)
{
case "interval":
if (call.Interval is null)
{
throw new IndexerConfigurationException($"Interval not set for a Call in Worker \"{workerConfig.Name}\"");
}
var timer = new System.Timers.Timer((double)call.Interval);
timer.Elapsed += (sender, e) => worker.Scriptable.Update(new IntervalCallbackInfos() { sender = sender, e = e });
timer.AutoReset = true;
timer.Enabled = true;
break;
case "schedule": // TODO implement scheduled tasks using Quartz
throw new NotImplementedException("schedule not implemented yet");
case "fileupdate":
if (call.Path is null)
{
throw new IndexerConfigurationException($"Path not set for a Call in Worker \"{workerConfig.Name}\"");
}
throw new NotImplementedException("fileupdate not implemented yet");
//break;
default:
throw new IndexerConfigurationException($"Unknown Type specified for a Call in Worker \"{workerConfig.Name}\"");
}
}
}
}
else
{
throw new IndexerConfigurationException("Unable to find section \"Worker\"");
}
}
public IScriptable GetScriptable(ScriptToolSet toolSet)
{
string fileName = toolSet.filePath;
foreach (Type type in workerCollection.types)
{
IScriptable? instance = (IScriptable?)Activator.CreateInstance(type, toolSet);
if (instance is not null && instance.IsScript(fileName))
{
return instance;
}
}
throw new UnknownScriptLanguageException(fileName);
}
public Task StartAsync(CancellationToken cancellationToken)
{
/*foreach (Worker worker in workerCollection.Workers)
{
worker.Scriptable.Init();
}*/
return Task.CompletedTask;
}
public Task StopAsync(CancellationToken cancellationToken)
{
return Task.CompletedTask;
}
}

View File

@@ -0,0 +1,29 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft.AspNetCore": "Warning"
}
},
"Embeddingsearch": {
"BaseUri": "http://localhost:5146"
},
"EmbeddingsearchIndexer": {
"Worker":
[
{
"Name": "example",
"Searchdomains": [
"example"
],
"Script": "Scripts/example.py",
"Calls": [
{
"Type": "interval",
"Interval": 10000
}
]
}
]
}
}

View File

@@ -0,0 +1,9 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft.AspNetCore": "Warning"
}
},
"AllowedHosts": "*"
}

View File

@@ -2,17 +2,17 @@ namespace embeddingsearch;
public class JSONEntity
{
public required string name { get; set; }
public required string probmethod { get; set; }
public required string searchdomain { get; set; }
public required Dictionary<string, string> attributes { get; set; }
public required JSONDatapoint[] datapoints { get; set; }
public required string Name { get; set; }
public required string Probmethod { get; set; }
public required string Searchdomain { get; set; }
public required Dictionary<string, string> Attributes { get; set; }
public required JSONDatapoint[] Datapoints { get; set; }
}
public class JSONDatapoint
{
public required string name { get; set; }
public required string text { get; set; }
public required string probmethod_embedding { get; set; }
public required string[] model { get; set; }
public required string Name { get; set; }
public required string Text { get; set; }
public required string Probmethod_embedding { get; set; }
public required string[] Model { get; set; }
}

View File

@@ -268,25 +268,25 @@ public class Searchdomain
{
return null;
}
if (HasEntity(jsonEntity.name))
if (HasEntity(jsonEntity.Name))
{
RemoveEntity(jsonEntity.name);
RemoveEntity(jsonEntity.Name);
}
int id_entity = DatabaseInsertEntity(jsonEntity.name, jsonEntity.probmethod, id);
foreach (KeyValuePair<string, string> attribute in jsonEntity.attributes)
int id_entity = DatabaseInsertEntity(jsonEntity.Name, jsonEntity.Probmethod, id);
foreach (KeyValuePair<string, string> attribute in jsonEntity.Attributes)
{
DatabaseInsertAttribute(attribute.Key, attribute.Value, id_entity);
}
List<Datapoint> datapoints = [];
foreach (JSONDatapoint jsonDatapoint in jsonEntity.datapoints)
foreach (JSONDatapoint jsonDatapoint in jsonEntity.Datapoints)
{
Dictionary<string, float[]> embeddings = Datapoint.GenerateEmbeddings(jsonDatapoint.text, [.. jsonDatapoint.model], ollama, embeddingCache);
var probMethod_embedding = probmethods.GetMethod(jsonDatapoint.probmethod_embedding) ?? throw new Exception($"Unknown probmethod name {jsonDatapoint.probmethod_embedding}");
Datapoint datapoint = new(jsonDatapoint.name, probMethod_embedding, [.. embeddings.Select(kv => (kv.Key, kv.Value))]);
int id_datapoint = DatabaseInsertDatapoint(jsonDatapoint.name, jsonDatapoint.probmethod_embedding, id_entity);
Dictionary<string, float[]> embeddings = Datapoint.GenerateEmbeddings(jsonDatapoint.Text, [.. jsonDatapoint.Model], ollama, embeddingCache);
var probMethod_embedding = probmethods.GetMethod(jsonDatapoint.Probmethod_embedding) ?? throw new Exception($"Unknown probmethod name {jsonDatapoint.Probmethod_embedding}");
Datapoint datapoint = new(jsonDatapoint.Name, probMethod_embedding, [.. embeddings.Select(kv => (kv.Key, kv.Value))]);
int id_datapoint = DatabaseInsertDatapoint(jsonDatapoint.Name, jsonDatapoint.Probmethod_embedding, id_entity);
foreach ((string, float[]) embedding in datapoint.embeddings)
{
DatabaseInsertEmbedding(id_datapoint, embedding.Item1, BytesFromFloatArray(embedding.Item2));
@@ -294,8 +294,8 @@ public class Searchdomain
datapoints.Add(datapoint);
}
var probMethod = probmethods.GetMethod(jsonEntity.probmethod) ?? throw new Exception($"Unknown probmethod name {jsonEntity.probmethod}");
Entity entity = new(jsonEntity.attributes, probMethod, datapoints, jsonEntity.name)
var probMethod = probmethods.GetMethod(jsonEntity.Probmethod) ?? throw new Exception($"Unknown probmethod name {jsonEntity.Probmethod}");
Entity entity = new(jsonEntity.Attributes, probMethod, datapoints, jsonEntity.Name)
{
id = id_entity
};
@@ -314,15 +314,15 @@ public class Searchdomain
Dictionary<string, List<string>> toBeCached = [];
foreach (JSONEntity jSONEntity in jsonEntities)
{
foreach (JSONDatapoint datapoint in jSONEntity.datapoints)
foreach (JSONDatapoint datapoint in jSONEntity.Datapoints)
{
foreach (string model in datapoint.model)
foreach (string model in datapoint.Model)
{
if (!toBeCached.ContainsKey(model))
{
toBeCached[model] = [];
}
toBeCached[model].Add(datapoint.text);
toBeCached[model].Add(datapoint.Text);
}
}
}

View File

@@ -2,6 +2,7 @@ using Microsoft.AspNetCore.Mvc;
using embeddingsearch;
using System.Text.Json;
using Models;
using System.Text.Json.Nodes;
namespace server.Controllers;
[ApiController]
@@ -39,33 +40,30 @@ public class EntityController : ControllerBase
return Ok(new EntityQueryResults(){Results = queryResults});
}
[HttpGet("Index")]
public ActionResult<EntityIndexResult> Index(string searchdomain, string jsonEntity)
[HttpPost("Index")]
public ActionResult<EntityIndexResult> Index(string searchdomain, [FromBody] List<JSONEntity>? jsonEntity)
{
Searchdomain searchdomain_;
try
{
searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
} catch (Exception)
{
return Ok(new EntityIndexResult() {Success = false});
}
List<JSONEntity>? jsonEntities = JsonSerializer.Deserialize<List<JSONEntity>?>(jsonEntity);
if (jsonEntities is not null)
catch (Exception)
{
List<Entity>? entities = searchdomain_.EntitiesFromJSON(jsonEntity);
if (entities is not null)
{
return Ok(new EntityIndexResult() {Success = true});
}
else
{
_logger.LogDebug("Unable to deserialize an entity");
}
return Ok(new EntityIndexResult() { Success = false });
}
return Ok(new EntityIndexResult() {Success = false});
List<Entity>? entities = searchdomain_.EntitiesFromJSON(JsonSerializer.Serialize(jsonEntity));
if (entities is not null)
{
_domainManager.InvalidateSearchdomainCache(searchdomain);
return Ok(new EntityIndexResult() { Success = true });
}
else
{
_logger.LogDebug("Unable to deserialize an entity");
}
return Ok(new EntityIndexResult() { Success = false });
}
[HttpGet("List")]

View File

@@ -42,6 +42,11 @@ public class SearchomainManager
}
}
public void InvalidateSearchdomainCache(string searchdomain)
{
searchdomains.Remove(searchdomain);
}
public List<string> ListSearchdomains()
{
DbDataReader reader = ExecuteSQLCommand("SELECT name FROM searchdomain", []);