From f6a4a75e4fee0feb37b9f1df70fecc3fd8b5acd0 Mon Sep 17 00:00:00 2001 From: EzFeDezy Date: Mon, 26 May 2025 15:53:18 +0200 Subject: [PATCH] Implemented indexer, cleanup --- .gitignore | 5 +- .vscode/tasks.json | 6 +- embeddingsearch.sln | 7 + src/Client/Client.cs | 20 ++- src/Indexer/Exceptions/IndexerExceptions.cs | 14 ++ src/Indexer/Exceptions/WorkerExceptions.cs | 18 +++ src/Indexer/Indexer.csproj | 19 +++ src/Indexer/Models/Interfaces.cs | 12 ++ src/Indexer/Models/Script.cs | 94 +++++++++++++ src/Indexer/Models/Worker.cs | 45 ++++++ src/Indexer/Program.cs | 31 +++++ src/Indexer/Properties/launchSettings.json | 41 ++++++ src/Indexer/Scripts/example.py | 50 +++++++ .../Scripts/generate_example_content.py | 122 ++++++++++++++++ src/Indexer/Scripts/tools.py | 130 ++++++++++++++++++ src/Indexer/Services/IndexerService.cs | 102 ++++++++++++++ src/Indexer/appsettings.Development.json | 29 ++++ src/Indexer/appsettings.json | 9 ++ src/embeddingsearch/JSONModels.cs | 18 +-- src/embeddingsearch/Searchdomain.cs | 28 ++-- src/server/Controllers/EntityController.cs | 36 +++-- src/server/SearchdomainManager.cs | 5 + 22 files changed, 791 insertions(+), 50 deletions(-) create mode 100644 src/Indexer/Exceptions/IndexerExceptions.cs create mode 100644 src/Indexer/Exceptions/WorkerExceptions.cs create mode 100644 src/Indexer/Indexer.csproj create mode 100644 src/Indexer/Models/Interfaces.cs create mode 100644 src/Indexer/Models/Script.cs create mode 100644 src/Indexer/Models/Worker.cs create mode 100644 src/Indexer/Program.cs create mode 100644 src/Indexer/Properties/launchSettings.json create mode 100644 src/Indexer/Scripts/example.py create mode 100644 src/Indexer/Scripts/generate_example_content.py create mode 100644 src/Indexer/Scripts/tools.py create mode 100644 src/Indexer/Services/IndexerService.cs create mode 100644 src/Indexer/appsettings.Development.json create mode 100644 src/Indexer/appsettings.json diff --git a/.gitignore b/.gitignore index e208d6e..c5839a4 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,7 @@ src/server/obj src/Client/bin src/Client/obj src/Models/bin -src/Models/obj \ No newline at end of file +src/Models/obj +src/Indexer/bin +src/Indexer/obj +src/Indexer/Scripts/__pycache__ \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 94b7716..6981bbd 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -7,7 +7,7 @@ "type": "process", "args": [ "build", - "${workspaceFolder}/src/testapp/testapp.sln", + "${workspaceFolder}/embeddingsearch.sln", "/property:GenerateFullPaths=true", "/consoleloggerparameters:NoSummary;ForceNoAlign" ], @@ -19,7 +19,7 @@ "type": "process", "args": [ "publish", - "${workspaceFolder}/src/testapp/testapp.sln", + "${workspaceFolder}/embeddingsearch.sln", "/property:GenerateFullPaths=true", "/consoleloggerparameters:NoSummary;ForceNoAlign" ], @@ -33,7 +33,7 @@ "watch", "run", "--project", - "${workspaceFolder}/src/testapp/testapp.sln" + "${workspaceFolder}/embeddingsearch.sln" ], "problemMatcher": "$msCompile" } diff --git a/embeddingsearch.sln b/embeddingsearch.sln index 03e1d81..1b932b5 100644 --- a/embeddingsearch.sln +++ b/embeddingsearch.sln @@ -17,6 +17,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Client", "src\Client\Client EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Models", "src\Models\Models.csproj", "{4468F2B1-425E-441C-B288-C3403BB771CB}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Indexer", "src\Indexer\Indexer.csproj", "{5361FD10-E85C-496C-9BEF-9232F767F904}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -46,6 +48,10 @@ Global {4468F2B1-425E-441C-B288-C3403BB771CB}.Debug|Any CPU.Build.0 = Debug|Any CPU {4468F2B1-425E-441C-B288-C3403BB771CB}.Release|Any CPU.ActiveCfg = Release|Any CPU {4468F2B1-425E-441C-B288-C3403BB771CB}.Release|Any CPU.Build.0 = Release|Any CPU + {5361FD10-E85C-496C-9BEF-9232F767F904}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5361FD10-E85C-496C-9BEF-9232F767F904}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5361FD10-E85C-496C-9BEF-9232F767F904}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5361FD10-E85C-496C-9BEF-9232F767F904}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(NestedProjects) = preSolution {67AA89C0-3630-4994-B4EE-FC86CFF407DB} = {6AA0A9E0-A361-4E86-BA02-D5F6779C6DEF} @@ -54,5 +60,6 @@ Global {D61A2C50-B46C-42BA-B75D-E84D8FA28C29} = {BC4F3063-B921-4C4A-A7CE-11FAF5B73D50} {A8EBB748-5BBA-47EB-840D-E398365C52A2} = {6AA0A9E0-A361-4E86-BA02-D5F6779C6DEF} {4468F2B1-425E-441C-B288-C3403BB771CB} = {6AA0A9E0-A361-4E86-BA02-D5F6779C6DEF} + {5361FD10-E85C-496C-9BEF-9232F767F904} = {6AA0A9E0-A361-4E86-BA02-D5F6779C6DEF} EndGlobalSection EndGlobal diff --git a/src/Client/Client.cs b/src/Client/Client.cs index 37af4b8..2f1b624 100644 --- a/src/Client/Client.cs +++ b/src/Client/Client.cs @@ -104,8 +104,9 @@ public class Client public async Task EntityIndexAsync(string searchdomain, string jsonEntity) { - var url = $"{baseUri}/Entity/Index?apiKey={HttpUtility.UrlEncode(apiKey)}&searchdomain={HttpUtility.UrlEncode(searchdomain)}&jsonEntity={HttpUtility.UrlEncode(jsonEntity)}"; - return await GetUrlAndProcessJson(url); + var url = $"{baseUri}/Entity/Index?apiKey={HttpUtility.UrlEncode(apiKey)}&searchdomain={HttpUtility.UrlEncode(searchdomain)}"; + var content = new StringContent(jsonEntity, Encoding.UTF8, "application/json"); + return await PostUrlAndProcessJson(url, content);//new FormUrlEncodedContent(values)); } public async Task EntityListAsync(bool returnEmbeddings = false) @@ -134,8 +135,19 @@ public class Client { using var client = new HttpClient(); var response = await client.GetAsync(url); - string content = await response.Content.ReadAsStringAsync(); - var result = JsonSerializer.Deserialize(content) + string responseContent = await response.Content.ReadAsStringAsync(); + var result = JsonSerializer.Deserialize(responseContent) + ?? throw new Exception($"Failed to deserialize JSON to type {typeof(T).Name}"); + return result; + } + private static async Task PostUrlAndProcessJson(string url, HttpContent content) + { + using var client = new HttpClient(); + var response = await client.PostAsync(url, content); + string responseContent = await response.Content.ReadAsStringAsync(); + Console.WriteLine("DEBUG@GetUrlAndProcessJson"); + Console.WriteLine(responseContent); + var result = JsonSerializer.Deserialize(responseContent) ?? throw new Exception($"Failed to deserialize JSON to type {typeof(T).Name}"); return result; } diff --git a/src/Indexer/Exceptions/IndexerExceptions.cs b/src/Indexer/Exceptions/IndexerExceptions.cs new file mode 100644 index 0000000..f2e12ac --- /dev/null +++ b/src/Indexer/Exceptions/IndexerExceptions.cs @@ -0,0 +1,14 @@ +namespace Indexer.Exceptions; + +public class IndexerConfigurationException : Exception +{ + public IndexerConfigurationException() + : base("Configuration is incomplete or was set up incorrectly") + { + } + + public IndexerConfigurationException(string message) + : base(message) + { + } +} \ No newline at end of file diff --git a/src/Indexer/Exceptions/WorkerExceptions.cs b/src/Indexer/Exceptions/WorkerExceptions.cs new file mode 100644 index 0000000..4f30e19 --- /dev/null +++ b/src/Indexer/Exceptions/WorkerExceptions.cs @@ -0,0 +1,18 @@ +namespace Indexer.Exceptions; + +public class UnknownScriptLanguageException : Exception +{ + public string? FileName { get; } + + public UnknownScriptLanguageException(string? fileName = null) + : base("Unable to determine script language") + { + FileName = fileName; + } + + public UnknownScriptLanguageException(string message, string? fileName = null) + : base(message) + { + FileName = fileName; + } +} \ No newline at end of file diff --git a/src/Indexer/Indexer.csproj b/src/Indexer/Indexer.csproj new file mode 100644 index 0000000..07dfbdc --- /dev/null +++ b/src/Indexer/Indexer.csproj @@ -0,0 +1,19 @@ + + + + net8.0 + enable + enable + + + + + + + + + + + + + diff --git a/src/Indexer/Models/Interfaces.cs b/src/Indexer/Models/Interfaces.cs new file mode 100644 index 0000000..0613a38 --- /dev/null +++ b/src/Indexer/Models/Interfaces.cs @@ -0,0 +1,12 @@ +namespace Indexer.Models; + +public interface IScriptable +{ + ScriptToolSet ToolSet { get; set; } + void Init(); + void Update(ICallbackInfos callbackInfos); + bool IsScript(string filePath); +} + +public interface ICallbackInfos { } + diff --git a/src/Indexer/Models/Script.cs b/src/Indexer/Models/Script.cs new file mode 100644 index 0000000..851c216 --- /dev/null +++ b/src/Indexer/Models/Script.cs @@ -0,0 +1,94 @@ +using System.Text.Json; +using System.Timers; +using embeddingsearch; +using Python.Runtime; + +namespace Indexer.Models; + +public class PythonScriptable : IScriptable +{ + public ScriptToolSet ToolSet { get; set; } + public PyObject? pyToolSet; + public PyModule scope; + public dynamic sys; + public string source; + public PythonScriptable(ScriptToolSet toolSet) + { + Runtime.PythonDLL = @"libpython3.12.so"; + if (!PythonEngine.IsInitialized) + { + PythonEngine.Initialize(); + PythonEngine.BeginAllowThreads(); + } + ToolSet = toolSet; + source = File.ReadAllText(ToolSet.filePath); + string fullPath = Path.GetFullPath(ToolSet.filePath); + string? scriptDir = Path.GetDirectoryName(fullPath); + using (Py.GIL()) + { + scope = Py.CreateScope(); + sys = Py.Import("sys"); + if (scriptDir is not null) + { + sys.path.append(scriptDir); + } + } + Init(); + } + + public void Init() + { + using (Py.GIL()) + { + pyToolSet = ToolSet.ToPython(); + scope.Set("toolset", pyToolSet); + scope.Exec(source); + scope.Exec("init(toolset)"); + } + } + + public void Update(ICallbackInfos callbackInfos) + { + PythonEngine.Initialize(); + using (Py.GIL()) + { + ToolSet.callbackInfos = callbackInfos; + pyToolSet = ToolSet.ToPython(); + scope.Set("toolset", pyToolSet); + scope.Exec("update(toolset)"); + } + PythonEngine.Shutdown(); + } + + public bool IsScript(string fileName) + { + return fileName.EndsWith(".py"); + } +} + +/* + TODO Add the following languages + - Javascript + - Golang (reconsider) +*/ + +public class ScriptToolSet +{ + public string filePath; + public Client.Client client; + public ICallbackInfos? callbackInfos; + + // IConfiguration - Access to connection strings, ollama, etc. maybe? + public ScriptToolSet(string filePath, Client.Client client) + { + this.filePath = filePath; + this.client = client; + } +} + +public class IntervalCallbackInfos : ICallbackInfos +{ + public object? sender; + public required ElapsedEventArgs e; + +} \ No newline at end of file diff --git a/src/Indexer/Models/Worker.cs b/src/Indexer/Models/Worker.cs new file mode 100644 index 0000000..f8153d1 --- /dev/null +++ b/src/Indexer/Models/Worker.cs @@ -0,0 +1,45 @@ +namespace Indexer.Models; + +public class WorkerCollection +{ + public List Workers; + public List types; + public WorkerCollection() + { + Workers = []; + types = [typeof(PythonScriptable)]; + } +} + +public class Worker +{ + public WorkerConfig Config { get; set; } + public IScriptable Scriptable { get; set; } + + public Worker(WorkerConfig workerConfig, IScriptable scriptable) + { + this.Config = workerConfig; + this.Scriptable = scriptable; + } +} + +public class WorkerCollectionConfig +{ + public required List Worker { get; set; } +} + +public class WorkerConfig +{ + public required string Name { get; set; } + public required List Searchdomains { get; set; } + public required string Script { get; set; } + public required List Calls { get; set; } +} + +public class Call +{ + public required string Type { get; set; } + public long? Interval { get; set; } // For Type: Interval + public string? Path { get; set; } // For Type: FileSystemWatcher +} + diff --git a/src/Indexer/Program.cs b/src/Indexer/Program.cs new file mode 100644 index 0000000..cddc340 --- /dev/null +++ b/src/Indexer/Program.cs @@ -0,0 +1,31 @@ +using Indexer.Models; +using Indexer.Services; +using server; + +var builder = WebApplication.CreateBuilder(args); + +// Add services to the container. + +builder.Services.AddControllers(); +// Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle +builder.Services.AddEndpointsApiExplorer(); +builder.Services.AddSwaggerGen(); +builder.Services.AddSingleton(); +builder.Services.AddSingleton(); +builder.Services.AddHostedService(); +var app = builder.Build(); + +// Configure the HTTP request pipeline. +if (app.Environment.IsDevelopment()) +{ + app.UseSwagger(); + app.UseSwaggerUI(); +} +else +{ + app.UseMiddleware(); +} + +// app.UseHttpsRedirection(); + +app.Run(); diff --git a/src/Indexer/Properties/launchSettings.json b/src/Indexer/Properties/launchSettings.json new file mode 100644 index 0000000..9d1cf96 --- /dev/null +++ b/src/Indexer/Properties/launchSettings.json @@ -0,0 +1,41 @@ +{ + "$schema": "http://json.schemastore.org/launchsettings.json", + "iisSettings": { + "windowsAuthentication": false, + "anonymousAuthentication": true, + "iisExpress": { + "applicationUrl": "http://localhost:36687", + "sslPort": 44337 + } + }, + "profiles": { + "http": { + "commandName": "Project", + "dotnetRunMessages": true, + "launchBrowser": true, + "launchUrl": "swagger", + "applicationUrl": "http://localhost:5210", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development" + } + }, + "https": { + "commandName": "Project", + "dotnetRunMessages": true, + "launchBrowser": true, + "launchUrl": "swagger", + "applicationUrl": "https://localhost:7282;http://localhost:5210", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development" + } + }, + "IIS Express": { + "commandName": "IISExpress", + "launchBrowser": true, + "launchUrl": "swagger", + "environmentVariables": { + "ASPNETCORE_ENVIRONMENT": "Development" + } + } + } +} diff --git a/src/Indexer/Scripts/example.py b/src/Indexer/Scripts/example.py new file mode 100644 index 0000000..338910e --- /dev/null +++ b/src/Indexer/Scripts/example.py @@ -0,0 +1,50 @@ +import os +from tools import * +import json +from dataclasses import asdict + +example_content = "./Scripts/example_content" +example_searchdomain = "example" +example_counter = 0 +models = ["bge-m3", "mxbai-embed-large"] + +def init(toolset: Toolset): + global example_counter + print("Py-DEBUG@init") + print("This is the init function from the python example script") + print(f"example_counter: {example_counter}") + searchdomainlist:SearchdomainListResults = toolset.client.SearchdomainListAsync().Result + print("Currently these searchdomains exist") + for searchdomain in searchdomainlist.Searchdomains: + print(f" - {searchdomain}") + index_files(toolset) + +def update(toolset: Toolset): + global example_counter + print("Py-DEBUG@update") + print("This is the update function from the python example script") + callbackInfos:ICallbackInfos = toolset.callbackInfos + if (callbackInfos is IntervalCallbackInfos): + print("It was called via an interval callback") + example_counter += 1 + mycounter = example_counter + print(f"example_counter: {example_counter}") + index_files(toolset) + +def index_files(toolset: Toolset): + jsonEntities:list = [] + for filename in os.listdir(example_content): + qualified_filepath = example_content + "/" + filename + with open(qualified_filepath, "r") as file: + title = file.readline() + text = file.read() + datapoints:list = [ + JSONDatapoint("filename", qualified_filepath, "wavg", models), + JSONDatapoint("title", title, "wavg", models), + JSONDatapoint("text", text, "wavg", models) + ] + jsonEntity:dict = asdict(JSONEntity(qualified_filepath, "wavg", example_searchdomain, {}, datapoints)) + jsonEntities.append(jsonEntity) + jsonstring = json.dumps(jsonEntities) + result:EntityIndexResult = toolset.client.EntityIndexAsync(jsonstring).Result + print(f"Update was successful: {result.Success}") \ No newline at end of file diff --git a/src/Indexer/Scripts/generate_example_content.py b/src/Indexer/Scripts/generate_example_content.py new file mode 100644 index 0000000..5ee1316 --- /dev/null +++ b/src/Indexer/Scripts/generate_example_content.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +""" +Generate ten brief-overview files for a given topic using an Ollama model. + +▪ The directory ./files is used as a mini knowledge-base. +▪ Two Python functions are exposed to the model as *tools*: + • list_files() – return [{name, title}, …] for everything in ./files + • create_file() – create/overwrite a file and write the content supplied +▪ The model is instructed to call create_file() ten times (one per sub-topic) + and put the title on the first line of each file. +""" + +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import List, Dict, Any + +import ollama # pip install -U ollama + + +# ---------- constant configuration ------------------------------------------------ + +FILES_DIR = Path(__file__).parent / "example_content" +FILES_DIR.mkdir(exist_ok=True) + + +# ---------- tool functions --------------------------------------------------------- + +def list_files() -> List[Dict[str, str]]: + """ + List every regular file in ./example_content together with its first line (title). + + Returns + ------- + list[dict] + Each element has: {"name": "", "title": ""} + """ + results: List[Dict[str, str]] = [] + for path in FILES_DIR.iterdir(): + if path.is_file(): + with path.open("r", encoding="utf-8", errors="ignore") as fh: + title = fh.readline().rstrip("\n") + results.append({"name": path.name, "title": title}) + return results + + +def create_file(filename: str, content: str) -> str: + """ + Create (or overwrite) a file inside ./files. + + Parameters + ---------- + filename : str + A simple name like "quantum_entanglement.md". Any path components + beyond the basename are stripped for safety. + content : str + The full text to write – the first line *must* be the title. + + Returns + ------- + str + Absolute path of the file that was written. + """ + safe_name = os.path.basename(filename) + if not safe_name: + raise ValueError("filename cannot be empty") + path = FILES_DIR / safe_name + path.write_text(content, encoding="utf-8") + return str(path.resolve()) + + +# ---------- main driver ------------------------------------------------------------ + +def run(topic: str, *, model: str = "qwen3:latest", temperature: float = 0.2) -> None: + """Ask the model to create ten overview files about *topic*.""" + + system_prompt = ( + "You are a file-writing assistant. For each of ten distinct sub-topics " + "related to the given topic you will call the `create_file` tool to " + "write a Markdown file that contains at least 5 sentences. Use a short, " + "snake_case filename ending with '.md'. The very first line of the " + "file **must** be the title (capitalized). After you have created all " + "ten files, reply with only the single word DONE." + ) + + messages: List[Dict[str, Any]] = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": f"Topic: {topic}"}, + ] + + tools = [list_files, create_file] + available = {f.__name__: f for f in tools} + + # initial call + response = ollama.chat(model=model, + messages=messages, + tools=tools, + options={"temperature": temperature}) + + for call in response.message.tool_calls or []: + fn_name = call.function.name + fn_args = call.function.arguments + result = available[fn_name](**fn_args) # Run tool calls + messages.append({"role": "tool", + "name": fn_name, + "content": json.dumps(result, ensure_ascii=False)}) + +# ---------- CLI entry-point -------------------------------------------------------- + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Generate ten overview files for a topic using Ollama + tool calling") + parser.add_argument("topic", help="Main subject area, e.g. 'quantum computing'") + parser.add_argument("--model", default="qwen3:latest", + help="Local Ollama model to use (default: qwen3:latest)") + args = parser.parse_args() + + run(args.topic, model=args.model) \ No newline at end of file diff --git a/src/Indexer/Scripts/tools.py b/src/Indexer/Scripts/tools.py new file mode 100644 index 0000000..d7dd763 --- /dev/null +++ b/src/Indexer/Scripts/tools.py @@ -0,0 +1,130 @@ +from dataclasses import dataclass +import array +from typing import Optional + +@dataclass +class JSONDatapoint: + Name:str + Text:str + Probmethod_embedding:str + Model:list[str] + +@dataclass +class JSONEntity: + Name:str + Probmethod:str + Searchdomain:str + Attributes:dict + Datapoints:array.array[JSONDatapoint] + +#Model - Searchdomain +@dataclass +class SearchdomainListResults: + Searchdomains:list[str] + +@dataclass +class SearchdomainCreateResults: + Success:bool + id:int|None + +@dataclass +class SearchdomainUpdateResults: + Success:bool + +@dataclass +class SearchdomainDeleteResults: + Success:bool + DeletedEntities:int + +#Model - Entity +@dataclass +class EntityQueryResult: + name:str + ValueError:float + +@dataclass +class EntityQueryResults: + Results:list[EntityQueryResult] + +@dataclass +class EntityIndexResult: + Success:bool + +@dataclass +class AttributeResult: + Name:str + Value:str + +@dataclass +class EmbeddingResult: + Model:str + Embeddings:array.array[float] + +@dataclass +class DatapointResult: + Name:str + ProbMethod:str + Embeddings:list[EmbeddingResult]|None + +@dataclass +class EntityListResults: + Name:str + Attributes:list[AttributeResult] + Datapoints:list[DatapointResult] + +@dataclass +class EntityDeleteResults: + Success:bool + +# Model - Client +@dataclass +class Client: + baseUri:str + apiKey:str + searchdomain:str + async def SearchdomainListAsync() -> SearchdomainListResults: + pass + async def SearchdomainDeleteAsync() -> SearchdomainDeleteResults: + pass + async def SearchdomainCreateAsync() -> SearchdomainCreateResults: + pass + async def SearchdomainCreateAsync(searchdomain:str) -> SearchdomainCreateResults: + pass + async def SearchdomainUpdateAsync(newName:str, settings:str) -> SearchdomainUpdateResults: + pass + async def SearchdomainUpdateAsync(searchdomain:str, newName:str, settings:str) -> SearchdomainUpdateResults: + pass + async def EntityQueryAsync(query:str) -> EntityQueryResults: + pass + async def EntityQueryAsync(searchdomain:str, query:str) -> EntityQueryResults: + pass + #async def EntityIndexAsync(jsonEntity): # -> EntityIndexResult:#:NetList[JSONEntity]) -> EntityIndexResult: #TODO fix clr issues, i.e. make this work + # pass + #async def EntityIndexAsync(searchdomain:str, jsonEntity:list[JSONEntity]) -> EntityIndexResult: + # pass + async def EntityIndexAsync(jsonEntity:str) -> EntityIndexResult: + pass + async def EntityIndexAsync(searchdomain:str, jsonEntity:str) -> EntityIndexResult: + pass + async def EntityListAsync(returnEmbeddings:bool = False) -> EntityListResults: + pass + async def EntityListAsync(searchdomain:str, returnEmbeddings:bool = False) -> EntityListResults: + pass + async def EntityDeleteAsync(searchdomain:str, entityName:str) -> EntityDeleteResults: + pass +class ICallbackInfos: + pass + + +@dataclass +class IntervalCallbackInfos(ICallbackInfos): + sender: Optional[object] + e: object + +@dataclass +class Toolset: + filePath:str + client:Client + callbackInfos: Optional[ICallbackInfos] = None + + diff --git a/src/Indexer/Services/IndexerService.cs b/src/Indexer/Services/IndexerService.cs new file mode 100644 index 0000000..b5ece9e --- /dev/null +++ b/src/Indexer/Services/IndexerService.cs @@ -0,0 +1,102 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Indexer.Exceptions; +using Indexer.Models; +using System.Timers; +using Microsoft.AspNetCore.Http.HttpResults; +using embeddingsearch; +using Python.Runtime; + +namespace Indexer.Services; + +public class IndexerService : IHostedService +{ + private readonly WorkerCollection workerCollection; + private readonly IConfiguration _config; + private readonly Client.Client client; + + public IndexerService(WorkerCollection workerCollection, IConfiguration configuration, Client.Client client) + { + this._config = configuration; + this.client = client; + this.workerCollection = workerCollection; + // Load and configure all workers + var sectionMain = _config.GetSection("EmbeddingsearchIndexer"); + + WorkerCollectionConfig? sectionWorker = (WorkerCollectionConfig?) sectionMain.Get(typeof(WorkerCollectionConfig)); //GetValue("Worker"); + if (sectionWorker is not null) + { + foreach (WorkerConfig workerConfig in sectionWorker.Worker) + { + if (client.searchdomain == "" && workerConfig.Searchdomains.Count >= 1) + { + client.searchdomain = workerConfig.Searchdomains.First(); + } + ScriptToolSet toolSet = new(workerConfig.Script, client); + Worker worker = new(workerConfig, GetScriptable(toolSet)); + workerCollection.Workers.Add(worker); + foreach (Call call in workerConfig.Calls) + { + switch (call.Type) + { + case "interval": + if (call.Interval is null) + { + throw new IndexerConfigurationException($"Interval not set for a Call in Worker \"{workerConfig.Name}\""); + } + var timer = new System.Timers.Timer((double)call.Interval); + timer.Elapsed += (sender, e) => worker.Scriptable.Update(new IntervalCallbackInfos() { sender = sender, e = e }); + timer.AutoReset = true; + timer.Enabled = true; + break; + case "schedule": // TODO implement scheduled tasks using Quartz + throw new NotImplementedException("schedule not implemented yet"); + case "fileupdate": + if (call.Path is null) + { + throw new IndexerConfigurationException($"Path not set for a Call in Worker \"{workerConfig.Name}\""); + } + throw new NotImplementedException("fileupdate not implemented yet"); + //break; + default: + throw new IndexerConfigurationException($"Unknown Type specified for a Call in Worker \"{workerConfig.Name}\""); + } + } + } + } + else + { + throw new IndexerConfigurationException("Unable to find section \"Worker\""); + } + } + + public IScriptable GetScriptable(ScriptToolSet toolSet) + { + string fileName = toolSet.filePath; + foreach (Type type in workerCollection.types) + { + IScriptable? instance = (IScriptable?)Activator.CreateInstance(type, toolSet); + if (instance is not null && instance.IsScript(fileName)) + { + return instance; + } + } + + throw new UnknownScriptLanguageException(fileName); + } + public Task StartAsync(CancellationToken cancellationToken) + { + /*foreach (Worker worker in workerCollection.Workers) + { + worker.Scriptable.Init(); + }*/ + return Task.CompletedTask; + } + + public Task StopAsync(CancellationToken cancellationToken) + { + return Task.CompletedTask; + } +} \ No newline at end of file diff --git a/src/Indexer/appsettings.Development.json b/src/Indexer/appsettings.Development.json new file mode 100644 index 0000000..875f93a --- /dev/null +++ b/src/Indexer/appsettings.Development.json @@ -0,0 +1,29 @@ +{ + "Logging": { + "LogLevel": { + "Default": "Information", + "Microsoft.AspNetCore": "Warning" + } + }, + "Embeddingsearch": { + "BaseUri": "http://localhost:5146" + }, + "EmbeddingsearchIndexer": { + "Worker": + [ + { + "Name": "example", + "Searchdomains": [ + "example" + ], + "Script": "Scripts/example.py", + "Calls": [ + { + "Type": "interval", + "Interval": 10000 + } + ] + } + ] + } +} diff --git a/src/Indexer/appsettings.json b/src/Indexer/appsettings.json new file mode 100644 index 0000000..4d56694 --- /dev/null +++ b/src/Indexer/appsettings.json @@ -0,0 +1,9 @@ +{ + "Logging": { + "LogLevel": { + "Default": "Information", + "Microsoft.AspNetCore": "Warning" + } + }, + "AllowedHosts": "*" +} diff --git a/src/embeddingsearch/JSONModels.cs b/src/embeddingsearch/JSONModels.cs index 5e8fd2f..ee96461 100644 --- a/src/embeddingsearch/JSONModels.cs +++ b/src/embeddingsearch/JSONModels.cs @@ -2,17 +2,17 @@ namespace embeddingsearch; public class JSONEntity { - public required string name { get; set; } - public required string probmethod { get; set; } - public required string searchdomain { get; set; } - public required Dictionary attributes { get; set; } - public required JSONDatapoint[] datapoints { get; set; } + public required string Name { get; set; } + public required string Probmethod { get; set; } + public required string Searchdomain { get; set; } + public required Dictionary Attributes { get; set; } + public required JSONDatapoint[] Datapoints { get; set; } } public class JSONDatapoint { - public required string name { get; set; } - public required string text { get; set; } - public required string probmethod_embedding { get; set; } - public required string[] model { get; set; } + public required string Name { get; set; } + public required string Text { get; set; } + public required string Probmethod_embedding { get; set; } + public required string[] Model { get; set; } } \ No newline at end of file diff --git a/src/embeddingsearch/Searchdomain.cs b/src/embeddingsearch/Searchdomain.cs index 29ccdf2..86967c4 100644 --- a/src/embeddingsearch/Searchdomain.cs +++ b/src/embeddingsearch/Searchdomain.cs @@ -268,25 +268,25 @@ public class Searchdomain { return null; } - if (HasEntity(jsonEntity.name)) + if (HasEntity(jsonEntity.Name)) { - RemoveEntity(jsonEntity.name); + RemoveEntity(jsonEntity.Name); } - int id_entity = DatabaseInsertEntity(jsonEntity.name, jsonEntity.probmethod, id); - foreach (KeyValuePair attribute in jsonEntity.attributes) + int id_entity = DatabaseInsertEntity(jsonEntity.Name, jsonEntity.Probmethod, id); + foreach (KeyValuePair attribute in jsonEntity.Attributes) { DatabaseInsertAttribute(attribute.Key, attribute.Value, id_entity); } List datapoints = []; - foreach (JSONDatapoint jsonDatapoint in jsonEntity.datapoints) + foreach (JSONDatapoint jsonDatapoint in jsonEntity.Datapoints) { - Dictionary embeddings = Datapoint.GenerateEmbeddings(jsonDatapoint.text, [.. jsonDatapoint.model], ollama, embeddingCache); - var probMethod_embedding = probmethods.GetMethod(jsonDatapoint.probmethod_embedding) ?? throw new Exception($"Unknown probmethod name {jsonDatapoint.probmethod_embedding}"); - Datapoint datapoint = new(jsonDatapoint.name, probMethod_embedding, [.. embeddings.Select(kv => (kv.Key, kv.Value))]); - int id_datapoint = DatabaseInsertDatapoint(jsonDatapoint.name, jsonDatapoint.probmethod_embedding, id_entity); + Dictionary embeddings = Datapoint.GenerateEmbeddings(jsonDatapoint.Text, [.. jsonDatapoint.Model], ollama, embeddingCache); + var probMethod_embedding = probmethods.GetMethod(jsonDatapoint.Probmethod_embedding) ?? throw new Exception($"Unknown probmethod name {jsonDatapoint.Probmethod_embedding}"); + Datapoint datapoint = new(jsonDatapoint.Name, probMethod_embedding, [.. embeddings.Select(kv => (kv.Key, kv.Value))]); + int id_datapoint = DatabaseInsertDatapoint(jsonDatapoint.Name, jsonDatapoint.Probmethod_embedding, id_entity); foreach ((string, float[]) embedding in datapoint.embeddings) { DatabaseInsertEmbedding(id_datapoint, embedding.Item1, BytesFromFloatArray(embedding.Item2)); @@ -294,8 +294,8 @@ public class Searchdomain datapoints.Add(datapoint); } - var probMethod = probmethods.GetMethod(jsonEntity.probmethod) ?? throw new Exception($"Unknown probmethod name {jsonEntity.probmethod}"); - Entity entity = new(jsonEntity.attributes, probMethod, datapoints, jsonEntity.name) + var probMethod = probmethods.GetMethod(jsonEntity.Probmethod) ?? throw new Exception($"Unknown probmethod name {jsonEntity.Probmethod}"); + Entity entity = new(jsonEntity.Attributes, probMethod, datapoints, jsonEntity.Name) { id = id_entity }; @@ -314,15 +314,15 @@ public class Searchdomain Dictionary> toBeCached = []; foreach (JSONEntity jSONEntity in jsonEntities) { - foreach (JSONDatapoint datapoint in jSONEntity.datapoints) + foreach (JSONDatapoint datapoint in jSONEntity.Datapoints) { - foreach (string model in datapoint.model) + foreach (string model in datapoint.Model) { if (!toBeCached.ContainsKey(model)) { toBeCached[model] = []; } - toBeCached[model].Add(datapoint.text); + toBeCached[model].Add(datapoint.Text); } } } diff --git a/src/server/Controllers/EntityController.cs b/src/server/Controllers/EntityController.cs index 9b29702..da54c7f 100644 --- a/src/server/Controllers/EntityController.cs +++ b/src/server/Controllers/EntityController.cs @@ -2,6 +2,7 @@ using Microsoft.AspNetCore.Mvc; using embeddingsearch; using System.Text.Json; using Models; +using System.Text.Json.Nodes; namespace server.Controllers; [ApiController] @@ -39,33 +40,30 @@ public class EntityController : ControllerBase return Ok(new EntityQueryResults(){Results = queryResults}); } - [HttpGet("Index")] - public ActionResult Index(string searchdomain, string jsonEntity) + [HttpPost("Index")] + public ActionResult Index(string searchdomain, [FromBody] List? jsonEntity) { Searchdomain searchdomain_; try { searchdomain_ = _domainManager.GetSearchdomain(searchdomain); - } catch (Exception) - { - return Ok(new EntityIndexResult() {Success = false}); } - List? jsonEntities = JsonSerializer.Deserialize?>(jsonEntity); - if (jsonEntities is not null) + catch (Exception) { - - List? entities = searchdomain_.EntitiesFromJSON(jsonEntity); - if (entities is not null) - { - return Ok(new EntityIndexResult() {Success = true}); - } - else - { - _logger.LogDebug("Unable to deserialize an entity"); - } + return Ok(new EntityIndexResult() { Success = false }); } - - return Ok(new EntityIndexResult() {Success = false}); + List? entities = searchdomain_.EntitiesFromJSON(JsonSerializer.Serialize(jsonEntity)); + if (entities is not null) + { + _domainManager.InvalidateSearchdomainCache(searchdomain); + return Ok(new EntityIndexResult() { Success = true }); + } + else + { + _logger.LogDebug("Unable to deserialize an entity"); + } + + return Ok(new EntityIndexResult() { Success = false }); } [HttpGet("List")] diff --git a/src/server/SearchdomainManager.cs b/src/server/SearchdomainManager.cs index b79fc28..891760a 100644 --- a/src/server/SearchdomainManager.cs +++ b/src/server/SearchdomainManager.cs @@ -42,6 +42,11 @@ public class SearchomainManager } } + public void InvalidateSearchdomainCache(string searchdomain) + { + searchdomains.Remove(searchdomain); + } + public List ListSearchdomains() { DbDataReader reader = ExecuteSQLCommand("SELECT name FROM searchdomain", []);