Implemented indexer, cleanup
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -9,4 +9,7 @@ src/server/obj
|
||||
src/Client/bin
|
||||
src/Client/obj
|
||||
src/Models/bin
|
||||
src/Models/obj
|
||||
src/Models/obj
|
||||
src/Indexer/bin
|
||||
src/Indexer/obj
|
||||
src/Indexer/Scripts/__pycache__
|
||||
6
.vscode/tasks.json
vendored
6
.vscode/tasks.json
vendored
@@ -7,7 +7,7 @@
|
||||
"type": "process",
|
||||
"args": [
|
||||
"build",
|
||||
"${workspaceFolder}/src/testapp/testapp.sln",
|
||||
"${workspaceFolder}/embeddingsearch.sln",
|
||||
"/property:GenerateFullPaths=true",
|
||||
"/consoleloggerparameters:NoSummary;ForceNoAlign"
|
||||
],
|
||||
@@ -19,7 +19,7 @@
|
||||
"type": "process",
|
||||
"args": [
|
||||
"publish",
|
||||
"${workspaceFolder}/src/testapp/testapp.sln",
|
||||
"${workspaceFolder}/embeddingsearch.sln",
|
||||
"/property:GenerateFullPaths=true",
|
||||
"/consoleloggerparameters:NoSummary;ForceNoAlign"
|
||||
],
|
||||
@@ -33,7 +33,7 @@
|
||||
"watch",
|
||||
"run",
|
||||
"--project",
|
||||
"${workspaceFolder}/src/testapp/testapp.sln"
|
||||
"${workspaceFolder}/embeddingsearch.sln"
|
||||
],
|
||||
"problemMatcher": "$msCompile"
|
||||
}
|
||||
|
||||
@@ -17,6 +17,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Client", "src\Client\Client
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Models", "src\Models\Models.csproj", "{4468F2B1-425E-441C-B288-C3403BB771CB}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Indexer", "src\Indexer\Indexer.csproj", "{5361FD10-E85C-496C-9BEF-9232F767F904}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -46,6 +48,10 @@ Global
|
||||
{4468F2B1-425E-441C-B288-C3403BB771CB}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{4468F2B1-425E-441C-B288-C3403BB771CB}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{4468F2B1-425E-441C-B288-C3403BB771CB}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{5361FD10-E85C-496C-9BEF-9232F767F904}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{5361FD10-E85C-496C-9BEF-9232F767F904}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{5361FD10-E85C-496C-9BEF-9232F767F904}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{5361FD10-E85C-496C-9BEF-9232F767F904}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(NestedProjects) = preSolution
|
||||
{67AA89C0-3630-4994-B4EE-FC86CFF407DB} = {6AA0A9E0-A361-4E86-BA02-D5F6779C6DEF}
|
||||
@@ -54,5 +60,6 @@ Global
|
||||
{D61A2C50-B46C-42BA-B75D-E84D8FA28C29} = {BC4F3063-B921-4C4A-A7CE-11FAF5B73D50}
|
||||
{A8EBB748-5BBA-47EB-840D-E398365C52A2} = {6AA0A9E0-A361-4E86-BA02-D5F6779C6DEF}
|
||||
{4468F2B1-425E-441C-B288-C3403BB771CB} = {6AA0A9E0-A361-4E86-BA02-D5F6779C6DEF}
|
||||
{5361FD10-E85C-496C-9BEF-9232F767F904} = {6AA0A9E0-A361-4E86-BA02-D5F6779C6DEF}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
||||
@@ -104,8 +104,9 @@ public class Client
|
||||
|
||||
public async Task<EntityIndexResult> EntityIndexAsync(string searchdomain, string jsonEntity)
|
||||
{
|
||||
var url = $"{baseUri}/Entity/Index?apiKey={HttpUtility.UrlEncode(apiKey)}&searchdomain={HttpUtility.UrlEncode(searchdomain)}&jsonEntity={HttpUtility.UrlEncode(jsonEntity)}";
|
||||
return await GetUrlAndProcessJson<EntityIndexResult>(url);
|
||||
var url = $"{baseUri}/Entity/Index?apiKey={HttpUtility.UrlEncode(apiKey)}&searchdomain={HttpUtility.UrlEncode(searchdomain)}";
|
||||
var content = new StringContent(jsonEntity, Encoding.UTF8, "application/json");
|
||||
return await PostUrlAndProcessJson<EntityIndexResult>(url, content);//new FormUrlEncodedContent(values));
|
||||
}
|
||||
|
||||
public async Task<EntityListResults> EntityListAsync(bool returnEmbeddings = false)
|
||||
@@ -134,8 +135,19 @@ public class Client
|
||||
{
|
||||
using var client = new HttpClient();
|
||||
var response = await client.GetAsync(url);
|
||||
string content = await response.Content.ReadAsStringAsync();
|
||||
var result = JsonSerializer.Deserialize<T>(content)
|
||||
string responseContent = await response.Content.ReadAsStringAsync();
|
||||
var result = JsonSerializer.Deserialize<T>(responseContent)
|
||||
?? throw new Exception($"Failed to deserialize JSON to type {typeof(T).Name}");
|
||||
return result;
|
||||
}
|
||||
private static async Task<T> PostUrlAndProcessJson<T>(string url, HttpContent content)
|
||||
{
|
||||
using var client = new HttpClient();
|
||||
var response = await client.PostAsync(url, content);
|
||||
string responseContent = await response.Content.ReadAsStringAsync();
|
||||
Console.WriteLine("DEBUG@GetUrlAndProcessJson");
|
||||
Console.WriteLine(responseContent);
|
||||
var result = JsonSerializer.Deserialize<T>(responseContent)
|
||||
?? throw new Exception($"Failed to deserialize JSON to type {typeof(T).Name}");
|
||||
return result;
|
||||
}
|
||||
|
||||
14
src/Indexer/Exceptions/IndexerExceptions.cs
Normal file
14
src/Indexer/Exceptions/IndexerExceptions.cs
Normal file
@@ -0,0 +1,14 @@
|
||||
namespace Indexer.Exceptions;
|
||||
|
||||
public class IndexerConfigurationException : Exception
|
||||
{
|
||||
public IndexerConfigurationException()
|
||||
: base("Configuration is incomplete or was set up incorrectly")
|
||||
{
|
||||
}
|
||||
|
||||
public IndexerConfigurationException(string message)
|
||||
: base(message)
|
||||
{
|
||||
}
|
||||
}
|
||||
18
src/Indexer/Exceptions/WorkerExceptions.cs
Normal file
18
src/Indexer/Exceptions/WorkerExceptions.cs
Normal file
@@ -0,0 +1,18 @@
|
||||
namespace Indexer.Exceptions;
|
||||
|
||||
public class UnknownScriptLanguageException : Exception
|
||||
{
|
||||
public string? FileName { get; }
|
||||
|
||||
public UnknownScriptLanguageException(string? fileName = null)
|
||||
: base("Unable to determine script language")
|
||||
{
|
||||
FileName = fileName;
|
||||
}
|
||||
|
||||
public UnknownScriptLanguageException(string message, string? fileName = null)
|
||||
: base(message)
|
||||
{
|
||||
FileName = fileName;
|
||||
}
|
||||
}
|
||||
19
src/Indexer/Indexer.csproj
Normal file
19
src/Indexer/Indexer.csproj
Normal file
@@ -0,0 +1,19 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk.Web">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net8.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.14" />
|
||||
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.6.2" />
|
||||
<PackageReference Include="Python" Version="3.13.3" />
|
||||
<PackageReference Include="Pythonnet" Version="3.0.5" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\server\server.csproj" />
|
||||
<ProjectReference Include="..\Client\Client.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
12
src/Indexer/Models/Interfaces.cs
Normal file
12
src/Indexer/Models/Interfaces.cs
Normal file
@@ -0,0 +1,12 @@
|
||||
namespace Indexer.Models;
|
||||
|
||||
public interface IScriptable
|
||||
{
|
||||
ScriptToolSet ToolSet { get; set; }
|
||||
void Init();
|
||||
void Update(ICallbackInfos callbackInfos);
|
||||
bool IsScript(string filePath);
|
||||
}
|
||||
|
||||
public interface ICallbackInfos { }
|
||||
|
||||
94
src/Indexer/Models/Script.cs
Normal file
94
src/Indexer/Models/Script.cs
Normal file
@@ -0,0 +1,94 @@
|
||||
using System.Text.Json;
|
||||
using System.Timers;
|
||||
using embeddingsearch;
|
||||
using Python.Runtime;
|
||||
|
||||
namespace Indexer.Models;
|
||||
|
||||
public class PythonScriptable : IScriptable
|
||||
{
|
||||
public ScriptToolSet ToolSet { get; set; }
|
||||
public PyObject? pyToolSet;
|
||||
public PyModule scope;
|
||||
public dynamic sys;
|
||||
public string source;
|
||||
public PythonScriptable(ScriptToolSet toolSet)
|
||||
{
|
||||
Runtime.PythonDLL = @"libpython3.12.so";
|
||||
if (!PythonEngine.IsInitialized)
|
||||
{
|
||||
PythonEngine.Initialize();
|
||||
PythonEngine.BeginAllowThreads();
|
||||
}
|
||||
ToolSet = toolSet;
|
||||
source = File.ReadAllText(ToolSet.filePath);
|
||||
string fullPath = Path.GetFullPath(ToolSet.filePath);
|
||||
string? scriptDir = Path.GetDirectoryName(fullPath);
|
||||
using (Py.GIL())
|
||||
{
|
||||
scope = Py.CreateScope();
|
||||
sys = Py.Import("sys");
|
||||
if (scriptDir is not null)
|
||||
{
|
||||
sys.path.append(scriptDir);
|
||||
}
|
||||
}
|
||||
Init();
|
||||
}
|
||||
|
||||
public void Init()
|
||||
{
|
||||
using (Py.GIL())
|
||||
{
|
||||
pyToolSet = ToolSet.ToPython();
|
||||
scope.Set("toolset", pyToolSet);
|
||||
scope.Exec(source);
|
||||
scope.Exec("init(toolset)");
|
||||
}
|
||||
}
|
||||
|
||||
public void Update(ICallbackInfos callbackInfos)
|
||||
{
|
||||
PythonEngine.Initialize();
|
||||
using (Py.GIL())
|
||||
{
|
||||
ToolSet.callbackInfos = callbackInfos;
|
||||
pyToolSet = ToolSet.ToPython();
|
||||
scope.Set("toolset", pyToolSet);
|
||||
scope.Exec("update(toolset)");
|
||||
}
|
||||
PythonEngine.Shutdown();
|
||||
}
|
||||
|
||||
public bool IsScript(string fileName)
|
||||
{
|
||||
return fileName.EndsWith(".py");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
TODO Add the following languages
|
||||
- Javascript
|
||||
- Golang (reconsider)
|
||||
*/
|
||||
|
||||
public class ScriptToolSet
|
||||
{
|
||||
public string filePath;
|
||||
public Client.Client client;
|
||||
public ICallbackInfos? callbackInfos;
|
||||
|
||||
// IConfiguration - Access to connection strings, ollama, etc. maybe?
|
||||
public ScriptToolSet(string filePath, Client.Client client)
|
||||
{
|
||||
this.filePath = filePath;
|
||||
this.client = client;
|
||||
}
|
||||
}
|
||||
|
||||
public class IntervalCallbackInfos : ICallbackInfos
|
||||
{
|
||||
public object? sender;
|
||||
public required ElapsedEventArgs e;
|
||||
|
||||
}
|
||||
45
src/Indexer/Models/Worker.cs
Normal file
45
src/Indexer/Models/Worker.cs
Normal file
@@ -0,0 +1,45 @@
|
||||
namespace Indexer.Models;
|
||||
|
||||
public class WorkerCollection
|
||||
{
|
||||
public List<Worker> Workers;
|
||||
public List<Type> types;
|
||||
public WorkerCollection()
|
||||
{
|
||||
Workers = [];
|
||||
types = [typeof(PythonScriptable)];
|
||||
}
|
||||
}
|
||||
|
||||
public class Worker
|
||||
{
|
||||
public WorkerConfig Config { get; set; }
|
||||
public IScriptable Scriptable { get; set; }
|
||||
|
||||
public Worker(WorkerConfig workerConfig, IScriptable scriptable)
|
||||
{
|
||||
this.Config = workerConfig;
|
||||
this.Scriptable = scriptable;
|
||||
}
|
||||
}
|
||||
|
||||
public class WorkerCollectionConfig
|
||||
{
|
||||
public required List<WorkerConfig> Worker { get; set; }
|
||||
}
|
||||
|
||||
public class WorkerConfig
|
||||
{
|
||||
public required string Name { get; set; }
|
||||
public required List<string> Searchdomains { get; set; }
|
||||
public required string Script { get; set; }
|
||||
public required List<Call> Calls { get; set; }
|
||||
}
|
||||
|
||||
public class Call
|
||||
{
|
||||
public required string Type { get; set; }
|
||||
public long? Interval { get; set; } // For Type: Interval
|
||||
public string? Path { get; set; } // For Type: FileSystemWatcher
|
||||
}
|
||||
|
||||
31
src/Indexer/Program.cs
Normal file
31
src/Indexer/Program.cs
Normal file
@@ -0,0 +1,31 @@
|
||||
using Indexer.Models;
|
||||
using Indexer.Services;
|
||||
using server;
|
||||
|
||||
var builder = WebApplication.CreateBuilder(args);
|
||||
|
||||
// Add services to the container.
|
||||
|
||||
builder.Services.AddControllers();
|
||||
// Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle
|
||||
builder.Services.AddEndpointsApiExplorer();
|
||||
builder.Services.AddSwaggerGen();
|
||||
builder.Services.AddSingleton<Client.Client>();
|
||||
builder.Services.AddSingleton<WorkerCollection>();
|
||||
builder.Services.AddHostedService<IndexerService>();
|
||||
var app = builder.Build();
|
||||
|
||||
// Configure the HTTP request pipeline.
|
||||
if (app.Environment.IsDevelopment())
|
||||
{
|
||||
app.UseSwagger();
|
||||
app.UseSwaggerUI();
|
||||
}
|
||||
else
|
||||
{
|
||||
app.UseMiddleware<ApiKeyMiddleware>();
|
||||
}
|
||||
|
||||
// app.UseHttpsRedirection();
|
||||
|
||||
app.Run();
|
||||
41
src/Indexer/Properties/launchSettings.json
Normal file
41
src/Indexer/Properties/launchSettings.json
Normal file
@@ -0,0 +1,41 @@
|
||||
{
|
||||
"$schema": "http://json.schemastore.org/launchsettings.json",
|
||||
"iisSettings": {
|
||||
"windowsAuthentication": false,
|
||||
"anonymousAuthentication": true,
|
||||
"iisExpress": {
|
||||
"applicationUrl": "http://localhost:36687",
|
||||
"sslPort": 44337
|
||||
}
|
||||
},
|
||||
"profiles": {
|
||||
"http": {
|
||||
"commandName": "Project",
|
||||
"dotnetRunMessages": true,
|
||||
"launchBrowser": true,
|
||||
"launchUrl": "swagger",
|
||||
"applicationUrl": "http://localhost:5210",
|
||||
"environmentVariables": {
|
||||
"ASPNETCORE_ENVIRONMENT": "Development"
|
||||
}
|
||||
},
|
||||
"https": {
|
||||
"commandName": "Project",
|
||||
"dotnetRunMessages": true,
|
||||
"launchBrowser": true,
|
||||
"launchUrl": "swagger",
|
||||
"applicationUrl": "https://localhost:7282;http://localhost:5210",
|
||||
"environmentVariables": {
|
||||
"ASPNETCORE_ENVIRONMENT": "Development"
|
||||
}
|
||||
},
|
||||
"IIS Express": {
|
||||
"commandName": "IISExpress",
|
||||
"launchBrowser": true,
|
||||
"launchUrl": "swagger",
|
||||
"environmentVariables": {
|
||||
"ASPNETCORE_ENVIRONMENT": "Development"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
50
src/Indexer/Scripts/example.py
Normal file
50
src/Indexer/Scripts/example.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import os
|
||||
from tools import *
|
||||
import json
|
||||
from dataclasses import asdict
|
||||
|
||||
example_content = "./Scripts/example_content"
|
||||
example_searchdomain = "example"
|
||||
example_counter = 0
|
||||
models = ["bge-m3", "mxbai-embed-large"]
|
||||
|
||||
def init(toolset: Toolset):
|
||||
global example_counter
|
||||
print("Py-DEBUG@init")
|
||||
print("This is the init function from the python example script")
|
||||
print(f"example_counter: {example_counter}")
|
||||
searchdomainlist:SearchdomainListResults = toolset.client.SearchdomainListAsync().Result
|
||||
print("Currently these searchdomains exist")
|
||||
for searchdomain in searchdomainlist.Searchdomains:
|
||||
print(f" - {searchdomain}")
|
||||
index_files(toolset)
|
||||
|
||||
def update(toolset: Toolset):
|
||||
global example_counter
|
||||
print("Py-DEBUG@update")
|
||||
print("This is the update function from the python example script")
|
||||
callbackInfos:ICallbackInfos = toolset.callbackInfos
|
||||
if (callbackInfos is IntervalCallbackInfos):
|
||||
print("It was called via an interval callback")
|
||||
example_counter += 1
|
||||
mycounter = example_counter
|
||||
print(f"example_counter: {example_counter}")
|
||||
index_files(toolset)
|
||||
|
||||
def index_files(toolset: Toolset):
|
||||
jsonEntities:list = []
|
||||
for filename in os.listdir(example_content):
|
||||
qualified_filepath = example_content + "/" + filename
|
||||
with open(qualified_filepath, "r") as file:
|
||||
title = file.readline()
|
||||
text = file.read()
|
||||
datapoints:list = [
|
||||
JSONDatapoint("filename", qualified_filepath, "wavg", models),
|
||||
JSONDatapoint("title", title, "wavg", models),
|
||||
JSONDatapoint("text", text, "wavg", models)
|
||||
]
|
||||
jsonEntity:dict = asdict(JSONEntity(qualified_filepath, "wavg", example_searchdomain, {}, datapoints))
|
||||
jsonEntities.append(jsonEntity)
|
||||
jsonstring = json.dumps(jsonEntities)
|
||||
result:EntityIndexResult = toolset.client.EntityIndexAsync(jsonstring).Result
|
||||
print(f"Update was successful: {result.Success}")
|
||||
122
src/Indexer/Scripts/generate_example_content.py
Normal file
122
src/Indexer/Scripts/generate_example_content.py
Normal file
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate ten brief-overview files for a given topic using an Ollama model.
|
||||
|
||||
▪ The directory ./files is used as a mini knowledge-base.
|
||||
▪ Two Python functions are exposed to the model as *tools*:
|
||||
• list_files() – return [{name, title}, …] for everything in ./files
|
||||
• create_file() – create/overwrite a file and write the content supplied
|
||||
▪ The model is instructed to call create_file() ten times (one per sub-topic)
|
||||
and put the title on the first line of each file.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
|
||||
import ollama # pip install -U ollama
|
||||
|
||||
|
||||
# ---------- constant configuration ------------------------------------------------
|
||||
|
||||
FILES_DIR = Path(__file__).parent / "example_content"
|
||||
FILES_DIR.mkdir(exist_ok=True)
|
||||
|
||||
|
||||
# ---------- tool functions ---------------------------------------------------------
|
||||
|
||||
def list_files() -> List[Dict[str, str]]:
|
||||
"""
|
||||
List every regular file in ./example_content together with its first line (title).
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[dict]
|
||||
Each element has: {"name": "<filename>", "title": "<first line or ''>"}
|
||||
"""
|
||||
results: List[Dict[str, str]] = []
|
||||
for path in FILES_DIR.iterdir():
|
||||
if path.is_file():
|
||||
with path.open("r", encoding="utf-8", errors="ignore") as fh:
|
||||
title = fh.readline().rstrip("\n")
|
||||
results.append({"name": path.name, "title": title})
|
||||
return results
|
||||
|
||||
|
||||
def create_file(filename: str, content: str) -> str:
|
||||
"""
|
||||
Create (or overwrite) a file inside ./files.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : str
|
||||
A simple name like "quantum_entanglement.md". Any path components
|
||||
beyond the basename are stripped for safety.
|
||||
content : str
|
||||
The full text to write – the first line *must* be the title.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Absolute path of the file that was written.
|
||||
"""
|
||||
safe_name = os.path.basename(filename)
|
||||
if not safe_name:
|
||||
raise ValueError("filename cannot be empty")
|
||||
path = FILES_DIR / safe_name
|
||||
path.write_text(content, encoding="utf-8")
|
||||
return str(path.resolve())
|
||||
|
||||
|
||||
# ---------- main driver ------------------------------------------------------------
|
||||
|
||||
def run(topic: str, *, model: str = "qwen3:latest", temperature: float = 0.2) -> None:
|
||||
"""Ask the model to create ten overview files about *topic*."""
|
||||
|
||||
system_prompt = (
|
||||
"You are a file-writing assistant. For each of ten distinct sub-topics "
|
||||
"related to the given topic you will call the `create_file` tool to "
|
||||
"write a Markdown file that contains at least 5 sentences. Use a short, "
|
||||
"snake_case filename ending with '.md'. The very first line of the "
|
||||
"file **must** be the title (capitalized). After you have created all "
|
||||
"ten files, reply with only the single word DONE."
|
||||
)
|
||||
|
||||
messages: List[Dict[str, Any]] = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": f"Topic: {topic}"},
|
||||
]
|
||||
|
||||
tools = [list_files, create_file]
|
||||
available = {f.__name__: f for f in tools}
|
||||
|
||||
# initial call
|
||||
response = ollama.chat(model=model,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
options={"temperature": temperature})
|
||||
|
||||
for call in response.message.tool_calls or []:
|
||||
fn_name = call.function.name
|
||||
fn_args = call.function.arguments
|
||||
result = available[fn_name](**fn_args) # Run tool calls
|
||||
messages.append({"role": "tool",
|
||||
"name": fn_name,
|
||||
"content": json.dumps(result, ensure_ascii=False)})
|
||||
|
||||
# ---------- CLI entry-point --------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate ten overview files for a topic using Ollama + tool calling")
|
||||
parser.add_argument("topic", help="Main subject area, e.g. 'quantum computing'")
|
||||
parser.add_argument("--model", default="qwen3:latest",
|
||||
help="Local Ollama model to use (default: qwen3:latest)")
|
||||
args = parser.parse_args()
|
||||
|
||||
run(args.topic, model=args.model)
|
||||
130
src/Indexer/Scripts/tools.py
Normal file
130
src/Indexer/Scripts/tools.py
Normal file
@@ -0,0 +1,130 @@
|
||||
from dataclasses import dataclass
|
||||
import array
|
||||
from typing import Optional
|
||||
|
||||
@dataclass
|
||||
class JSONDatapoint:
|
||||
Name:str
|
||||
Text:str
|
||||
Probmethod_embedding:str
|
||||
Model:list[str]
|
||||
|
||||
@dataclass
|
||||
class JSONEntity:
|
||||
Name:str
|
||||
Probmethod:str
|
||||
Searchdomain:str
|
||||
Attributes:dict
|
||||
Datapoints:array.array[JSONDatapoint]
|
||||
|
||||
#Model - Searchdomain
|
||||
@dataclass
|
||||
class SearchdomainListResults:
|
||||
Searchdomains:list[str]
|
||||
|
||||
@dataclass
|
||||
class SearchdomainCreateResults:
|
||||
Success:bool
|
||||
id:int|None
|
||||
|
||||
@dataclass
|
||||
class SearchdomainUpdateResults:
|
||||
Success:bool
|
||||
|
||||
@dataclass
|
||||
class SearchdomainDeleteResults:
|
||||
Success:bool
|
||||
DeletedEntities:int
|
||||
|
||||
#Model - Entity
|
||||
@dataclass
|
||||
class EntityQueryResult:
|
||||
name:str
|
||||
ValueError:float
|
||||
|
||||
@dataclass
|
||||
class EntityQueryResults:
|
||||
Results:list[EntityQueryResult]
|
||||
|
||||
@dataclass
|
||||
class EntityIndexResult:
|
||||
Success:bool
|
||||
|
||||
@dataclass
|
||||
class AttributeResult:
|
||||
Name:str
|
||||
Value:str
|
||||
|
||||
@dataclass
|
||||
class EmbeddingResult:
|
||||
Model:str
|
||||
Embeddings:array.array[float]
|
||||
|
||||
@dataclass
|
||||
class DatapointResult:
|
||||
Name:str
|
||||
ProbMethod:str
|
||||
Embeddings:list[EmbeddingResult]|None
|
||||
|
||||
@dataclass
|
||||
class EntityListResults:
|
||||
Name:str
|
||||
Attributes:list[AttributeResult]
|
||||
Datapoints:list[DatapointResult]
|
||||
|
||||
@dataclass
|
||||
class EntityDeleteResults:
|
||||
Success:bool
|
||||
|
||||
# Model - Client
|
||||
@dataclass
|
||||
class Client:
|
||||
baseUri:str
|
||||
apiKey:str
|
||||
searchdomain:str
|
||||
async def SearchdomainListAsync() -> SearchdomainListResults:
|
||||
pass
|
||||
async def SearchdomainDeleteAsync() -> SearchdomainDeleteResults:
|
||||
pass
|
||||
async def SearchdomainCreateAsync() -> SearchdomainCreateResults:
|
||||
pass
|
||||
async def SearchdomainCreateAsync(searchdomain:str) -> SearchdomainCreateResults:
|
||||
pass
|
||||
async def SearchdomainUpdateAsync(newName:str, settings:str) -> SearchdomainUpdateResults:
|
||||
pass
|
||||
async def SearchdomainUpdateAsync(searchdomain:str, newName:str, settings:str) -> SearchdomainUpdateResults:
|
||||
pass
|
||||
async def EntityQueryAsync(query:str) -> EntityQueryResults:
|
||||
pass
|
||||
async def EntityQueryAsync(searchdomain:str, query:str) -> EntityQueryResults:
|
||||
pass
|
||||
#async def EntityIndexAsync(jsonEntity): # -> EntityIndexResult:#:NetList[JSONEntity]) -> EntityIndexResult: #TODO fix clr issues, i.e. make this work
|
||||
# pass
|
||||
#async def EntityIndexAsync(searchdomain:str, jsonEntity:list[JSONEntity]) -> EntityIndexResult:
|
||||
# pass
|
||||
async def EntityIndexAsync(jsonEntity:str) -> EntityIndexResult:
|
||||
pass
|
||||
async def EntityIndexAsync(searchdomain:str, jsonEntity:str) -> EntityIndexResult:
|
||||
pass
|
||||
async def EntityListAsync(returnEmbeddings:bool = False) -> EntityListResults:
|
||||
pass
|
||||
async def EntityListAsync(searchdomain:str, returnEmbeddings:bool = False) -> EntityListResults:
|
||||
pass
|
||||
async def EntityDeleteAsync(searchdomain:str, entityName:str) -> EntityDeleteResults:
|
||||
pass
|
||||
class ICallbackInfos:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntervalCallbackInfos(ICallbackInfos):
|
||||
sender: Optional[object]
|
||||
e: object
|
||||
|
||||
@dataclass
|
||||
class Toolset:
|
||||
filePath:str
|
||||
client:Client
|
||||
callbackInfos: Optional[ICallbackInfos] = None
|
||||
|
||||
|
||||
102
src/Indexer/Services/IndexerService.cs
Normal file
102
src/Indexer/Services/IndexerService.cs
Normal file
@@ -0,0 +1,102 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
using Indexer.Exceptions;
|
||||
using Indexer.Models;
|
||||
using System.Timers;
|
||||
using Microsoft.AspNetCore.Http.HttpResults;
|
||||
using embeddingsearch;
|
||||
using Python.Runtime;
|
||||
|
||||
namespace Indexer.Services;
|
||||
|
||||
public class IndexerService : IHostedService
|
||||
{
|
||||
private readonly WorkerCollection workerCollection;
|
||||
private readonly IConfiguration _config;
|
||||
private readonly Client.Client client;
|
||||
|
||||
public IndexerService(WorkerCollection workerCollection, IConfiguration configuration, Client.Client client)
|
||||
{
|
||||
this._config = configuration;
|
||||
this.client = client;
|
||||
this.workerCollection = workerCollection;
|
||||
// Load and configure all workers
|
||||
var sectionMain = _config.GetSection("EmbeddingsearchIndexer");
|
||||
|
||||
WorkerCollectionConfig? sectionWorker = (WorkerCollectionConfig?) sectionMain.Get(typeof(WorkerCollectionConfig)); //GetValue<WorkerCollectionConfig>("Worker");
|
||||
if (sectionWorker is not null)
|
||||
{
|
||||
foreach (WorkerConfig workerConfig in sectionWorker.Worker)
|
||||
{
|
||||
if (client.searchdomain == "" && workerConfig.Searchdomains.Count >= 1)
|
||||
{
|
||||
client.searchdomain = workerConfig.Searchdomains.First();
|
||||
}
|
||||
ScriptToolSet toolSet = new(workerConfig.Script, client);
|
||||
Worker worker = new(workerConfig, GetScriptable(toolSet));
|
||||
workerCollection.Workers.Add(worker);
|
||||
foreach (Call call in workerConfig.Calls)
|
||||
{
|
||||
switch (call.Type)
|
||||
{
|
||||
case "interval":
|
||||
if (call.Interval is null)
|
||||
{
|
||||
throw new IndexerConfigurationException($"Interval not set for a Call in Worker \"{workerConfig.Name}\"");
|
||||
}
|
||||
var timer = new System.Timers.Timer((double)call.Interval);
|
||||
timer.Elapsed += (sender, e) => worker.Scriptable.Update(new IntervalCallbackInfos() { sender = sender, e = e });
|
||||
timer.AutoReset = true;
|
||||
timer.Enabled = true;
|
||||
break;
|
||||
case "schedule": // TODO implement scheduled tasks using Quartz
|
||||
throw new NotImplementedException("schedule not implemented yet");
|
||||
case "fileupdate":
|
||||
if (call.Path is null)
|
||||
{
|
||||
throw new IndexerConfigurationException($"Path not set for a Call in Worker \"{workerConfig.Name}\"");
|
||||
}
|
||||
throw new NotImplementedException("fileupdate not implemented yet");
|
||||
//break;
|
||||
default:
|
||||
throw new IndexerConfigurationException($"Unknown Type specified for a Call in Worker \"{workerConfig.Name}\"");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IndexerConfigurationException("Unable to find section \"Worker\"");
|
||||
}
|
||||
}
|
||||
|
||||
public IScriptable GetScriptable(ScriptToolSet toolSet)
|
||||
{
|
||||
string fileName = toolSet.filePath;
|
||||
foreach (Type type in workerCollection.types)
|
||||
{
|
||||
IScriptable? instance = (IScriptable?)Activator.CreateInstance(type, toolSet);
|
||||
if (instance is not null && instance.IsScript(fileName))
|
||||
{
|
||||
return instance;
|
||||
}
|
||||
}
|
||||
|
||||
throw new UnknownScriptLanguageException(fileName);
|
||||
}
|
||||
public Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
/*foreach (Worker worker in workerCollection.Workers)
|
||||
{
|
||||
worker.Scriptable.Init();
|
||||
}*/
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
29
src/Indexer/appsettings.Development.json
Normal file
29
src/Indexer/appsettings.Development.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"Logging": {
|
||||
"LogLevel": {
|
||||
"Default": "Information",
|
||||
"Microsoft.AspNetCore": "Warning"
|
||||
}
|
||||
},
|
||||
"Embeddingsearch": {
|
||||
"BaseUri": "http://localhost:5146"
|
||||
},
|
||||
"EmbeddingsearchIndexer": {
|
||||
"Worker":
|
||||
[
|
||||
{
|
||||
"Name": "example",
|
||||
"Searchdomains": [
|
||||
"example"
|
||||
],
|
||||
"Script": "Scripts/example.py",
|
||||
"Calls": [
|
||||
{
|
||||
"Type": "interval",
|
||||
"Interval": 10000
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
9
src/Indexer/appsettings.json
Normal file
9
src/Indexer/appsettings.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"Logging": {
|
||||
"LogLevel": {
|
||||
"Default": "Information",
|
||||
"Microsoft.AspNetCore": "Warning"
|
||||
}
|
||||
},
|
||||
"AllowedHosts": "*"
|
||||
}
|
||||
@@ -2,17 +2,17 @@ namespace embeddingsearch;
|
||||
|
||||
public class JSONEntity
|
||||
{
|
||||
public required string name { get; set; }
|
||||
public required string probmethod { get; set; }
|
||||
public required string searchdomain { get; set; }
|
||||
public required Dictionary<string, string> attributes { get; set; }
|
||||
public required JSONDatapoint[] datapoints { get; set; }
|
||||
public required string Name { get; set; }
|
||||
public required string Probmethod { get; set; }
|
||||
public required string Searchdomain { get; set; }
|
||||
public required Dictionary<string, string> Attributes { get; set; }
|
||||
public required JSONDatapoint[] Datapoints { get; set; }
|
||||
}
|
||||
|
||||
public class JSONDatapoint
|
||||
{
|
||||
public required string name { get; set; }
|
||||
public required string text { get; set; }
|
||||
public required string probmethod_embedding { get; set; }
|
||||
public required string[] model { get; set; }
|
||||
public required string Name { get; set; }
|
||||
public required string Text { get; set; }
|
||||
public required string Probmethod_embedding { get; set; }
|
||||
public required string[] Model { get; set; }
|
||||
}
|
||||
@@ -268,25 +268,25 @@ public class Searchdomain
|
||||
{
|
||||
return null;
|
||||
}
|
||||
if (HasEntity(jsonEntity.name))
|
||||
if (HasEntity(jsonEntity.Name))
|
||||
{
|
||||
RemoveEntity(jsonEntity.name);
|
||||
RemoveEntity(jsonEntity.Name);
|
||||
|
||||
}
|
||||
int id_entity = DatabaseInsertEntity(jsonEntity.name, jsonEntity.probmethod, id);
|
||||
foreach (KeyValuePair<string, string> attribute in jsonEntity.attributes)
|
||||
int id_entity = DatabaseInsertEntity(jsonEntity.Name, jsonEntity.Probmethod, id);
|
||||
foreach (KeyValuePair<string, string> attribute in jsonEntity.Attributes)
|
||||
{
|
||||
DatabaseInsertAttribute(attribute.Key, attribute.Value, id_entity);
|
||||
}
|
||||
|
||||
List<Datapoint> datapoints = [];
|
||||
|
||||
foreach (JSONDatapoint jsonDatapoint in jsonEntity.datapoints)
|
||||
foreach (JSONDatapoint jsonDatapoint in jsonEntity.Datapoints)
|
||||
{
|
||||
Dictionary<string, float[]> embeddings = Datapoint.GenerateEmbeddings(jsonDatapoint.text, [.. jsonDatapoint.model], ollama, embeddingCache);
|
||||
var probMethod_embedding = probmethods.GetMethod(jsonDatapoint.probmethod_embedding) ?? throw new Exception($"Unknown probmethod name {jsonDatapoint.probmethod_embedding}");
|
||||
Datapoint datapoint = new(jsonDatapoint.name, probMethod_embedding, [.. embeddings.Select(kv => (kv.Key, kv.Value))]);
|
||||
int id_datapoint = DatabaseInsertDatapoint(jsonDatapoint.name, jsonDatapoint.probmethod_embedding, id_entity);
|
||||
Dictionary<string, float[]> embeddings = Datapoint.GenerateEmbeddings(jsonDatapoint.Text, [.. jsonDatapoint.Model], ollama, embeddingCache);
|
||||
var probMethod_embedding = probmethods.GetMethod(jsonDatapoint.Probmethod_embedding) ?? throw new Exception($"Unknown probmethod name {jsonDatapoint.Probmethod_embedding}");
|
||||
Datapoint datapoint = new(jsonDatapoint.Name, probMethod_embedding, [.. embeddings.Select(kv => (kv.Key, kv.Value))]);
|
||||
int id_datapoint = DatabaseInsertDatapoint(jsonDatapoint.Name, jsonDatapoint.Probmethod_embedding, id_entity);
|
||||
foreach ((string, float[]) embedding in datapoint.embeddings)
|
||||
{
|
||||
DatabaseInsertEmbedding(id_datapoint, embedding.Item1, BytesFromFloatArray(embedding.Item2));
|
||||
@@ -294,8 +294,8 @@ public class Searchdomain
|
||||
datapoints.Add(datapoint);
|
||||
}
|
||||
|
||||
var probMethod = probmethods.GetMethod(jsonEntity.probmethod) ?? throw new Exception($"Unknown probmethod name {jsonEntity.probmethod}");
|
||||
Entity entity = new(jsonEntity.attributes, probMethod, datapoints, jsonEntity.name)
|
||||
var probMethod = probmethods.GetMethod(jsonEntity.Probmethod) ?? throw new Exception($"Unknown probmethod name {jsonEntity.Probmethod}");
|
||||
Entity entity = new(jsonEntity.Attributes, probMethod, datapoints, jsonEntity.Name)
|
||||
{
|
||||
id = id_entity
|
||||
};
|
||||
@@ -314,15 +314,15 @@ public class Searchdomain
|
||||
Dictionary<string, List<string>> toBeCached = [];
|
||||
foreach (JSONEntity jSONEntity in jsonEntities)
|
||||
{
|
||||
foreach (JSONDatapoint datapoint in jSONEntity.datapoints)
|
||||
foreach (JSONDatapoint datapoint in jSONEntity.Datapoints)
|
||||
{
|
||||
foreach (string model in datapoint.model)
|
||||
foreach (string model in datapoint.Model)
|
||||
{
|
||||
if (!toBeCached.ContainsKey(model))
|
||||
{
|
||||
toBeCached[model] = [];
|
||||
}
|
||||
toBeCached[model].Add(datapoint.text);
|
||||
toBeCached[model].Add(datapoint.Text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ using Microsoft.AspNetCore.Mvc;
|
||||
using embeddingsearch;
|
||||
using System.Text.Json;
|
||||
using Models;
|
||||
using System.Text.Json.Nodes;
|
||||
namespace server.Controllers;
|
||||
|
||||
[ApiController]
|
||||
@@ -39,33 +40,30 @@ public class EntityController : ControllerBase
|
||||
return Ok(new EntityQueryResults(){Results = queryResults});
|
||||
}
|
||||
|
||||
[HttpGet("Index")]
|
||||
public ActionResult<EntityIndexResult> Index(string searchdomain, string jsonEntity)
|
||||
[HttpPost("Index")]
|
||||
public ActionResult<EntityIndexResult> Index(string searchdomain, [FromBody] List<JSONEntity>? jsonEntity)
|
||||
{
|
||||
Searchdomain searchdomain_;
|
||||
try
|
||||
{
|
||||
searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
|
||||
} catch (Exception)
|
||||
{
|
||||
return Ok(new EntityIndexResult() {Success = false});
|
||||
}
|
||||
List<JSONEntity>? jsonEntities = JsonSerializer.Deserialize<List<JSONEntity>?>(jsonEntity);
|
||||
if (jsonEntities is not null)
|
||||
catch (Exception)
|
||||
{
|
||||
|
||||
List<Entity>? entities = searchdomain_.EntitiesFromJSON(jsonEntity);
|
||||
if (entities is not null)
|
||||
{
|
||||
return Ok(new EntityIndexResult() {Success = true});
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogDebug("Unable to deserialize an entity");
|
||||
}
|
||||
return Ok(new EntityIndexResult() { Success = false });
|
||||
}
|
||||
|
||||
return Ok(new EntityIndexResult() {Success = false});
|
||||
List<Entity>? entities = searchdomain_.EntitiesFromJSON(JsonSerializer.Serialize(jsonEntity));
|
||||
if (entities is not null)
|
||||
{
|
||||
_domainManager.InvalidateSearchdomainCache(searchdomain);
|
||||
return Ok(new EntityIndexResult() { Success = true });
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogDebug("Unable to deserialize an entity");
|
||||
}
|
||||
|
||||
return Ok(new EntityIndexResult() { Success = false });
|
||||
}
|
||||
|
||||
[HttpGet("List")]
|
||||
|
||||
@@ -42,6 +42,11 @@ public class SearchomainManager
|
||||
}
|
||||
}
|
||||
|
||||
public void InvalidateSearchdomainCache(string searchdomain)
|
||||
{
|
||||
searchdomains.Remove(searchdomain);
|
||||
}
|
||||
|
||||
public List<string> ListSearchdomains()
|
||||
{
|
||||
DbDataReader reader = ExecuteSQLCommand("SELECT name FROM searchdomain", []);
|
||||
|
||||
Reference in New Issue
Block a user