diff --git a/docs/Indexer.md b/docs/Indexer.md index 8e96e9b..9c0eb23 100644 --- a/docs/Indexer.md +++ b/docs/Indexer.md @@ -177,8 +177,10 @@ probmethod_entity = "DictionaryWeightedAverage:{\"title\": 2, \"filename\": 0.1, To ease scripting, tools.py contains all definitions of the .NET objects passed to the script. This includes attributes and methods. These are not yet defined in a way that makes them 100% interactible with the Dotnet CLR, meaning some methods that require anything more than strings or other simple data types to be passed are not yet supported. (WIP) -### Required elements -Here is an overview of required elements by example: +### Supported file extensions +- .py +### Code elements +Here is an overview of code elements by example: ```python from tools import * # Import all tools that are provided for ease of scripting @@ -197,6 +199,68 @@ Currently, `Toolset`, as provided by the IndexerService to the Python script, co 1. (only for `update`, not `init`) `callbackInfos` - an object that provides all information regarding the callback. (e.g. what file was updated) 2. `client` - a .NET object that has the functions as described in `src/Indexer/Scripts/tools.py`. It's the client that - according to the configuration - communicates with the search server and executes the API calls. 3. `filePath` - the path to the script, as specified in the configuration +## C# (Roslyn) +### Supported file extensions +- .csx +### Code elements +**important hint:** As shown in the last two lines of the example code, simply declaring the class is **not** enough. One must also return an object of said class! +```csharp +// #load directives are disregarded at compile time. Its use is currently for syntax highlighting only +#load "../../Client/Client.cs" +#load "../Models/Script.cs" +#load "../Models/Interfaces.cs" +#load "../Models/WorkerResults.cs" +#load "../../Shared/Models/SearchdomainResults.cs" +#load "../../Shared/Models/JSONModels.cs" +#load "../../Shared/Models/EntityResults.cs" + +using Shared.Models; +using System.Collections.Generic; +using System.Linq; +using Microsoft.Extensions.Logging; + +// Required: a class that extends Indexer.Models.IScript +public class ExampleScript : Indexer.Models.IScript +{ + public Indexer.Models.ScriptToolSet ToolSet; + public Client.Client client; + + // Optional: constructor + public ExampleScript() + { + //System.Console.WriteLine("DEBUG@example.cs - Constructor"); // logger not passed here yet + } + + // Required: Init method as required to extend IScript + public int Init(Indexer.Models.ScriptToolSet toolSet) + { + ToolSet = toolSet; + ToolSet.Logger.LogInformation("DEBUG@example.csx - Init"); + return 0; // Required: int error value return + } + + // Required: Updaet method as required to extend IScript + public int Update(Indexer.Models.ICallbackInfos callbackInfos) + { + ToolSet.Logger.LogInformation("DEBUG@example.csx - Update"); + EntityQueryResults test = ToolSet.Client.EntityQueryAsync(defaultSearchdomain, "DNA").Result; + var firstResult = test.Results.ToArray()[0]; + ToolSet.Logger.LogInformation(firstResult.Name); + ToolSet.Logger.LogInformation(firstResult.Value.ToString()); + return 0; // Required: int error value return + } + + // Required: int error value return + public int Stop() + { + ToolSet.Logger.LogInformation("DEBUG@example.csx - Stop"); + return 0; // Required: int error value return + } +} + +// Required: return an instance of your IScript-extending class +return new ExampleScript(); +``` ## Golang TODO ## Javascript diff --git a/src/Indexer/Indexer.csproj b/src/Indexer/Indexer.csproj index 5978d32..8c2a30b 100644 --- a/src/Indexer/Indexer.csproj +++ b/src/Indexer/Indexer.csproj @@ -9,6 +9,7 @@ + diff --git a/src/Indexer/Models/Interfaces.cs b/src/Indexer/Models/Interfaces.cs index d906069..b4b8815 100644 --- a/src/Indexer/Models/Interfaces.cs +++ b/src/Indexer/Models/Interfaces.cs @@ -1,15 +1,22 @@ namespace Indexer.Models; +public interface IScript +{ + int Init(ScriptToolSet toolSet); + int Update(ICallbackInfos callbackInfos); + int Stop(); +} + public interface IScriptable { ScriptToolSet ToolSet { get; set; } ScriptUpdateInfo UpdateInfo { get; set; } ILogger _logger { get; set; } - void Init(); - void Update(ICallbackInfos callbackInfos); - void Stop(); + int Init(); + int Update(ICallbackInfos callbackInfos); + int Stop(); - bool IsScript(string filePath); + abstract static bool IsScript(string filePath); } public interface ICallbackInfos { } diff --git a/src/Indexer/Models/Script.cs b/src/Indexer/Models/Script.cs index f650d00..f114a9e 100644 --- a/src/Indexer/Models/Script.cs +++ b/src/Indexer/Models/Script.cs @@ -1,115 +1,23 @@ using System.Timers; -using Python.Runtime; namespace Indexer.Models; -public class PythonScriptable : IScriptable -{ - public ScriptToolSet ToolSet { get; set; } - public PyObject? pyToolSet; - public PyModule scope; - public dynamic sys; - public string source; - public bool SourceLoaded { get; set; } - public ScriptUpdateInfo UpdateInfo { get; set; } - public ILogger _logger { get; set; } - public PythonScriptable(ScriptToolSet toolSet, ILogger logger) - { - _logger = logger; - SourceLoaded = false; - Runtime.PythonDLL ??= @"libpython3.12.so"; - if (!PythonEngine.IsInitialized) - { - PythonEngine.Initialize(); - PythonEngine.BeginAllowThreads(); - } - ToolSet = toolSet; - source = File.ReadAllText(ToolSet.filePath); - string fullPath = Path.GetFullPath(ToolSet.filePath); - string? scriptDir = Path.GetDirectoryName(fullPath); - using (Py.GIL()) - { - scope = Py.CreateScope(); - sys = Py.Import("sys"); - if (scriptDir is not null) - { - sys.path.append(scriptDir); - } - } - Init(); - } - - public void Init() - { - ExecFunction("init"); - } - - public void Update(ICallbackInfos callbackInfos) - { - ExecFunction("update"); - } - - public void Stop() - { - ExecFunction("stop"); - } - - public void ExecFunction(string name, ICallbackInfos? callbackInfos = null) - { - int retryCounter = 0; - retry: - try - { - using (Py.GIL()) - { - pyToolSet = ToolSet.ToPython(); - pyToolSet.SetAttr("callbackInfos", callbackInfos.ToPython()); - scope.Set("toolset", pyToolSet); - if (!SourceLoaded) - { - scope.Exec(source); - SourceLoaded = true; - } - scope.Exec($"{name}(toolset)"); - } - } - catch (Exception ex) - { - UpdateInfo = new() { DateTime = DateTime.Now, Successful = false, Exception = ex }; - if (retryCounter < 3) - { - _logger.LogWarning("Execution of {name} function in script {Toolset.filePath} failed to an exception {ex.Message}", [name, ToolSet.filePath, ex.Message]); - retryCounter++; - goto retry; - } - _logger.LogError("Execution of {name} function in script {Toolset.filePath} failed to an exception {ex.Message}", [name, ToolSet.filePath, ex.Message]); - } - UpdateInfo = new() { DateTime = DateTime.Now, Successful = true }; - } - - public bool IsScript(string fileName) - { - return fileName.EndsWith(".py"); - } -} - -/* - TODO Add the following languages - - Javascript - - Golang (reconsider) -*/ - public class ScriptToolSet { - public string filePath; - public Client.Client client; - public ICallbackInfos? callbackInfos; + public string FilePath; + public Client.Client Client; + public ILogger Logger; + public ICallbackInfos? CallbackInfos; + public IConfiguration Configuration; + public string Name; - // IConfiguration - Access to connection strings, ollama, etc. maybe? - public ScriptToolSet(string filePath, Client.Client client) + public ScriptToolSet(string filePath, Client.Client client, ILogger logger, IConfiguration configuration, string name) { - this.filePath = filePath; - this.client = client; + Configuration = configuration; + Name = name; + FilePath = filePath; + Client = client; + Logger = logger; } } diff --git a/src/Indexer/Models/Worker.cs b/src/Indexer/Models/Worker.cs index 41c762b..df0b835 100644 --- a/src/Indexer/Models/Worker.cs +++ b/src/Indexer/Models/Worker.cs @@ -1,4 +1,5 @@ using Microsoft.Extensions.Diagnostics.HealthChecks; +using Indexer.Scriptables; using Indexer.Exceptions; using Quartz; using Quartz.Impl; @@ -16,7 +17,7 @@ public class WorkerCollection public WorkerCollection(ILogger logger, IConfiguration configuration, Client.Client client) { Workers = []; - types = [typeof(PythonScriptable)]; + types = [typeof(PythonScriptable), typeof(CSharpScriptable)]; _logger = logger; _configuration = configuration; this.client = client; @@ -38,7 +39,7 @@ public class WorkerCollection { foreach (WorkerConfig workerConfig in sectionWorker.Worker) { - ScriptToolSet toolSet = new(workerConfig.Script, client); + ScriptToolSet toolSet = new(workerConfig.Script, client, _logger, _configuration, workerConfig.Name); InitializeWorker(toolSet, workerConfig); } } @@ -153,17 +154,23 @@ public class WorkerCollection public IScriptable GetScriptable(ScriptToolSet toolSet) { - string fileName = toolSet.filePath; + string fileName = toolSet.FilePath ?? throw new IndexerConfigurationException($"\"Script\" not set for Worker \"{toolSet.Name}\""); foreach (Type type in types) - { - IScriptable? instance = (IScriptable?)Activator.CreateInstance(type, [toolSet, _logger]); - if (instance is not null && instance.IsScript(fileName)) { - return instance; + System.Reflection.MethodInfo? method = type.GetMethod("IsScript"); + bool? isInstance = method is not null ? (bool?)method.Invoke(null, [fileName]) : null; + if (isInstance == true) + { + IScriptable? instance = (IScriptable?)Activator.CreateInstance(type, [toolSet, _logger]); + if (instance is null) + { + _logger.LogError("Unable to initialize script: \"{fileName}\"", fileName); + throw new Exception($"Unable to initialize script: \"{fileName}\""); + } + return instance; + } } - } _logger.LogError("Unable to determine the script's language: \"{fileName}\"", fileName); - throw new UnknownScriptLanguageException(fileName); } } @@ -346,8 +353,8 @@ public class IntervalCall : ICall { if (!Scriptable.UpdateInfo.Successful) { - _logger.LogWarning("HealthCheck revealed: The last execution of \"{name}\" was not successful", Scriptable.ToolSet.filePath); - return HealthCheckResult.Unhealthy($"HealthCheck revealed: The last execution of \"{Scriptable.ToolSet.filePath}\" was not successful"); + _logger.LogWarning("HealthCheck revealed: The last execution of \"{name}\" was not successful", Scriptable.ToolSet.FilePath); + return HealthCheckResult.Unhealthy($"HealthCheck revealed: The last execution of \"{Scriptable.ToolSet.FilePath}\" was not successful"); } double timerInterval = Timer.Interval; // In ms DateTime lastRunDateTime = Scriptable.UpdateInfo.DateTime; @@ -355,8 +362,8 @@ public class IntervalCall : ICall double millisecondsSinceLastExecution = now.Subtract(lastRunDateTime).TotalMilliseconds; if (millisecondsSinceLastExecution >= 2 * timerInterval) { - _logger.LogWarning("HealthCheck revealed: Since the last execution of \"{name}\" more than twice the interval has passed", Scriptable.ToolSet.filePath); - return HealthCheckResult.Unhealthy($"HealthCheck revealed: Since the last execution of \"{Scriptable.ToolSet.filePath}\" more than twice the interval has passed"); + _logger.LogWarning("HealthCheck revealed: Since the last execution of \"{name}\" more than twice the interval has passed", Scriptable.ToolSet.FilePath); + return HealthCheckResult.Unhealthy($"HealthCheck revealed: Since the last execution of \"{Scriptable.ToolSet.FilePath}\" more than twice the interval has passed"); } return HealthCheckResult.Healthy(); } diff --git a/src/Indexer/Scriptables/CSharpScriptable.cs b/src/Indexer/Scriptables/CSharpScriptable.cs new file mode 100644 index 0000000..dab33f3 --- /dev/null +++ b/src/Indexer/Scriptables/CSharpScriptable.cs @@ -0,0 +1,75 @@ +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp.Scripting; +using Microsoft.CodeAnalysis.Scripting; +using Indexer.Models; + +namespace Indexer.Scriptables; + +public class CSharpScriptable : IScriptable +{ + public ScriptToolSet ToolSet { get; set; } + public ScriptUpdateInfo UpdateInfo { get; set; } + public ILogger _logger { get; set; } + public IScript script; + public CSharpScriptable(ScriptToolSet toolSet, ILogger logger) + { + _logger = logger; + ToolSet = toolSet; + + try + { + script = LoadScriptAsync(ToolSet).Result; + Init(); + } + catch (Exception ex) + { + _logger.LogCritical("Exception loading the script {ToolSet.filePath} CSharpScriptable {ex}", [ToolSet.FilePath, ex.StackTrace]); + throw; + } + + + } + + public int Init() + { + return script.Init(ToolSet); + } + + public int Update(ICallbackInfos callbackInfos) + { + return script.Update(callbackInfos); + } + + public int Stop() + { + return script.Stop(); + } + public async Task LoadScriptAsync(ScriptToolSet toolSet) + { + string path = toolSet.FilePath; + var fileText = File.ReadAllText(path); + var code = string.Join("\n", fileText.Split("\n").Select(line => line.StartsWith("#load") ? "// " + line : line)); // CRUTCH! enables syntax highlighting via "#load" directive + + var options = ScriptOptions.Default + .WithReferences(typeof(IScript).Assembly) + .WithImports("System") + .WithImports("System.Linq") + .WithImports("System.Console") + .WithImports("System.Collections") + .WithImports("Indexer.Models"); + try + { + return await CSharpScript.EvaluateAsync(code, options); + } + catch (Exception ex) + { + _logger.LogCritical("Exception loading the script {ToolSet.filePath} CSharpScriptable {ex.Message} {ex.StackTrace}", [ToolSet.FilePath, ex.Message, ex.StackTrace]); + throw; + } + } + + public static bool IsScript(string fileName) + { + return fileName.EndsWith(".cs") || fileName.EndsWith(".csx"); + } +} diff --git a/src/Indexer/Scriptables/PythonScriptable.cs b/src/Indexer/Scriptables/PythonScriptable.cs new file mode 100644 index 0000000..d0a1a05 --- /dev/null +++ b/src/Indexer/Scriptables/PythonScriptable.cs @@ -0,0 +1,101 @@ +using Python.Runtime; +using Indexer.Models; + +namespace Indexer.Scriptables; + +public class PythonScriptable : IScriptable +{ + public ScriptToolSet ToolSet { get; set; } + public PyObject? pyToolSet; + public PyModule scope; + public dynamic sys; + public string source; + public bool SourceLoaded { get; set; } + public ScriptUpdateInfo UpdateInfo { get; set; } + public ILogger _logger { get; set; } + public PythonScriptable(ScriptToolSet toolSet, ILogger logger) + { + string? runtime = toolSet.Configuration.GetValue("EmbeddingsearchIndexer:PythonRuntime"); + if (runtime is not null) + { + Runtime.PythonDLL ??= runtime; + } + _logger = logger; + SourceLoaded = false; + if (!PythonEngine.IsInitialized) + { + PythonEngine.Initialize(); + PythonEngine.BeginAllowThreads(); + } + ToolSet = toolSet; + source = File.ReadAllText(ToolSet.FilePath); + string fullPath = Path.GetFullPath(ToolSet.FilePath); + string? scriptDir = Path.GetDirectoryName(fullPath); + using (Py.GIL()) + { + scope = Py.CreateScope(); + sys = Py.Import("sys"); + if (scriptDir is not null) + { + sys.path.append(scriptDir); + } + } + Init(); + } + + public int Init() + { + return ExecFunction("init"); + } + + public int Update(ICallbackInfos callbackInfos) + { + return ExecFunction("update"); + } + + public int Stop() + { + return ExecFunction("stop"); + } + + public int ExecFunction(string name, ICallbackInfos? callbackInfos = null) + { + int error = 0; + int retryCounter = 0; + retry: + try + { + using (Py.GIL()) + { + pyToolSet = ToolSet.ToPython(); + pyToolSet.SetAttr("callbackInfos", callbackInfos.ToPython()); + scope.Set("toolset", pyToolSet); + if (!SourceLoaded) + { + scope.Exec(source); + SourceLoaded = true; + } + scope.Exec($"{name}(toolset)"); + } + } + catch (Exception ex) + { + UpdateInfo = new() { DateTime = DateTime.Now, Successful = false, Exception = ex }; + if (retryCounter < 3) + { + _logger.LogWarning("Execution of {name} function in script {Toolset.filePath} failed to an exception {ex.Message}", [name, ToolSet.FilePath, ex.Message]); + retryCounter++; + goto retry; + } + _logger.LogError("Execution of {name} function in script {Toolset.filePath} failed to an exception {ex.Message}", [name, ToolSet.FilePath, ex.Message]); + error = 1; + } + UpdateInfo = new() { DateTime = DateTime.Now, Successful = true }; + return error; + } + + public static bool IsScript(string fileName) + { + return fileName.EndsWith(".py"); + } +} diff --git a/src/Indexer/Scripts/example.csx b/src/Indexer/Scripts/example.csx new file mode 100644 index 0000000..33b30a6 --- /dev/null +++ b/src/Indexer/Scripts/example.csx @@ -0,0 +1,69 @@ +#load "../../Client/Client.cs" +#load "../Models/Script.cs" +#load "../Models/Interfaces.cs" +#load "../Models/WorkerResults.cs" +#load "../../Shared/Models/SearchdomainResults.cs" +#load "../../Shared/Models/JSONModels.cs" +#load "../../Shared/Models/EntityResults.cs" + +using Shared.Models; +using System.Collections.Generic; +using System.Linq; +using Microsoft.Extensions.Logging; + +public class ExampleScript : Indexer.Models.IScript +{ + public Indexer.Models.ScriptToolSet ToolSet; + public Client.Client client; + string defaultSearchdomain; + string exampleContent; + string probMethod; + string similarityMethod; + string exampleSearchdomain; + int exampleCounter; + List models; + string probmethodDatapoint; + string probmethodEntity; + + public ExampleScript() + { + //System.Console.WriteLine("DEBUG@example.cs - Constructor"); // logger not passed here yet + exampleContent = "./Scripts/example_content"; + probMethod = "HVEWAvg"; + similarityMethod = "Cosine"; + exampleSearchdomain = "example_" + probMethod; + exampleCounter = 0; + models = ["ollama:bge-m3", "ollama:mxbai-embed-large"]; + probmethodDatapoint = probMethod; + probmethodEntity = probMethod; + } + + public int Init(Indexer.Models.ScriptToolSet toolSet) + { + ToolSet = toolSet; + ToolSet.Logger.LogInformation("DEBUG@example.csx - Init"); + SearchdomainListResults searchdomains = ToolSet.Client.SearchdomainListAsync().Result; + defaultSearchdomain = searchdomains.Searchdomains.First(); + var searchdomainList = string.Join("\n", searchdomains.Searchdomains); + ToolSet.Logger.LogInformation(searchdomainList); + return 0; + } + + public int Update(Indexer.Models.ICallbackInfos callbackInfos) + { + ToolSet.Logger.LogInformation("DEBUG@example.csx - Update"); + EntityQueryResults test = ToolSet.Client.EntityQueryAsync(defaultSearchdomain, "DNA").Result; + var firstResult = test.Results.ToArray()[0]; + ToolSet.Logger.LogInformation(firstResult.Name); + ToolSet.Logger.LogInformation(firstResult.Value.ToString()); + return 0; + } + + public int Stop() + { + ToolSet.Logger.LogInformation("DEBUG@example.csx - Stop"); + return 0; + } +} + +return new ExampleScript(); \ No newline at end of file diff --git a/src/Indexer/Scripts/example.py b/src/Indexer/Scripts/example.py index 7af1cd0..eb18df6 100644 --- a/src/Indexer/Scripts/example.py +++ b/src/Indexer/Scripts/example.py @@ -21,10 +21,10 @@ def init(toolset: Toolset): print("Py-DEBUG@init") print("This is the init function from the python example script") print(f"example_counter: {example_counter}") - searchdomainlist:SearchdomainListResults = toolset.client.SearchdomainListAsync().Result + searchdomainlist:SearchdomainListResults = toolset.Client.SearchdomainListAsync().Result if example_searchdomain not in searchdomainlist.Searchdomains: - toolset.client.SearchdomainCreateAsync(example_searchdomain).Result - searchdomainlist = toolset.client.SearchdomainListAsync().Result + toolset.Client.SearchdomainCreateAsync(example_searchdomain).Result + searchdomainlist = toolset.Client.SearchdomainListAsync().Result print("Currently these searchdomains exist:") for searchdomain in searchdomainlist.Searchdomains: print(f" - {searchdomain}") @@ -34,7 +34,7 @@ def update(toolset: Toolset): global example_counter print("Py-DEBUG@update") print("This is the update function from the python example script") - callbackInfos:ICallbackInfos = toolset.callbackInfos + callbackInfos:ICallbackInfos = toolset.CallbackInfos if (str(callbackInfos) == "Indexer.Models.IntervalCallbackInfos"): print("It was called via an interval callback") else: @@ -59,6 +59,6 @@ def index_files(toolset: Toolset): jsonEntities.append(jsonEntity) jsonstring = json.dumps(jsonEntities) timer_start = time.time() - result:EntityIndexResult = toolset.client.EntityIndexAsync(jsonstring).Result + result:EntityIndexResult = toolset.Client.EntityIndexAsync(jsonstring).Result timer_end = time.time() print(f"Update was successful: {result.Success} - and was done in {timer_end - timer_start} seconds.") \ No newline at end of file diff --git a/src/Indexer/appsettings.Development.json b/src/Indexer/appsettings.Development.json index 386d6e1..eab8a9e 100644 --- a/src/Indexer/appsettings.Development.json +++ b/src/Indexer/appsettings.Development.json @@ -20,6 +20,23 @@ "Interval": 30000 } ] + }, + { + "Name": "csharpExample", + "Script": "Scripts/example.csx", + "Calls": [ + { + "Type": "runonce" + }, + { + "Type": "schedule", + "Schedule": "0 0/5 * * * ?" + }, + { + "Type": "fileupdate", + "Path": "./Scripts/example_content" + } + ] } ] } diff --git a/src/Indexer/appsettings.json b/src/Indexer/appsettings.json index 1ccb58f..19252ea 100644 --- a/src/Indexer/appsettings.json +++ b/src/Indexer/appsettings.json @@ -23,7 +23,8 @@ "::1" ], "LogFolder": "./logs" - } + }, + "PythonRuntime": "libpython3.12.so" }, "AllowedHosts": "*" }