Major restructuring, removed searchdomain field from Entity/Index, moved embeddingcache to SearchdomainManager, improved logging

This commit is contained in:
2025-06-22 02:11:09 +02:00
parent d2f4210e72
commit 9a6ee4ad75
9 changed files with 365 additions and 273 deletions

View File

@@ -88,22 +88,12 @@ public class Client
public async Task<EntityIndexResult> EntityIndexAsync(List<Server.JSONEntity> jsonEntity) public async Task<EntityIndexResult> EntityIndexAsync(List<Server.JSONEntity> jsonEntity)
{ {
return await EntityIndexAsync(searchdomain, jsonEntity); return await EntityIndexAsync(JsonSerializer.Serialize(jsonEntity));
}
public async Task<EntityIndexResult> EntityIndexAsync(string searchdomain, List<Server.JSONEntity> jsonEntity)
{
return await EntityIndexAsync(searchdomain, JsonSerializer.Serialize(jsonEntity));
} }
public async Task<EntityIndexResult> EntityIndexAsync(string jsonEntity) public async Task<EntityIndexResult> EntityIndexAsync(string jsonEntity)
{ {
return await EntityIndexAsync(searchdomain, jsonEntity); var url = $"{baseUri}/Entity/Index?apiKey={HttpUtility.UrlEncode(apiKey)}";
}
public async Task<EntityIndexResult> EntityIndexAsync(string searchdomain, string jsonEntity)
{
var url = $"{baseUri}/Entity/Index?apiKey={HttpUtility.UrlEncode(apiKey)}&searchdomain={HttpUtility.UrlEncode(searchdomain)}";
var content = new StringContent(jsonEntity, Encoding.UTF8, "application/json"); var content = new StringContent(jsonEntity, Encoding.UTF8, "application/json");
return await PostUrlAndProcessJson<EntityIndexResult>(url, content);//new FormUrlEncodedContent(values)); return await PostUrlAndProcessJson<EntityIndexResult>(url, content);//new FormUrlEncodedContent(values));
} }

View File

@@ -32,10 +32,6 @@ public class IndexerService : IHostedService
foreach (WorkerConfig workerConfig in sectionWorker.Worker) foreach (WorkerConfig workerConfig in sectionWorker.Worker)
{ {
_logger.LogInformation("Initializing worker: {Name}", workerConfig.Name); _logger.LogInformation("Initializing worker: {Name}", workerConfig.Name);
if (client.searchdomain == "" && workerConfig.Searchdomains.Count >= 1)
{
client.searchdomain = workerConfig.Searchdomains.First();
}
ScriptToolSet toolSet = new(workerConfig.Script, client); ScriptToolSet toolSet = new(workerConfig.Script, client);
Worker worker = new(workerConfig.Name, workerConfig, GetScriptable(toolSet)); Worker worker = new(workerConfig.Name, workerConfig, GetScriptable(toolSet));
workerCollection.Workers.Add(worker); workerCollection.Workers.Add(worker);

View File

@@ -41,22 +41,28 @@ public class EntityController : ControllerBase
} }
[HttpPost("Index")] [HttpPost("Index")]
public ActionResult<EntityIndexResult> Index(string searchdomain, [FromBody] List<JSONEntity>? jsonEntity) public ActionResult<EntityIndexResult> Index([FromBody] List<JSONEntity>? jsonEntities)
{ {
Searchdomain searchdomain_; List<Entity>? entities = SearchdomainHelper.EntitiesFromJSON(
try [],
_domainManager.embeddingCache,
_domainManager.client,
_domainManager.helper,
JsonSerializer.Serialize(jsonEntities));
if (entities is not null && jsonEntities is not null)
{ {
searchdomain_ = _domainManager.GetSearchdomain(searchdomain); List<string> invalidatedSearchdomains = [];
foreach (var jsonEntity in jsonEntities)
{
string jsonEntityName = jsonEntity.Name;
if (entities.Select(x => x.name == jsonEntityName).Any()
&& !invalidatedSearchdomains.Contains(jsonEntityName))
{
string jsonEntitySearchdomain = jsonEntity.Searchdomain;
invalidatedSearchdomains.Add(jsonEntitySearchdomain);
_domainManager.InvalidateSearchdomainCache(jsonEntitySearchdomain);
} }
catch (Exception)
{
_logger.LogError("Unable to retrieve the searchdomain {searchdomain} - it likely does not exist yet", [searchdomain]);
return Ok(new EntityIndexResult() { Success = false });
} }
List<Entity>? entities = searchdomain_.EntitiesFromJSON(JsonSerializer.Serialize(jsonEntity));
if (entities is not null)
{
_domainManager.InvalidateSearchdomainCache(searchdomain);
return Ok(new EntityIndexResult() { Success = true }); return Ok(new EntityIndexResult() { Success = true });
} }
else else
@@ -118,22 +124,13 @@ public class EntityController : ControllerBase
[HttpGet("Delete")] [HttpGet("Delete")]
public ActionResult<EntityDeleteResults> Delete(string searchdomain, string entityName) public ActionResult<EntityDeleteResults> Delete(string searchdomain, string entityName)
{ {
Searchdomain searchdomain_; Entity? entity_ = SearchdomainHelper.CacheGetEntity([], entityName);
try
{
searchdomain_ = _domainManager.GetSearchdomain(searchdomain);
} catch (Exception)
{
_logger.LogError("Unable to delete the entity {entityName} in {searchdomain} - the searchdomain likely does not exist", [entityName, searchdomain]);
return Ok(new EntityDeleteResults() {Success = false});
}
Entity? entity_ = searchdomain_.GetEntity(entityName);
if (entity_ is null) if (entity_ is null)
{ {
_logger.LogError("Unable to delete the entity {entityName} in {searchdomain} - it was not found under the specified name", [entityName, searchdomain]); _logger.LogError("Unable to delete the entity {entityName} in {searchdomain} - it was not found under the specified name", [entityName, searchdomain]);
return Ok(new EntityDeleteResults() {Success = false}); return Ok(new EntityDeleteResults() {Success = false});
} }
searchdomain_.RemoveEntity(entityName); DatabaseHelper.RemoveEntity([], _domainManager.helper, entityName, searchdomain);
return Ok(new EntityDeleteResults() {Success = true}); return Ok(new EntityDeleteResults() {Success = true});
} }
} }

View File

@@ -1,3 +1,4 @@
using ElmahCore;
using Microsoft.AspNetCore.Mvc; using Microsoft.AspNetCore.Mvc;
using Server.Models; using Server.Models;
@@ -58,11 +59,13 @@ public class SearchdomainController : ControllerBase
{ {
success = true; success = true;
deletedEntries = _domainManager.DeleteSearchdomain(searchdomain); deletedEntries = _domainManager.DeleteSearchdomain(searchdomain);
} catch (Exception) }
catch (Exception ex)
{ {
_logger.LogError("Unable to delete searchdomain {searchdomain}", [searchdomain]); _logger.LogError("Unable to delete searchdomain {searchdomain}", [searchdomain]);
success = false; success = false;
deletedEntries = 0; deletedEntries = 0;
ElmahExtensions.RaiseError(ex);
} }
return Ok(new SearchdomainDeleteResults(){Success = success, DeletedEntities = deletedEntries}); return Ok(new SearchdomainDeleteResults(){Success = success, DeletedEntities = deletedEntries});
} }

View File

@@ -0,0 +1,160 @@
using System.Data.Common;
using System.Text;
namespace Server;
public static class DatabaseHelper
{
public static void DatabaseInsertEmbeddingBulk(SQLHelper helper, int id_datapoint, List<(string model, byte[] embedding)> data)
{
Dictionary<string, object> parameters = [];
parameters["id_datapoint"] = id_datapoint;
var query = new StringBuilder("INSERT INTO embedding (id_datapoint, model, embedding) VALUES ");
foreach (var (model, embedding) in data)
{
string modelParam = $"model_{Guid.NewGuid()}".Replace("-", "");
string embeddingParam = $"embedding_{Guid.NewGuid()}".Replace("-", "");
parameters[modelParam] = model;
parameters[embeddingParam] = embedding;
query.Append($"(@id_datapoint, @{modelParam}, @{embeddingParam}), ");
}
query.Length -= 2; // remove trailing comma
helper.ExecuteSQLNonQuery(query.ToString(), parameters);
}
public static int DatabaseInsertSearchdomain(SQLHelper helper, string name)
{
Dictionary<string, dynamic> parameters = new()
{
{ "name", name },
{ "settings", "{}"} // TODO add settings. It's not used yet, but maybe it's needed someday...
};
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO searchdomain (name, settings) VALUES (@name, @settings)", parameters);
}
public static int DatabaseInsertEntity(SQLHelper helper, string name, string probmethod, int id_searchdomain)
{
Dictionary<string, dynamic> parameters = new()
{
{ "name", name },
{ "probmethod", probmethod },
{ "id_searchdomain", id_searchdomain }
};
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO entity (name, probmethod, id_searchdomain) VALUES (@name, @probmethod, @id_searchdomain)", parameters);
}
public static int DatabaseInsertAttribute(SQLHelper helper, string attribute, string value, int id_entity)
{
Dictionary<string, dynamic> parameters = new()
{
{ "attribute", attribute },
{ "value", value },
{ "id_entity", id_entity }
};
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO attribute (attribute, value, id_entity) VALUES (@attribute, @value, @id_entity)", parameters);
}
public static int DatabaseInsertDatapoint(SQLHelper helper, string name, string probmethod_embedding, string hash, int id_entity)
{
Dictionary<string, dynamic> parameters = new()
{
{ "name", name },
{ "probmethod_embedding", probmethod_embedding },
{ "hash", hash },
{ "id_entity", id_entity }
};
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO datapoint (name, probmethod_embedding, hash, id_entity) VALUES (@name, @probmethod_embedding, @hash, @id_entity)", parameters);
}
public static int DatabaseInsertEmbedding(SQLHelper helper, int id_datapoint, string model, byte[] embedding)
{
Dictionary<string, dynamic> parameters = new()
{
{ "id_datapoint", id_datapoint },
{ "model", model },
{ "embedding", embedding }
};
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO embedding (id_datapoint, model, embedding) VALUES (@id_datapoint, @model, @embedding)", parameters);
}
public static int GetSearchdomainID(SQLHelper helper, string searchdomain)
{
Dictionary<string, dynamic> parameters = new()
{
{ "searchdomain", searchdomain}
};
lock (helper.connection)
{
DbDataReader reader = helper.ExecuteSQLCommand("SELECT id FROM searchdomain WHERE name = @searchdomain", parameters);
bool success = reader.Read();
int result = success ? reader.GetInt32(0) : 0;
reader.Close();
if (success)
{
return result;
}
else
{
throw new Exception($"Unable to retrieve searchdomain ID for {searchdomain}"); // TODO implement logging here; add logger via method injection
}
}
}
public static void RemoveEntity(List<Entity> entityCache, SQLHelper helper, string name, string searchdomain)
{
Dictionary<string, dynamic> parameters = new()
{
{ "name", name },
{ "searchdomain", GetSearchdomainID(helper, searchdomain)}
};
helper.ExecuteSQLNonQuery("DELETE embedding.* FROM embedding JOIN datapoint dp ON id_datapoint = dp.id JOIN entity ON id_entity = entity.id WHERE entity.name = @name AND entity.id_searchdomain = @searchdomain", parameters);
helper.ExecuteSQLNonQuery("DELETE datapoint.* FROM datapoint JOIN entity ON id_entity = entity.id WHERE entity.name = @name AND entity.id_searchdomain = @searchdomain", parameters);
helper.ExecuteSQLNonQuery("DELETE attribute.* FROM attribute JOIN entity ON id_entity = entity.id WHERE entity.name = @name AND entity.id_searchdomain = @searchdomain", parameters);
helper.ExecuteSQLNonQuery("DELETE FROM entity WHERE name = @name AND entity.id_searchdomain = @searchdomain", parameters);
entityCache.RemoveAll(entity => entity.name == name);
}
public static bool HasEntity(SQLHelper helper, string name, string searchdomain)
{
Dictionary<string, dynamic> parameters = new()
{
{ "name", name },
{ "searchdomain", GetSearchdomainID(helper, searchdomain)}
};
lock (helper.connection)
{
DbDataReader reader = helper.ExecuteSQLCommand("SELECT COUNT(*) FROM entity WHERE name = @name AND id_searchdomain = @searchdomain", parameters);
bool success = reader.Read();
bool result = success && reader.GetInt32(0) > 0;
reader.Close();
if (success)
{
return result;
}
else
{
throw new Exception($"Unable to determine whether an entity named {name} exists for {searchdomain}"); // TODO implement logging here; add logger via method injection
}
}
}
public static int? GetEntityID(SQLHelper helper, string name, string searchdomain)
{
Dictionary<string, dynamic> parameters = new()
{
{ "name", name },
{ "searchdomain", GetSearchdomainID(helper, searchdomain)}
};
lock (helper.connection)
{
DbDataReader reader = helper.ExecuteSQLCommand("SELECT id FROM entity WHERE name = @name AND id_searchdomain = @searchdomain", parameters);
bool success = reader.Read();
int? result = success ? reader.GetInt32(0) : 0;
reader.Close();
return result;
}
}
}

View File

@@ -6,10 +6,19 @@ namespace Server;
public class SQLHelper public class SQLHelper
{ {
public MySqlConnection connection; public MySqlConnection connection;
public SQLHelper(MySqlConnection connection) public string connectionString;
public SQLHelper(MySqlConnection connection, string connectionString)
{ {
this.connection = connection; this.connection = connection;
this.connectionString = connectionString;
} }
public SQLHelper DuplicateConnection()
{
MySqlConnection newConnection = new(connectionString);
return new SQLHelper(newConnection, connectionString);
}
public DbDataReader ExecuteSQLCommand(string query, Dictionary<string, dynamic> parameters) public DbDataReader ExecuteSQLCommand(string query, Dictionary<string, dynamic> parameters)
{ {
lock (connection) lock (connection)

View File

@@ -0,0 +1,146 @@
using System.Collections.Concurrent;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using MySql.Data.MySqlClient;
using OllamaSharp;
namespace Server;
public static class SearchdomainHelper
{
public static byte[] BytesFromFloatArray(float[] floats)
{
var byteArray = new byte[floats.Length * 4];
var floatArray = floats.ToArray();
Buffer.BlockCopy(floatArray, 0, byteArray, 0, byteArray.Length);
return byteArray;
}
public static float[] FloatArrayFromBytes(byte[] bytes)
{
var floatArray = new float[bytes.Length / 4];
Buffer.BlockCopy(bytes, 0, floatArray, 0, bytes.Length);
return floatArray;
}
public static bool CacheHasEntity(List<Entity> entityCache, string name)
{
return CacheGetEntity(entityCache, name) is not null;
}
public static Entity? CacheGetEntity(List<Entity> entityCache, string name)
{
foreach (Entity entity in entityCache)
{
if (entity.name == name)
{
return entity;
}
}
return null;
}
public static List<Entity>? EntitiesFromJSON(List<Entity> entityCache, Dictionary<string, Dictionary<string, float[]>> embeddingCache, OllamaApiClient ollama, SQLHelper helper, string json)
{
List<JSONEntity>? jsonEntities = JsonSerializer.Deserialize<List<JSONEntity>>(json);
if (jsonEntities is null)
{
return null;
}
Dictionary<string, List<string>> toBeCached = [];
foreach (JSONEntity jSONEntity in jsonEntities)
{
foreach (JSONDatapoint datapoint in jSONEntity.Datapoints)
{
foreach (string model in datapoint.Model)
{
if (!toBeCached.ContainsKey(model))
{
toBeCached[model] = [];
}
toBeCached[model].Add(datapoint.Text);
}
}
}
ConcurrentQueue<Entity> retVal = [];
Parallel.ForEach(jsonEntities, jSONEntity =>
{
var tempHelper = helper.DuplicateConnection();
var entity = EntityFromJSON(entityCache, embeddingCache, ollama, tempHelper, jSONEntity);
if (entity is not null)
{
retVal.Enqueue(entity);
}
});
return [.. retVal];
}
public static Entity? EntityFromJSON(List<Entity> entityCache, Dictionary<string, Dictionary<string, float[]>> embeddingCache, OllamaApiClient ollama, SQLHelper helper, JSONEntity jsonEntity) //string json)
{
Dictionary<string, Dictionary<string, float[]>> embeddingsLUT = [];
int? preexistingEntityID = DatabaseHelper.GetEntityID(helper, jsonEntity.Name, jsonEntity.Searchdomain);
if (preexistingEntityID is not null)
{
lock (helper.connection)
{
Dictionary<string, dynamic> parameters = new()
{
{ "id", preexistingEntityID }
};
System.Data.Common.DbDataReader reader = helper.ExecuteSQLCommand("SELECT e.model, e.embedding, d.hash FROM datapoint d JOIN embedding e ON d.id = e.id_datapoint WHERE d.id_entity = @id", parameters);
while (reader.Read())
{
string model = reader.GetString(0);
long length = reader.GetBytes(1, 0, null, 0, 0);
byte[] embeddingBytes = new byte[length];
reader.GetBytes(1, 0, embeddingBytes, 0, (int)length);
float[] embeddingValues = FloatArrayFromBytes(embeddingBytes);
string hash = reader.GetString(2);
if (!embeddingsLUT.ContainsKey(hash))
{
embeddingsLUT[hash] = [];
}
embeddingsLUT[hash].TryAdd(model, embeddingValues);
}
reader.Close();
}
DatabaseHelper.RemoveEntity(entityCache, helper, jsonEntity.Name, jsonEntity.Searchdomain); // TODO only remove entity if there is actually a change somewhere. Perhaps create 3 datapoint lists to operate with: 1. delete, 2. update, 3. create
}
int id_entity = DatabaseHelper.DatabaseInsertEntity(helper, jsonEntity.Name, jsonEntity.Probmethod, DatabaseHelper.GetSearchdomainID(helper, jsonEntity.Searchdomain));
foreach (KeyValuePair<string, string> attribute in jsonEntity.Attributes)
{
DatabaseHelper.DatabaseInsertAttribute(helper, attribute.Key, attribute.Value, id_entity); // TODO implement bulk insert to reduce number of queries
}
List<Datapoint> datapoints = [];
foreach (JSONDatapoint jsonDatapoint in jsonEntity.Datapoints)
{
string hash = Convert.ToBase64String(SHA256.HashData(Encoding.UTF8.GetBytes(jsonDatapoint.Text)));
Dictionary<string, float[]> embeddings = embeddingsLUT.ContainsKey(hash) ? embeddingsLUT[hash] : [];
if (embeddings.Count == 0)
{
embeddings = Datapoint.GenerateEmbeddings(jsonDatapoint.Text, [.. jsonDatapoint.Model], ollama, embeddingCache);
}
var probMethod_embedding = Probmethods.GetMethod(jsonDatapoint.Probmethod_embedding) ?? throw new Exception($"Unknown probmethod name {jsonDatapoint.Probmethod_embedding}");
Datapoint datapoint = new(jsonDatapoint.Name, probMethod_embedding, hash, [.. embeddings.Select(kv => (kv.Key, kv.Value))]);
int id_datapoint = DatabaseHelper.DatabaseInsertDatapoint(helper, jsonDatapoint.Name, jsonDatapoint.Probmethod_embedding, hash, id_entity); // TODO make this a bulk add action to reduce number of queries
List<(string model, byte[] embedding)> data = [];
foreach ((string, float[]) embedding in datapoint.embeddings)
{
data.Add((embedding.Item1, BytesFromFloatArray(embedding.Item2)));
}
DatabaseHelper.DatabaseInsertEmbeddingBulk(helper, id_datapoint, data);
datapoints.Add(datapoint);
}
var probMethod = Probmethods.GetMethod(jsonEntity.Probmethod) ?? throw new Exception($"Unknown probmethod name {jsonEntity.Probmethod}");
Entity entity = new(jsonEntity.Attributes, probMethod, datapoints, jsonEntity.Name)
{
id = id_entity
};
entityCache.Add(entity);
return entity;
}
}

View File

@@ -41,18 +41,18 @@ public class Searchdomain
// TODO Add settings and update cli/program.cs, as well as DatabaseInsertSearchdomain() // TODO Add settings and update cli/program.cs, as well as DatabaseInsertSearchdomain()
public Searchdomain(string searchdomain, string connectionString, OllamaApiClient ollama, string provider = "sqlserver", bool runEmpty = false) public Searchdomain(string searchdomain, string connectionString, OllamaApiClient ollama, Dictionary<string, Dictionary<string, float[]>> embeddingCache, string provider = "sqlserver", bool runEmpty = false)
{ {
_connectionString = connectionString; _connectionString = connectionString;
_provider = provider.ToLower(); _provider = provider.ToLower();
this.searchdomain = searchdomain; this.searchdomain = searchdomain;
this.ollama = ollama; this.ollama = ollama;
this.embeddingCache = embeddingCache;
searchCache = []; searchCache = [];
entityCache = []; entityCache = [];
embeddingCache = [];
connection = new MySqlConnection(connectionString); connection = new MySqlConnection(connectionString);
connection.Open(); connection.Open();
helper = new SQLHelper(connection); helper = new SQLHelper(connection, connectionString);
modelsInUse = []; // To make the compiler shut up - it is set in UpdateSearchDomain() don't worry // yeah, about that... modelsInUse = []; // To make the compiler shut up - it is set in UpdateSearchDomain() don't worry // yeah, about that...
if (!runEmpty) if (!runEmpty)
{ {
@@ -78,13 +78,13 @@ public class Searchdomain
embeddingReader.GetBytes(3, 0, embedding, 0, (int) length); embeddingReader.GetBytes(3, 0, embedding, 0, (int) length);
if (embedding_unassigned.TryGetValue(id_datapoint, out Dictionary<string, float[]>? embedding_unassigned_id_datapoint)) if (embedding_unassigned.TryGetValue(id_datapoint, out Dictionary<string, float[]>? embedding_unassigned_id_datapoint))
{ {
embedding_unassigned[id_datapoint][model] = FloatArrayFromBytes(embedding); embedding_unassigned[id_datapoint][model] = SearchdomainHelper.FloatArrayFromBytes(embedding);
} }
else else
{ {
embedding_unassigned[id_datapoint] = new() embedding_unassigned[id_datapoint] = new()
{ {
[model] = FloatArrayFromBytes(embedding) [model] = SearchdomainHelper.FloatArrayFromBytes(embedding)
}; };
} }
} }
@@ -219,218 +219,4 @@ public class Searchdomain
reader.Close(); reader.Close();
return this.id; return this.id;
} }
public static float[] FloatArrayFromBytes(byte[] bytes)
{
var floatArray = new float[bytes.Length / 4];
Buffer.BlockCopy(bytes, 0, floatArray, 0, bytes.Length);
return floatArray;
}
public static byte[] BytesFromFloatArray(float[] floats)
{
var byteArray = new byte[floats.Length * 4];
var floatArray = floats.ToArray();
Buffer.BlockCopy(floatArray, 0, byteArray, 0, byteArray.Length);
return byteArray;
}
public Entity? GetEntity(string name)
{
foreach (Entity entity in entityCache)
{
if (entity.name == name)
{
return entity;
}
}
return null;
}
public bool HasEntity(string name)
{
return GetEntity(name) is not null;
}
public Entity? EntityFromJSON(string json)
{
JSONEntity? jsonEntity = JsonSerializer.Deserialize<JSONEntity>(json);
if (jsonEntity is null)
{
return null;
}
bool hasPreexistingEntity = HasEntity(jsonEntity.Name);
Entity? preexistingEntity = null;
if (hasPreexistingEntity)
{
preexistingEntity = GetEntity(jsonEntity.Name);
RemoveEntity(jsonEntity.Name); // TODO only remove entity if there is actually a change somewhere. Perhaps create 3 datapoint lists to operate with: 1. delete, 2. update, 3. create
}
int id_entity = DatabaseInsertEntity(jsonEntity.Name, jsonEntity.Probmethod, id);
foreach (KeyValuePair<string, string> attribute in jsonEntity.Attributes)
{
DatabaseInsertAttribute(attribute.Key, attribute.Value, id_entity);
}
List<Datapoint> datapoints = [];
foreach (JSONDatapoint jsonDatapoint in jsonEntity.Datapoints)
{
Dictionary<string, float[]> embeddings = [];
string hash = Convert.ToBase64String(SHA256.HashData(Encoding.UTF8.GetBytes(jsonDatapoint.Text)));
if (hasPreexistingEntity && preexistingEntity is not null)
{
IEnumerable<Datapoint> preexistingDatapoints = preexistingEntity.datapoints.Where(x => x.name == jsonDatapoint.Name && x.hash == hash);
if (preexistingDatapoints.Any())
{
var preexistingDatapoint = preexistingDatapoints.First();
embeddings = preexistingDatapoint.embeddings.ToDictionary(item => item.Item1, item => item.Item2);
}
}
if (embeddings.Count == 0)
{
embeddings = Datapoint.GenerateEmbeddings(jsonDatapoint.Text, [.. jsonDatapoint.Model], ollama, embeddingCache);
}
var probMethod_embedding = Probmethods.GetMethod(jsonDatapoint.Probmethod_embedding) ?? throw new Exception($"Unknown probmethod name {jsonDatapoint.Probmethod_embedding}");
Datapoint datapoint = new(jsonDatapoint.Name, probMethod_embedding, hash, [.. embeddings.Select(kv => (kv.Key, kv.Value))]);
int id_datapoint = DatabaseInsertDatapoint(jsonDatapoint.Name, jsonDatapoint.Probmethod_embedding, hash, id_entity);
List<(string model, byte[] embedding)> data = [];
foreach ((string, float[]) embedding in datapoint.embeddings)
{
data.Add((embedding.Item1, BytesFromFloatArray(embedding.Item2)));
}
DatabaseInsertEmbeddingBulk(id_datapoint, data);
datapoints.Add(datapoint);
}
var probMethod = Probmethods.GetMethod(jsonEntity.Probmethod) ?? throw new Exception($"Unknown probmethod name {jsonEntity.Probmethod}");
Entity entity = new(jsonEntity.Attributes, probMethod, datapoints, jsonEntity.Name)
{
id = id_entity
};
entityCache.Add(entity);
return entity;
}
public List<Entity>? EntitiesFromJSON(string json)
{
List<JSONEntity>? jsonEntities = JsonSerializer.Deserialize<List<JSONEntity>>(json);
if (jsonEntities is null)
{
return null;
}
Dictionary<string, List<string>> toBeCached = [];
foreach (JSONEntity jSONEntity in jsonEntities)
{
foreach (JSONDatapoint datapoint in jSONEntity.Datapoints)
{
foreach (string model in datapoint.Model)
{
if (!toBeCached.ContainsKey(model))
{
toBeCached[model] = [];
}
toBeCached[model].Add(datapoint.Text);
}
}
}
ConcurrentQueue<Entity> retVal = [];
Parallel.ForEach(jsonEntities, jSONEntity =>
{
var entity = EntityFromJSON(JsonSerializer.Serialize(jSONEntity));
if (entity is not null)
{
retVal.Enqueue(entity);
}
});
return retVal.ToList();
}
public void RemoveEntity(string name)
{
Dictionary<string, dynamic> parameters = new()
{
{ "name", name }
};
helper.ExecuteSQLNonQuery("DELETE embedding.* FROM embedding JOIN datapoint dp ON id_datapoint = dp.id JOIN entity ON id_entity = entity.id WHERE entity.name = @name", parameters);
helper.ExecuteSQLNonQuery("DELETE datapoint.* FROM datapoint JOIN entity ON id_entity = entity.id WHERE entity.name = @name", parameters);
helper.ExecuteSQLNonQuery("DELETE attribute.* FROM attribute JOIN entity ON id_entity = entity.id WHERE entity.name = @name", parameters);
helper.ExecuteSQLNonQuery("DELETE FROM entity WHERE name = @name", parameters);
entityCache.RemoveAll(entity => entity.name == name);
}
public int DatabaseInsertSearchdomain(string name)
{
Dictionary<string, dynamic> parameters = new()
{
{ "name", name },
{ "settings", "{}"} // TODO add settings. It's not used yet, but maybe it's needed someday...
};
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO searchdomain (name, settings) VALUES (@name, @settings)", parameters);
}
public int DatabaseInsertEntity(string name, string probmethod, int id_searchdomain)
{
Dictionary<string, dynamic> parameters = new()
{
{ "name", name },
{ "probmethod", probmethod },
{ "id_searchdomain", id_searchdomain }
};
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO entity (name, probmethod, id_searchdomain) VALUES (@name, @probmethod, @id_searchdomain)", parameters);
}
public int DatabaseInsertAttribute(string attribute, string value, int id_entity)
{
Dictionary<string, dynamic> parameters = new()
{
{ "attribute", attribute },
{ "value", value },
{ "id_entity", id_entity }
};
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO attribute (attribute, value, id_entity) VALUES (@attribute, @value, @id_entity)", parameters);
}
public int DatabaseInsertDatapoint(string name, string probmethod_embedding, string hash, int id_entity)
{
Dictionary<string, dynamic> parameters = new()
{
{ "name", name },
{ "probmethod_embedding", probmethod_embedding },
{ "hash", hash },
{ "id_entity", id_entity }
};
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO datapoint (name, probmethod_embedding, hash, id_entity) VALUES (@name, @probmethod_embedding, @hash, @id_entity)", parameters);
}
public int DatabaseInsertEmbedding(int id_datapoint, string model, byte[] embedding)
{
Dictionary<string, dynamic> parameters = new()
{
{ "id_datapoint", id_datapoint },
{ "model", model },
{ "embedding", embedding }
};
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO embedding (id_datapoint, model, embedding) VALUES (@id_datapoint, @model, @embedding)", parameters);
}
public void DatabaseInsertEmbeddingBulk(int id_datapoint, List<(string model, byte[] embedding)> data)
{
Dictionary<string, object> parameters = [];
parameters["id_datapoint"] = id_datapoint;
var query = new StringBuilder("INSERT INTO embedding (id_datapoint, model, embedding) VALUES ");
foreach (var (model, embedding) in data)
{
string modelParam = $"model_{Guid.NewGuid()}".Replace("-", "");
string embeddingParam = $"embedding_{Guid.NewGuid()}".Replace("-", "");
parameters[modelParam] = model;
parameters[embeddingParam] = embedding;
query.Append($"(@id_datapoint, @{modelParam}, @{embeddingParam}), ");
}
query.Length -= 2; // remove trailing comma
helper.ExecuteSQLNonQuery(query.ToString(), parameters);
}
} }

View File

@@ -14,14 +14,16 @@ public class SearchdomainManager
private readonly IConfiguration _config; private readonly IConfiguration _config;
private readonly string ollamaURL; private readonly string ollamaURL;
private readonly string connectionString; private readonly string connectionString;
private OllamaApiClient client; public OllamaApiClient client;
private MySqlConnection connection; private MySqlConnection connection;
private SQLHelper helper; public SQLHelper helper;
public Dictionary<string, Dictionary<string, float[]>> embeddingCache;
public SearchdomainManager(ILogger<SearchdomainManager> logger, IConfiguration config) public SearchdomainManager(ILogger<SearchdomainManager> logger, IConfiguration config)
{ {
_logger = logger; _logger = logger;
_config = config; _config = config;
embeddingCache = [];
ollamaURL = _config.GetSection("Embeddingsearch")["OllamaURL"] ?? ""; ollamaURL = _config.GetSection("Embeddingsearch")["OllamaURL"] ?? "";
connectionString = _config.GetSection("Embeddingsearch").GetConnectionString("SQL") ?? ""; connectionString = _config.GetSection("Embeddingsearch").GetConnectionString("SQL") ?? "";
if (ollamaURL.IsNullOrEmpty() || connectionString.IsNullOrEmpty()) if (ollamaURL.IsNullOrEmpty() || connectionString.IsNullOrEmpty())
@@ -31,7 +33,7 @@ public class SearchdomainManager
client = new(new Uri(ollamaURL)); client = new(new Uri(ollamaURL));
connection = new MySqlConnection(connectionString); connection = new MySqlConnection(connectionString);
connection.Open(); connection.Open();
helper = new SQLHelper(connection); helper = new SQLHelper(connection, connectionString);
try try
{ {
DatabaseMigrations.Migrate(helper); DatabaseMigrations.Migrate(helper);
@@ -51,7 +53,7 @@ public class SearchdomainManager
} }
try try
{ {
return SetSearchdomain(searchdomain, new Searchdomain(searchdomain, connectionString, client)); return SetSearchdomain(searchdomain, new Searchdomain(searchdomain, connectionString, client, embeddingCache));
} }
catch (MySqlException) catch (MySqlException)
{ {
@@ -66,6 +68,8 @@ public class SearchdomainManager
} }
public List<string> ListSearchdomains() public List<string> ListSearchdomains()
{
lock (helper.connection)
{ {
DbDataReader reader = helper.ExecuteSQLCommand("SELECT name FROM searchdomain", []); DbDataReader reader = helper.ExecuteSQLCommand("SELECT name FROM searchdomain", []);
List<string> results = []; List<string> results = [];
@@ -76,6 +80,7 @@ public class SearchdomainManager
reader.Close(); reader.Close();
return results; return results;
} }
}
public int CreateSearchdomain(string searchdomain, string settings = "{}") public int CreateSearchdomain(string searchdomain, string settings = "{}")
{ {
@@ -98,7 +103,7 @@ public class SearchdomainManager
int counter = 0; int counter = 0;
while (searchdomain_.entityCache.Count > 0) while (searchdomain_.entityCache.Count > 0)
{ {
searchdomain_.RemoveEntity(searchdomain_.entityCache.First().name); DatabaseHelper.RemoveEntity(searchdomain_.entityCache, helper, searchdomain_.entityCache.First().name, searchdomain);
counter += 1; counter += 1;
} }
_logger.LogDebug($"Number of entities deleted as part of deleting the searchdomain \"{searchdomain}\": {counter}"); _logger.LogDebug($"Number of entities deleted as part of deleting the searchdomain \"{searchdomain}\": {counter}");