From 4aabc3bae03c95d7e54dee16085e8388bf828f20 Mon Sep 17 00:00:00 2001 From: LD-Reborn Date: Thu, 12 Feb 2026 20:57:01 +0100 Subject: [PATCH] Fixed DatabaseInsertEmbeddingBulk, Added attributes bulk edit and delete, Fixed entityCache not multithreading safe, fixed EntityFromJSON missing bulk inserts, Added retry logic for BulkExecuteNonQuery, added MaxRequestBodySize configuration --- src/Server/Controllers/EntityController.cs | 3 +- src/Server/Datapoint.cs | 10 + src/Server/Helper/DatabaseHelper.cs | 62 +++++- src/Server/Helper/SQLHelper.cs | 42 ++-- src/Server/Helper/SearchdomainHelper.cs | 214 ++++++++++++++------- src/Server/Models/ConfigModels.cs | 1 + src/Server/Probmethods.cs | 9 +- src/Server/Searchdomain.cs | 30 +-- src/Server/SimilarityMethods.cs | 9 +- 9 files changed, 271 insertions(+), 109 deletions(-) diff --git a/src/Server/Controllers/EntityController.cs b/src/Server/Controllers/EntityController.cs index 9fbfc53..cc72a33 100644 --- a/src/Server/Controllers/EntityController.cs +++ b/src/Server/Controllers/EntityController.cs @@ -153,7 +153,8 @@ public class EntityController : ControllerBase } searchdomain_.ReconciliateOrInvalidateCacheForDeletedEntity(entity_); _databaseHelper.RemoveEntity([], _domainManager.helper, entityName, searchdomain); - searchdomain_.entityCache.RemoveAll(entity => entity.name == entityName); + Entity toBeRemoved = searchdomain_.entityCache.First(entity => entity.name == entityName); + searchdomain_.entityCache = [.. searchdomain_.entityCache.Except([toBeRemoved])]; return Ok(new EntityDeleteResults() {Success = true}); } } diff --git a/src/Server/Datapoint.cs b/src/Server/Datapoint.cs index c4c742d..8240252 100644 --- a/src/Server/Datapoint.cs +++ b/src/Server/Datapoint.cs @@ -1,4 +1,5 @@ using Shared; +using Shared.Models; namespace Server; @@ -10,6 +11,15 @@ public class Datapoint public List<(string, float[])> embeddings; public string hash; + public Datapoint(string name, ProbMethodEnum probMethod, SimilarityMethodEnum similarityMethod, string hash, List<(string, float[])> embeddings) + { + this.name = name; + this.probMethod = new ProbMethod(probMethod); + this.similarityMethod = new SimilarityMethod(similarityMethod); + this.hash = hash; + this.embeddings = embeddings; + } + public Datapoint(string name, ProbMethod probMethod, SimilarityMethod similarityMethod, string hash, List<(string, float[])> embeddings) { this.name = name; diff --git a/src/Server/Helper/DatabaseHelper.cs b/src/Server/Helper/DatabaseHelper.cs index b543347..ed0d5ab 100644 --- a/src/Server/Helper/DatabaseHelper.cs +++ b/src/Server/Helper/DatabaseHelper.cs @@ -39,14 +39,15 @@ public class DatabaseHelper(ILogger logger) helper.ExecuteSQLNonQuery(query.ToString(), parameters); } - public static int DatabaseInsertEmbeddingBulk(SQLHelper helper, List<(string hash, string model, byte[] embedding)> data) + public static int DatabaseInsertEmbeddingBulk(SQLHelper helper, List<(string name, string model, byte[] embedding)> data, int id_entity) { return helper.BulkExecuteNonQuery( - "INSERT INTO embedding (id_datapoint, model, embedding) SELECT d.id, @model, @embedding FROM datapoint d WHERE d.hash = @hash", + "INSERT INTO embedding (id_datapoint, model, embedding) SELECT d.id, @model, @embedding FROM datapoint d WHERE d.name = @name AND d.id_entity = @id_entity ORDER BY d.id LIMIT 1", // TODO: fix limitation - entity must not have 2 datapoints with the same content, i.e. hash data.Select(element => new object[] { new MySqlParameter("@model", element.model), new MySqlParameter("@embedding", element.embedding), - new MySqlParameter("@hash", element.hash) + new MySqlParameter("@name", element.name), + new MySqlParameter("@id_entity", id_entity) }) ); } @@ -96,6 +97,29 @@ public class DatabaseHelper(ILogger logger) ); } + public static int DatabaseUpdateAttributes(SQLHelper helper, List<(string attribute, string value, int id_entity)> values) + { + return helper.BulkExecuteNonQuery( + "UPDATE attribute SET value=@value WHERE id_entity=@id_entity AND attribute=@attribute", + values.Select(element => new object[] { + new MySqlParameter("@attribute", element.attribute), + new MySqlParameter("@value", element.value), + new MySqlParameter("@id_entity", element.id_entity) + }) + ); + } + + public static int DatabaseDeleteAttributes(SQLHelper helper, List<(string attribute, int id_entity)> values) + { + return helper.BulkExecuteNonQuery( + "DELETE FROM attribute WHERE id_entity=@id_entity AND attribute=@attribute", + values.Select(element => new object[] { + new MySqlParameter("@attribute", element.attribute), + new MySqlParameter("@id_entity", element.id_entity) + }) + ); + } + public static int DatabaseInsertDatapoints(SQLHelper helper, List<(string name, ProbMethodEnum probmethod_embedding, SimilarityMethodEnum similarityMethod, string hash)> values, int id_entity) { return helper.BulkExecuteNonQuery( @@ -123,6 +147,38 @@ public class DatabaseHelper(ILogger logger) return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO datapoint (name, probmethod_embedding, similaritymethod, hash, id_entity) VALUES (@name, @probmethod_embedding, @similaritymethod, @hash, @id_entity)", parameters); } + public static (int datapoints, int embeddings) DatabaseDeleteDatapoints(SQLHelper helper, List values, int id_entity) + { + int embeddings = helper.BulkExecuteNonQuery( + "DELETE e FROM embedding e JOIN datapoint d ON e.id_datapoint=d.id WHERE d.name=@datapointName AND d.id_entity=@entityId", + values.Select(element => new object[] { + new MySqlParameter("@datapointName", element), + new MySqlParameter("@entityId", id_entity) + }) + ); + int datapoints = helper.BulkExecuteNonQuery( + "DELETE FROM datapoint WHERE name=@datapointName AND id_entity=@entityId", + values.Select(element => new object[] { + new MySqlParameter("@datapointName", element), + new MySqlParameter("@entityId", id_entity) + }) + ); + return (datapoints: datapoints, embeddings: embeddings); + } + + public static int DatabaseUpdateDatapoint(SQLHelper helper, List<(string name, string probmethod_embedding, string similarityMethod)> values, int id_entity) + { + return helper.BulkExecuteNonQuery( + "UPDATE datapoint SET probmethod_embedding=@probmethod, similaritymethod=@similaritymethod WHERE id_entity=@entityId AND name=@datapointName", + values.Select(element => new object[] { + new MySqlParameter("@probmethod", element.probmethod_embedding), + new MySqlParameter("@similaritymethod", element.similarityMethod), + new MySqlParameter("@entityId", id_entity), + new MySqlParameter("@datapointName", element.name) + }) + ); + } + public static int DatabaseInsertEmbedding(SQLHelper helper, int id_datapoint, string model, byte[] embedding) { Dictionary parameters = new() diff --git a/src/Server/Helper/SQLHelper.cs b/src/Server/Helper/SQLHelper.cs index de7e276..0fb1ec4 100644 --- a/src/Server/Helper/SQLHelper.cs +++ b/src/Server/Helper/SQLHelper.cs @@ -87,22 +87,38 @@ public class SQLHelper:IDisposable EnsureConnected(); EnsureDbReaderIsClosed(); - using var transaction = connection.BeginTransaction(); - using var command = connection.CreateCommand(); - - command.CommandText = sql; - command.Transaction = transaction; - int affectedRows = 0; - - foreach (var parameters in parameterSets) + int retries = 0; + + while (retries <= 3) { - command.Parameters.Clear(); - command.Parameters.AddRange(parameters); - affectedRows += command.ExecuteNonQuery(); - } + try + { + using var transaction = connection.BeginTransaction(); + using var command = connection.CreateCommand(); - transaction.Commit(); + command.CommandText = sql; + command.Transaction = transaction; + + foreach (var parameters in parameterSets) + { + command.Parameters.Clear(); + command.Parameters.AddRange(parameters); + affectedRows += command.ExecuteNonQuery(); + } + + transaction.Commit(); + break; + } + catch (Exception) + { + retries++; + if (retries > 3) + throw; + Thread.Sleep(10); + } + } + return affectedRows; } } diff --git a/src/Server/Helper/SearchdomainHelper.cs b/src/Server/Helper/SearchdomainHelper.cs index ae2a553..8f38047 100644 --- a/src/Server/Helper/SearchdomainHelper.cs +++ b/src/Server/Helper/SearchdomainHelper.cs @@ -29,12 +29,12 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp return floatArray; } - public static bool CacheHasEntity(List entityCache, string name) + public static bool CacheHasEntity(ConcurrentBag entityCache, string name) { return CacheGetEntity(entityCache, name) is not null; } - public static Entity? CacheGetEntity(List entityCache, string name) + public static Entity? CacheGetEntity(ConcurrentBag entityCache, string name) { foreach (Entity entity in entityCache) { @@ -111,10 +111,14 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp { using SQLHelper helper = searchdomainManager.helper.DuplicateConnection(); Searchdomain searchdomain = searchdomainManager.GetSearchdomain(jsonEntity.Searchdomain); - List entityCache = searchdomain.entityCache; + ConcurrentBag entityCache = searchdomain.entityCache; AIProvider aIProvider = searchdomain.aIProvider; EnumerableLruCache> embeddingCache = searchdomain.embeddingCache; - Entity? preexistingEntity = entityCache.FirstOrDefault(entity => entity.name == jsonEntity.Name); + Entity? preexistingEntity; + lock (entityCache) + { + preexistingEntity = entityCache.FirstOrDefault(entity => entity.name == jsonEntity.Name); + } bool invalidateSearchCache = false; if (preexistingEntity is not null) @@ -127,7 +131,10 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp } Dictionary attributes = jsonEntity.Attributes; - // Attribute + // Attribute - get changes + List<(string attribute, string newValue, int entityId)> updatedAttributes = []; + List<(string attribute, int entityId)> deletedAttributes = []; + List<(string attributeKey, string attribute, int entityId)> addedAttributes = []; foreach (KeyValuePair attributesKV in preexistingEntity.attributes.ToList()) { string oldAttributeKey = attributesKV.Key; @@ -135,25 +142,10 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp bool newHasAttribute = jsonEntity.Attributes.TryGetValue(oldAttributeKey, out string? newAttribute); if (newHasAttribute && newAttribute is not null && newAttribute != oldAttribute) { - // Attribute - Updated - Dictionary parameters = new() - { - { "newValue", newAttribute }, - { "entityId", preexistingEntityID }, - { "attribute", oldAttributeKey} - }; - helper.ExecuteSQLNonQuery("UPDATE attribute SET value=@newValue WHERE id_entity=@entityId AND attribute=@attribute", parameters); - preexistingEntity.attributes[oldAttributeKey] = newAttribute; + updatedAttributes.Add((attribute: oldAttributeKey, newValue: newAttribute, entityId: (int)preexistingEntityID)); } else if (!newHasAttribute) { - // Attribute - Deleted - Dictionary parameters = new() - { - { "entityId", preexistingEntityID }, - { "attribute", oldAttributeKey} - }; - helper.ExecuteSQLNonQuery("DELETE FROM attribute WHERE id_entity=@entityId AND attribute=@attribute", parameters); - preexistingEntity.attributes.Remove(oldAttributeKey); + deletedAttributes.Add((attribute: oldAttributeKey, entityId: (int)preexistingEntityID)); } } foreach (var attributesKV in jsonEntity.Attributes) @@ -164,12 +156,48 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp if (!preexistingHasAttribute) { // Attribute - New - DatabaseHelper.DatabaseInsertAttribute(helper, newAttributeKey, newAttribute, (int)preexistingEntityID); - preexistingEntity.attributes.Add(newAttributeKey, newAttribute); + addedAttributes.Add((attributeKey: newAttributeKey, attribute: newAttribute, entityId: (int)preexistingEntityID)); } } - // Datapoint + + // Attribute - apply changes + if (updatedAttributes.Count != 0) + { + // Update + DatabaseHelper.DatabaseUpdateAttributes(helper, updatedAttributes); + lock (preexistingEntity.attributes) + { + updatedAttributes.ForEach(attribute => preexistingEntity.attributes[attribute.attribute] = attribute.newValue); + } + } + if (deletedAttributes.Count != 0) + { + // Delete + DatabaseHelper.DatabaseDeleteAttributes(helper, deletedAttributes); + lock (preexistingEntity.attributes) + { + deletedAttributes.ForEach(attribute => preexistingEntity.attributes.Remove(attribute.attribute)); + } + } + if (addedAttributes.Count != 0) + { + // Insert + DatabaseHelper.DatabaseInsertAttributes(helper, addedAttributes); + lock (preexistingEntity.attributes) + { + addedAttributes.ForEach(attribute => preexistingEntity.attributes.Add(attribute.attributeKey, attribute.attribute)); + } + } + + // Datapoint - get changes + List deletedDatapointInstances = []; + List deletedDatapoints = []; + List<(string datapointName, int entityId, JSONDatapoint jsonDatapoint, string hash)> updatedDatapointsText = []; + List<(string datapointName, string probMethod, string similarityMethod, int entityId, JSONDatapoint jsonDatapoint)> updatedDatapointsNonText = []; + List createdDatapointInstances = []; + List<(string name, ProbMethodEnum probmethod_embedding, SimilarityMethodEnum similarityMethod, string hash, Dictionary embeddings, JSONDatapoint datapoint)> createdDatapoints = []; + foreach (Datapoint datapoint_ in preexistingEntity.datapoints.ToList()) { Datapoint datapoint = datapoint_; // To enable replacing the datapoint reference as foreach iterators cannot be overwritten @@ -177,48 +205,43 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp if (!newEntityHasDatapoint) { // Datapoint - Deleted - Dictionary parameters = new() - { - { "datapointName", datapoint.name }, - { "entityId", preexistingEntityID} - }; - helper.ExecuteSQLNonQuery("DELETE e FROM embedding e JOIN datapoint d ON e.id_datapoint=d.id WHERE d.name=@datapointName AND d.id_entity=@entityId", parameters); - helper.ExecuteSQLNonQuery("DELETE FROM datapoint WHERE id_entity=@entityId AND name=@datapointName", parameters); - preexistingEntity.datapoints.Remove(datapoint); + deletedDatapointInstances.Add(datapoint); + deletedDatapoints.Add(datapoint.name); invalidateSearchCache = true; } else { JSONDatapoint? newEntityDatapoint = jsonEntity.Datapoints.FirstOrDefault(x => x.Name == datapoint.name); - if (newEntityDatapoint is not null && newEntityDatapoint.Text is not null) + string? hash = newEntityDatapoint?.Text is not null ? GetHash(newEntityDatapoint) : null; + if ( + newEntityDatapoint is not null + && newEntityDatapoint.Text is not null + && hash is not null + && hash != datapoint.hash) { // Datapoint - Updated (text) - Dictionary parameters = new() + updatedDatapointsText.Add(new() { - { "datapointName", datapoint.name }, - { "entityId", preexistingEntityID} - }; - helper.ExecuteSQLNonQuery("DELETE e FROM embedding e JOIN datapoint d ON e.id_datapoint=d.id WHERE d.name=@datapointName AND d.id_entity=@entityId", parameters); - helper.ExecuteSQLNonQuery("DELETE FROM datapoint WHERE id_entity=@entityId AND name=@datapointName", parameters); - preexistingEntity.datapoints.Remove(datapoint); - Datapoint newDatapoint = DatabaseInsertDatapointWithEmbeddings(helper, searchdomain, newEntityDatapoint, (int)preexistingEntityID); - preexistingEntity.datapoints.Add(newDatapoint); - datapoint = newDatapoint; + datapointName = newEntityDatapoint.Name, + entityId = (int)preexistingEntityID, + jsonDatapoint = newEntityDatapoint, + hash = hash + }); invalidateSearchCache = true; } - if (newEntityDatapoint is not null && (newEntityDatapoint.Probmethod_embedding != datapoint.probMethod.probMethodEnum || newEntityDatapoint.SimilarityMethod != datapoint.similarityMethod.similarityMethodEnum)) + if ( + newEntityDatapoint is not null + && (newEntityDatapoint.Probmethod_embedding != datapoint.probMethod.probMethodEnum + || newEntityDatapoint.SimilarityMethod != datapoint.similarityMethod.similarityMethodEnum)) { // Datapoint - Updated (probmethod or similaritymethod) - Dictionary parameters = new() + updatedDatapointsNonText.Add(new() { - { "probmethod", newEntityDatapoint.Probmethod_embedding.ToString() }, - { "similaritymethod", newEntityDatapoint.SimilarityMethod.ToString() }, - { "datapointName", datapoint.name }, - { "entityId", preexistingEntityID} - }; - helper.ExecuteSQLNonQuery("UPDATE datapoint SET probmethod_embedding=@probmethod, similaritymethod=@similaritymethod WHERE id_entity=@entityId AND name=@datapointName", parameters); - Datapoint preexistingDatapoint = preexistingEntity.datapoints.First(x => x == datapoint); // The for loop is a copy. This retrieves the original such that it can be updated. - preexistingDatapoint.probMethod = new(newEntityDatapoint.Probmethod_embedding, _logger); - preexistingDatapoint.similarityMethod = new(newEntityDatapoint.SimilarityMethod, _logger); + datapointName = newEntityDatapoint.Name, + entityId = (int)preexistingEntityID, + probMethod = newEntityDatapoint.Probmethod_embedding.ToString(), + similarityMethod = newEntityDatapoint.SimilarityMethod.ToString(), + jsonDatapoint = newEntityDatapoint + }); invalidateSearchCache = true; } } @@ -229,11 +252,66 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp if (!oldEntityHasDatapoint) { // Datapoint - New - Datapoint datapoint = DatabaseInsertDatapointWithEmbeddings(helper, searchdomain, jsonDatapoint, (int)preexistingEntityID); - preexistingEntity.datapoints.Add(datapoint); + createdDatapoints.Add(new() + { + name = jsonDatapoint.Name, + probmethod_embedding = jsonDatapoint.Probmethod_embedding, + similarityMethod = jsonDatapoint.SimilarityMethod, + hash = GetHash(jsonDatapoint), + embeddings = Datapoint.GetEmbeddings( + jsonDatapoint.Text ?? throw new Exception("jsonDatapoint.Text must not be null when retrieving embeddings"), + [.. jsonDatapoint.Model], + aIProvider, + embeddingCache + ), + datapoint = jsonDatapoint + }); invalidateSearchCache = true; } } + + // Datapoint - apply changes + // Deleted + if (deletedDatapointInstances.Count != 0) + { + DatabaseHelper.DatabaseDeleteDatapoints(helper, deletedDatapoints, (int)preexistingEntityID); + deletedDatapointInstances.ForEach(datapoint => preexistingEntity.datapoints.Remove(datapoint)); + } + // Created + if (createdDatapoints.Count != 0) + { + List datapoint = DatabaseInsertDatapointsWithEmbeddings(helper, searchdomain, [.. createdDatapoints.Select(element => (element.datapoint, element.hash))], (int)preexistingEntityID); + createdDatapoints.ForEach(datapoint => preexistingEntity.datapoints.Add(new( + datapoint.name, + datapoint.probmethod_embedding, + datapoint.similarityMethod, + datapoint.hash, + [.. datapoint.embeddings.Select(element => (element.Key, element.Value))]) + )); + } + // Datapoint - Updated (text) + if (updatedDatapointsText.Count != 0) + { + DatabaseHelper.DatabaseDeleteDatapoints(helper, [.. updatedDatapointsText.Select(datapoint => datapoint.datapointName)], (int)preexistingEntityID); + updatedDatapointsText.ForEach(datapoint => preexistingEntity.datapoints.RemoveAll(x => x.name == datapoint.datapointName)); + List datapoints = DatabaseInsertDatapointsWithEmbeddings(helper, searchdomain, [.. updatedDatapointsText.Select(element => (datapoint: element.jsonDatapoint, hash: element.hash))], (int)preexistingEntityID); + preexistingEntity.datapoints.AddRange(datapoints); + } + // Datapoint - Updated (probmethod or similaritymethod) + if (updatedDatapointsNonText.Count != 0) + { + DatabaseHelper.DatabaseUpdateDatapoint( + helper, + [.. updatedDatapointsNonText.Select(element => (element.datapointName, element.probMethod, element.similarityMethod))], + (int)preexistingEntityID + ); + updatedDatapointsNonText.ForEach(element => + { + Datapoint preexistingDatapoint = preexistingEntity.datapoints.First(x => x.name == element.datapointName); + preexistingDatapoint.probMethod = new(element.jsonDatapoint.Probmethod_embedding); + preexistingDatapoint.similarityMethod = new(element.jsonDatapoint.SimilarityMethod); + }); + } if (invalidateSearchCache) { @@ -256,7 +334,6 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp } DatabaseHelper.DatabaseInsertAttributes(helper, toBeInsertedAttributes); - List datapoints = []; List<(JSONDatapoint datapoint, string hash)> toBeInsertedDatapoints = []; foreach (JSONDatapoint jsonDatapoint in jsonEntity.Datapoints) { @@ -267,7 +344,7 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp hash = hash }); } - List datapoint = DatabaseInsertDatapointsWithEmbeddings(helper, searchdomain, toBeInsertedDatapoints, id_entity); + List datapoints = DatabaseInsertDatapointsWithEmbeddings(helper, searchdomain, toBeInsertedDatapoints, id_entity); var probMethod = Probmethods.GetMethod(jsonEntity.Probmethod) ?? throw new ProbMethodNotFoundException(jsonEntity.Probmethod); Entity entity = new(jsonEntity.Attributes, probMethod, jsonEntity.Probmethod.ToString(), datapoints, jsonEntity.Name) @@ -285,7 +362,7 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp { List result = []; List<(string name, ProbMethodEnum probmethod_embedding, SimilarityMethodEnum similarityMethod, string hash)> toBeInsertedDatapoints = []; - List<(string hash, string model, byte[] embedding)> toBeInsertedEmbeddings = []; + List<(string name, string model, byte[] embedding)> toBeInsertedEmbeddings = []; foreach ((JSONDatapoint datapoint, string hash) value in values) { Datapoint datapoint = BuildDatapointFromJsonDatapoint(value.datapoint, id_entity, searchdomain, value.hash); @@ -300,7 +377,7 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp { toBeInsertedEmbeddings.Add(new() { - hash = value.hash, + name = datapoint.name, model = embedding.Item1, embedding = BytesFromFloatArray(embedding.Item2) }); @@ -308,8 +385,8 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp result.Add(datapoint); } - DatabaseHelper.DatabaseInsertDatapoints(helper, toBeInsertedDatapoints, id_entity); - DatabaseHelper.DatabaseInsertEmbeddingBulk(helper, toBeInsertedEmbeddings); + int insertedDatapoints = DatabaseHelper.DatabaseInsertDatapoints(helper, toBeInsertedDatapoints, id_entity); + int insertedEmbeddings = DatabaseHelper.DatabaseInsertEmbeddingBulk(helper, toBeInsertedEmbeddings, id_entity); return result; } @@ -319,7 +396,7 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp { throw new Exception("jsonDatapoint.Text must not be null at this point"); } - hash ??= Convert.ToBase64String(SHA256.HashData(Encoding.UTF8.GetBytes(jsonDatapoint.Text))); + hash ??= GetHash(jsonDatapoint); Datapoint datapoint = BuildDatapointFromJsonDatapoint(jsonDatapoint, id_entity, searchdomain, hash); int id_datapoint = DatabaseHelper.DatabaseInsertDatapoint(helper, jsonDatapoint.Name, jsonDatapoint.Probmethod_embedding, jsonDatapoint.SimilarityMethod, hash, id_entity); // TODO make this a bulk add action to reduce number of queries List<(string model, byte[] embedding)> data = []; @@ -331,6 +408,11 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp return datapoint; } + public string GetHash(JSONDatapoint jsonDatapoint) + { + return Convert.ToBase64String(SHA256.HashData(Encoding.UTF8.GetBytes(jsonDatapoint.Text ?? throw new Exception("jsonDatapoint.Text must not be null to compute hash")))); + } + public Datapoint BuildDatapointFromJsonDatapoint(JSONDatapoint jsonDatapoint, int entityId, Searchdomain searchdomain, string? hash = null) { if (jsonDatapoint.Text is null) @@ -342,8 +424,8 @@ public class SearchdomainHelper(ILogger logger, DatabaseHelp hash ??= Convert.ToBase64String(SHA256.HashData(Encoding.UTF8.GetBytes(jsonDatapoint.Text))); DatabaseHelper.DatabaseInsertDatapoint(helper, jsonDatapoint.Name, jsonDatapoint.Probmethod_embedding, jsonDatapoint.SimilarityMethod, hash, entityId); Dictionary embeddings = Datapoint.GetEmbeddings(jsonDatapoint.Text, [.. jsonDatapoint.Model], searchdomain.aIProvider, embeddingCache); - var probMethod_embedding = new ProbMethod(jsonDatapoint.Probmethod_embedding, logger) ?? throw new ProbMethodNotFoundException(jsonDatapoint.Probmethod_embedding); - var similarityMethod = new SimilarityMethod(jsonDatapoint.SimilarityMethod, logger) ?? throw new SimilarityMethodNotFoundException(jsonDatapoint.SimilarityMethod); + var probMethod_embedding = new ProbMethod(jsonDatapoint.Probmethod_embedding) ?? throw new ProbMethodNotFoundException(jsonDatapoint.Probmethod_embedding); + var similarityMethod = new SimilarityMethod(jsonDatapoint.SimilarityMethod) ?? throw new SimilarityMethodNotFoundException(jsonDatapoint.SimilarityMethod); return new Datapoint(jsonDatapoint.Name, probMethod_embedding, similarityMethod, hash, [.. embeddings.Select(kv => (kv.Key, kv.Value))]); } diff --git a/src/Server/Models/ConfigModels.cs b/src/Server/Models/ConfigModels.cs index 1c318c8..e50df62 100644 --- a/src/Server/Models/ConfigModels.cs +++ b/src/Server/Models/ConfigModels.cs @@ -12,6 +12,7 @@ public class EmbeddingSearchOptions : ApiKeyOptions public required SimpleAuthOptions SimpleAuth { get; set; } public required CacheOptions Cache { get; set; } public required bool UseHttpsRedirection { get; set; } + public int? MaxRequestBodySize { get; set; } } public class AiProvider diff --git a/src/Server/Probmethods.cs b/src/Server/Probmethods.cs index 439f9cd..8794756 100644 --- a/src/Server/Probmethods.cs +++ b/src/Server/Probmethods.cs @@ -10,16 +10,11 @@ public class ProbMethod public ProbMethodEnum probMethodEnum; public string name; - public ProbMethod(ProbMethodEnum probMethodEnum, ILogger logger) + public ProbMethod(ProbMethodEnum probMethodEnum) { this.probMethodEnum = probMethodEnum; this.name = probMethodEnum.ToString(); - Probmethods.probMethodDelegate? probMethod = Probmethods.GetMethod(name); - if (probMethod is null) - { - logger.LogError("Unable to retrieve probMethod {name}", [name]); - throw new ProbMethodNotFoundException(probMethodEnum); - } + Probmethods.probMethodDelegate? probMethod = Probmethods.GetMethod(name) ?? throw new ProbMethodNotFoundException(probMethodEnum); method = probMethod; } } diff --git a/src/Server/Searchdomain.cs b/src/Server/Searchdomain.cs index 83e6e44..d0c718e 100644 --- a/src/Server/Searchdomain.cs +++ b/src/Server/Searchdomain.cs @@ -7,6 +7,7 @@ using Server.Helper; using Shared; using Shared.Models; using AdaptiveExpressions; +using System.Collections.Concurrent; namespace Server; @@ -19,7 +20,7 @@ public class Searchdomain public int id; public SearchdomainSettings settings; public EnumerableLruCache queryCache; // Key: query, Value: Search results for that query (with timestamp) - public List entityCache; + public ConcurrentBag entityCache; public List modelsInUse; public EnumerableLruCache> embeddingCache; private readonly MySqlConnection connection; @@ -105,8 +106,8 @@ public class Searchdomain typeof(SimilarityMethodEnum), similarityMethodString ); - ProbMethod probmethod = new(probmethodEnum, _logger); - SimilarityMethod similarityMethod = new(similairtyMethodEnum, _logger); + ProbMethod probmethod = new(probmethodEnum); + SimilarityMethod similarityMethod = new(similairtyMethodEnum); if (embedding_unassigned.TryGetValue(id, out Dictionary? embeddings) && probmethod is not null) { embedding_unassigned.Remove(id); @@ -173,7 +174,6 @@ public class Searchdomain Dictionary queryEmbeddings = GetQueryEmbeddings(query); List<(float, string)> result = []; - foreach (Entity entity in entityCache) { result.Add((EvaluateEntityAgainstQueryEmbeddings(entity, queryEmbeddings), entity.name)); @@ -219,7 +219,10 @@ public class Searchdomain public void UpdateModelsInUse() { - modelsInUse = GetModels(entityCache.ToList()); + lock (modelsInUse) + { + modelsInUse = GetModels(entityCache); + } } private static float EvaluateEntityAgainstQueryEmbeddings(Entity entity, Dictionary queryEmbeddings) @@ -240,21 +243,24 @@ public class Searchdomain return entity.probMethod(datapointProbs); } - public static List GetModels(List entities) + public static List GetModels(ConcurrentBag entities) { List result = []; - lock (entities) + foreach (Entity entity in entities) { - foreach (Entity entity in entities) + lock (entity) { foreach (Datapoint datapoint in entity.datapoints) { - foreach ((string, float[]) tuple in datapoint.embeddings) + lock (entity.datapoints) { - string model = tuple.Item1; - if (!result.Contains(model)) + foreach ((string, float[]) tuple in datapoint.embeddings) { - result.Add(model); + string model = tuple.Item1; + if (!result.Contains(model)) + { + result.Add(model); + } } } } diff --git a/src/Server/SimilarityMethods.cs b/src/Server/SimilarityMethods.cs index 3d750ca..7d28527 100644 --- a/src/Server/SimilarityMethods.cs +++ b/src/Server/SimilarityMethods.cs @@ -9,16 +9,11 @@ public class SimilarityMethod public SimilarityMethodEnum similarityMethodEnum; public string name; - public SimilarityMethod(SimilarityMethodEnum similarityMethodEnum, ILogger logger) + public SimilarityMethod(SimilarityMethodEnum similarityMethodEnum) { this.similarityMethodEnum = similarityMethodEnum; this.name = similarityMethodEnum.ToString(); - SimilarityMethods.similarityMethodDelegate? probMethod = SimilarityMethods.GetMethod(name); - if (probMethod is null) - { - logger.LogError("Unable to retrieve similarityMethod {name}", [name]); - throw new Exception("Unable to retrieve similarityMethod"); - } + SimilarityMethods.similarityMethodDelegate? probMethod = SimilarityMethods.GetMethod(name) ?? throw new Exception($"Unable to retrieve similarityMethod {name}"); method = probMethod; } }