9 Commits

8 changed files with 135 additions and 10 deletions

View File

@@ -116,12 +116,14 @@ public class EntityController : ControllerBase
else
{
_logger.LogError("Unable to deserialize an entity");
ElmahCore.ElmahExtensions.RaiseError(new Exception("Unable to deserialize an entity"));
return Ok(new EntityIndexResult() { Success = false, Message = "Unable to deserialize an entity"});
}
} catch (Exception ex)
{
if (ex.InnerException is not null) ex = ex.InnerException;
_logger.LogError("Unable to index the provided entities. {ex.Message} - {ex.StackTrace}", [ex.Message, ex.StackTrace]);
ElmahCore.ElmahExtensions.RaiseError(ex);
return Ok(new EntityIndexResult() { Success = false, Message = ex.Message });
}
@@ -142,6 +144,11 @@ public class EntityController : ControllerBase
if (entity_ is null)
{
_logger.LogError("Unable to delete the entity {entityName} in {searchdomain} - it was not found under the specified name", [entityName, searchdomain]);
ElmahCore.ElmahExtensions.RaiseError(
new Exception(
$"Unable to delete the entity {entityName} in {searchdomain} - it was not found under the specified name"
)
);
return Ok(new EntityDeleteResults() {Success = false, Message = "Entity not found"});
}
searchdomain_.ReconciliateOrInvalidateCacheForDeletedEntity(entity_);

View File

@@ -1,6 +1,3 @@
using AdaptiveExpressions;
using OllamaSharp;
using OllamaSharp.Models;
using Shared;
namespace Server;
@@ -80,6 +77,10 @@ public class Datapoint
}
}
}
if (toBeGenerated.Count == 0)
{
continue;
}
IEnumerable<float[]> generatedEmbeddings = GenerateEmbeddings([.. toBeGenerated], model, aIProvider, embeddingCache);
if (generatedEmbeddings.Count() != toBeGenerated.Count)
{

View File

@@ -1,4 +1,3 @@
using System.Configuration;
using System.Data.Common;
using System.Text;
using System.Text.Json;
@@ -40,6 +39,19 @@ public class DatabaseHelper(ILogger<DatabaseHelper> logger)
helper.ExecuteSQLNonQuery(query.ToString(), parameters);
}
public static int DatabaseInsertEmbeddingBulk(SQLHelper helper, List<(string hash, string model, byte[] embedding)> data)
{
return helper.BulkExecuteNonQuery(
"INSERT INTO embedding (id_datapoint, model, embedding) SELECT d.id, @model, @embedding FROM datapoint d WHERE d.hash = @hash",
data.Select(element => new object[] {
new MySqlParameter("@model", element.model),
new MySqlParameter("@embedding", element.embedding),
new MySqlParameter("@hash", element.hash)
})
);
}
public static int DatabaseInsertSearchdomain(SQLHelper helper, string name, SearchdomainSettings settings = new())
{
Dictionary<string, dynamic> parameters = new()
@@ -72,6 +84,32 @@ public class DatabaseHelper(ILogger<DatabaseHelper> logger)
return helper.ExecuteSQLCommandGetInsertedID("INSERT INTO attribute (attribute, value, id_entity) VALUES (@attribute, @value, @id_entity)", parameters);
}
public static int DatabaseInsertAttributes(SQLHelper helper, List<(string attribute, string value, int id_entity)> values) //string[] attribute, string value, int id_entity)
{
return helper.BulkExecuteNonQuery(
"INSERT INTO attribute (attribute, value, id_entity) VALUES (@attribute, @value, @id_entity)",
values.Select(element => new object[] {
new MySqlParameter("@attribute", element.attribute),
new MySqlParameter("@value", element.value),
new MySqlParameter("@id_entity", element.id_entity)
})
);
}
public static int DatabaseInsertDatapoints(SQLHelper helper, List<(string name, ProbMethodEnum probmethod_embedding, SimilarityMethodEnum similarityMethod, string hash)> values, int id_entity)
{
return helper.BulkExecuteNonQuery(
"INSERT INTO datapoint (name, probmethod_embedding, similaritymethod, hash, id_entity) VALUES (@name, @probmethod_embedding, @similaritymethod, @hash, @id_entity)",
values.Select(element => new object[] {
new MySqlParameter("@name", element.name),
new MySqlParameter("@probmethod_embedding", element.probmethod_embedding),
new MySqlParameter("@similaritymethod", element.similarityMethod),
new MySqlParameter("@hash", element.hash),
new MySqlParameter("@id_entity", id_entity)
})
);
}
public static int DatabaseInsertDatapoint(SQLHelper helper, string name, ProbMethodEnum probmethod_embedding, SimilarityMethodEnum similarityMethod, string hash, int id_entity)
{
Dictionary<string, dynamic> parameters = new()
@@ -144,7 +182,7 @@ public class DatabaseHelper(ILogger<DatabaseHelper> logger)
helper.ExecuteSQLNonQuery("DELETE embedding.* FROM embedding JOIN datapoint dp ON id_datapoint = dp.id JOIN entity ON id_entity = entity.id WHERE entity.id_searchdomain = @searchdomain", parameters);
helper.ExecuteSQLNonQuery("DELETE datapoint.* FROM datapoint JOIN entity ON id_entity = entity.id WHERE entity.id_searchdomain = @searchdomain", parameters);
helper.ExecuteSQLNonQuery("DELETE attribute.* FROM attribute JOIN entity ON id_entity = entity.id WHERE entity.id_searchdomain = @searchdomain", parameters);
helper.ExecuteSQLNonQuery("DELETE FROM attribute WHERE id_entity IN (SELECT entity.id FROM entity WHERE id_searchdomain = @searchdomain)", parameters);
return helper.ExecuteSQLNonQuery("DELETE FROM entity WHERE entity.id_searchdomain = @searchdomain", parameters);
}

View File

@@ -80,6 +80,33 @@ public class SQLHelper:IDisposable
}
}
public int BulkExecuteNonQuery(string sql, IEnumerable<object[]> parameterSets)
{
lock (connection)
{
EnsureConnected();
EnsureDbReaderIsClosed();
using var transaction = connection.BeginTransaction();
using var command = connection.CreateCommand();
command.CommandText = sql;
command.Transaction = transaction;
int affectedRows = 0;
foreach (var parameters in parameterSets)
{
command.Parameters.Clear();
command.Parameters.AddRange(parameters);
affectedRows += command.ExecuteNonQuery();
}
transaction.Commit();
return affectedRows;
}
}
public bool EnsureConnected()
{
if (connection.State != System.Data.ConnectionState.Open)

View File

@@ -245,18 +245,29 @@ public class SearchdomainHelper(ILogger<SearchdomainHelper> logger, DatabaseHelp
else
{
int id_entity = DatabaseHelper.DatabaseInsertEntity(helper, jsonEntity.Name, jsonEntity.Probmethod, _databaseHelper.GetSearchdomainID(helper, jsonEntity.Searchdomain));
List<(string attribute, string value, int id_entity)> toBeInsertedAttributes = [];
foreach (KeyValuePair<string, string> attribute in jsonEntity.Attributes)
{
DatabaseHelper.DatabaseInsertAttribute(helper, attribute.Key, attribute.Value, id_entity); // TODO implement bulk insert to reduce number of queries
toBeInsertedAttributes.Add(new() {
attribute = attribute.Key,
value = attribute.Value,
id_entity = id_entity
});
}
DatabaseHelper.DatabaseInsertAttributes(helper, toBeInsertedAttributes);
List<Datapoint> datapoints = [];
List<(JSONDatapoint datapoint, string hash)> toBeInsertedDatapoints = [];
foreach (JSONDatapoint jsonDatapoint in jsonEntity.Datapoints)
{
string hash = Convert.ToBase64String(SHA256.HashData(Encoding.UTF8.GetBytes(jsonDatapoint.Text)));
Datapoint datapoint = DatabaseInsertDatapointWithEmbeddings(helper, searchdomain, jsonDatapoint, id_entity, hash);
datapoints.Add(datapoint);
toBeInsertedDatapoints.Add(new()
{
datapoint = jsonDatapoint,
hash = hash
});
}
List<Datapoint> datapoint = DatabaseInsertDatapointsWithEmbeddings(helper, searchdomain, toBeInsertedDatapoints, id_entity);
var probMethod = Probmethods.GetMethod(jsonEntity.Probmethod) ?? throw new ProbMethodNotFoundException(jsonEntity.Probmethod);
Entity entity = new(jsonEntity.Attributes, probMethod, jsonEntity.Probmethod.ToString(), datapoints, jsonEntity.Name)
@@ -270,6 +281,38 @@ public class SearchdomainHelper(ILogger<SearchdomainHelper> logger, DatabaseHelp
}
}
public List<Datapoint> DatabaseInsertDatapointsWithEmbeddings(SQLHelper helper, Searchdomain searchdomain, List<(JSONDatapoint datapoint, string hash)> values, int id_entity)
{
List<Datapoint> result = [];
List<(string name, ProbMethodEnum probmethod_embedding, SimilarityMethodEnum similarityMethod, string hash)> toBeInsertedDatapoints = [];
List<(string hash, string model, byte[] embedding)> toBeInsertedEmbeddings = [];
foreach ((JSONDatapoint datapoint, string hash) value in values)
{
Datapoint datapoint = BuildDatapointFromJsonDatapoint(value.datapoint, id_entity, searchdomain, value.hash);
toBeInsertedDatapoints.Add(new()
{
name = datapoint.name,
probmethod_embedding = datapoint.probMethod.probMethodEnum,
similarityMethod = datapoint.similarityMethod.similarityMethodEnum,
hash = value.hash
});
foreach ((string, float[]) embedding in datapoint.embeddings)
{
toBeInsertedEmbeddings.Add(new()
{
hash = value.hash,
model = embedding.Item1,
embedding = BytesFromFloatArray(embedding.Item2)
});
}
result.Add(datapoint);
}
DatabaseHelper.DatabaseInsertDatapoints(helper, toBeInsertedDatapoints, id_entity);
DatabaseHelper.DatabaseInsertEmbeddingBulk(helper, toBeInsertedEmbeddings);
return result;
}
public Datapoint DatabaseInsertDatapointWithEmbeddings(SQLHelper helper, Searchdomain searchdomain, JSONDatapoint jsonDatapoint, int id_entity, string? hash = null)
{
if (jsonDatapoint.Text is null)

View File

@@ -35,6 +35,12 @@ EmbeddingSearchOptions configuration = configurationSection.Get<EmbeddingSearchO
builder.Services.Configure<EmbeddingSearchOptions>(configurationSection);
builder.Services.Configure<ApiKeyOptions>(configurationSection);
// Configure Kestrel
builder.WebHost.ConfigureKestrel(options =>
{
options.Limits.MaxRequestBodySize = configuration.MaxRequestBodySize ?? 50 * 1024 * 1024;
});
// Migrate database
var helper = new SQLHelper(new MySql.Data.MySqlClient.MySqlConnection(configuration.ConnectionStrings.SQL), configuration.ConnectionStrings.SQL);
DatabaseMigrations.Migrate(helper);

View File

@@ -219,7 +219,7 @@ public class Searchdomain
public void UpdateModelsInUse()
{
modelsInUse = GetModels([.. entityCache]);
modelsInUse = GetModels(entityCache.ToList());
}
private static float EvaluateEntityAgainstQueryEmbeddings(Entity entity, Dictionary<string, float[]> queryEmbeddings)

View File

@@ -16,5 +16,8 @@
"Application": "Embeddingsearch.Server"
}
},
"Embeddingsearch": {
"MaxRequestBodySize": 524288000
},
"AllowedHosts": "*"
}