Added SimilarityMethod to datapoint; Added euclidian distance, manhattan distance, pearson correlation; improved CosineSimilarity result using a remap
This commit is contained in:
@@ -48,6 +48,7 @@ public static class SearchdomainHelper
|
||||
return null;
|
||||
}
|
||||
|
||||
// toBeCached: model -> [datapoint.text * n]
|
||||
Dictionary<string, List<string>> toBeCached = [];
|
||||
foreach (JSONEntity jSONEntity in jsonEntities)
|
||||
{
|
||||
@@ -79,7 +80,7 @@ public static class SearchdomainHelper
|
||||
|
||||
public static Entity? EntityFromJSON(List<Entity> entityCache, Dictionary<string, Dictionary<string, float[]>> embeddingCache, AIProvider aIProvider, SQLHelper helper, ILogger logger, JSONEntity jsonEntity) //string json)
|
||||
{
|
||||
Dictionary<string, Dictionary<string, float[]>> embeddingsLUT = [];
|
||||
Dictionary<string, Dictionary<string, float[]>> embeddingsLUT = []; // embeddingsLUT: hash -> model -> [embeddingValues * n]
|
||||
int? preexistingEntityID = DatabaseHelper.GetEntityID(helper, jsonEntity.Name, jsonEntity.Searchdomain);
|
||||
if (preexistingEntityID is not null)
|
||||
{
|
||||
@@ -139,9 +140,10 @@ public static class SearchdomainHelper
|
||||
{
|
||||
embeddings = Datapoint.GenerateEmbeddings(jsonDatapoint.Text, [.. jsonDatapoint.Model], aIProvider, embeddingCache);
|
||||
}
|
||||
var probMethod_embedding = new ProbMethod(jsonDatapoint.Probmethod_embedding, logger) ?? throw new Exception($"Unknown probmethod name {jsonDatapoint.Probmethod_embedding}");
|
||||
Datapoint datapoint = new(jsonDatapoint.Name, probMethod_embedding, hash, [.. embeddings.Select(kv => (kv.Key, kv.Value))]);
|
||||
int id_datapoint = DatabaseHelper.DatabaseInsertDatapoint(helper, jsonDatapoint.Name, jsonDatapoint.Probmethod_embedding, hash, id_entity); // TODO make this a bulk add action to reduce number of queries
|
||||
var probMethod_embedding = new ProbMethod(jsonDatapoint.Probmethod_embedding, logger) ?? throw new Exception($"Unknown probMethod name {jsonDatapoint.Probmethod_embedding}");
|
||||
var similarityMethod = new SimilarityMethod(jsonDatapoint.SimilarityMethod, logger) ?? throw new Exception($"Unknown similarityMethod name {jsonDatapoint.SimilarityMethod}");
|
||||
Datapoint datapoint = new(jsonDatapoint.Name, probMethod_embedding, similarityMethod, hash, [.. embeddings.Select(kv => (kv.Key, kv.Value))]);
|
||||
int id_datapoint = DatabaseHelper.DatabaseInsertDatapoint(helper, jsonDatapoint.Name, jsonDatapoint.Probmethod_embedding, jsonDatapoint.SimilarityMethod, hash, id_entity); // TODO make this a bulk add action to reduce number of queries
|
||||
List<(string model, byte[] embedding)> data = [];
|
||||
foreach ((string, float[]) embedding in datapoint.embeddings)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user