From 6e0d82ad3ce1e53a05f1d6118e9eb5147db45de5 Mon Sep 17 00:00:00 2001 From: EzFeDezy Date: Wed, 23 Apr 2025 23:44:23 +0200 Subject: [PATCH] Fixed malformed JSON, added To-do --- README.md | 71 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index bb70ef7..d1b7f1e 100644 --- a/README.md +++ b/README.md @@ -71,45 +71,47 @@ Deletes a searchdomain and its corresponding entites. ## Entity ### Create / Index entity -`src/cli/bin/Debug/net8.0/cli -h $mysql_ip -p $mysql_port -U $mysql_username -P $mysql_password --entity --index -o $ollama_URL -s $searchdomain_name -e $entity_as_JSON` -Creates the entity using the json string as specified under $entity_as_JSON +`src/cli/bin/Debug/net8.0/cli -h $mysql_ip -p $mysql_port -U $mysql_username -P $mysql_password --entity --index -o $ollama_URL -s $searchdomain_name -e $entities_as_JSON` +Creates the entities using the json string as specified under $entities_as_JSON Example: -- Linux: `src/cli/bin/Debug/net8.0/cli -h $mysql_ip -p $mysql_port -U $mysql_username -P $mysql_password --entity --index -o $ollama_URL -s $searchdomain_name -e '{"name": "myfile.txt", "probmethod": "weighted_average", "searchdomain": "mysearchdomain", "attributes": {"mimetype": "text-plain"}, "datapoints": [{"name": "text", "text": "this is the full text", "probmethod_embedding": "weighted_average", "model": ["bge-m3", "nomic-embed-text", "paraphrase-multilingual"]}, {"name": "filepath", "text": "/home/myuser/myfile.txt", "probmethod_embedding": "weighted_average", "model": ["bge-m3", "nomic-embed-text", "paraphrase-multilingual"]}]}'` -- Powershell: `src/cli/bin/Debug/net8.0/cli -h $mysql_ip -p $mysql_port -U $mysql_username -P $mysql_password --entity --index -o $ollama_URL -s $searchdomain_name -e '{\"name\": \"myfile.txt\", \"probmethod\": \"weighted_average\", \"searchdomain\": \"mysearchdomain\", \"attributes\": {\"mimetype\": \"text-plain\"}, \"datapoints\": [{\"name\": \"text\", \"text\": \"this is the full text\", \"probmethod_embedding\": \"weighted_average\", \"model\": [\"bge-m3\", \"nomic-embed-text\", \"paraphrase-multilingual\"]}, {\"name\": \"filepath\", \"text\": \"\/home\/myuser\/myfile.txt\", \"probmethod_embedding\": \"weighted_average\", \"model\": [\"bge-m3\", \"nomic-embed-text\", \"paraphrase-multilingual\"]}]}'` +- Linux: `src/cli/bin/Debug/net8.0/cli -h $mysql_ip -p $mysql_port -U $mysql_username -P $mysql_password --entity --index -o $ollama_URL -s $searchdomain_name -e '[{"name": "myfile.txt", "probmethod": "weighted_average", "searchdomain": "mysearchdomain", "attributes": {"mimetype": "text-plain"}, "datapoints": [{"name": "text", "text": "this is the full text", "probmethod_embedding": "weighted_average", "model": ["bge-m3", "nomic-embed-text", "paraphrase-multilingual"]}, {"name": "filepath", "text": "/home/myuser/myfile.txt", "probmethod_embedding": "weighted_average", "model": ["bge-m3", "nomic-embed-text", "paraphrase-multilingual"]}]}]'` +- Powershell: `src/cli/bin/Debug/net8.0/cli -h $mysql_ip -p $mysql_port -U $mysql_username -P $mysql_password --entity --index -o $ollama_URL -s $searchdomain_name -e '[{\"name\": \"myfile.txt\", \"probmethod\": \"weighted_average\", \"searchdomain\": \"mysearchdomain\", \"attributes\": {\"mimetype\": \"text-plain\"}, \"datapoints\": [{\"name\": \"text\", \"text\": \"this is the full text\", \"probmethod_embedding\": \"weighted_average\", \"model\": [\"bge-m3\", \"nomic-embed-text\", \"paraphrase-multilingual\"]}, {\"name\": \"filepath\", \"text\": \"\/home\/myuser\/myfile.txt\", \"probmethod_embedding\": \"weighted_average\", \"model\": [\"bge-m3\", \"nomic-embed-text\", \"paraphrase-multilingual\"]}]}]'` Only the json: ```json -{ - "name": "myfile.txt", - "probmethod": "weighted_average", - "searchdomain": "mysearchdomain", - "attributes": { - "mimetype": "text-plain" - }, - "datapoints": [ - { - "name": "text", - "text": "this is the full text", - "probmethod_embedding": "weighted_average", - "model": [ - "bge-m3", - "nomic-embed-text", - "paraphrase-multilingual" - ] +[ + { + "name": "myfile.txt", + "probmethod": "weighted_average", + "searchdomain": "mysearchdomain", + "attributes": { + "mimetype": "text-plain" }, - { - "name": "filepath", - "text": "/home/myuser/myfile.txt", - "probmethod_embedding": "weighted_average", - "model": [ - "bge-m3", - "nomic-embed-text", - "paraphrase-multilingual" - ] - } - ] -} + "datapoints": [ + { + "name": "text", + "text": "this is the full text", + "probmethod_embedding": "weighted_average", + "model": [ + "bge-m3", + "nomic-embed-text", + "paraphrase-multilingual" + ] + }, + { + "name": "filepath", + "text": "/home/myuser/myfile.txt", + "probmethod_embedding": "weighted_average", + "model": [ + "bge-m3", + "nomic-embed-text", + "paraphrase-multilingual" + ] + } + ] + } +] ``` ### Evaluate query (i.e. "search"; that what you're here for) `src/cli/bin/Debug/net8.0/cli -h $mysql_ip -p $mysql_port -U $mysql_username -P $mysql_password --entity --evaluate -o $ollama_URL -s $searchdomain_name -q $query_string [-n $max_results]` @@ -134,7 +136,8 @@ Deletes the entity specified by `$entity_name`. | Unhandled exception. MySql.Data.MySqlClient.MySqlException (0x80004005): Authentication to host 'localhost' for user 'embeddingsearch' using method 'caching_sha2_password' failed with message: Access denied for user 'embeddingsearch'@'localhost' (using password: YES) | TBD | # To-do -- Implement environment variable use +- Add "Click-Through" result evaluation (For each entity: store a list of queries that led to the entity being chosen by the user. Then at query-time choose the best-fitting entry and maybe use it as another datapoint? Or use a separate weight function?) +- Implement environment variable use in CLI - fix the `--help` functionality - Rename `cli` to something unique but still short, e.g. `escli`? - Improve error messaging for when retrieving a searchdomain fails.