[go: nahoru, domu]

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CHECK-2179 initial push on using context in text like other media #249

Merged
merged 3 commits into from
Aug 11, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions app/main/lib/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ def truncate_query(query, clause_count):
else:
return None

def merge_contexts(body, found_doc):
if not body.get("contexts"):
body["contexts"] = [body["context"]]
for context in found_doc["_source"].get("contexts", []):
if context not in body["contexts"]:
body["contexts"].append(context)
return body

def store_document(body, doc_id):
es = Elasticsearch(app.config['ELASTICSEARCH_URL'])
if doc_id:
Expand All @@ -70,7 +78,7 @@ def store_document(body, doc_id):
if found_doc:
result = es.update(
id=doc_id,
body={"doc": body},
body={"doc": merge_contexts(body, found_doc)},
index=app.config['ELASTICSEARCH_SIMILARITY']
)
else:
Expand All @@ -92,10 +100,26 @@ def store_document(body, doc_id):
'success': success
}

def delete_document(doc_id, quiet):
def delete_context_from_found_doc(context, found_doc, doc_id):
found_doc["contexts"] = [row for row in found_doc.get("contexts", []) if context != row]
result = es.update(
DGaffney marked this conversation as resolved.
Show resolved Hide resolved
id=doc_id,
body={"doc": found_doc},
index=app.config['ELASTICSEARCH_SIMILARITY']
)
return result

def delete_document(doc_id, context, quiet):
es = Elasticsearch(app.config['ELASTICSEARCH_URL'])
try:
return es.delete(index=app.config['ELASTICSEARCH_SIMILARITY'], id=doc_id)
found_doc = es.get(index=app.config['ELASTICSEARCH_SIMILARITY'], id=doc_id)
except elasticsearch.exceptions.NotFoundError:
found_doc = None
try:
if found_doc and context in found_doc.get("contexts", []) and len(found_doc.get("contexts", [])) > 1:
return delete_context_from_found_doc(context, found_doc, doc_id)
else:
return es.delete(index=app.config['ELASTICSEARCH_SIMILARITY'], id=doc_id)
except:
if quiet:
return {
Expand Down
2 changes: 1 addition & 1 deletion app/main/lib/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def delete_item(item, similarity_type):
elif similarity_type == "image":
response = delete_image(item)
elif similarity_type == "text":
response = delete_text(item.get("doc_id"), item.get("quiet", False))
response = delete_text(item.get("doc_id"), item.get("context", {}), item.get("quiet", False))
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for delete was {response}")
return response

Expand Down
12 changes: 12 additions & 0 deletions app/main/lib/similarity_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@

from app.main import db

def drop_context_from_text_record(record, context):
deleted = False
record["contexts"] = [row for row in record.get("contexts", []) if context != row]
db.session.add(record)
try:
db.session.commit()
except Exception as exception:
db.session.rollback()
raise exception
deleted = True
return deleted

def drop_context_from_record(record, context):
deleted = False
record.context = [row for row in record.context if context != row]
Expand Down
7 changes: 5 additions & 2 deletions app/main/lib/text_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
from app.main.lib.elasticsearch import language_to_analyzer, generate_matches, truncate_query, store_document, delete_document
from app.main.lib.shared_models.shared_model import SharedModel
ELASTICSEARCH_DEFAULT_LIMIT = 10000
def delete_text(doc_id, quiet):
return delete_document(doc_id, quiet)
def delete_text(doc_id, context, quiet):
return delete_document(doc_id, context, quiet)

def get_document_body(body):
for model_key in body.pop("models", []):
body['model_'+model_key] = 1
context = body.get("context", {})
if context:
body["contexts"] = [context]
if model_key != 'elasticsearch':
model = SharedModel.get_client(model_key)
vector = model.get_shared_model_response(body['content'])
Expand Down
23 changes: 21 additions & 2 deletions app/test/test_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ def test_elasticsearch_delete_200(self):
with self.client:
delete_response = self.client.delete(
'/text/similarity/',
data=json.dumps({"doc_id": "abcdef", "quiet": True}),
data=json.dumps({"doc_id": "abcdef", "quiet": True, 'context': { 'dbid': 54 }}),
content_type='application/json'
)
result = json.loads(delete_response.data.decode())
Expand All @@ -361,7 +361,26 @@ def test_elasticsearch_delete_text(self):
doc = [e for e in results["hits"]["hits"] if e["_source"]['content'] == term['text']][0]
delete_response = self.client.delete(
'/text/similarity/',
data=json.dumps({"doc_id": doc["_id"]}),
data=json.dumps({"doc_id": doc["_id"], 'context': { 'dbid': 54 }}),
content_type='application/json'
)
result = json.loads(delete_response.data.decode())
self.assertEqual('deleted', result['result'])
with self.client:
term = { 'doc_id': '123', 'text': 'how to slice a banana', 'model': 'elasticsearch', 'context': { 'dbid': 54 } }
post_response = self.client.post('/text/similarity/', data=json.dumps(term), content_type='application/json')
term = { 'doc_id': '123', 'text': 'how to slice a banana', 'model': 'elasticsearch', 'context': { 'dbid': 55 } }
post_response = self.client.post('/text/similarity/', data=json.dumps(term), content_type='application/json')
es = Elasticsearch(app.config['ELASTICSEARCH_URL'])
es.indices.refresh(index=app.config['ELASTICSEARCH_SIMILARITY'])
result = json.loads(post_response.data.decode())
self.assertEqual(True, result['success'])
es = Elasticsearch(app.config['ELASTICSEARCH_URL'])
results = es.search(body={"query": {"match_all": {}}},index=app.config['ELASTICSEARCH_SIMILARITY'])
doc = [e for e in results["hits"]["hits"] if e["_source"]['content'] == term['text']][0]
delete_response = self.client.delete(
'/text/similarity/',
data=json.dumps({"doc_id": doc["_id"], 'context': { 'dbid': 54 }}),
content_type='application/json'
)
result = json.loads(delete_response.data.decode())
Expand Down