From 5bccc8c63dd0458e56e6ac1461fb83690ef5257e Mon Sep 17 00:00:00 2001 From: Adam Soos Date: Fri, 5 Apr 2024 17:12:37 +0200 Subject: [PATCH 01/11] WS-3151: fix description --- examples/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/events.py b/examples/events.py index 7e5147a..828dd45 100644 --- a/examples/events.py +++ b/examples/events.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -Example code to call Rosette API to get entities from a piece of text. +Example code to call Rosette API to get events from a piece of text. """ import argparse From 0c93c5b2c7c3f4c0decdc0e1a6d17468538d086c Mon Sep 17 00:00:00 2001 From: Adam Soos Date: Fri, 5 Apr 2024 17:14:02 +0200 Subject: [PATCH 02/11] WS-3151: add coref option example --- examples/entities.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/entities.py b/examples/entities.py index e6e2a50..30a2ea5 100644 --- a/examples/entities.py +++ b/examples/entities.py @@ -22,6 +22,7 @@ def run(key, alt_url='https://api.rosette.com/rest/v1/'): # api.set_option('calculateSalience','true') # api.set_option('linkEntities','false') + # api.set_option('useIndocServer', True) entities_text_data = "The Securities and Exchange Commission today announced the leadership of the agency’s trial unit. Bridget Fitzpatrick has been named Chief Litigation Counsel of the SEC and David Gottesman will continue to serve as the agency’s Deputy Chief Litigation Counsel. Since December 2016, Ms. Fitzpatrick and Mr. Gottesman have served as Co-Acting Chief Litigation Counsel. In that role, they were jointly responsible for supervising the trial unit at the agency’s Washington D.C. headquarters as well as coordinating with litigators in the SEC’s 11 regional offices around the country." params = DocumentParameters() From c9c01338f3f233b45acb093e6274d429b1c8b1ef Mon Sep 17 00:00:00 2001 From: Adam Soos Date: Fri, 5 Apr 2024 17:15:26 +0200 Subject: [PATCH 03/11] WS-3151: add record similarity top level object and example --- examples/record_similarity.py | 112 ++++++++++++++++++++++++++++++++++ rosette/api.py | 33 +++++++++- 2 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 examples/record_similarity.py diff --git a/examples/record_similarity.py b/examples/record_similarity.py new file mode 100644 index 0000000..454aacd --- /dev/null +++ b/examples/record_similarity.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Rosette API to get similarity score between a list of records +""" + +import argparse +import json +import os + +from rosette.api import API, RecordSimilarityParameters, RosetteException + + +def run(key, alt_url='https://api.rosette.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + fields = { + "primaryName": { + "type": "rni_name", + "weight": 0.5 + }, + "dob": { + "type": "rni_date", + "weight": 0.2 + }, + "addr": { + "type": "rni_address", + "weight": 0.5 + }, + "dob2": { + "type": "rni_date", + "weight": 0.1 + } + } + properties = { + "threshold": 0.1, + "includeExplainInfo": False + } + records = { + "left": [ + { + "primaryName": { + "text": "Ethan R", + "entityType": "PERSON", + "language": "eng", + "languageOfOrigin": "eng", + "script": "Latn" + }, + "dob": "1993-04-16", + "addr": "123 Roadlane Ave", + "dob2": { + "date": "1993/04/16" + } + }, + { + "dob": { + "date": "1993-04-16" + }, + "primaryName": { + "text": "Evan R" + } + } + ], + "right": [ + { + "dob": { + "date": "1993-04-16" + }, + "primaryName": { + "text": "Seth R", + "language": "eng" + } + }, + { + "primaryName": "Ivan R", + "dob": { + "date": "1993-04-16" + }, + "addr": { + "address": "123 Roadlane Ave" + }, + "dob2": { + "date": "1993/04/16" + } + } + ] + } + params = RecordSimilarityParameters() + params["fields"] = fields + params["properties"] = properties + params["records"] = records + + #params["parameters"] = {"conflictScore": "0.9", "deletionScore": "0.2"} + + try: + return api.record_similarity(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Rosette API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://api.rosette.com/rest/v1/') + +if __name__ == '__main__': + # ARGS = PARSER.parse_args() + RESULT = run("key", "http://localhost:8181/rest/v1/") + print(RESULT) diff --git a/rosette/api.py b/rosette/api.py index f3fec57..cb98445 100644 --- a/rosette/api.py +++ b/rosette/api.py @@ -344,6 +344,27 @@ def validate(self): "Required Name De-Duplication parameter is missing: names", repr("names")) +class RecordSimilarityParameters(_RequestParametersBase): + """Parameter object for C{record-similarity} endpoint. + Required: + C{records} A list of C{record} objects + C{properties} A C{property} object + C{fields} A dictionary of C{field} objects + """ + + def __init__(self): + self.use_multipart = False + _RequestParametersBase.__init__(self, ("fields", "properties", "records")) + + def validate(self): + """Internal. Do not use.""" + for option in "fields", "properties", "records": # required + if self[option] is None: + raise RosetteException( + "missingParameter", + "Required Name Similarity parameter is missing: " + option, + repr(option)) + class EndpointCaller(object): """L{EndpointCaller} objects are invoked via their instance methods to obtain results @@ -592,7 +613,8 @@ def __init__( 'TOKENS': 'tokens', 'TOPICS': 'topics', 'TRANSLITERATION': 'transliteration', - 'EVENTS': 'events' + 'EVENTS': 'events', + 'RECORD_SIMILARITY': 'record-similarity' } def __del__(self): @@ -966,6 +988,15 @@ def name_deduplication(self, parameters): @return: A python dictionary containing the results of de-duplication""" return EndpointCaller(self, self.endpoints['NAME_DEDUPLICATION']).call(parameters, NameDeduplicationParameters) + def record_similarity(self, parameters): + """ + Create an L{EndpointCaller} to get similarity core between a list of records and call it. + @param parameters: An object specifying the data, + and possible metadata, to be processed by the record matcher. + @type parameters: L{RecordSimilarityParameters} + @return: A python dictionary containing the results of record matching.""" + return EndpointCaller(self, self.endpoints['RECORD_SIMILARITY']).call(parameters, RecordSimilarityParameters) + def text_embedding(self, parameters): """ deprecated Create an L{EndpointCaller} to identify text vectors found in the texts From b1f50fb9f7c14609900b71e02e4346e3b7f4a034 Mon Sep 17 00:00:00 2001 From: Adam Soos Date: Mon, 8 Apr 2024 11:55:50 +0200 Subject: [PATCH 04/11] WS-3151: unit tests --- examples/record_similarity.py | 4 +-- rosette/api.py | 3 +- tests/test_rosette_api.py | 62 +++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/examples/record_similarity.py b/examples/record_similarity.py index 454aacd..42d59ea 100644 --- a/examples/record_similarity.py +++ b/examples/record_similarity.py @@ -34,8 +34,8 @@ def run(key, alt_url='https://api.rosette.com/rest/v1/'): } } properties = { - "threshold": 0.1, - "includeExplainInfo": False + "threshold": 0.7, + "includeExplainInfo": True } records = { "left": [ diff --git a/rosette/api.py b/rosette/api.py index cb98445..32e8c99 100644 --- a/rosette/api.py +++ b/rosette/api.py @@ -344,6 +344,7 @@ def validate(self): "Required Name De-Duplication parameter is missing: names", repr("names")) + class RecordSimilarityParameters(_RequestParametersBase): """Parameter object for C{record-similarity} endpoint. Required: @@ -362,7 +363,7 @@ def validate(self): if self[option] is None: raise RosetteException( "missingParameter", - "Required Name Similarity parameter is missing: " + option, + "Required Record Similarity parameter is missing: " + option, repr(option)) diff --git a/tests/test_rosette_api.py b/tests/test_rosette_api.py index c05ea05..9b75d20 100644 --- a/tests/test_rosette_api.py +++ b/tests/test_rosette_api.py @@ -29,6 +29,7 @@ NameTranslationParameters, NameSimilarityParameters, NameDeduplicationParameters, + RecordSimilarityParameters, RosetteException) _ISPY3 = sys.version_info[0] == 3 @@ -484,6 +485,10 @@ def test_the_name_requests_with_text(api, json_response): result = api.address_similarity("should fail") assert e_rosette.value.status == 'incompatible' + with pytest.raises(RosetteException) as e_rosette: + result = api.record_similarity("should fail") + assert e_rosette.value.status == 'incompatible' + httpretty.disable() httpretty.reset() @@ -958,4 +963,61 @@ def test_the_events_endpoint(api, json_response, doc_params): result = api.events(doc_params) assert result["name"] == "Rosette" httpretty.disable() + httpretty.reset() + +# Test the record similarity endpoint + + +def test_the_record_similarity_endpoint(api, json_response): + """Test the record similarity endpoint""" + httpretty.enable() + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/record-similarity", + body=json_response, status=200, content_type="application/json") + + params = RecordSimilarityParameters() + params["fields"] = {} + params["properties"] = {} + params["records"] = [] + result = api.record_similarity(params) + assert result["name"] == "Rosette" + httpretty.disable() + httpretty.reset() + + +# Tests for required record-similarities parameters +def test_for_record_similarity_required_parameters(api, json_response): + """Test record similarity parameters""" + httpretty.enable() + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/record-similarity", + body=json_response, status=200, content_type="application/json") + + params = RecordSimilarityParameters() + + with pytest.raises(RosetteException) as e_rosette: + api.record_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert e_rosette.value.message == 'Required Record Similarity parameter is missing: fields' + + params["fields"] = {} + + with pytest.raises(RosetteException) as e_rosette: + api.record_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert e_rosette.value.message == 'Required Record Similarity parameter is missing: properties' + + params["properties"] = {} + + with pytest.raises(RosetteException) as e_rosette: + api.record_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert e_rosette.value.message == 'Required Record Similarity parameter is missing: records' + + params["records"] = [] + + result = api.record_similarity(params) + assert result["name"] == "Rosette" + httpretty.disable() httpretty.reset() \ No newline at end of file From f7c6b75aa4b8729431fadb567561b7908782aa82 Mon Sep 17 00:00:00 2001 From: Adam Soos Date: Mon, 8 Apr 2024 12:23:09 +0200 Subject: [PATCH 05/11] WS-3151: copyright years and version number --- docs/source/conf.py | 6 +++--- rosette/__init__.py | 2 +- rosette/api.py | 4 ++-- tests/test_rosette_api.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index c95582f..2ec8928 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -47,7 +47,7 @@ # General information about the project. project = '' -copyright = '2022, Basis Technology' +copyright = '2024, Basis Technology' author = 'Basis Technology' # The version info for the project you're documenting, acts as replacement for @@ -55,9 +55,9 @@ # built documents. # # The short X.Y version. -version = '1.28.0' +version = '1.29.0' # The full version, including alpha/beta/rc tags. -release = '1.28.0' +release = '1.29.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/rosette/__init__.py b/rosette/__init__.py index 72eac6f..8944820 100644 --- a/rosette/__init__.py +++ b/rosette/__init__.py @@ -12,4 +12,4 @@ limitations under the License. """ -__version__ = '1.28.0' +__version__ = '1.29.0' diff --git a/rosette/api.py b/rosette/api.py index 32e8c99..199e405 100644 --- a/rosette/api.py +++ b/rosette/api.py @@ -3,7 +3,7 @@ """ Python client for the Rosette API. -Copyright (c) 2014-2022 Basis Technology Corporation. +Copyright (c) 2014-2024 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ _APPLICATION_JSON = 'application/json' _BINDING_LANGUAGE = 'python' -_BINDING_VERSION = '1.28.0' +_BINDING_VERSION = '1.29.0' _CONCURRENCY_HEADER = 'x-rosetteapi-concurrency' _CUSTOM_HEADER_PREFIX = 'X-RosetteAPI-' _CUSTOM_HEADER_PATTERN = re.compile('^' + _CUSTOM_HEADER_PREFIX) diff --git a/tests/test_rosette_api.py b/tests/test_rosette_api.py index 9b75d20..88e58a2 100644 --- a/tests/test_rosette_api.py +++ b/tests/test_rosette_api.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -Copyright (c) 2014-2022 Basis Technology Corporation. +Copyright (c) 2014-2024 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From df50abcdde2388d1fd03fc130a641528f146031d Mon Sep 17 00:00:00 2001 From: Adam Soos Date: Mon, 8 Apr 2024 14:48:27 +0200 Subject: [PATCH 06/11] WS-3151: change jenkins cleanup to uid guid --- CI.Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CI.Jenkinsfile b/CI.Jenkinsfile index 28697c2..811c36a 100644 --- a/CI.Jenkinsfile +++ b/CI.Jenkinsfile @@ -37,7 +37,7 @@ def runSonnarForPythonVersion(sourceDir, ver){ ${sonarExec} && \ echo && \ echo [INFO] Re-permission files for cleanup. && \ - chown -R jenkins:jenkins /source\"" + chown -R 9960:9960 /source\"" } node ("docker-light") { From 35bb8bb4db5bf7920ff1dc7c71cba84db2666528 Mon Sep 17 00:00:00 2001 From: Adam Soos Date: Thu, 11 Apr 2024 17:09:43 +0200 Subject: [PATCH 07/11] WS-3151: fix description, fix example --- examples/record_similarity.py | 6 ++---- rosette/api.py | 7 ++++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/examples/record_similarity.py b/examples/record_similarity.py index 42d59ea..2ae4c1e 100644 --- a/examples/record_similarity.py +++ b/examples/record_similarity.py @@ -91,8 +91,6 @@ def run(key, alt_url='https://api.rosette.com/rest/v1/'): params["properties"] = properties params["records"] = records - #params["parameters"] = {"conflictScore": "0.9", "deletionScore": "0.2"} - try: return api.record_similarity(params) except RosetteException as exception: @@ -107,6 +105,6 @@ def run(key, alt_url='https://api.rosette.com/rest/v1/'): default='https://api.rosette.com/rest/v1/') if __name__ == '__main__': - # ARGS = PARSER.parse_args() - RESULT = run("key", "http://localhost:8181/rest/v1/") + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) print(RESULT) diff --git a/rosette/api.py b/rosette/api.py index 199e405..39f3467 100644 --- a/rosette/api.py +++ b/rosette/api.py @@ -348,9 +348,10 @@ def validate(self): class RecordSimilarityParameters(_RequestParametersBase): """Parameter object for C{record-similarity} endpoint. Required: - C{records} A list of C{record} objects - C{properties} A C{property} object - C{fields} A dictionary of C{field} objects + C{records} The records to be compared; where each left record is compared to the associated right record. + C{properties} Parameters used in the call + C{fields} The definition of the fields used in the comparison. There must be a minimum of 1 field and + can have a maximum of 5 fields. """ def __init__(self): From e62c1c261843ad570d85021ee40488976425fded Mon Sep 17 00:00:00 2001 From: Seth Gransky Date: Wed, 17 Apr 2024 17:52:47 -0500 Subject: [PATCH 08/11] WS-3151: README/branding updates. Events example string. --- README.md | 18 ++++++------------ examples/events.py | 2 +- setup.py | 4 ++-- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index fc7daf3..ceb1212 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,10 @@ - - ---- + +# Rosette by Babel Street [![PyPI version](https://badge.fury.io/py/rosette-api.svg)](https://badge.fury.io/py/rosette-api) [![Python Versions](https://img.shields.io/pypi/pyversions/rosette-api.svg?color=dark%20green&label=Python%20Versions)](https://img.shields.io/pypi/pyversions/rosette-api.svg?color=dark%20green&label=Python%20Versions) -## Rosette API -The Rosette Text Analytics Platform uses natural language processing, statistical modeling, and machine learning to -analyze unstructured and semi-structured text across 364 language-encoding-script combinations, revealing valuable -information and actionable data. Rosette provides endpoints for extracting entities and relationships, translating and -comparing the similarity of names, categorizing and adding linguistic tags to text and more. +Rosette uses natural language processing, statistical modeling, and machine learning to analyze unstructured and semi-structured text across hundreds of language-script combinations, revealing valuable information and actionable data. Rosette provides endpoints for extracting entities and relationships, translating and comparing the similarity of names, categorizing and adding linguistic tags to text and more. Rosette Server is the on-premises installation of Rosette, with access to Rosette's functions as RESTful web service endpoints. This solves cloud security worries and allows customization (models/indexes) as needed for your business. ## Rosette API Access - Rosette Cloud [Sign Up](https://developer.rosette.com/signup) @@ -25,11 +20,10 @@ in the [examples](https://github.com/rosette-api/python/tree/develop/examples) d #### Documentation & Support - [Binding API](https://rosette-api.github.io/python/) -- [Rosette Platform API](https://developer.rosette.com/features-and-functions) +- [Rosette Platform API](https://docs.babelstreet.com/API/en/index-en.html) - [Binding Release Notes](https://github.com/rosette-api/python/wiki/Release-Notes) -- [Rosette Platform Release Notes](https://support.rosette.com/hc/en-us/articles/360018354971-Release-Notes) -- [Binding/Rosette Platform Compatibility](https://developer.rosette.com/features-and-functions?python#) -- [Support](https://support.rosette.com) +- [Rosette Platform Release Notes](https://babelstreet.my.site.com/support/s/article/Rosette-Cloud-Release-Notes) +- [Support](https://babelstreet.my.site.com/support/s/) - [Binding License: Apache 2.0](https://github.com/rosette-api/python/blob/develop/LICENSE.txt) ## Binding Developer Information diff --git a/examples/events.py b/examples/events.py index 828dd45..832f6e3 100644 --- a/examples/events.py +++ b/examples/events.py @@ -15,7 +15,7 @@ def run(key, alt_url='https://api.rosette.com/rest/v1/'): # Create an API instance api = API(user_key=key, service_url=alt_url) - events_text_data = "I am looking for flights to Super Bowl 2022 in Inglewood, LA." + events_text_data = "Alice has a flight to Budapest. She has not booked a hotel." params = DocumentParameters() params["content"] = events_text_data diff --git a/setup.py b/setup.py index 551de4f..309ec07 100755 --- a/setup.py +++ b/setup.py @@ -7,8 +7,8 @@ NAME = "rosette_api" DESCRIPTION = "Rosette API Python client SDK" -AUTHOR = "Basis Technology Corp." -AUTHOR_EMAIL = "support@rosette.com" +AUTHOR = "Rosette by Babel Street" +AUTHOR_EMAIL = "helpdesk@babelstreet.com" HOMEPAGE = "https://github.com/rosette-api/python" VERSION = rosette.__version__ From a5121cb388d9e7f97439ed98754a5fba38128cb4 Mon Sep 17 00:00:00 2001 From: Seth Gransky Date: Fri, 19 Apr 2024 11:24:51 -0500 Subject: [PATCH 09/11] WS-3151: Revert example payhload. There is a separate negation example. --- examples/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/events.py b/examples/events.py index 832f6e3..828dd45 100644 --- a/examples/events.py +++ b/examples/events.py @@ -15,7 +15,7 @@ def run(key, alt_url='https://api.rosette.com/rest/v1/'): # Create an API instance api = API(user_key=key, service_url=alt_url) - events_text_data = "Alice has a flight to Budapest. She has not booked a hotel." + events_text_data = "I am looking for flights to Super Bowl 2022 in Inglewood, LA." params = DocumentParameters() params["content"] = events_text_data From 37de35a3e4de8969c531fa950e2b39f9e9f13e3a Mon Sep 17 00:00:00 2001 From: Seth Gransky Date: Fri, 19 Apr 2024 11:25:14 -0500 Subject: [PATCH 10/11] WS-3151: Use a supported Python in the examples. --- examples/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/README.md b/examples/README.md index 5928376..e52b2cb 100644 --- a/examples/README.md +++ b/examples/README.md @@ -32,7 +32,7 @@ python ping.py -k $API_KEY ``` git clone git@github.com:rosette-api/python.git cd python/examples -docker run -it -v $(pwd):/source --entrypoint bash python:3.6-slim +docker run -it -v $(pwd):/source --entrypoint bash python:3.12-slim cd /source pip install rosette_api python ping.py -k $API_KEY @@ -42,7 +42,7 @@ python ping.py -k $API_KEY ``` git clone git@github.com:rosette-api/python.git cd python -docker run -it -v $(pwd):/source --entrypoint bash python:3.6-slim +docker run -it -v $(pwd):/source --entrypoint bash python:3.12-slim cd /source python setup.py install cd examples From b579e0a711f461b4ed1b646be30579b4b9d5f2b5 Mon Sep 17 00:00:00 2001 From: Seth Gransky Date: Fri, 19 Apr 2024 11:43:31 -0500 Subject: [PATCH 11/11] WS-3151: change records param in unit test from array to dict. --- tests/test_rosette_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_rosette_api.py b/tests/test_rosette_api.py index 88e58a2..a0e2b3d 100644 --- a/tests/test_rosette_api.py +++ b/tests/test_rosette_api.py @@ -977,7 +977,7 @@ def test_the_record_similarity_endpoint(api, json_response): params = RecordSimilarityParameters() params["fields"] = {} params["properties"] = {} - params["records"] = [] + params["records"] = {} result = api.record_similarity(params) assert result["name"] == "Rosette" httpretty.disable() @@ -1015,9 +1015,9 @@ def test_for_record_similarity_required_parameters(api, json_response): assert e_rosette.value.status == 'missingParameter' assert e_rosette.value.message == 'Required Record Similarity parameter is missing: records' - params["records"] = [] + params["records"] = {} result = api.record_similarity(params) assert result["name"] == "Rosette" httpretty.disable() - httpretty.reset() \ No newline at end of file + httpretty.reset()