diff --git a/CI.Jenkinsfile b/CI.Jenkinsfile index f6aaf1e..811c36a 100644 --- a/CI.Jenkinsfile +++ b/CI.Jenkinsfile @@ -34,7 +34,10 @@ def runSonnarForPythonVersion(sourceDir, ver){ pip3 install tox && \ cd /source && \ tox && \ - ${sonarExec}\"" + ${sonarExec} && \ + echo && \ + echo [INFO] Re-permission files for cleanup. && \ + chown -R 9960:9960 /source\"" } node ("docker-light") { diff --git a/README.md b/README.md index fc7daf3..ceb1212 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,10 @@ - - ---- + +# Rosette by Babel Street [![PyPI version](https://badge.fury.io/py/rosette-api.svg)](https://badge.fury.io/py/rosette-api) [![Python Versions](https://img.shields.io/pypi/pyversions/rosette-api.svg?color=dark%20green&label=Python%20Versions)](https://img.shields.io/pypi/pyversions/rosette-api.svg?color=dark%20green&label=Python%20Versions) -## Rosette API -The Rosette Text Analytics Platform uses natural language processing, statistical modeling, and machine learning to -analyze unstructured and semi-structured text across 364 language-encoding-script combinations, revealing valuable -information and actionable data. Rosette provides endpoints for extracting entities and relationships, translating and -comparing the similarity of names, categorizing and adding linguistic tags to text and more. +Rosette uses natural language processing, statistical modeling, and machine learning to analyze unstructured and semi-structured text across hundreds of language-script combinations, revealing valuable information and actionable data. Rosette provides endpoints for extracting entities and relationships, translating and comparing the similarity of names, categorizing and adding linguistic tags to text and more. Rosette Server is the on-premises installation of Rosette, with access to Rosette's functions as RESTful web service endpoints. This solves cloud security worries and allows customization (models/indexes) as needed for your business. ## Rosette API Access - Rosette Cloud [Sign Up](https://developer.rosette.com/signup) @@ -25,11 +20,10 @@ in the [examples](https://github.com/rosette-api/python/tree/develop/examples) d #### Documentation & Support - [Binding API](https://rosette-api.github.io/python/) -- [Rosette Platform API](https://developer.rosette.com/features-and-functions) +- [Rosette Platform API](https://docs.babelstreet.com/API/en/index-en.html) - [Binding Release Notes](https://github.com/rosette-api/python/wiki/Release-Notes) -- [Rosette Platform Release Notes](https://support.rosette.com/hc/en-us/articles/360018354971-Release-Notes) -- [Binding/Rosette Platform Compatibility](https://developer.rosette.com/features-and-functions?python#) -- [Support](https://support.rosette.com) +- [Rosette Platform Release Notes](https://babelstreet.my.site.com/support/s/article/Rosette-Cloud-Release-Notes) +- [Support](https://babelstreet.my.site.com/support/s/) - [Binding License: Apache 2.0](https://github.com/rosette-api/python/blob/develop/LICENSE.txt) ## Binding Developer Information diff --git a/docs/source/conf.py b/docs/source/conf.py index c95582f..2ec8928 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -47,7 +47,7 @@ # General information about the project. project = '' -copyright = '2022, Basis Technology' +copyright = '2024, Basis Technology' author = 'Basis Technology' # The version info for the project you're documenting, acts as replacement for @@ -55,9 +55,9 @@ # built documents. # # The short X.Y version. -version = '1.28.0' +version = '1.29.0' # The full version, including alpha/beta/rc tags. -release = '1.28.0' +release = '1.29.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/examples/README.md b/examples/README.md index 5928376..e52b2cb 100644 --- a/examples/README.md +++ b/examples/README.md @@ -32,7 +32,7 @@ python ping.py -k $API_KEY ``` git clone git@github.com:rosette-api/python.git cd python/examples -docker run -it -v $(pwd):/source --entrypoint bash python:3.6-slim +docker run -it -v $(pwd):/source --entrypoint bash python:3.12-slim cd /source pip install rosette_api python ping.py -k $API_KEY @@ -42,7 +42,7 @@ python ping.py -k $API_KEY ``` git clone git@github.com:rosette-api/python.git cd python -docker run -it -v $(pwd):/source --entrypoint bash python:3.6-slim +docker run -it -v $(pwd):/source --entrypoint bash python:3.12-slim cd /source python setup.py install cd examples diff --git a/examples/entities.py b/examples/entities.py index e6e2a50..30a2ea5 100644 --- a/examples/entities.py +++ b/examples/entities.py @@ -22,6 +22,7 @@ def run(key, alt_url='https://api.rosette.com/rest/v1/'): # api.set_option('calculateSalience','true') # api.set_option('linkEntities','false') + # api.set_option('useIndocServer', True) entities_text_data = "The Securities and Exchange Commission today announced the leadership of the agency’s trial unit. Bridget Fitzpatrick has been named Chief Litigation Counsel of the SEC and David Gottesman will continue to serve as the agency’s Deputy Chief Litigation Counsel. Since December 2016, Ms. Fitzpatrick and Mr. Gottesman have served as Co-Acting Chief Litigation Counsel. In that role, they were jointly responsible for supervising the trial unit at the agency’s Washington D.C. headquarters as well as coordinating with litigators in the SEC’s 11 regional offices around the country." params = DocumentParameters() diff --git a/examples/events.py b/examples/events.py index 7e5147a..828dd45 100644 --- a/examples/events.py +++ b/examples/events.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -Example code to call Rosette API to get entities from a piece of text. +Example code to call Rosette API to get events from a piece of text. """ import argparse diff --git a/examples/record_similarity.py b/examples/record_similarity.py new file mode 100644 index 0000000..2ae4c1e --- /dev/null +++ b/examples/record_similarity.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +""" +Example code to call Rosette API to get similarity score between a list of records +""" + +import argparse +import json +import os + +from rosette.api import API, RecordSimilarityParameters, RosetteException + + +def run(key, alt_url='https://api.rosette.com/rest/v1/'): + """ Run the example """ + # Create an API instance + api = API(user_key=key, service_url=alt_url) + + fields = { + "primaryName": { + "type": "rni_name", + "weight": 0.5 + }, + "dob": { + "type": "rni_date", + "weight": 0.2 + }, + "addr": { + "type": "rni_address", + "weight": 0.5 + }, + "dob2": { + "type": "rni_date", + "weight": 0.1 + } + } + properties = { + "threshold": 0.7, + "includeExplainInfo": True + } + records = { + "left": [ + { + "primaryName": { + "text": "Ethan R", + "entityType": "PERSON", + "language": "eng", + "languageOfOrigin": "eng", + "script": "Latn" + }, + "dob": "1993-04-16", + "addr": "123 Roadlane Ave", + "dob2": { + "date": "1993/04/16" + } + }, + { + "dob": { + "date": "1993-04-16" + }, + "primaryName": { + "text": "Evan R" + } + } + ], + "right": [ + { + "dob": { + "date": "1993-04-16" + }, + "primaryName": { + "text": "Seth R", + "language": "eng" + } + }, + { + "primaryName": "Ivan R", + "dob": { + "date": "1993-04-16" + }, + "addr": { + "address": "123 Roadlane Ave" + }, + "dob2": { + "date": "1993/04/16" + } + } + ] + } + params = RecordSimilarityParameters() + params["fields"] = fields + params["properties"] = properties + params["records"] = records + + try: + return api.record_similarity(params) + except RosetteException as exception: + print(exception) + + +PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Calls the ' + + os.path.splitext(os.path.basename(__file__))[0] + ' endpoint') +PARSER.add_argument('-k', '--key', help='Rosette API Key', required=True) +PARSER.add_argument('-u', '--url', help="Alternative API URL", + default='https://api.rosette.com/rest/v1/') + +if __name__ == '__main__': + ARGS = PARSER.parse_args() + RESULT = run(ARGS.key, ARGS.url) + print(RESULT) diff --git a/rosette/__init__.py b/rosette/__init__.py index 72eac6f..8944820 100644 --- a/rosette/__init__.py +++ b/rosette/__init__.py @@ -12,4 +12,4 @@ limitations under the License. """ -__version__ = '1.28.0' +__version__ = '1.29.0' diff --git a/rosette/api.py b/rosette/api.py index f3fec57..39f3467 100644 --- a/rosette/api.py +++ b/rosette/api.py @@ -3,7 +3,7 @@ """ Python client for the Rosette API. -Copyright (c) 2014-2022 Basis Technology Corporation. +Copyright (c) 2014-2024 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ _APPLICATION_JSON = 'application/json' _BINDING_LANGUAGE = 'python' -_BINDING_VERSION = '1.28.0' +_BINDING_VERSION = '1.29.0' _CONCURRENCY_HEADER = 'x-rosetteapi-concurrency' _CUSTOM_HEADER_PREFIX = 'X-RosetteAPI-' _CUSTOM_HEADER_PATTERN = re.compile('^' + _CUSTOM_HEADER_PREFIX) @@ -345,6 +345,29 @@ def validate(self): repr("names")) +class RecordSimilarityParameters(_RequestParametersBase): + """Parameter object for C{record-similarity} endpoint. + Required: + C{records} The records to be compared; where each left record is compared to the associated right record. + C{properties} Parameters used in the call + C{fields} The definition of the fields used in the comparison. There must be a minimum of 1 field and + can have a maximum of 5 fields. + """ + + def __init__(self): + self.use_multipart = False + _RequestParametersBase.__init__(self, ("fields", "properties", "records")) + + def validate(self): + """Internal. Do not use.""" + for option in "fields", "properties", "records": # required + if self[option] is None: + raise RosetteException( + "missingParameter", + "Required Record Similarity parameter is missing: " + option, + repr(option)) + + class EndpointCaller(object): """L{EndpointCaller} objects are invoked via their instance methods to obtain results from the Rosette server described by the L{API} object from which they @@ -592,7 +615,8 @@ def __init__( 'TOKENS': 'tokens', 'TOPICS': 'topics', 'TRANSLITERATION': 'transliteration', - 'EVENTS': 'events' + 'EVENTS': 'events', + 'RECORD_SIMILARITY': 'record-similarity' } def __del__(self): @@ -966,6 +990,15 @@ def name_deduplication(self, parameters): @return: A python dictionary containing the results of de-duplication""" return EndpointCaller(self, self.endpoints['NAME_DEDUPLICATION']).call(parameters, NameDeduplicationParameters) + def record_similarity(self, parameters): + """ + Create an L{EndpointCaller} to get similarity core between a list of records and call it. + @param parameters: An object specifying the data, + and possible metadata, to be processed by the record matcher. + @type parameters: L{RecordSimilarityParameters} + @return: A python dictionary containing the results of record matching.""" + return EndpointCaller(self, self.endpoints['RECORD_SIMILARITY']).call(parameters, RecordSimilarityParameters) + def text_embedding(self, parameters): """ deprecated Create an L{EndpointCaller} to identify text vectors found in the texts diff --git a/setup.py b/setup.py index 551de4f..309ec07 100755 --- a/setup.py +++ b/setup.py @@ -7,8 +7,8 @@ NAME = "rosette_api" DESCRIPTION = "Rosette API Python client SDK" -AUTHOR = "Basis Technology Corp." -AUTHOR_EMAIL = "support@rosette.com" +AUTHOR = "Rosette by Babel Street" +AUTHOR_EMAIL = "helpdesk@babelstreet.com" HOMEPAGE = "https://github.com/rosette-api/python" VERSION = rosette.__version__ diff --git a/tests/test_rosette_api.py b/tests/test_rosette_api.py index c05ea05..a0e2b3d 100644 --- a/tests/test_rosette_api.py +++ b/tests/test_rosette_api.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -Copyright (c) 2014-2022 Basis Technology Corporation. +Copyright (c) 2014-2024 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ NameTranslationParameters, NameSimilarityParameters, NameDeduplicationParameters, + RecordSimilarityParameters, RosetteException) _ISPY3 = sys.version_info[0] == 3 @@ -484,6 +485,10 @@ def test_the_name_requests_with_text(api, json_response): result = api.address_similarity("should fail") assert e_rosette.value.status == 'incompatible' + with pytest.raises(RosetteException) as e_rosette: + result = api.record_similarity("should fail") + assert e_rosette.value.status == 'incompatible' + httpretty.disable() httpretty.reset() @@ -958,4 +963,61 @@ def test_the_events_endpoint(api, json_response, doc_params): result = api.events(doc_params) assert result["name"] == "Rosette" httpretty.disable() - httpretty.reset() \ No newline at end of file + httpretty.reset() + +# Test the record similarity endpoint + + +def test_the_record_similarity_endpoint(api, json_response): + """Test the record similarity endpoint""" + httpretty.enable() + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/record-similarity", + body=json_response, status=200, content_type="application/json") + + params = RecordSimilarityParameters() + params["fields"] = {} + params["properties"] = {} + params["records"] = {} + result = api.record_similarity(params) + assert result["name"] == "Rosette" + httpretty.disable() + httpretty.reset() + + +# Tests for required record-similarities parameters +def test_for_record_similarity_required_parameters(api, json_response): + """Test record similarity parameters""" + httpretty.enable() + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/record-similarity", + body=json_response, status=200, content_type="application/json") + + params = RecordSimilarityParameters() + + with pytest.raises(RosetteException) as e_rosette: + api.record_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert e_rosette.value.message == 'Required Record Similarity parameter is missing: fields' + + params["fields"] = {} + + with pytest.raises(RosetteException) as e_rosette: + api.record_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert e_rosette.value.message == 'Required Record Similarity parameter is missing: properties' + + params["properties"] = {} + + with pytest.raises(RosetteException) as e_rosette: + api.record_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert e_rosette.value.message == 'Required Record Similarity parameter is missing: records' + + params["records"] = {} + + result = api.record_similarity(params) + assert result["name"] == "Rosette" + httpretty.disable() + httpretty.reset()