Skip to content

Commit

Permalink
CLS2-997 Create job for EYB Lead & Company+Contact matching (#5726)
Browse files Browse the repository at this point in the history
* Create a job to link eyb leads to companies that runs on completion of the user ingestion job

Co-authored-by: Oliver Roberts <oliverjwroberts@gmail.com>
Co-authored-by: Santosh Dasari <santoshdasa12345@users.noreply.github.com>
Co-authored-by: Sam Dasari <santoshdasa12345@users.noreply.github.com>

* Refactor services.py

* Refactor services.py

* Further flake8

* Tests

* Link function + test

* More testing

* flake8

* more flake

* Adding fixture to test

* One test fixed (db refresh needed)

* Progress, but not really

* Testing the creation of new companies and contacts after linking EYB triage and user data e2e

* More tests; cleanup

* Better testing. Better error handling

    Co-authored-by: Oliver Roberts <oliverjwroberts@gmail.com>
    Co-authored-by: Santosh Dasari <santoshdasa12345@users.noreply.github.com>
    Co-authored-by: Sophie Wenban <74198488+swenban@users.noreply.github.com>

* Test cases for unhappy paths and exceptions.

    Co-authored-by: Oliver Roberts <oliverjwroberts@gmail.com>
    Co-authored-by: Santosh Dasari <santoshdasa12345@users.noreply.github.com>
    Co-authored-by: Sophie Wenban <74198488+swenban@users.noreply.github.com>

* Remove comments to prevent future maintenance faff

---------

Co-authored-by: Sophie Wenban <s.maclennan@hotmail.co.uk>
Co-authored-by: Oliver Roberts <oliverjwroberts@gmail.com>
Co-authored-by: Santosh Dasari <santoshdasa12345@users.noreply.github.com>
  • Loading branch information
4 people authored Oct 28, 2024
1 parent d6478be commit bd03b78
Show file tree
Hide file tree
Showing 4 changed files with 294 additions and 16 deletions.
49 changes: 37 additions & 12 deletions datahub/investment_lead/services.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,37 @@
import logging

from django.db.models import F, Q

from datahub.company.models.company import Company
from datahub.company.models.contact import Contact
from datahub.investment_lead.models import EYBLead

logger = logging.getLogger(__name__)


def link_leads_to_companies():
queryset = get_leads_to_process()

for eyb_lead in queryset:
try:
match_or_create_company_for_eyb_lead(eyb_lead)
create_or_skip_eyb_lead_as_company_contact(eyb_lead)
except Exception as e:
logger.error(f'Error linking EYB lead {eyb_lead.pk} to company/contact: {e}.')
continue


def get_leads_to_process():
"""
Returns a list of EYB leads that are not archived
and that need company/contact linking
"""
return EYBLead.objects.filter(archived=False).filter(
Q(user_hashed_uuid=F('triage_hashed_uuid')),
company__isnull=True,
)


def raise_exception_for_eyb_lead_without_company(eyb_lead: EYBLead):
"""
Check for required attributes on EYB Lead to ensure there is a company
Expand Down Expand Up @@ -59,7 +84,7 @@ def add_new_company_from_eyb_lead(eyb_lead: EYBLead):
return company


def process_eyb_lead(eyb_lead):
def match_or_create_company_for_eyb_lead(eyb_lead):
"""Matches an EYB lead with an existing Company via DnB number
Args:
Expand Down Expand Up @@ -92,17 +117,6 @@ def email_matches_contact_on_eyb_lead_company(eyb_lead: EYBLead):
return count >= 1


def create_or_skip_eyb_lead_as_company_contact(eyb_lead: EYBLead):
"""
Given an EYB Lead with a linked company record:
Create new company contact if not exists
"""
raise_exception_for_eyb_lead_without_company(eyb_lead)

if not email_matches_contact_on_eyb_lead_company(eyb_lead):
create_company_contact_for_eyb_lead(eyb_lead)


def create_company_contact_for_eyb_lead(eyb_lead: EYBLead):
"""
Given an EYB lead with a linked company record:
Expand All @@ -123,3 +137,14 @@ def create_company_contact_for_eyb_lead(eyb_lead: EYBLead):
contact.save()

return contact


def create_or_skip_eyb_lead_as_company_contact(eyb_lead: EYBLead):
"""
Given an EYB Lead with a linked company record:
Create new company contact if not exists
"""
raise_exception_for_eyb_lead_without_company(eyb_lead)

if not email_matches_contact_on_eyb_lead_company(eyb_lead):
create_company_contact_for_eyb_lead(eyb_lead)
4 changes: 4 additions & 0 deletions datahub/investment_lead/tasks/ingest_eyb_user.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging

from datahub.investment_lead.serializers import CreateEYBLeadUserSerializer
from datahub.investment_lead.services import link_leads_to_companies
from datahub.investment_lead.tasks.ingest_eyb_common import (
BaseEYBDataIngestionTask,
BaseEYBFileIngestionTask,
Expand Down Expand Up @@ -38,6 +39,9 @@ def ingest_eyb_user_data(bucket, file):
task.ingest(bucket, file)
logger.info(f'Ingesting file: {file} finished')

link_leads_to_companies()
logger.info('Linked leads to companies')


class EYBUserDataIngestionTask(BaseEYBDataIngestionTask):
"""Long running job to read the user file contents and ingest the records."""
104 changes: 100 additions & 4 deletions datahub/investment_lead/test/test_services.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
import logging

from unittest.mock import patch

import pytest

from django.db import IntegrityError

from datahub.company.models.company import Company
from datahub.company.models.contact import Contact
from datahub.company.test.factories import CompanyFactory, ContactFactory
from datahub.investment_lead import services
from datahub.investment_lead.services import (
add_new_company_from_eyb_lead,
create_or_skip_eyb_lead_as_company_contact,
email_matches_contact_on_eyb_lead_company,
find_match_by_duns_number,
process_eyb_lead,
get_leads_to_process,
link_leads_to_companies,
match_or_create_company_for_eyb_lead,
)
from datahub.investment_lead.test.factories import EYBLeadFactory
from datahub.investment_lead.test.factories import EYBLeadFactory, generate_hashed_uuid
from datahub.investment_lead.test.utils import (
assert_eyb_lead_matches_company,
assert_eyb_lead_matches_contact,
Expand Down Expand Up @@ -43,7 +50,7 @@ def test_attach_existing_company_from_eyb_lead(self):
company = CompanyFactory(duns_number='123456789')
eyb_lead = EYBLeadFactory(duns_number='123456789')

result = process_eyb_lead(eyb_lead)
result = match_or_create_company_for_eyb_lead(eyb_lead)

assert eyb_lead.company is not None
assert eyb_lead.company == company
Expand All @@ -52,7 +59,7 @@ def test_attach_existing_company_from_eyb_lead(self):
def test_add_new_company_from_eyb_lead(self):
eyb_lead = EYBLeadFactory(duns_number=None)

company = process_eyb_lead(eyb_lead)
company = match_or_create_company_for_eyb_lead(eyb_lead)

company = Company.objects.get(pk=company.pk)
assert_eyb_lead_matches_company(company, eyb_lead)
Expand Down Expand Up @@ -168,3 +175,92 @@ def test_create_contact_on_company(self):
assert eyb_lead.company.contacts.count() == count + 1
contact = eyb_lead.company.contacts.first()
assert_eyb_lead_matches_contact(contact, eyb_lead)

def test_get_leads_to_process(self):
# Not returned in the results
EYBLeadFactory()
EYBLeadFactory(archived=True)
EYBLeadFactory(
triage_hashed_uuid='a hashed uuid',
user_hashed_uuid='another hashed uuid',
)

# Returned in the results
matching_hashed_uuid = generate_hashed_uuid()
expected_eyb_lead = EYBLeadFactory(
triage_hashed_uuid=matching_hashed_uuid,
user_hashed_uuid=matching_hashed_uuid,
company=None,
)

# only one result is expected
result = get_leads_to_process()

assert result.count() == 1
tester = result[0]
assert tester == expected_eyb_lead
assert tester.company is None

def test_link_leads_to_companies_raises_exception_company(self, caplog):
hashed_uuid = generate_hashed_uuid()
eyb_lead = EYBLeadFactory(
company=None,
triage_hashed_uuid=hashed_uuid,
user_hashed_uuid=hashed_uuid,
address_country_id=None, # this forces link_leads_to_companies into exception
)

assert eyb_lead.company is None

# link company and create contact failure
with caplog.at_level(logging.ERROR):
link_leads_to_companies()
assert f'Error linking EYB lead {eyb_lead.pk} to company/contact' in caplog.text

assert eyb_lead.company is None

@patch(
'datahub.investment_lead.services.match_or_create_company_for_eyb_lead',
return_value=None)
def test_link_leads_to_companies_raises_exception_contact(self, mock_method, caplog):
hashed_uuid = generate_hashed_uuid()
eyb_lead = EYBLeadFactory(
company=None,
triage_hashed_uuid=hashed_uuid,
user_hashed_uuid=hashed_uuid,
)

assert eyb_lead.company is None
assert Contact.objects.count() == 0

# link company and create contact failure
with caplog.at_level(logging.ERROR):
link_leads_to_companies()
assert f'Error linking EYB lead {eyb_lead.pk} to company/contact' in caplog.text

assert eyb_lead.company is None
assert Contact.objects.count() == 0

def test_link_leads_to_companies(self):
eyb_lead = EYBLeadFactory(
duns_number='123',
company=None,
triage_hashed_uuid='123123123',
user_hashed_uuid='123123123',
)
company = CompanyFactory(duns_number='123')

assert eyb_lead.company is None

# link company and create contact
link_leads_to_companies()
eyb_lead.refresh_from_db()

# company linked assertions
assert eyb_lead.company is not None
assert eyb_lead.company == company

# contact linked assertions
assert eyb_lead.company.contacts.count() == 1
contact = eyb_lead.company.contacts.first()
assert_eyb_lead_matches_contact(contact, eyb_lead)
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import pytest

from moto import mock_aws

from datahub.company.models import Company, Contact
from datahub.company.test.factories import CompanyFactory, ContactFactory
from datahub.investment_lead.models import EYBLead
from datahub.investment_lead.tasks.ingest_eyb_common import (
BUCKET,
)
from datahub.investment_lead.tasks.ingest_eyb_triage import (
ingest_eyb_triage_data,
TRIAGE_PREFIX,
)
from datahub.investment_lead.tasks.ingest_eyb_user import (
ingest_eyb_user_data,
USER_PREFIX,
)
from datahub.investment_lead.test.factories import (
eyb_lead_triage_record_faker,
eyb_lead_user_record_faker,
generate_hashed_uuid,
)
from datahub.investment_lead.test.test_tasks.utils import (
file_contents_faker,
setup_s3_bucket,
)
from datahub.investment_lead.test.utils import (
assert_eyb_lead_matches_company,
assert_eyb_lead_matches_contact,
)


pytestmark = pytest.mark.django_db


@pytest.fixture
def test_triage_file_path():
return f'{TRIAGE_PREFIX}/triage.jsonl.gz'


@pytest.fixture
def test_user_file_path():
return f'{USER_PREFIX}user.jsonl.gz'


class TestEYBCompanyContactLinking:
@mock_aws
def test_create_company_and_contact_success(
self, test_triage_file_path, test_user_file_path,
):
"""
Test ingests triage and user data without any pre existing company and contacts
and verifies that their creation + linking to the EYB lead happens correctly
"""
initial_eyb_lead_count = EYBLead.objects.count()
initial_company_count = Company.objects.count()
initial_contact_count = Contact.objects.count()
hashed_uuid = generate_hashed_uuid()

triage_records = [
eyb_lead_triage_record_faker({
'hashedUuid': hashed_uuid,
}),
]

user_records = [
eyb_lead_user_record_faker({
'hashedUuid': hashed_uuid,
}),
]

triage_file_contents = file_contents_faker(records=triage_records)
user_file_contents = file_contents_faker(records=user_records)

setup_s3_bucket(
BUCKET,
[test_triage_file_path, test_user_file_path],
[triage_file_contents, user_file_contents],
)

ingest_eyb_triage_data(BUCKET, test_triage_file_path)
ingest_eyb_user_data(BUCKET, test_user_file_path)

assert EYBLead.objects.count() == initial_eyb_lead_count + 1
assert Company.objects.count() == initial_company_count + 1
assert Contact.objects.count() == initial_contact_count + 1

eyb_lead = EYBLead.objects.all()[0]
company = Company.objects.all()[0]
assert_eyb_lead_matches_company(company, eyb_lead)

assert eyb_lead.company.contacts.count() == 1
contact = eyb_lead.company.contacts.first()
assert_eyb_lead_matches_contact(contact, eyb_lead)

@mock_aws
def test_linking_existing_company_contact_success(
self, test_triage_file_path, test_user_file_path,
):
"""
Test ingests triage and user data with pre existing company and contacts
and verifies that their match + linking to the EYB lead happens correctly
"""
company = CompanyFactory(duns_number='123')
contact = ContactFactory(
company=company,
email='foo@bar.com',
)

initial_eyb_lead_count = EYBLead.objects.count()
initial_company_count = Company.objects.count()
initial_contact_count = Contact.objects.count()
hashed_uuid = generate_hashed_uuid()

triage_records = [
eyb_lead_triage_record_faker({
'hashedUuid': hashed_uuid,
}),
]

user_records = [
eyb_lead_user_record_faker({
'hashedUuid': hashed_uuid,
'dunsNumber': '123',
'companyName': company.name,
'email': 'foo@bar.com',
}),
]

triage_file_contents = file_contents_faker(records=triage_records)
user_file_contents = file_contents_faker(records=user_records)

setup_s3_bucket(
BUCKET,
[test_triage_file_path, test_user_file_path],
[triage_file_contents, user_file_contents],
)

ingest_eyb_triage_data(BUCKET, test_triage_file_path)
ingest_eyb_user_data(BUCKET, test_user_file_path)

assert EYBLead.objects.count() == initial_eyb_lead_count + 1
assert Company.objects.count() == initial_company_count
assert Contact.objects.count() == initial_contact_count

eyb_lead = EYBLead.objects.all()[0]
company = Company.objects.all()[0]
assert eyb_lead.company == company

assert eyb_lead.company.contacts.count() == 1
from_company_contact = eyb_lead.company.contacts.first()
assert contact == from_company_contact

0 comments on commit bd03b78

Please sign in to comment.