From 4658ab0cd8ce8482da758ee3f6cbea865af1b4aa Mon Sep 17 00:00:00 2001 From: tdroberto Date: Mon, 30 Dec 2024 21:09:57 +0900 Subject: [PATCH] add digit swap detection solution --- data-box/digit_swap/README.md | 22 ++++++++++++++++++ data-box/digit_swap/digit_swap.py | 37 +++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 data-box/digit_swap/README.md create mode 100644 data-box/digit_swap/digit_swap.py diff --git a/data-box/digit_swap/README.md b/data-box/digit_swap/README.md new file mode 100644 index 00000000..3625adfc --- /dev/null +++ b/data-box/digit_swap/README.md @@ -0,0 +1,22 @@ +# Consecutive digit swap + +---- +## Overview + +This project provides a solution to detecting consecutive digit swaps in phone numbers. +E.g.: 070 1234 5678 vs 070 2134 5678 + +---- +## Implementation +1. Modify this code to run on custom script Python container. (https://docs.treasuredata.com/articles/#!pd/python-custom-scripting-example) +2. Copy and paste the code into a custom script in Treasure Workflows. + +---- +## Considerations + +This project can be used to detect any consecutive character swaps, e.g.: email, username etc. + +---- +## Questions + +Please feel free to reach out to apac-se@treasure-data.com with any questions you have about using this code. diff --git a/data-box/digit_swap/digit_swap.py b/data-box/digit_swap/digit_swap.py new file mode 100644 index 00000000..0bf2aff7 --- /dev/null +++ b/data-box/digit_swap/digit_swap.py @@ -0,0 +1,37 @@ +import pandas as pd + +def check_consecutive_digit_swap(): + df = pd.read_csv('data.csv', dtype=str) + df = df.reset_index() # Make sure indexes pair with number of rows. + cnt = 0 + + # init result csv + f = open('res.csv','w+') + f.write('phone1,phone2\n') + f.close() + + f = open('res.csv', 'w') + + for index, row in df.iterrows(): + phone1 = row['ph1'] + phone2 = row['ph2'] + + # Check if lengths are the same. + if len(phone1) == len(phone2): + + # Find differing positions. + differing_positions = [i for i in range(len(phone1)) if phone1[i] != phone2[i]] + + # Check if there are exactly two differing positions, and that they are consecutive. + if len(differing_positions) == 2: + i, j = differing_positions + if (j == i + 1 and phone1[i] == phone2[j] and phone1[j] == phone2[i]): + cnt = cnt + 1 + f.write(phone1 + ',' + phone2 + '\n') + #print(phone1, phone2, (j == i + 1 + # and phone1[i] == phone2[j] + # and phone1[j] == phone2[i])) + print(str(cnt)) + f.close() + +check_consecutive_digit_swap()