-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtransform_data.py
61 lines (46 loc) · 1.48 KB
/
transform_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import re
import pandas as pd
from loguru import logger
def clean_data(data: dict[str, list]) -> pd.DataFrame:
"""
Clean the scraped data.
:param data: Dictionary of scraped data.
:return: Pandas dataframe with cleaned data.
"""
logger.info('Cleaning data...')
data = pd.DataFrame(data)
data["Damage"] = data["Damage"].apply(clean_numbers)
data["Start Up Frame"] = data["Start Up Frame"].apply(clean_numbers)
data["Block Frame"] = data["Block Frame"].apply(clean_numbers)
data["Hit Frame"] = data["Hit Frame"].apply(clean_numbers)
data["Counter Hit Frame"] = data["Counter Hit Frame"].apply(clean_numbers)
return data
def clean_numbers(input_str: str) -> float:
"""
Clean number data.
:param input_str: Input data as String.
:return: Float
"""
# Extract numbers using regular expressions
numbers: list = re.findall(r'-?\d+', input_str)
# Convert numbers to integers
numbers = [int(num) for num in numbers]
# Calculate average
if len(numbers) > 0:
return sum(numbers) / len(numbers)
else:
return 0
def clean_first_start_up_column(input_str: str) -> float:
"""
Clean First Start Up Frame column.
:param input_str: Input data as String.
:return: Float
"""
# Extract the first number using regular expressions
match = re.search(r'-?\d+', input_str)
if match:
return int(match.group(0))
else:
return 0
if __name__ == '__main__':
pass