-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathDATIX_analysis.py
73 lines (59 loc) · 2.06 KB
/
DATIX_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""
HSMA Group 6 - Hackathon 2, 22/10/2024
1. Set up GitHub x
2. Create ignore data file + read.me file x
2.1 Create Environment x
3. Data load
4. Explore data,
5. Data Pre-processing
6. Creating features and scripting for overall information, + NLP, streamlit as
the output for interaction with end user.
7. Create Streamlit front-end interface.
8. SPC from py.plot.dot?
"""
# %%
# 1. Import packages
#Dataframe tools
import pandas as pd
import numpy as np
#Preprocessing of datetimes
from datetime import datetime
#Data Visualisation
import plotly.express as px
import matplotlib.pyplot as plt
# NLP tools
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
### SpaCy NLTK for NER
#import en_core_web_sm
# pip install spacy == 3.2.0
# python -m spacy download en_core_web_sm
# for numpy but with images
from PIL import Image
#
# %%
#import the data file
datix_df = pd.read_csv("data\Test_data.csv")
# %%
datix_df.head(5)
# %%
#Check for columns
datix_headers = datix_df.columns
# %%
# Data pre-processing
# Changing Actual Harm category to numerical values
try:
datix_df["Actual_Harm_ord"] = datix_df["Actual Harm"]
datix_df["Actual_Harm_ord"] = datix_df["Actual_Harm_ord"].replace("None (No harm)", 0)
datix_df["Actual_Harm_ord"] = datix_df["Actual_Harm_ord"].replace("Low (Minimal harm: required extra observation or minor treatment)", 1)
datix_df["Actual_Harm_ord"] = datix_df["Actual_Harm_ord"].replace("Moderate (Short-term harm: required further treatment or procedure)", 2)
datix_df["Actual_Harm_ord"] = datix_df["Actual_Harm_ord"].replace("Severe\xa0(Permanent or long-term harm: e.g. fractured NOF)", 3)
datix_df["Actual_Harm_ord"] = datix_df["Actual_Harm_ord"].replace("Death (Not caused by a safety incident)", 4)
datix_df["Actual_Harm_ord"] = datix_df["Actual_Harm_ord"].replace("Death\xa0(Caused by a safety incident)", 5)
except:
print("Data modification for 'Actual Harms' already completed.")
pass
datix_df["Division"].columns
datix_df["Specialty"].columns
datix_df["Category"].columns
datix_df["Sub Category"].columns
# %%