-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWeek4.R
100 lines (85 loc) · 3.3 KB
/
Week4.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
library(here)
library(dplyr)
data = read.csv(here("Datasets", "Police_Department_Incident_Reports__2018_to_Present.csv"))
View(data)
#Only Get Missing Persons theb only unique incidents
data_MP = data.frame(subset(data, data$Incident.Category == "Missing Person" &
data$Report.Type.Description == "Initial"))
View(data_MP)
#Change Incident.Time column to lubridate readable
library(lubridate)
data_MP$Incident.Time = hm(data_MP$Incident.Time)
#Lubridate other time columns
data_MP$Incident.Datetime = parse_date_time(data_MP$Incident.Datetime,
orders = "mdy HM")
data_MP$Incident.Date = parse_date_time(data_MP$Incident.Date,
orders = "mdy")
#Take out all 2023 entries
data_MP = data.frame(subset(data_MP, year(data_MP$Incident.Datetime) != 2023))
#How many have no neighborhood?
Neighborhood_Count = data.frame(table(data_MP$Analysis.Neighborhood))
data_MP_useful = data.frame("No_Neighborhood_Count",
Neighborhood_Count$Freq[1])
#Rearrange Neighborhood_Count table for aesthetics
names(Neighborhood_Count)[1] = "Neighborhood"
names(Neighborhood_Count)[2] = "Count"
#Rename data_MP_useful columns so it makes sense (ease of mind)
names(data_MP_useful)[1] = "Field"
names(data_MP_useful)[2] = "Value"
#Make a list for all DFs & assign latest/first two dataframes then remove
#dfs from environment + rename them in list
all_df_list = list()
all_df_list[[1]] = data_MP_useful
names(all_df_list)[1] = "No_Neighborhood_Count"
all_df_list[[2]] = Neighborhood_Count
names(all_df_list)[2] = "Neighborhood_Count"
rm(data_MP_useful)
rm(Neighborhood_Count)
#Prepare for counting how many crimes per month
Month_df = data.frame(month(data_MP$Incident.Datetime))
Month_df = data.frame(table(Month_df))
names(Month_df)[1] = "Month"
names(Month_df)[2] = "Count"
Month_df = Month_df %>% arrange(Month)
all_df_list[[3]] = Month_df
names(all_df_list)[3] = "Month_df"
rm(Month_df)
#Counting Distribution for Day of Week
Week_df = data.frame(weekdays(data_MP$Incident.Datetime))
Week_df = data.frame(table(Week_df), c(5,1,6,7,4,2,3))
names(Week_df)[1] = "Week"
names(Week_df)[2] = "Count"
names(Week_df)[3] = "Number"
Week_df = Week_df %>% arrange(Number)
all_df_list[[4]] = Week_df
names(all_df_list)[4] = "Week_df"
rm(Week_df)
#Counting for Time of Day
Hour_df = data.frame(hour(data_MP$Incident.Datetime))
Hour_df = data.frame(table(Hour_df))
names(Hour_df)[1] = "Hour"
names(Hour_df)[2] = "Count"
all_df_list[[5]] = Hour_df
names(all_df_list)[5] = "Hour_df"
rm(Hour_df)
#How many were found?
#!!! This one, you must use supplemental reports as well
data_MP_with_supplement = data.frame(subset(data, data$Incident.Category == "Missing Person"))
Found_df = data.frame(subset(data_MP_with_supplement,
data_MP_with_supplement$Incident.Description == "Found Person"))
Found_df = data.frame(table(Found_df$Incident.Description))
names(Found_df)[1] = "Found"
names(Found_df)[2] = "Count"
all_df_list[[6]] = Found_df
names(all_df_list)[6] = "Found_df"
rm(Found_df)
rm(data_MP_with_supplement)
#Arranging for top neighborhood data
#Rename 1st row, if not it won't work
all_df_list$Neighborhood_Count$Neighborhood[[1]] = "Empty"
all_df_list$Neighborhood_Count = arrange(all_df_list$Neighborhood_Count,
desc(all_df_list$Neighborhood_Count$Count))
Top5 = data.frame(all_df_list$Neighborhood_Count[1:5,])
all_df_list[[7]] = Top5
rm(Top5)
names(all_df_list)[7] = "Top 5"