-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.Rhistory
69 lines (69 loc) · 2.71 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
library(tidyverse)
metadata <- read.csv(file = "IndianaNewspapersMasterMetadata.csv", header = TRUE)
colnames(metadata)
metadata <- metadata %>% filter(Transcript == TRUE)
txt <- list.files(path = "txt/")
txt <- as.data.frame(txt)
View(txt)
View(txt)
txt <- txt %>% rename("Filename" = "txt")
View(txt)
test <- full_join(metadata, txt, by="Filename")
View(test)
txt$hasfile <- TRUE
test <- full_join(metadata, txt, by="Filename")
View(txt)
View(test)
unique(test$Filename)
duplicated(test$Filename)
test$duplicate <- duplicated(test$Filename)
View(test)
test <- left_join(txt, metadata, by="Filename")
test$duplicate <- duplicated(test$Filename)
View(txt)
metadata <- read.csv(file = "IndianaNewspapersMasterMetadata.csv", header = TRUE)
metadata <- metadata %>% filter(Transcript == TRUE)
txt <- list.files(path = "txt/")
txt <- as.data.frame(txt)
txt$hasfile <- TRUE
txt <- txt %>% rename("Filename" = "txt")
metadata <- filter(Filename %in% txt$Filename)
Extrafiles <- txt %>% filter(Filename !%in% metadata$Filename)
Extrafiles <- txt %>% filter(!Filename %in% metadata$Filename)
View(txt)
View(Extrafiles)
View(metadata)
Extrametadata <- metadata %>% filter(!Filename %in% txt$Filename)
metadata <- read.csv(file = "IndianaNewspapersMasterMetadata.csv", header = TRUE)
metadata <- metadata %>% filter(Transcript == TRUE)
txt <- list.files(path = "txt/")
txt <- as.data.frame(txt)
txt$hasfile <- TRUE
txt <- txt %>% rename("Filename" = "txt")
Extrafiles <- txt %>% filter(!Filename %in% metadata$Filename)
Extrametadata <- metadata %>% filter(!Filename %in% txt$Filename)
metadata$duplicated <- duplicated(metadata$Filename)
dups <- metadata %>% filter(duplicated == TRUE)
write.csv(Extrametadata, file="metadata-nomatchingfiles.csv")
write.csv(Extrafiles, file="Files-NoMetadata.csv")
write.csv(dups, file="duplicated-metadata.csv")
txt <- list.files(path = "txt/")
library(tidyverse)
metadata <- read.csv(file = "IndianaNewspapersMasterMetadata.csv", header = TRUE)
metadata <- metadata %>% filter(Transcript == TRUE)
txt <- list.files(path = "txt/")
txt <- as.data.frame(txt)
txt$hasfile <- TRUE
txt <- txt %>% rename("Filename" = "txt")
Extrafiles <- txt %>% filter(!Filename %in% metadata$Filename)
Extrametadata <- metadata %>% filter(!Filename %in% txt$Filename)
metadata$duplicated <- duplicated(metadata$Filename)
dups <- metadata %>% filter(duplicated == TRUE)
write.csv(Extrametadata, file="metadata-nomatchingfiles.csv")
write.csv(Extrafiles, file="Files-NoMetadata.csv")
write.csv(dups, file="duplicated-metadata.csv")
write.csv(metadata, file="IndianaNewspaperMasterMetdata.csv", header=TRUE)
write.csv(metadata, file="IndianaNewspaperMasterMetdata.csv")
write.csv(metadata, file="IndianaNewspapersMasterMetadata.csv")
View(Extrametadata)
View(metadata)