-
Notifications
You must be signed in to change notification settings - Fork 0
/
u-boat_cleaner.rmd
124 lines (112 loc) · 4 KB
/
u-boat_cleaner.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
```{r}
library(tidyverse)
library(skimr)
library(survival)
library(survminer)
library(fitdistrplus)
root = 'C://Users//corma//Documents//GitHub//WW2_Analysis//u-boat_cleaner.rmd'
```
```{r}
# read uboats.csv
uboats = read_csv("uboats.csv")
skim(uboats)
```
```{r}
# remove \n from Fate_Date
uboats$Fate_Date = gsub("\n", "", uboats$Fate_Date)
# create a new col for lifespan, measured in days
uboats = uboats %>%
mutate(Commissioned = as.Date(Commissioned, format = "%d %B %Y"),
Fate_Date = as.Date(Fate_Date, format = "%d %B %Y"),
Lifespan = as.numeric(difftime(Fate_Date, Commissioned, units = "days")))
head(uboats$Lifespan)
# remove '\n' from everything
uboats = uboats %>%
mutate_at(vars(-Lifespan), ~gsub("\n", "", .))
```
```{r}
# if Sunk, Scuttled, Surrendered, Captured, or Missing, then 1, else 0
uboats = uboats %>%
mutate(event = ifelse(Fate_Event %in% c("Sunk", "Scuttled", "Surrendered", "Captured", "Missing"), 1, 0))
# if the boat had a notable commander, then 1, else 0
uboats = uboats %>%
mutate(notable = ifelse(`Notable Commanders` != 'NA', 1, 0))
# if notable is na, then 0
uboats = uboats %>%
mutate(notable = ifelse(is.na(notable), 0, notable))
# drop '[1]' from Last_Flotilla
uboats = uboats %>%
mutate(Last_Flotilla = gsub("\\[1\\]", "", Last_Flotilla)) %>%
mutate(Last_Flotilla = str_to_title(Last_Flotilla))
```
```{r}
# save as uboats_cleaned.csv
write_csv(uboats, "uboats_cleaned.csv")
```
```{r}
calc_war_service = function(commissioned_date, fate_date) {
war_service = 0
ww1_st = as.Date("1914-07-28")
ww1_end = as.Date("1918-11-11")
ww2_st = as.Date("1939-09-01")
ww2_end = as.Date("1945-09-02")
if (is.na(commissioned_date) | is.na(fate_date)) {
return(-1)
}
if (commissioned_date < ww1_st) {
if (fate_date < ww1_st) {
war_service = 0
} else if (fate_date <= ww1_end) {
war_service = as.numeric(difftime(fate_date, ww1_st, units = "days"))
} else if (fate_date < ww2_st) {
war_service = as.numeric(difftime(ww1_end, ww1_st, units = "days"))
} else if (fate_date <= ww2_end) {
war_service = as.numeric(difftime(ww1_end, ww1_st, units = "days")) + as.numeric(difftime(fate_date, ww2_st, units = "days"))
} else {
war_service = as.numeric(difftime(ww1_end, ww1_st, units = "days")) + as.numeric(difftime(ww2_end, ww2_st, units = "days"))
}
} else if (commissioned_date <= ww1_end) {
if (fate_date <= ww1_end) {
war_service = as.numeric(difftime(fate_date, commissioned_date, units = "days"))
} else if (fate_date < ww2_st) {
war_service = as.numeric(difftime(ww1_end, commissioned_date, units = "days"))
} else if (fate_date <= ww2_end) {
war_service = as.numeric(difftime(ww1_end, commissioned_date, units = "days")) + as.numeric(difftime(fate_date, ww2_st, units = "days"))
} else {
war_service = as.numeric(difftime(ww1_end, commissioned_date, units = "days")) + as.numeric(difftime(ww2_end, ww2_st, units = "days"))
}
} else if (commissioned_date < ww2_st) {
if (fate_date < ww2_st) {
war_service = 0
} else if (fate_date <= ww2_end) {
war_service = as.numeric(difftime(fate_date, ww2_st, units = "days"))
} else {
war_service = as.numeric(difftime(ww2_end, ww2_st, units = "days"))
}
} else if (commissioned_date <= ww2_end) {
if (fate_date <= ww2_end) {
war_service = as.numeric(difftime(fate_date, commissioned_date, units = "days"))
} else {
war_service = as.numeric(difftime(ww2_end, commissioned_date, units = "days"))
}
} else {
war_service = -1
}
return(war_service)
}
```
```{r}
# for (i in 1:nrow(uboats)) {
# uboats$Active_Service[i] = calc_war_service(uboats$Commissioned[i], uboats$Fate_Date[i])
# }
# write_csv(uboats, "uboats_cleaned.csv")
```
```{r}
# # filter for uboats where Active_Service > lifespan
# bad_span_uboats = uboats %>% filter(Active_Service > Lifespan)
# if (nrow(bad_span_uboats) > 0) {
# bad_span_uboats
# } else {
# print("There are no uboats where Active_Service > Lifespan")
# }
```