-
Notifications
You must be signed in to change notification settings - Fork 0
/
New_Hampshire_Data_LONG_2022_2023.R
91 lines (59 loc) · 5.45 KB
/
New_Hampshire_Data_LONG_2022_2023.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
######################################################################
###
### R syntax to produce New Hampshire long data file for 2022-2023
###
######################################################################
### Load data.table package
require(data.table)
### Load base data file
New_Hampshire_Data_LONG_2022_2023 <- fread("Data/Base_Files/New_Hampshire_Data_LONG_2022_2023.txt", colClasses=rep("character", 31))
my.iep.labels <- c("Students with Disabilities (SWD/IEP)", "Students without Disabilities (Non-SWD/IEP)")
my.achievement.level.labels <- c("Below Proficient", "Approaching Proficient", "Proficient", "Above Proficient")
setnames(New_Hampshire_Data_LONG_2022_2023, c("VAID_CASE", "DISNUMBER", "ETHNIC", "SCHOOL_NUMBER", "SCHOOL_NUMBER_TESTING_YEAR"), c("VALID_CASE", "DISTRICT_NUMBER", "ETHNICITY", "SCHOOL_NUMBER_TESTING_YEAR", "SCHOOL_NUMBER"))
New_Hampshire_Data_LONG_2022_2023[,CONTENT_AREA:=as.factor(CONTENT_AREA)]
levels(New_Hampshire_Data_LONG_2022_2023$CONTENT_AREA) <- c("MATHEMATICS", "READING", "SCIENCE")
New_Hampshire_Data_LONG_2022_2023[,CONTENT_AREA:=as.character(CONTENT_AREA)]
New_Hampshire_Data_LONG_2022_2023[YEAR=="2023",YEAR:="2022_2023"]
New_Hampshire_Data_LONG_2022_2023[,DISTRICT_NUMBER_TESTING_YEAR:=NULL]
New_Hampshire_Data_LONG_2022_2023[,DISTRICT_NAME_TESTING_YEAR:=NULL]
New_Hampshire_Data_LONG_2022_2023[,SCHOOL_NUMBER_TESTING_YEAR:=NULL]
New_Hampshire_Data_LONG_2022_2023[,SCHOOL_NAME_TESTING_YEAR:=NULL]
New_Hampshire_Data_LONG_2022_2023[,ETHNICITY:=factor(ETHNICITY)]
New_Hampshire_Data_LONG_2022_2023[ELL_STATUS=="NULL",ELL_STATUS:=as.character(NA)]
New_Hampshire_Data_LONG_2022_2023[,ELL_STATUS:=factor(ELL_STATUS)]
New_Hampshire_Data_LONG_2022_2023[,ELL_MULTI_CATEGORY_STATUS:=ELL_STATUS]
New_Hampshire_Data_LONG_2022_2023[,ELL_STATUS:=NULL]
New_Hampshire_Data_LONG_2022_2023[,ELL_STATUS:=factor(1, levels=1:2, labels=c("Non-English Language Learners (Non-EL)", "English Language Learners (EL) with Composite >= 4.0"))]
New_Hampshire_Data_LONG_2022_2023[,ELL_STATUS:=as.character(ELL_STATUS)]
New_Hampshire_Data_LONG_2022_2023[ELL_MULTI_CATEGORY_STATUS != "Not ELL", ELL_STATUS:="English Language Learners (EL) with Composite >= 4.0"]
New_Hampshire_Data_LONG_2022_2023[,ELL_MULTI_CATEGORY_STATUS:=as.character(ELL_MULTI_CATEGORY_STATUS)]
New_Hampshire_Data_LONG_2022_2023[,IEP_STATUS:=factor(IEP_STATUS)]
setattr(New_Hampshire_Data_LONG_2022_2023$IEP_STATUS, "levels", my.iep.labels)
New_Hampshire_Data_LONG_2022_2023[,FREE_REDUCED_LUNCH_STATUS:=factor(FREE_REDUCED_LUNCH_STATUS)]
setattr(New_Hampshire_Data_LONG_2022_2023$FREE_REDUCED_LUNCH_STATUS, "levels", c("Economically Disadvantaged (SES)", "Not Economically Disadvantaged (Non-SES)"))
New_Hampshire_Data_LONG_2022_2023[,SCALE_SCORE:=as.numeric(SCALE_SCORE)]
New_Hampshire_Data_LONG_2022_2023[!is.na(ACHIEVEMENT_LEVEL), ACHIEVEMENT_LEVEL:=paste("Level", ACHIEVEMENT_LEVEL)]
New_Hampshire_Data_LONG_2022_2023[,ACHIEVEMENT_LEVEL:=as.factor(ACHIEVEMENT_LEVEL)]
setattr(New_Hampshire_Data_LONG_2022_2023$ACHIEVEMENT_LEVEL, "levels", my.achievement.level.labels)
New_Hampshire_Data_LONG_2022_2023[,ACHIEVEMENT_LEVEL:=as.character(ACHIEVEMENT_LEVEL)]
New_Hampshire_Data_LONG_2022_2023[,TEST_STATUS:=NULL]
New_Hampshire_Data_LONG_2022_2023[,STATE_ENROLLMENT_STATUS:=factor(STATE_ENROLLMENT_STATUS, levels=0:1, labels=c("Enrolled State: No", "Enrolled State: Yes"))]
New_Hampshire_Data_LONG_2022_2023[,DISTRICT_ENROLLMENT_STATUS:=factor(DISTRICT_ENROLLMENT_STATUS)]
setattr(New_Hampshire_Data_LONG_2022_2023$DISTRICT_ENROLLMENT_STATUS, "levels", c("Enrolled District: No", "Enrolled District: Yes"))
New_Hampshire_Data_LONG_2022_2023[,SCHOOL_ENROLLMENT_STATUS:=factor(SCHOOL_ENROLLMENT_STATUS)]
setattr(New_Hampshire_Data_LONG_2022_2023$SCHOOL_ENROLLMENT_STATUS, "levels", c("Enrolled School: No", "Enrolled School: Yes"))
New_Hampshire_Data_LONG_2022_2023[EMH_LEVEL=="NULL",EMH_LEVEL:=as.character(NA)]
New_Hampshire_Data_LONG_2022_2023[GENDER=="NULL",GENDER:=as.character(NA)]
New_Hampshire_Data_LONG_2022_2023[,GENDER:=factor(GENDER)]
New_Hampshire_Data_LONG_2022_2023[,STUDENT_GROUP:=
factor(4, levels=1:4, c("English Language Learners (EL) Group with Composite >= 4.0", "Students with Disabilities (SWD/IEP) Group (not EL)", "Economically Disadvantaged (SES) Group (not EL or SWD)", "All Other Students Group (Not EL, Not SWD, Not SES)"))]
New_Hampshire_Data_LONG_2022_2023[ELL_STATUS=="English Language Learners (EL) with Composite >= 4.0", STUDENT_GROUP:="English Language Learners (EL) Group with Composite >= 4.0"]
New_Hampshire_Data_LONG_2022_2023[ELL_STATUS=="Non-English Language Learners (Non-EL)" & IEP_STATUS=="Students with Disabilities (SWD/IEP)", STUDENT_GROUP:="Students with Disabilities (SWD/IEP) Group (not EL)"]
New_Hampshire_Data_LONG_2022_2023[ELL_STATUS=="Non-English Language Learners (Non-EL)" & IEP_STATUS=="Students without Disabilities (Non-SWD/IEP)" & FREE_REDUCED_LUNCH_STATUS=="Economically Disadvantaged (SES)", STUDENT_GROUP:="Economically Disadvantaged (SES) Group (not EL or SWD)"]
New_Hampshire_Data_LONG_2022_2023[,STUDENT_GROUP:=as.character(STUDENT_GROUP)]
New_Hampshire_Data_LONG_2022_2023[,c("InvalidSessions", "SAUName", "AlternateStudentID", "Typeofdata", "DateGenerated"):=NULL]
### Identify VALID_CASES and INVALID_CASES
New_Hampshire_Data_LONG_2022_2023[,VALID_CASE:="VALID_CASE"]
New_Hampshire_Data_LONG_2022_2023[GRADE==11|CONTENT_AREA=="SCIENCE", VALID_CASE:="INVALID_CASE"]
### Save output
save(New_Hampshire_Data_LONG_2022_2023, file="Data/New_Hampshire_Data_LONG_2022_2023.Rdata")