-
Notifications
You must be signed in to change notification settings - Fork 1
/
redcap_to_sqlite.py
104 lines (79 loc) · 2.73 KB
/
redcap_to_sqlite.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python
# Imports
from peewee import *
import requests
import datetime
import time
# Timer API start
timerAPIStart = time.time()
# Set the REDCap API URL and Token before executing the script
redcap_api_url = 'https://redcapdemo.vanderbilt.edu/api/'
redcap_api_token = '[YOUR_REDCAP_API_TOKEN]'
# Db connection
db = SqliteDatabase("redcap.db")
class BaseModel(Model):
class Meta:
database = db
class Patient(BaseModel):
redcap_patient_id = IntegerField(unique=True)
ssn = CharField()
class Psa(BaseModel):
patient = ForeignKeyField(Patient, backref='psa_values', on_delete='CASCADE')
psa_date = DateField()
psa_value = FloatField()
db.connect()
# Truncate tables
truncateQuery1 = Patient.delete()
truncateQuery1.execute()
truncateQuery2 = Psa.delete()
truncateQuery2.execute()
# REDCap API request
data = {
'token': redcap_api_token,
'content': 'record',
'format': 'json',
'type': 'flat',
'csvDelimiter': '',
'rawOrLabel': 'raw',
'rawOrLabelHeaders': 'raw',
'exportCheckboxLabel': 'false',
'exportSurveyFields': 'false',
'exportDataAccessGroups': 'false',
'returnFormat': 'json'
}
result = requests.post(redcap_api_url, data=data)
# Timer API end
timerAPIEnd = time.time()
elapsedAPITime = timerAPIEnd - timerAPIStart
print('API Execution time:', elapsedAPITime, 'seconds')
# Timer SQLite start
timerSQLiteStart = time.time()
# JSON parsing
result_list = result.json()
patientsList = []
psaList = []
for record in result_list:
redcap_patient_id = int(record["record_id"])
if len(record["redcap_repeat_instrument"]) == 0:
# The record belongs to the parent table (Patient)
ssn = record["ssn"]
patientsList.append({'id': redcap_patient_id, 'redcap_patient_id': redcap_patient_id, 'ssn': ssn})
else:
# The record belongs to one of the child tables (Psa, Pet, Treatment, Event)
# Create a new record in the corresponding child table
if record["redcap_repeat_instrument"] == "psa":
psa_date = record["psa_date"]
psa_value = record["psa_value"]
psaList.append({'patient_id': redcap_patient_id, 'psa_date': datetime.datetime.strptime(psa_date, '%Y-%m-%d'), 'psa_value': float(psa_value)})
else:
print("Warning: unknown repeating instrument '" + "': the corresponding record was not saved")
with db.atomic():
for batch in chunked(patientsList, 10000):
Patient.insert_many(batch).execute()
for batch in chunked(psaList, 10000):
Psa.insert_many(batch).execute()
# Timer SQLite end
timerSQLiteEnd = time.time()
# Get the execution time
elapsedSQLiteTime = timerSQLiteEnd - timerSQLiteStart
print('SQLite Execution time:', elapsedSQLiteTime, 'seconds')