-
Notifications
You must be signed in to change notification settings - Fork 4
/
web.py
153 lines (120 loc) · 5.08 KB
/
web.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import datetime
import json
import os
from statistics import mean
import tests
import importlib
import jinja2
HOME = os.path.expanduser("~")
test_list = [
"ZeroShotClassificationTest",
"CountingTest",
"DocumentOCRTest",
"HandwritingOCRTest",
"ExtractionOCRTest",
"MathOCRTest",
"ObjectDetectionTest",
# "SetOfMarkTest",
"GraphUnderstandingTest",
"ColorRecognitionTest",
"AnnotationQATest",
"MeasurementTest",
"EasyCaptchaTest",
"PersuasiveAttackEasyCaptchaTest",
]
test_ids = []
current_results = {}
# Run tests
for i in test_list:
test_info = getattr(importlib.import_module(f"tests"),i)
print(f"Running {test_info.name} test...")
test_id = test_info.id
test_ids.append(test_id)
test_result = test_info.test()
score, response_time, result, tokens = test_result
input_token_price = 0.01/1000
output_token_price = 0.03/1000
price = (input_token_price * tokens[0]) + (output_token_price * tokens[1])
score = (1 if score is True else (0 if score is False else score))
current_results[test_id] = {}
current_results[test_id]["score"] = score
current_results[test_id]["success"] = score == 1
current_results[test_id]["price"] = price
current_results[test_id]["pass_fail"] = "Pass" if score == 1 else "Fail"
current_results[test_id]["response_time"] = response_time
current_results[test_id]["result"] = result
print("current_results", current_results)
# save as today in 2023-01-01 format
# make results dir
if not os.path.exists("results"):
os.mkdir("results")
today = datetime.datetime.now().strftime("%Y-%m-%d")
with open(f"results/{today}.json", "w+") as file:
json.dump(current_results, file, indent=4)
# Results processing
if (current_results == {}) and (os.path.exists(f"results/{today}.json")):
with open(f"results/{today}.json") as file:
current_results = json.load(file)
test_ids = list(current_results.keys())
else:
print("No current results and no file found")
results = {}
for index, test_id in enumerate(test_ids):
results[test_id] = {}
test_info = getattr(importlib.import_module(f"tests"),test_list[index])
results[test_id]["name"] = test_info.name
results[test_id]["question"] = test_info.question
results[test_id]["prompt"] = test_info.prompt
results[test_id]["image"] = test_info.image
results[test_id]["method"] = test_info.method
results[test_id]["author_name"] = test_info.author_name if hasattr(test_info, "author_name") else "Roboflow"
results[test_id]["author_url"] = test_info.author_url if hasattr(test_info, "author_url") else "https://roboflow.com"
for i in test_ids:
results[i]["history"] = {}
results[i]["history"]["scores"] = []
results[i]["history"]["response_times"] = []
results[i]["history"]["success"] = []
for file in os.listdir("results"):
if os.path.isdir(f"results/{file}"): continue
with open(f"results/{file}") as f:
data = json.load(f)
for key, value in data.items():
print(key, value)
if results.get(key) is None: continue
results[key]["history"]["scores"].append(value["score"])
results[key]["history"]["response_times"].append(value["response_time"])
results[key]["history"]["days"] = len(results[key]["history"]["scores"])
# Test succeeded or not
passed = value["success"] if "success" in value else (True if value["score"]==1 else False)
results[key]["history"]["success"].append(passed)
for i in test_ids:
results[i]["average"] = {}
results[i]["average"]["score"] = mean(results[i]["history"]["scores"])
results[i]["average"]["response_time"] = mean(results[i]["history"]["response_times"])
results[i]["average"]["success_percent"] = round(results[i]["average"]["score"]*100,2)
for i in test_ids:
results[i]["seven_day"] = {}
results[i]["seven_day"]["score"] = results[i]["history"]["scores"][-7:]
results[i]["seven_day"]["score_average"] = mean(results[i]["seven_day"]["score"])
results[i]["seven_day"]["score_percent"] = round(results[i]["seven_day"]["score_average"]*100,0)
results[i]["seven_day"]["success"] = results[i]["history"]["success"][-7:]
results[i]["seven_day"]["success_average"] = mean(results[i]["seven_day"]["success"])
results[i]["seven_day"]["success_percent"] = round(results[i]["seven_day"]["success_average"]*100,0)
response_times = []
for i in test_ids:
response_times.append(results[i]["average"]["response_time"])
print("response_times", response_times, test_ids)
info = {}
info["average_time"] = round(mean(response_times), 2)
info["day_count"] = len(response_times)
print("- - - - -")
print(json.dumps(results, indent=4))
print("- - - - -")
print(json.dumps(current_results, indent=4))
template = jinja2.Template(open("template.html").read())
today = datetime.datetime.now().strftime("%B %d, %Y")
# render template
rendered = template.render(results=results, date=today, current_results=current_results, info=info)
# save rendered template to index.html
with open("index.html", "w+") as file:
file.write(rendered)