-
Notifications
You must be signed in to change notification settings - Fork 1
/
process_chatgpt_responses.py
91 lines (75 loc) · 2.78 KB
/
process_chatgpt_responses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import pandas as pd
chatgpt_dir = "/" ## ENTER PATH TO WHERE YOU SAVED THE CHATGPT RESPONSES HERE
generator_list = ["ctrl", "fair_wmt19", "fair_wmt20", "gpt1", "gpt2_small", "gpt2_medium", "gpt2_large", \
"gpt2_pytorch", "gpt2_xl", "gpt3", "grover_base","grover_large", "grover_mega", \
"pplm_distil", "pplm_gpt2","transfo_xl", "xlm", "xlnet_base", "xlnet_large"]
for generator in generator_list:
file_path = chatgpt_dir + 'chatgpt_responses_'+generator+'_real_all.csv'
test_df = pd.read_csv(file_path)
manual = []
total = len(test_df)
ai_count = 0
human_count = 0
unclear_count = 0
for i in range(len(test_df)):
human_flag = False
ai_flag = False
try:
response = test_df.iloc[i]['chatgpt_response'].lower()
except:
total -=1
continue
if response is None:
total -=1
continue
if response == 'null':
total -=1
continue
if 'unclear' in response:
unclear_count+=1
else:
if 'human' in response:
human_flag = True
if 'ai ' in response or 'ai-' in response or 'ai.' in response or 'ai,' in response:
ai_flag = True
if human_flag and ai_flag:
manual.append(response)
total -=1
elif human_flag:
human_count+=1
elif ai_flag:
ai_count+=1
else:
manual.append(response)
total -=1
updated_manual = []
for entry in manual:
if entry == 'ai':
ai_count+=1
total +=1
elif 'appears to be written by a human' in entry:
human_count+=1
total +=1
elif 'appears to have been written by a human' in entry:
human_count+=1
total +=1
elif 'appears to be generated by an ai' in entry:
ai_count+=1
total+=1
elif 'appears to have been generated by an ai' in entry:
ai_count+=1
total+=1
elif 'difficult to determine' in entry:
unclear_count+=1
total+=1
else:
updated_manual.append(entry)
## printing results
print("-"*20)
print("Results for generator: "+ generator)
print("Percentage of articles labeled as human: " + str(human_count/total*100))
print("Percentage of articles labeled as AI: " + str(ai_count/total*100))
print("Percentage of articles labeled as unclear: " + str(unclear_count/total*100))
## Uncomment to check which responses could not be given a label by our rule-based system
# print("To check manual list: ")
# print(updated_manual)