-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_tfrecord.py
152 lines (134 loc) · 5.88 KB
/
create_tfrecord.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import sys
import os
BASE_PATH = os.path.abspath(os.path.dirname(__file__))
sys.path.append("../face_extraction/")
from extract_face import custom_resize, vggface2_preprocessing, extract_face
import tensorflow as tf
import numpy as np
import os
import csv
import cv2 as cv
from matplotlib import pyplot as plt
import progressbar
sys.path.append("../csv_preprocessing/")
from get_ages_from_csv import read_csv
TARGET_SHAPE = (224,224,3)
MAX_VALUE = 169396
'''
This function creates a feature of int64 values to be stored in a TFRecord dataset
'''
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
'''
This function creates a feature of bytes values to be stored in a TFRecord dataset
'''
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
'''
This function creates a TFRecord dataset of examples composed by:
- path of the image encoded as utf-8
- width and height of the image
- label stored as int
- image stored as sequence of bytes encoded in jpg format
'''
def convert_to(dataset_path, ages):
cnt = 0
# modify for indicating where creating TFRecord has to be saved
filename = "E:/tfrecords/training_set_cropped.record"
writer = tf.io.TFRecordWriter(filename)
os.chdir(dataset_path)
all_dirs = os.listdir(".")
print("Have to process {} identities".format(MAX_VALUE))
with progressbar.ProgressBar(max_value=MAX_VALUE) as bar:
for d in all_dirs:
if os.path.isdir(d): #identity
id_dir = os.path.join(os.getcwd(), d)
for f in os.listdir(id_dir): #read all jpgs
jpg_dir = os.path.join(id_dir, f)
if os.path.isfile(jpg_dir):
path = jpg_dir.split('\\')[-2]+"/"+jpg_dir.split('\\')[-1]
age = ages[d]["/"+f]
img = cv.imread(jpg_dir, cv.IMREAD_UNCHANGED)
img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
image_shape = img.shape
is_success, im_buf_arr = cv.imencode(".jpg", img)
image_string = im_buf_arr.tobytes()
example = tf.train.Example(features=tf.train.Features(feature={
'path': _bytes_feature((path).encode('utf-8')),
'width': _int64_feature(image_shape[1]),
'height': _int64_feature(image_shape[0]),
'label': _int64_feature(int(age)),
'image_raw': _bytes_feature(image_string)
}))
writer.write(example.SerializeToString())
cnt += 1
bar.update(cnt)
else:
print("Unexpected directory{}".format(f))
else:
print("Unexpected file {}".format(d))
'''
This function creates a TFRecord dataset of test examples composed by:
- path of the image encoded as utf-8
- image stored as sequence of bytes encoded in jpg format
Before saving in the record, images are preprocessed with normalization and resize to desired shape.
Moreover it writes on a CSV file rows of type "<image_path>,<age>" to be used in test phase
for comparing it with the CSV of predictions generated by the trained CNN
ATTENTION:
- In order to generate TFRecord without saving the ages to a CSV, comment the
following lines: 106-107-116-117
- if images aren't previously cropped, add face extraction uncommenting line 120
'''
def convert_to_test(dataset_path, ages): #path, ages
cnt = 0
# modify for indicating where creating TFRecord has to be saved
filename = "E:/tfrecords/test_set_cropped.record"
writer = tf.io.TFRecordWriter(filename)
os.chdir(dataset_path)
all_dirs = os.listdir(".")
print("Have to process {} identities".format(MAX_VALUE))
with progressbar.ProgressBar(max_value=MAX_VALUE) as bar:
with open('E:/own_test_set_gt.csv', mode='w', newline="", encoding="utf-8") as csv_file:
csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for d in all_dirs:
if os.path.isdir(d): #identity
id_dir = os.path.join(os.getcwd(), d)
for f in os.listdir(id_dir): #read all jpgs
jpg_dir = os.path.join(id_dir, f)
if os.path.isfile(jpg_dir):
# write path-age to gt csv
path = jpg_dir.split(os.sep)[-2]+"/"+jpg_dir.split(os.sep)[-1]
age = ages[d]["/"+f]
csv_writer.writerow([path, age])
# write path-image to tfrecord
img = cv.imread(jpg_dir, cv.IMREAD_UNCHANGED)
img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
#preprocessing
#img = extract_face(img) #used only if images aren't already cropped for using MTCNN as detector
img = vggface2_preprocessing(img)
img = custom_resize(img, img.shape[0], img.shape[1], TARGET_SHAPE)
img = np.asarray(img)
is_success, im_buf_arr = cv.imencode(".jpg", img)
image_string = im_buf_arr.tobytes()
example = tf.train.Example(features=tf.train.Features(feature={
'path': _bytes_feature((path).encode('utf-8')),
'image_raw': _bytes_feature(image_string)
}))
writer.write(example.SerializeToString())
cnt += 1
bar.update(cnt)
else:
print("Unexpected directory{}".format(f))
else:
print("Unexpected file {}".format(d))
# modify to indicate where dataset to be saved into TFRecord are store
PATH_TO_CROPPED_TS = "E:/training_set_cropped"
# modify to indicate where CSV file with age labels is stored
PATH_TO_CSV = "../train.age.detected.csv"
test=False #indicates the kind of tfrecord to be created
print("Recovering ages...")
ages = read_csv(PATH_TO_CSV, test=test)
print("Recovering ages...DONE")
print("Creating TFRecord...")
convert_to(PATH_TO_CROPPED_TS, ages) if not test else convert_to_test(PATH_TO_CROPPED_TS,ages)
print("Creating TFRecord...DONE")