-
Notifications
You must be signed in to change notification settings - Fork 98
/
test_submit_multithreaded.py
85 lines (68 loc) · 2.33 KB
/
test_submit_multithreaded.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import cv2
import numpy as np
import pandas as pd
import threading
import queue
import tensorflow as tf
from tqdm import tqdm
import params
input_size = params.input_size
batch_size = params.batch_size
orig_width = params.orig_width
orig_height = params.orig_height
threshold = params.threshold
model = params.model_factory()
df_test = pd.read_csv('input/sample_submission.csv')
ids_test = df_test['img'].map(lambda s: s.split('.')[0])
names = []
for id in ids_test:
names.append('{}.jpg'.format(id))
# https://www.kaggle.com/stainsby/fast-tested-rle
def run_length_encode(mask):
'''
img: numpy array, 1 - mask, 0 - background
Returns run length as string formated
'''
inds = mask.flatten()
runs = np.where(inds[1:] != inds[:-1])[0] + 2
runs[1::2] = runs[1::2] - runs[:-1:2]
rle = ' '.join([str(r) for r in runs])
return rle
rles = []
model.load_weights(filepath='weights/best_weights.hdf5')
graph = tf.get_default_graph()
q_size = 10
def data_loader(q, ):
for start in range(0, len(ids_test), batch_size):
x_batch = []
end = min(start + batch_size, len(ids_test))
ids_test_batch = ids_test[start:end]
for id in ids_test_batch.values:
img = cv2.imread('input/test/{}.jpg'.format(id))
img = cv2.resize(img, (input_size, input_size))
x_batch.append(img)
x_batch = np.array(x_batch, np.float32) / 255
q.put(x_batch)
def predictor(q, ):
for i in tqdm(range(0, len(ids_test), batch_size)):
x_batch = q.get()
with graph.as_default():
preds = model.predict_on_batch(x_batch)
preds = np.squeeze(preds, axis=3)
for pred in preds:
prob = cv2.resize(pred, (orig_width, orig_height))
mask = prob > threshold
rle = run_length_encode(mask)
rles.append(rle)
q = queue.Queue(maxsize=q_size)
t1 = threading.Thread(target=data_loader, name='DataLoader', args=(q,))
t2 = threading.Thread(target=predictor, name='Predictor', args=(q,))
print('Predicting on {} samples with batch_size = {}...'.format(len(ids_test), batch_size))
t1.start()
t2.start()
# Wait for both threads to finish
t1.join()
t2.join()
print("Generating submission file...")
df = pd.DataFrame({'img': names, 'rle_mask': rles})
df.to_csv('submit/submission.csv.gz', index=False, compression='gzip')