-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.ini
executable file
·124 lines (89 loc) · 2.9 KB
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
[preprocess]
# For raptor
# in_tensor_dir : ./data/train-ready/pred-full/
# For comet
# in_tensor_dir : /scratch/mtari008/37154933/pred-full-deepnovo/
# For expanse
in_tensor_dir : /lclhome/mtari008/job_2436627/nist_massiv_80k_ch_graymass/
############ INPUT PARAMETERS ############
[input]
# file paths
msp_files : /lclhome/mtari008/DeepSNAP/data/msp
mgf_dir : /lclhome/mtari008/data/spectra/labeled
prep_dir : /lclhome/mtari008/data/deepatles/train_ready/nist-masive-deepnovo-5k-ch1-3-len7-30-200-mod-mass
; prep_dir : /lclhome/mtari008/data/deepatles/train_ready/pt-5k-ch1-3-len7-20-200-mod #proteome tools data most models are trained on.
val_dir : /lclhome/mtari008/data/deepatles/train_ready/nist-masive-deepnovo-5k-ch1-3-len7-30-200-mod-mass
# The array size to store a spectrum.
spec_size : 50000
# Max charge value to be used to read spectrum files.
charge : 5
# Whether to use modifications or not.
use_mods : True
# Max mods per peptide
num_mods: 5
# Number of species the training dataset contains.
num_species : 9
master_port : 12346
rank : 1
############ DATABASE SEARCH PARAMETERS ############
[search]
mgf_dir : /lclhome/mtari008/data/spectra/unlabeled/uti-pxd004713/
prep_path : /lclhome/mtari008/data/deepatles/prep_spectra/uti-pxd004713
pep_dir : /lclhome/mtari008/data/peps/refup-single
out_pin_dir : /lclhome/mtari008/DeepAtles/percolator-refup-no-filt-uti-pxd004713
index_path : /lclhome/mtari008/DeepAtles/index
model_name : 512-embed-2-lstm-SnapLoss2D-80k-nist-massive-no-mc-semi-r2r2r-22.pt
# Batch sizes for forward pass through the network
spec_batch_size : 16384
pep_batch_size : 16384
# Batch size for database search
search_spec_batch_size : 256
precursor_tolerance : 7 # Precursor tolerance to use during database search (Da or ppm)
precursor_tolerance_type : ppm # either ppm or Da
keep_psms : 5 # Number of top scoring psms to keep
# Number of modified peptides to be generated to search against.
# Different than the one in input section
num_mods : 1
charge: 4 # charge to be used during search
############ FILTERING PARAMETERS ############
[filter]
length_filter: False
len_tol_neg: 0
len_tol_pos: 0
missed_cleavages_filter: False
modification_filter: False
############### OUT OF CORE PARAMETERS ##############
[ooc]
chunk_size: 10000000
############ MACHINE LEARNING PARAMETERS ############
[ml]
batch_size : 1024
test_size : 0.2
max_spec_len : 200
min_pep_len: 7
max_pep_len : 30
# slightly larger than max_pep_len to account for modifications
pep_seq_len : 36
max_clvs : 2
embedding_dim : 1024
encoder_layers : 4
num_heads : 16
train_count : 0
ce_weight_clv : 1
ce_weight_mod : 1
mse_weight : 3
dropout : 0.3
lr : 0.0001
weight_decay : 0.0001
epochs : 500
margin : 0.2
read_split_listing : False
############ DEFAULT VALUES ############
# DO NOT CHANGE
[default]
msp_file : /data/human_consensus_final_true_lib.msp
mgf_files : /data/
spec_size : 8000
charge : 2
use_mods : False
batch_size : 1024