Skip to content

Commit

Permalink
fixed scan hits=0 then lookup crash bug.
Browse files Browse the repository at this point in the history
  • Loading branch information
OpenHero committed Aug 27, 2013
1 parent c2f4bba commit 86498a0
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 57 deletions.
94 changes: 50 additions & 44 deletions gpu_blast/src/gpu_blastn_MB_and_smallNa.cu
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ Int4
global_size,
p_MBHashWrap->lookupArray);

getLastCudaError("gpu_blastn_scan_11_2mod4() execution failed.\n");
getLastCudaError("gpu_blastn_scan_11_2mod4_v3() execution failed.\n");

slogfile.KernelEnd();
slogfile.addTotalTime("scan_kernel_time", slogfile.KernelElaplsedTime(),false);
Expand All @@ -626,30 +626,34 @@ Int4
BlastOffsetPair* NCBI_RESTRICT offset_pairs, Int4 max_hits,
Int4* scan_range)
{
Int4 threadNum = 512;
Int4 blockNum = (total_hits + threadNum - 1)/threadNum;
dim3 gridDim(blockNum, 1);
dim3 blockDim(threadNum, 1);
if (total_hits > 0)
{
Int4 threadNum = 512;
Int4 blockNum = (total_hits + threadNum - 1)/threadNum;
dim3 gridDim(blockNum, 1);
dim3 blockDim(threadNum, 1);

checkCudaErrors(cudaMemset(p_scanMultiDBAuxWrap->over_hits_num, 0, sizeof(unsigned int))); //初始化为0
checkCudaErrors(cudaMemset(p_scanMultiDBAuxWrap->over_hits_num, 0, sizeof(unsigned int))); //初始化为0

slogfile.KernelStart();
slogfile.KernelStart();
//cout << total_hits << endl;

kernel_lookupInBigHashTable_v3<<<gridDim,blockDim>>>(
p_MBHashWrap->hashtable,
p_MBHashWrap->next_pos,
total_hits,
p_scanMultiDBAuxWrap->offsetPairs,
p_scanMultiDBAuxWrap->over_offset_pairs,
p_scanMultiDBAuxWrap->over_hits_num,
p_MBHashWrap->next_pos_len
);
getLastCudaError("kernel_lookupInBigHashTable() execution failed.\n");
slogfile.KernelEnd();
slogfile.addTotalTime("lookup_kernel_time", slogfile.KernelElaplsedTime(), false );
kernel_lookupInBigHashTable_v3<<<gridDim,blockDim>>>(
p_MBHashWrap->hashtable,
p_MBHashWrap->next_pos,
total_hits,
p_scanMultiDBAuxWrap->offsetPairs,
p_scanMultiDBAuxWrap->over_offset_pairs,
p_scanMultiDBAuxWrap->over_hits_num,
p_MBHashWrap->next_pos_len
);
getLastCudaError("kernel_lookupInBigHashTable_v3() execution failed.\n");
slogfile.KernelEnd();
slogfile.addTotalTime("lookup_kernel_time", slogfile.KernelElaplsedTime(), false );

checkCudaErrors(cudaMemcpy(&total_hits, p_scanMultiDBAuxWrap->over_hits_num, sizeof(unsigned int), cudaMemcpyDeviceToHost));
slogfile.addTotalNum("Kernel_lookupInBigHashTable hits", total_hits, false);
checkCudaErrors(cudaMemcpy(&total_hits, p_scanMultiDBAuxWrap->over_hits_num, sizeof(unsigned int), cudaMemcpyDeviceToHost));
slogfile.addTotalNum("Kernel_lookupInBigHashTable hits", total_hits, false);
}

return total_hits;
}
Expand Down Expand Up @@ -747,7 +751,7 @@ Int4
global_size,
p_MBHashWrap->lookupArray);

getLastCudaError("gpu_blastn_scan_11_2mod4() execution failed.\n");
getLastCudaError("gpu_blastn_scan_11_1mod4() execution failed.\n");

slogfile.KernelEnd();
slogfile.addTotalTime("scan_kernel_time", slogfile.KernelElaplsedTime(),false);
Expand Down Expand Up @@ -847,7 +851,7 @@ Int4
pv_array_bts,
global_size,
p_MBHashWrap->lookupArray);
getLastCudaError("gpu_blastn_scan_11_2mod4() execution failed.\n");
getLastCudaError("gpu_blastn_scan_Any_v3() execution failed.\n");
slogfile.KernelEnd();
slogfile.addTotalTime("scan_kernel_time", slogfile.KernelElaplsedTime(),false);

Expand All @@ -864,30 +868,32 @@ Int4
BlastOffsetPair* NCBI_RESTRICT offset_pairs, Int4 max_hits,
Int4* scan_range)
{
Int4 threadNum = 512;
Int4 blockNum = (total_hits + threadNum - 1)/threadNum;
dim3 gridDim(blockNum, 1);
dim3 blockDim(threadNum, 1);

checkCudaErrors(cudaMemset(p_scanMultiDBAuxWrap->over_hits_num, 0, sizeof(unsigned int))); //初始化为0
if (total_hits > 0)
{
Int4 threadNum = 512;
Int4 blockNum = (total_hits + threadNum - 1)/threadNum;
dim3 gridDim(blockNum, 1);
dim3 blockDim(threadNum, 1);

slogfile.KernelStart();
kernel_lookupInBigHashTable_v3<<<gridDim,blockDim>>>(
p_MBHashWrap->hashtable,
p_MBHashWrap->next_pos,
total_hits,
p_scanMultiDBAuxWrap->offsetPairs,
p_scanMultiDBAuxWrap->over_offset_pairs,
p_scanMultiDBAuxWrap->over_hits_num,
p_MBHashWrap->next_pos_len
);
checkCudaErrors(cudaMemset(p_scanMultiDBAuxWrap->over_hits_num, 0, sizeof(unsigned int))); //初始化为0

getLastCudaError("kernel_lookupInBigHashTable() execution failed.\n");
slogfile.KernelEnd();
slogfile.addTotalTime("lookup_kernel_time", slogfile.KernelElaplsedTime(), false );
checkCudaErrors(cudaMemcpy(&total_hits, p_scanMultiDBAuxWrap->over_hits_num, sizeof(unsigned int), cudaMemcpyDeviceToHost));
slogfile.addTotalNum("Kernel_lookupInBigHashTable hits", total_hits, false);
slogfile.KernelStart();
kernel_lookupInBigHashTable_v3<<<gridDim,blockDim>>>(
p_MBHashWrap->hashtable,
p_MBHashWrap->next_pos,
total_hits,
p_scanMultiDBAuxWrap->offsetPairs,
p_scanMultiDBAuxWrap->over_offset_pairs,
p_scanMultiDBAuxWrap->over_hits_num,
p_MBHashWrap->next_pos_len
);

getLastCudaError("kernel_lookupInBigHashTable_v3() execution failed.\n");
slogfile.KernelEnd();
slogfile.addTotalTime("lookup_kernel_time", slogfile.KernelElaplsedTime(), false );
checkCudaErrors(cudaMemcpy(&total_hits, p_scanMultiDBAuxWrap->over_hits_num, sizeof(unsigned int), cudaMemcpyDeviceToHost));
slogfile.addTotalNum("Kernel_lookupInBigHashTable hits", total_hits, false);
}
return total_hits;
}
Int4
Expand Down
25 changes: 12 additions & 13 deletions gpu_blast/src/gpu_blastn_pre_search_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1268,19 +1268,18 @@ Int4
}
else if (lookup_wrap->lut_type == eMBLookupTable)
{
//BlastMBLookupTable *mb_lt = (BlastMBLookupTable*)lookup_wrap->lut;
//mb_lt->lut_word_length;

//if (stat_length > 10000)
//{
// mb_lt->scansub_callback = new_lp_cb;
// mb_lt->extend_callback = new_lp_et;
//}
//else
//{
// mb_lt->scansub_callback = org_lp_cb;
// mb_lt->extend_callback = org_lp_et;
//}
BlastMBLookupTable *mb_lt = (BlastMBLookupTable*)lookup_wrap->lut;
mb_lt->lut_word_length;
if (stat_length > 1000)
{
mb_lt->scansub_callback = new_lp_cb;
mb_lt->extend_callback = new_lp_et;
}
else
{
mb_lt->scansub_callback = org_lp_cb;
mb_lt->extend_callback = org_lp_et;
}
}
}

Expand Down

0 comments on commit 86498a0

Please sign in to comment.