Skip to content

Commit

Permalink
Merge pull request #4430 from vgteam/output-rna-haps
Browse files Browse the repository at this point in the history
Add an option to output a haplotype GBWT from vg rna with node IDs that match the spliced graph
  • Loading branch information
jeizenga authored Oct 30, 2024
2 parents 26db084 + 488eed3 commit 4ee01a5
Showing 1 changed file with 24 additions and 4 deletions.
28 changes: 24 additions & 4 deletions src/subcommand/rna_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ void help_rna(char** argv) {
<< "\nOutput options:" << endl

<< " -b, --write-gbwt FILE write pantranscriptome transcript paths as GBWT index file" << endl
<< " -v, --write-hap-gbwt FILE write input haplotypes as a GBWT with node IDs matching the output graph" << endl
<< " -f, --write-fasta FILE write pantranscriptome transcript sequences as fasta file" << endl
<< " -i, --write-info FILE write pantranscriptome transcript info table as tsv file" << endl
<< " -q, --out-exclude-ref exclude reference transcripts from pantranscriptome output" << endl
Expand Down Expand Up @@ -88,6 +89,7 @@ int32_t main_rna(int32_t argc, char** argv) {
bool gbwt_add_bidirectional = false;
string fasta_out_filename = "";
string info_out_filename = "";
string hap_gbwt_out_filename = "";
int32_t num_threads = 1;
bool show_progress = false;

Expand All @@ -110,8 +112,9 @@ int32_t main_rna(int32_t argc, char** argv) {
{"remove-non-gene", no_argument, 0, 'd'},
{"do-not-sort", no_argument, 0, 'o'},
{"add-ref-paths", no_argument, 0, 'r'},
{"add-hap-paths", no_argument, 0, 'a'},
{"add-hap-paths", no_argument, 0, 'a'},
{"write-gbwt", required_argument, 0, 'b'},
{"write-hap-gbwt", required_argument, 0, 'v'},
{"write-fasta", required_argument, 0, 'f'},
{"write-info", required_argument, 0, 'i'},
{"out-ref-paths", no_argument, 0, 'u'},
Expand All @@ -124,7 +127,7 @@ int32_t main_rna(int32_t argc, char** argv) {
};

int32_t option_index = 0;
c = getopt_long(argc, argv, "n:m:y:s:l:zjec:k:dorab:f:i:uqgt:ph?", long_options, &option_index);
c = getopt_long(argc, argv, "n:m:y:s:l:zjec:k:dorab:v:f:i:uqgt:ph?", long_options, &option_index);

/* Detect the end of the options. */
if (c == -1)
Expand Down Expand Up @@ -188,10 +191,14 @@ int32_t main_rna(int32_t argc, char** argv) {
case 'a':
add_projected_transcript_paths = true;
break;

case 'b':
gbwt_out_filename = optarg;
break;

case 'v':
hap_gbwt_out_filename = optarg;
break;

case 'f':
fasta_out_filename = optarg;
Expand Down Expand Up @@ -486,6 +493,19 @@ int32_t main_rna(int32_t argc, char** argv) {
gbwt_builder.finish();
save_gbwt(gbwt_builder.index, gbwt_out_filename);
}

// Write a haplotype GBWT with node IDs updated to match the spliced graph.
if (!hap_gbwt_out_filename.empty()) {
if (!haplotype_index.get()) {
cerr << "[vg rna] Warning: not saving updated haplotypes to " << hap_gbwt_out_filename << " because haplotypes were not provided as input" << endl;
}
else {
ofstream hap_gbwt_ostream;
hap_gbwt_ostream.open(hap_gbwt_out_filename);

haplotype_index->serialize(hap_gbwt_ostream);
}
}

// Write transcript sequences in transcriptome as fasta file.
if (!fasta_out_filename.empty()) {
Expand All @@ -496,7 +516,7 @@ int32_t main_rna(int32_t argc, char** argv) {
transcriptome.write_transcript_sequences(&fasta_ostream, exclude_reference_transcripts);

fasta_ostream.close();
}
}

// Write transcript info in transcriptome as tsv file.
if (!info_out_filename.empty()) {
Expand Down

1 comment on commit 4ee01a5

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for merge to master. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 17392 seconds

Please sign in to comment.