{"items":[{"id":1,"description":"DNA sequences in the FASTA format, indexed FASTA (produced with samtools index), chromosome sizes file and FASTA dict (produced with samtools dict)","command_templates":["cp {{values.files[\"fasta\"]}} {{values.output_folder}}/{{values.genome_digest}}.fa.gz","if (file {{values.output_folder}}/{{values.genome_digest}}.fa.gz | grep -q compressed ) ; then gzip -df {{values.output_folder}}/{{values.genome_digest}}.fa.gz ; else mv {{values.output_folder}}/{{values.genome_digest}}.fa.gz {{values.output_folder}}/{{values.genome_digest}}.fa ; fi","samtools faidx {{values.output_folder}}/{{values.genome_digest}}.fa","cut -f 1,2 {{values.output_folder}}/{{values.genome_digest}}.fa.fai > {{values.output_folder}}/{{values.genome_digest}}.chrom.sizes"],"input_files":{"fasta":{"description":"DNA sequences in the FASTA format"}},"docker_image":"docker.io/databio/refgenie","default_asset":"samtools-{{values.custom_properties.version}}","name":"fasta","version":"0.0.1","output_asset_class_id":1,"input_params":null,"input_assets":null,"custom_properties":{"version":"samtools --version-only | awk -F+ '{print $1}'"}},{"id":2,"description":"Abundant sequences in the FASTA format -- PhiX spike-in, Poly(A), Poly(C) and adapter sequences","command_templates":["cp {{values.files[\"phix\"]}} {{values.output_folder}}/{{values.genome_digest}}_phix.fa","cp {{values.files[\"polyA\"]}} {{values.output_folder}}/{{values.genome_digest}}_polyA.fa","cp {{values.files[\"polyC\"]}} {{values.output_folder}}/{{values.genome_digest}}_polyC.fa","cp {{values.files[\"adapter_contam\"]}} {{values.output_folder}}/{{values.genome_digest}}_adapter_contam.fa"],"input_files":{"phix":{"description":"PhiX sequences in FASTA format"},"polyA":{"description":"Poly(A) sequences in FASTA format"},"polyC":{"description":"Poly(C) sequences in FASTA format"},"adapter_contam":{"description":"Adapter sequences in FASTA format"}},"docker_image":"docker.io/databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"abundant_sequences","version":"0.0.1","output_asset_class_id":2,"input_params":null,"input_assets":null,"custom_properties":{}},{"id":3,"description":"Genomic feature annotations in BED12 format","command_templates":["cp {{values.files[\"bedgz\"]}} {{values.output_folder}}/{{values.genome_digest}}.bed.gz"],"input_files":{"bedgz":{"description":"Genomic feature annotations in gzipped Browser Extensible Data (BED12) format"}},"docker_image":"databio/refgenie","default_asset":"default","name":"bed12","version":"0.0.1","output_asset_class_id":3,"input_params":{},"input_assets":{},"custom_properties":{}},{"id":4,"description":"Genome index for Bisulfite-Seq applications, produced by bismark_genome_preparation using bowtie1","command_templates":["ln -sf {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} {{values.output_folder}}","bismark_genome_preparation {{values.output_folder}} --parallel {{values.params[\"parallel\"]}}"],"input_files":{},"docker_image":"databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"bismark_bt1_index","version":"0.0.1","output_asset_class_id":4,"input_params":{"parallel":{"default":"4","description":"Number of threads to use for parallel computing"}},"input_assets":{"fasta":{"asset_class":"fasta","default":"fasta","description":"fasta asset for genome"}},"custom_properties":{"version":"bismark_genome_preparation --version | awk 'NR==4{print $5}' | cut -c2-"}},{"id":5,"description":"Genome index for Bisulfite-Seq applications, produced by bismark_genome_preparation using bowtie2","command_templates":["ln -sf {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} {{values.output_folder}}","bismark_genome_preparation --bowtie2 {{values.output_folder}} --parallel {{values.params[\"parallel\"]}}"],"input_files":{},"docker_image":"databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"bismark_bt2_index","version":"0.0.1","output_asset_class_id":5,"input_params":{"parallel":{"default":"4","description":"Number of threads to use for parallel computing"}},"input_assets":{"fasta":{"asset_class":"fasta","default":"fasta","description":"fasta asset for genome"}},"custom_properties":{"version":"bismark_genome_preparation --version | awk 'NR==4{print $5}' | cut -c2-"}},{"id":6,"description":"Atypical, unstructured, or high signal genomic regions present in next-generation sequencing experiments (e.g. from ENCODE)","command_templates":["cp {{values.files[\"blacklist\"]}} {{values.output_folder}}/{{values.genome_digest}}_blacklist.bed.gz"],"input_files":{"blacklist":{"description":"gzipped blacklist file"}},"docker_image":"databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"blacklist","version":"0.0.1","output_asset_class_id":6,"input_params":{},"input_assets":{},"custom_properties":{}},{"id":7,"description":"Genome index for bowtie, produced with bowtie-build","command_templates":["bowtie-build --threads {{values.params[\"threads\"]}} {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} {{values.output_folder}}/{{values.genome_digest}}"],"input_files":null,"docker_image":"docker.io/databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"bowtie1_index","version":"0.0.1","output_asset_class_id":7,"input_params":{"threads":{"description":"Number of threads to use","default":1}},"input_assets":{"fasta":{"asset_class":"fasta","description":"fasta asset for genome","default":"fasta"}},"custom_properties":{"version":"bowtie-build --version | awk 'NR==1{print $3}'"}},{"id":8,"description":"Genome index for bowtie2, produced with bowtie2-build","command_templates":["bowtie2-build --threads {{values.params[\"threads\"]}} {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} {{values.output_folder}}/{{values.genome_digest}}"],"input_files":null,"docker_image":"docker.io/databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"bowtie2_index","version":"0.0.1","output_asset_class_id":8,"input_params":{"threads":{"description":"Number of threads to use","default":1}},"input_assets":{"fasta":{"asset_class":"fasta","description":"fasta asset for genome","default":"fasta"}},"custom_properties":{"version":"bowtie2-build --version | awk 'NR==1{print $3}'"}},{"id":9,"description":"Genome index for Burrows-Wheeler Alignment Tool, produced with bwa index","command_templates":["ln -sf {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} {{values.output_folder}}","bwa index {{values.output_folder}}/{{values.genome_digest}}.fa"],"input_files":{},"docker_image":"databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"bwa_index","version":"0.0.1","output_asset_class_id":9,"input_params":{},"input_assets":{"fasta":{"asset_class":"fasta","description":"fasta asset for genome","default":"fasta"}},"custom_properties":{"version":"bwa 2>&1 | grep Version | cut -d' ' -f2 | awk -F- '{print $1}'"}},{"id":10,"description":"Cell Ranger custom genome reference for read alignment and gene expression quantification","command_templates":["gunzip {{values.genome_folder}}/{{values.assets[\"gencode_gtf\"].seek_keys_dict[\"gtf\"]}} -c > {{values.output_folder}}/{{values.genome_digest}}.gtf","cellranger mkgtf {{values.output_folder}}/{{values.genome_digest}}.gtf {{values.output_folder}}/{{values.genome_digest}}_filtered.gtf","rm {{values.output_folder}}/{{values.genome_digest}}.gtf","cd {{values.output_folder}}; cellranger mkref --genome=ref --fasta={{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} --genes={{values.output_folder}}/{{values.genome_digest}}_filtered.gtf --nthreads={{values.params[\"threads\"]}}"],"input_files":{},"docker_image":"databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"cellranger_reference","version":"0.0.1","output_asset_class_id":10,"input_params":{"threads":{"default":"8","description":"Number of threads to use for parallel computing"}},"input_assets":{"gencode_gtf":{"asset_class":"gtf","default":"gencode_gtf","description":"Annotation file in Gene Transfer Format (GTF) from Gencode"},"fasta":{"asset_class":"fasta","default":"fasta","description":"fasta asset for genome"}},"custom_properties":{"version":"cellranger --version | awk -F- '{print $2}'"}},{"id":20,"description":"Genome index for kallisto, produced with kallisto index","command_templates":["kallisto index -i {{values.output_folder}}/{{values.genome_digest}}_kallisto_index.idx {{values.genome_folder}}/{{values.assets[\"fasta_txome\"].seek_keys_dict[\"fasta\"]}}"],"input_files":{},"docker_image":"databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"kallisto_index","version":"0.0.1","output_asset_class_id":18,"input_params":{},"input_assets":{"fasta_txome":{"asset_class":"fasta","default":"fasta_txome","description":"fasta asset for transcriptome"}},"custom_properties":{"version":"kallisto version | awk '{print $3}'"}},{"id":11,"description":"A database developed for functional prediction and annotation of all potential non-synonymous single-nucleotide variants (nsSNVs) in the human genome (Gencode release 29/Ensembl 94)","command_templates":["cp {{values.files[\"dbnsfp\"]}} {{values.output_folder}}/{{values.genome_digest}}.zip","unzip {{values.output_folder}}/{{values.genome_digest}}.zip -d {{values.output_folder}}","gunzip -v {{values.output_folder}}/*variant.chr*.gz","head -n1 {{values.output_folder}}/dbNSFP*_variant.chr1 > {{values.output_folder}}/{{values.genome_digest}}_dbNSFP.txt","cat {{values.output_folder}}/dbNSFP*variant.chr* | grep -v '#' >> {{values.output_folder}}/{{values.genome_digest}}_dbNSFP.txt","rm {{values.output_folder}}/dbNSFP*_variant.chr*","bgzip -@ {{values.params[\"threads\"]}} {{values.output_folder}}/{{values.genome_digest}}_dbNSFP.txt","tabix -s 1 -b 2 -e 2 {{values.output_folder}}/{{values.genome_digest}}_dbNSFP.txt.gz","rm `find {{values.output_folder}} -type f -not -path '{{values.output_folder}}/_refgenie_build*' -not -path '{{values.output_folder}}/{{values.genome_digest}}_dbNSFP.txt.*'`"],"input_files":{"dbnsfp":{"asset_class":"dbnsfp","description":"zipped dbNSFP database file"}},"docker_image":"databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"dbnsfp","version":"0.0.1","output_asset_class_id":11,"input_params":{"threads":{"default":"8","description":"Number of threads to use for parallel computing"}},"input_assets":{},"custom_properties":{"version":"tabix --version | awk '{print $3}' | head -1"}},{"id":12,"description":"The database of single nucleotide polymorphisms (SNPs) and multiple small-scale variations that include insertions/deletions, microsatellites, and non-polymorphic variants","command_templates":["cp {{values.files[\"dbsnp_vcf\"]}} {{values.output_folder}}/{{values.genome_digest}}_dbSNP.gz","cp {{values.files[\"dbsnp_tbi\"]}} {{values.output_folder}}/{{values.genome_digest}}_dbSNP.gz.tbi"],"input_files":{"dbsnp_vcf":{"description":"SNP database file in Variant Call Format (VCF)"},"dbsnp_tbi":{"description":"tabix index of the dbsnp.vcf file"}},"docker_image":"databio/refgenie","default_asset":"default","name":"dbsnp","version":"0.0.1","output_asset_class_id":12,"input_params":{},"input_assets":{},"custom_properties":{}},{"id":13,"description":"Ensembl GTF, TSS, and gene body annotation","command_templates":["cp {{values.files[\"ensembl_gtf\"]}} {{values.output_folder}}/{{values.genome_digest}}.gtf.gz","gzip -dcf {{values.output_folder}}/{{values.genome_digest}}.gtf.gz | grep 'exon_number \"1\";' | sed 's/^/chr/' | awk -v OFS='\\t' '{print $1, $4, $5, $20, $14, $7}' | sed 's/\";//g' | sed 's/\"//g' | awk '{if($6==\"+\"){print $1\"\\t\"$2+20\"\\t\"$2+120\"\\t\"$4\"\\t\"$5\"\\t\"$6}else{print $1\"\\t\"$3-120\"\\t\"$3-20\"\\t\"$4\"\\t\"$5\"\\t\"$6}}' | LC_COLLATE=C sort -k1,1 -k2,2n -u > {{values.output_folder}}/{{values.genome_digest}}_ensembl_TSS.bed","gzip -dcf {{values.output_folder}}/{{values.genome_digest}}.gtf.gz | awk '$3 == \"gene\"' | sed 's/^/chr/' | awk -v OFS='\\t' '{print $1, $4, $5, $14, $6, $7}' | sed 's/\";//g' | sed 's/\"//g' | awk '$4!=\"Metazoa_SRP\"' | awk '$4!=\"U3\"' | awk '$4!=\"7SK\"'  | awk '($3-$2)>200' | awk '{if($6==\"+\"){print $1\"\\t\"$2+500\"\\t\"$3\"\\t\"$4\"\\t\"$5\"\\t\"$6}else{print $1\"\\t\"$2\"\\t\"$3-500\"\\t\"$4\"\\t\"$5\"\\t\"$6}}' | awk '$3>$2' | LC_COLLATE=C sort -k4 -u > {{values.output_folder}}/{{values.genome_digest}}_ensembl_gene_body.bed"],"input_files":{"ensembl_gtf":{"description":"Annotation file in Gene Transfer Format (GTF) from Ensembl"}},"docker_image":"databio/refgenie","default_asset":"default","name":"ensembl_gtf","version":"0.0.1","output_asset_class_id":16,"input_params":{},"input_assets":{},"custom_properties":{}},{"id":14,"description":"A regulatory annotation file","command_templates":["cp {{values.files[\"gff\"]}} {{values.output_folder}}/{{values.genome_digest}}.gff.gz"],"input_files":{"gff":{"description":"Regulatory build annotation file in Gene Feature Format (GFF) from Ensembl"}},"docker_image":"databio/refgenie","default_asset":"default","name":"ensembl_rb","version":"0.0.1","output_asset_class_id":13,"input_params":{},"input_assets":{},"custom_properties":{}},{"id":15,"description":"Genome index for CpG sites, produced by the epilog DNA methylation caller","command_templates":["epilog index -- --infile {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} --outfile {{values.output_folder}}/{{values.genome_digest}}_{{values.params[\"context\"]}}.tsv --contexts {{values.params[\"context\"]}}","bgzip {{values.output_folder}}/{{values.genome_digest}}_{{values.params[\"context\"]}}.tsv","tabix -s 1 -b 2 -e 2 {{values.output_folder}}/{{values.genome_digest}}_{{values.params[\"context\"]}}.tsv.gz"],"input_files":{},"docker_image":"databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"epilog_index","version":"0.0.1","output_asset_class_id":14,"input_params":{"context":{"default":"CG","description":"Substring to index. One or more space-separated strings to index. e.g. 'CG' or 'CG CA CT CC'"}},"input_assets":{"fasta":{"asset_class":"fasta","default":"fasta","description":"fasta asset for genome"}},"custom_properties":{}},{"id":16,"description":"cDNA sequences in the FASTA format, indexed FASTA (produced with samtools index) and chromosome sizes file","command_templates":["cp {{values.files[\"fasta_txome\"]}} {{values.output_folder}}/{{values.genome_digest}}.fa.gz","if (file {{values.output_folder}}/{{values.genome_digest}}.fa.gz | grep -q compressed ) ; then gzip -df {{values.output_folder}}/{{values.genome_digest}}.fa.gz ; else mv {{values.output_folder}}/{{values.genome_digest}}.fa.gz {{values.output_folder}}/{{values.genome_digest}}.fa ; fi","samtools faidx {{values.output_folder}}/{{values.genome_digest}}.fa","cut -f 1,2 {{values.output_folder}}/{{values.genome_digest}}.fa.fai > {{values.output_folder}}/{{values.genome_digest}}.chrom.sizes","cat {{values.output_folder}}/{{values.genome_digest}}.fa | samtools dict -o {{values.output_folder}}/{{values.genome_digest}}.dict"],"input_files":{"fasta_txome":{"description":"A taxome FASTA file. Gzipped or not."}},"docker_image":"docker.io/databio/refgenie","default_asset":"samtools-{{values.custom_properties.version}}","name":"fasta_txome","version":"0.0.1","output_asset_class_id":1,"input_params":{},"input_assets":{},"custom_properties":{"version":"samtools --version-only | awk -F+ '{print $1}'"}},{"id":17,"description":"Combined genomic feature annotation created using an Ensembl GTF annotation asset and an Ensembl regulatory build annotation asset","command_templates":["gzip -dcf {{values.genome_folder}}/{{values.assets[\"ensembl_gtf\"].seek_keys_dict[\"gtf\"]}} | awk '$3==\"exon\"' | grep -v 'pseudogene' | awk -v OFS='\\t' '{print \"chr\"$1, $4-1, $5, \"Exon\", $6, $7}' | awk '$2<$3' | env LC_COLLATE=C sort -k1,1 -k2,2n -k3,3n -u > {{values.output_folder}}/{{values.genome_digest}}_exons.bed","gzip -dcf {{values.genome_folder}}/{{values.assets[\"ensembl_gtf\"].seek_keys_dict[\"gtf\"]}} | awk '$3==\"exon\"' | grep -v 'pseudogene' | awk -v OFS='\\t' '{ split($20, a, \"\"\"); print \"chr\"$1, $4-1, $5, a[2], $6, $7}' | env LC_COLLATE=C sort -k1,1 -k2,2n -k3,3n -u | awk 'seen[$4]++ && seen[$4] > 1' | env LC_COLLATE=C sort -k1,1 -k2,2n -k3,3nr | env LC_COLLATE=C sort -k1,1 -k2,2n -u | env LC_COLLATE=C sort -k1,1 -k3,3n -u | awk -v OFS='\\t' '{if($4==prev4){new2=prev3+1;} {prev4=$4; prev3=$3; print $1, new2, $2-1, \"Intron\", $5, $6}}' | awk -F'\\t' '$2' | awk '$2<$3' | env LC_COLLATE=C sort -k1,1 -k2,2n -u > {{values.output_folder}}/{{values.genome_digest}}_introns.bed","gzip -dcf {{values.genome_folder}}/{{values.assets[\"ensembl_gtf\"].seek_keys_dict[\"gtf\"]}} | awk '$3==\"three_prime_utr\"' | grep -v 'pseudogene' | awk -v OFS='\\t' '{print \"chr\"$1, $4-1, $5, \"3''' UTR\", $6, $7}' | awk '$2<$3' | env LC_COLLATE=C sort -k1,1 -k2,2n -u > {{values.output_folder}}/{{values.genome_digest}}_3utr.bed","gzip -dcf {{values.genome_folder}}/{{values.assets[\"ensembl_gtf\"].seek_keys_dict[\"gtf\"]}} | awk '$3==\"five_prime_utr\"' | grep -v 'pseudogene' | awk -v OFS='\\t' '{print \"chr\"$1, $4-1, $5, \"5''' UTR\", $6, $7}' | awk '$2<$3' | env LC_COLLATE=C sort -k1,1 -k2,2n -u > {{values.output_folder}}/{{values.genome_digest}}_5utr.bed","gzip -dcf {{values.genome_folder}}/{{values.assets[\"ensembl_rb\"].seek_keys_dict[\"ensembl_rb\"]}} | awk '$3==\"promoter\"' | awk -v OFS='\\t' '{print \"chr\"$1, $4, $5, \"Promoter\", $6, $7}' | awk '$2<$3' | env LC_COLLATE=C sort -k1,1 -k2,2n -k3,3n -u > {{values.output_folder}}/{{values.genome_digest}}_promoter.bed","gzip -dcf {{values.genome_folder}}/{{values.assets[\"ensembl_rb\"].seek_keys_dict[\"ensembl_rb\"]}} | awk '$3==\"promoter_flanking_region\"' | awk -v OFS='\\t' '{print \"chr\"$1, $4, $5, \"Promoter Flanking Region\", $6, $7}' | awk '$2<$3' | env LC_COLLATE=C sort -k1,1 -k2,2n -k3,3n -u > {{values.output_folder}}/{{values.genome_digest}}_promoter_flanking.bed","gzip -dcf {{values.genome_folder}}/{{values.assets[\"ensembl_rb\"].seek_keys_dict[\"ensembl_rb\"]}} | awk '$3==\"enhancer\"' | awk -v OFS='\\t' '{print \"chr\"$1, $4, $5, \"Enhancer\", $6, $7}' | awk '$2<$3' | env LC_COLLATE=C sort -k1,1 -k2,2n -k3,3n -u > {{values.output_folder}}/{{values.genome_digest}}_enhancer.bed","cat {{values.output_folder}}/{{values.genome_digest}}_enhancer.bed {{values.output_folder}}/{{values.genome_digest}}_promoter.bed {{values.output_folder}}/{{values.genome_digest}}_promoter_flanking.bed {{values.output_folder}}/{{values.genome_digest}}_5utr.bed {{values.output_folder}}/{{values.genome_digest}}_3utr.bed {{values.output_folder}}/{{values.genome_digest}}_exons.bed {{values.output_folder}}/{{values.genome_digest}}_introns.bed | awk -F'\\t' '!seen[$1, $2, $3]++' > {{values.output_folder}}/{{values.genome_digest}}_annotations.bed","rm -f {{values.output_folder}}/{{values.genome_digest}}_enhancer.bed {{values.output_folder}}/{{values.genome_digest}}_promoter.bed {{values.output_folder}}/{{values.genome_digest}}_promoter_flanking.bed {{values.output_folder}}/{{values.genome_digest}}_5utr.bed {{values.output_folder}}/{{values.genome_digest}}_3utr.bed {{values.output_folder}}/{{values.genome_digest}}_exons.bed {{values.output_folder}}/{{values.genome_digest}}_introns.bed","gzip -f {{values.output_folder}}/{{values.genome_digest}}_annotations.bed"],"input_files":{},"docker_image":"databio/refgenie","default_asset":"default","name":"feat_annotation","version":"0.0.1","output_asset_class_id":15,"input_params":{},"input_assets":{"ensembl_gtf":{"asset_class":"gtf","default":"ensembl_gtf","description":"Annotation file in Gene Transfer Format (GTF) from Ensembl"},"ensembl_rb":{"asset_class":"ensembl_rb","default":"ensembl_rb","description":"Regulatory annotation file in General Feature Format (GTF) from Ensembl"}},"custom_properties":{}},{"id":18,"description":"GTF annotation asset which provides access to all annotated transcripts","command_templates":["cp {{values.files[\"gencode_gtf\"]}} {{values.output_folder}}/{{values.genome_digest}}.gtf.gz"],"input_files":{"gencode_gtf":{"description":"Annotation file in Gene Transfer Format (GTF) from Gencode"}},"docker_image":"databio/refgenie","default_asset":"default","name":"gencode_gtf","version":"0.0.1","output_asset_class_id":16,"input_params":{},"input_assets":{},"custom_properties":{}},{"id":19,"description":"Genome index for HISAT2, produced with hisat2-build","command_templates":["hisat2-build {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} {{values.output_folder}}/{{values.genome_digest}}"],"input_files":{},"docker_image":"databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"hisat2_index","version":"0.0.1","output_asset_class_id":17,"input_params":{},"input_assets":{"fasta":{"asset_class":"fasta","default":"fasta","description":"fasta asset for genome"}},"custom_properties":{"version":"hisat2-build --version | awk 'NR==5{print $4}'"}},{"id":21,"description":"gene, TSS, exon, intron, and premature mRNA annotation files","command_templates":["cp {{values.files[\"refgene\"]}} {{values.output_folder}}/{{values.genome_digest}}_refGene.txt.gz","gzip -dcf {{values.output_folder}}/{{values.genome_digest}}_refGene.txt.gz | awk '{if($4==\"+\"){print $3\"\\\\t\"$5\"\\\\t\"$5\"\\\\t\"$13\"\\\\t.\\\\t\"$4}else{print $3\"\\\\t\"$6\"\\\\t\"$6\"\\\\t\"$13\"\\\\t.\\\\t\"$4}}' | LC_COLLATE=C sort -k1,1 -k2,2n -u > {{values.output_folder}}/{{values.genome_digest}}_TSS.bed","gzip -dcf {{values.output_folder}}/{{values.genome_digest}}_refGene.txt.gz | awk -v OFS='\\\\t' '$9>1' | awk -v OFS='\\\\t' '{ n = split($10, a, \",\"); split($11, b, \",\"); for(i=1; i<n; ++i) print $3, a[i], b[i], $13, i, $4 }' | awk -v OFS='\\\\t' '$6==\"+\" && $5!=1 {print $0} $6==\"-\" {print $0}' | awk '$4!=prev4 && prev6==\"-\" {prev4=$4; prev6=$6; delete line[NR-1]; idx-=1} {line[++idx]=$0; prev4=$4; prev6=$6} END {for (x=1; x<=idx; x++) print line[x]}' | LC_COLLATE=C sort -k1,1 -k2,2n -u > {{values.output_folder}}/{{values.genome_digest}}_exons.bed","gzip -dcf {{values.output_folder}}/{{values.genome_digest}}_refGene.txt.gz | awk -v OFS='\\\\t' '$9>1' | awk -F'\\\\t' '{ exonCount=int($9);split($10,exonStarts,\"[,]\"); split($11,exonEnds,\"[,]\"); for(i=1;i<exonCount;i++) {printf(\"%s\\\\t%s\\\\t%s\\\\t%s\\\\t%d\\\\t%s\\\\n\",$3,exonEnds[i],exonStarts[i+1],$13,($3==\"+\"?i:exonCount-i),$4);}}' | LC_COLLATE=C sort -k1,1 -k2,2n -u > {{values.output_folder}}/{{values.genome_digest}}_introns.bed","gzip -dcf {{values.output_folder}}/{{values.genome_digest}}_refGene.txt.gz | grep 'cmpl' | awk '{print $3\"\\\\t\"$5\"\\\\t\"$6\"\\\\t\"$13\"\\\\t.\\\\t\"$4}' | LC_COLLATE=C sort -k1,1 -k2,2n -u > {{values.output_folder}}/{{values.genome_digest}}_pre-mRNA.bed"],"input_files":{"refgene":{"description":"gzipped RefGene database annotation file"}},"docker_image":"databio/refgenie","default_asset":"default","name":"refgene_anno","version":"0.0.1","output_asset_class_id":19,"input_params":{},"input_assets":{},"custom_properties":{}},{"id":22,"description":"Transcriptome index for salmon, produced with salmon index","command_templates":["salmon index -t {{values.genome_folder}}/{{values.assets[\"fasta_txome\"].seek_keys_dict[\"fasta\"]}} -i {{values.output_folder}} -k {{values.params[\"kmer\"]}} -p {{values.params[\"threads\"]}}"],"input_files":{},"docker_image":"combinelab/salmon","default_asset":"{{values.custom_properties.version}}","name":"salmon_index","version":"0.0.1","output_asset_class_id":20,"input_params":{"threads":{"default":"8","description":"Number of threads to use for parallel computing"},"kmer":{"default":"31","description":"The length of kmer to use to create the indices"}},"input_assets":{"fasta_txome":{"asset_class":"fasta","default":"fasta_txome","description":"fasta asset for transcriptome"}},"custom_properties":{"version":"salmon --version | awk '{print $2}'"}},{"id":23,"description":"Transcriptome index for salmon, produced with salmon index using partial\nselective alignment method. Preparation includes transcriptome mapping to the genome\nand extraction of the relevant portion out from the genome and indexing it along\nwith the transcriptome. Recipe source -- https://github.com/COMBINE-lab/SalmonTools/blob/master/scripts/generateDecoyTranscriptome.sh\n","command_templates":["gunzip -c {{values.genome_folder}}/{{values.assets[\"ensembl_gtf\"].seek_keys_dict[\"gtf\"]}} > {{values.output_folder}}/{{values.genome_digest}}.gtf","awk -v OFS='\t' '{if ($3==\"exon\") {print $1,$4,$5}}' {{values.output_folder}}/{{values.genome_digest}}.gtf > {{values.output_folder}}/exons.bed","bedtools maskfasta -fi {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} -bed {{values.output_folder}}/exons.bed -fo {{values.output_folder}}/reference.masked.genome.fa","mashmap -r {{values.output_folder}}/reference.masked.genome.fa -q {{values.genome_folder}}/{{values.assets[\"fasta_txome\"].seek_keys_dict[\"fasta\"]}} -t {{values.params[\"threads\"]}} --pi 80 -s 500 -o {{values.output_folder}}/mashmap.out","awk -v OFS='\t' '{print $6,$8,$9}' {{values.output_folder}}/mashmap.out | sort -k1,1 -k2,2n - > {{values.output_folder}}/genome_found.sorted.bed","bedtools merge -i {{values.output_folder}}/genome_found.sorted.bed > {{values.output_folder}}/genome_found_merged.bed","bedtools getfasta -fi {{values.output_folder}}/reference.masked.genome.fa -bed {{values.output_folder}}/genome_found_merged.bed -fo {{values.output_folder}}/genome_found.fa","awk '{a=$0; getline; split(a, b, \":\"); r[b[1]] = r[b[1]]\"\"$0} END { for (k in r) { print k\"\\n\"r[k] }}' {{values.output_folder}}/genome_found.fa > {{values.output_folder}}/decoy.fa","cat {{values.genome_folder}}/{{values.assets[\"fasta_txome\"].seek_keys_dict[\"fasta\"]}} {{values.output_folder}}/decoy.fa > {{values.output_folder}}/gentrome.fa","grep '>' {{values.output_folder}}/decoy.fa | awk '{print substr($1,2); }' > {{values.output_folder}}/decoys.txt","rm {{values.output_folder}}/exons.bed {{values.output_folder}}/reference.masked.genome.fa {{values.output_folder}}/mashmap.out {{values.output_folder}}/genome_found.sorted.bed {{values.output_folder}}/genome_found_merged.bed {{values.output_folder}}/genome_found.fa {{values.output_folder}}/decoy.fa {{values.output_folder}}/reference.masked.genome.fa.fai","salmon index -t {{values.output_folder}}/gentrome.fa -d {{values.output_folder}}/decoys.txt -i {{values.output_folder}} -k {{values.params[\"kmer\"]}} -p {{values.params[\"threads\"]}}"],"input_files":{},"docker_image":"combinelab/salmon","default_asset":"{{values.custom_properties.version}}","name":"salmon_partial_sa_index","version":"0.0.1","output_asset_class_id":21,"input_params":{"threads":{"default":"8","description":"Number of threads to use for parallel computing"},"kmer":{"default":"31","description":"The length of kmer to use to create the indices"}},"input_assets":{"fasta":{"asset_class":"fasta","default":"fasta","description":"fasta asset for genome"},"fasta_txome":{"asset_class":"fasta","default":"fasta_txome","description":"fasta asset for transcriptome"},"ensembl_gtf":{"asset_class":"gtf","default":"ensembl_gtf","description":"GTF file for exonic features extraction"}},"custom_properties":{"version":"salmon --version | awk '{print $2}'"}},{"id":24,"description":"Transcriptome index for salmon, produced with salmon index using selective alignment method. Improves quantification accuracy compared to the regular index.","command_templates":["grep '^>' {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} | cut -d ' ' -f 1 > {{values.output_folder}}/decoys.txt","sed -i.bak -e 's/>//g' {{values.output_folder}}/decoys.txt","rm {{values.output_folder}}/decoys.txt.bak","cat {{values.genome_folder}}/{{values.assets[\"fasta_txome\"].seek_keys_dict[\"fasta\"]}} {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} > {{values.output_folder}}/gentrome.fa","salmon index -t {{values.output_folder}}/gentrome.fa -d {{values.output_folder}}/decoys.txt -i {{values.output_folder}} -k {{values.params[\"kmer\"]}} -p {{values.params[\"threads\"]}}","rm {{values.output_folder}}/gentrome.fa {{values.output_folder}}/decoys.txt"],"input_files":{},"docker_image":"combinelab/salmon","default_asset":"{{values.custom_properties.version}}","name":"salmon_sa_index","version":"0.0.1","output_asset_class_id":22,"input_params":{"threads":{"default":"8","description":"Number of threads to use for parallel computing"},"kmer":{"default":"31","description":"The length of kmer to use to create the indices"}},"input_assets":{"fasta":{"asset_class":"fasta","default":"fasta","description":"fasta asset for genome"},"fasta_txome":{"asset_class":"fasta","default":"fasta_txome","description":"fasta asset for transcriptome"}},"custom_properties":{"version":"salmon --version | awk '{print $2}'"}},{"id":25,"description":"Small RNA sequences in the FASTA format. A predicted hairpin portion of a miRNA transcript and information on the location and sequence of the mature miRNA sequence","command_templates":["cp {{values.files[\"hairpin\"]}} {{values.output_folder}}/{{values.genome_digest}}_hairpin.fa","cp {{values.files[\"mature\"]}} {{values.output_folder}}/{{values.genome_digest}}_mature.fa"],"input_files":{"hairpin":{"description":"FASTA format sequences of all miRNA hairpins"},"mature":{"description":"FASTA format sequences of all mature miRNA sequences"}},"docker_image":"docker.io/databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"small_rna","version":"0.0.1","output_asset_class_id":23,"input_params":null,"input_assets":null,"custom_properties":{"version":""}},{"id":26,"description":"Genome index for STAR RNA-seq aligner, produced with STAR --runMode genomeGenerate","command_templates":["mkdir -p {{values.output_folder}}","STAR --runThreadN {{values.params[\"threads\"]}} --runMode genomeGenerate --genomeDir {{values.output_folder}} --genomeFastaFiles {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}}"],"input_files":{},"docker_image":"databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"star_index","version":"0.0.1","output_asset_class_id":24,"input_params":{"threads":{"default":"8","description":"Number of threads to use for parallel computing"}},"input_assets":{"fasta":{"asset_class":"fasta","default":"fasta","description":"fasta asset for genome"}},"custom_properties":{"version":"STAR --version"}},{"id":27,"description":"Enhanced suffix array index for genomes using gt (GenomeTools) suffixerator program","command_templates":["gt suffixerator -dna -pl -tis -suf -lcp -v -showprogress -memlimit {{values.params[\"memlimit\"]}}GB -db {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} -indexname {{values.output_folder}}/{{values.genome_digest}}.sft"],"input_files":{},"docker_image":"databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"suffixerator_index","version":"0.0.1","output_asset_class_id":25,"input_params":{"memlimit":{"default":8,"description":"The maximum amount of memory available to be used during index construction."}},"input_assets":{"fasta":{"asset_class":"fasta","default":"fasta","description":"fasta asset for genome"}},"custom_properties":{"version":"gt suffixerator --version | awk 'NR==1{print $4}'"}},{"id":28,"description":"Indexed k-mers for a given enhanced suffix array at a fixed value of k","command_templates":["gt tallymer mkindex -v -counts -pl \\\n-mersize {{values.params[\"mersize\"]}} \\\n-minocc {{values.params[\"minocc\"]}} \\\n-indexname {{values.output_folder}}/{{values.genome_digest}}.tal_{{values.params[\"mersize\"]}} \\\n-esa {{values.genome_folder}}/{{values.assets[\"suffixerator_index\"].seek_keys_dict[\"esa\"]}}\n","gt tallymer search -output qseqnum qpos -strand fp \\\n-tyr {{values.output_folder}}/{{values.genome_digest}}.tal_{{values.params[\"mersize\"]}} \\\n-q {{values.genome_folder}}/{{values.assets[\"fasta\"].seek_keys_dict[\"fasta\"]}} \\\n> {{values.output_folder}}/{{values.genome_digest}}.tal_{{values.params[\"mersize\"]}}.gtTxt\n"],"input_files":{},"docker_image":"databio/refgenie","default_asset":"{{values.custom_properties.version}}","name":"tallymer_index","version":"0.0.1","output_asset_class_id":26,"input_params":{"mersize":{"default":"30","description":"The mer size."},"minocc":{"default":"2","description":"The minimum occurrence number for the mers to index."}},"input_assets":{"suffixerator_index":{"asset_class":"suffixerator_index","default":"suffixerator_index","description":"enhanced suffix array index for genome"},"fasta":{"asset_class":"fasta","default":"fasta","description":"fasta asset for genome"}},"custom_properties":{"version":"gt tallymer --version | awk 'NR==1{print $4}'"}},{"id":29,"description":"Transcript to gene map file, containing two columns mapping of each transcript present in the reference to the corresponding gene.","command_templates":["grep '^>' {{values.genome_folder}}/{{values.assets[\"salmon_partial_sa_index\"].seek_keys_dict[\"salmon_partial_sa_index\"]}}/gentrome.fa | cut -d ' ' -f 1,7 | tr -s ' ' '\\t' | sed 's/[>'gene_symbol:']//g' > {{values.output_folder}}/{{values.genome_digest}}_txp2gene.tsv"],"input_files":{},"docker_image":"","default_asset":"default","name":"tgMap","version":"0.0.1","output_asset_class_id":27,"input_params":{},"input_assets":{"salmon_partial_sa_index":{"asset_class":"salmon_partial_sa_index","default":"salmon_partial_sa_index","description":"partial salmon index asset"}},"custom_properties":{}}],"pagination":{"offset":0,"limit":100,"total":29}}