# Stop script on any error. set -uex # The default values will be comparing # the Ebola strain from 1972 to one strain observed in 2014. # # Ebola virus Mayinga 1972: AF086833 # Ebola virus Makona 2014: KR105328 # Accession numbers for the two sequences that are to be aligned. SEQ1=AF086833 SEQ2=KR105328 # Download sequences by accession numbers efetch -db nuccore -format fasta -id $SEQ1 > $SEQ1.fa efetch -db nuccore -format fasta -id $SEQ2 > $SEQ2.fa # # Typically you want to know what the scoring matrix is. # # # Get the scoring matrix. # wget -q -nc ftp://ftp.ncbi.nlm.nih.gov/blast/matrices/NUC.4.4 # Global alignments. # Using the default format. echo "Tool:stretcher format:default(markx0) file:alignment.1.txt " stretcher -filter -asequence $SEQ1.fa -bsequence $SEQ2.fa -data NUC.4.4 > alignment.1.txt # # The stretcher has the NUC4.4 matrix built into it under the special # name of EDNAFULL. From now on we will use that. # # Generate alignment in the so called markx2 format. echo "Tool:stretcher format:markx2 file:alignment.2.txt" stretcher -filter -asequence $SEQ1.fa -bsequence $SEQ2.fa -data EDNAFULL -aformat markx2 > alignment.2.txt # Generate alignment in the SAM format. echo "Tool:stretcher format:SAM file:alignment.3.txt" stretcher -filter -asequence $SEQ1.fa -bsequence $SEQ2.fa -data EDNAFULL -aformat sam > alignment.3.txt # # Local alignments. # # Generate a pariwise local alignment with blastn. # It is surprisingly difficult to tell what scoring matrix # blastn used. See blastn --help for information. # # The default output format of blasn. echo "Tool:blastn format:blast file:alignment.4.txt" blastn -subject $SEQ1.fa -query $SEQ2.fa > alignment.4.txt # Format alignments in tabular form. echo "Tool:blastn format:column file:alignment.5.txt" blastn -subject $SEQ1.fa -query $SEQ2.fa -outfmt 7 > alignment.5.txt # # Semi global alignment. # # minimap2 configured to produces SAM format. echo "Tool:minimap format:SAM file:alignment.6.txt" minimap2 -a -x sr --eqx $SEQ1.fa $SEQ2.fa > alignment.6.txt # minimap2 configured to produces PAF with MD tags. echo "Tool:minimap format:PAF (MD) file:alignment.7.txt" minimap2 -c --MD $SEQ1.fa $SEQ2.fa > alignment.7.txt # minimap2 configured to produces OAF with CS tags. echo "Tool:minimap format:PAF (CS) file:alignment.8.txt" minimap2 --cs $SEQ1.fa $SEQ2.fa > alignment.8.txt