# Print the commands as are executed. # Stop on errors. set -uex # The URL the data is located at. URL=http://data.biostarhandbook.com/rnaseq/projects/griffith/griffith-data.tar.gz # Obtain and download the data wget -q -nc $URL # Unpack the dataset. tar zxvf griffith-data.tar.gz 2>> log.txt # This is the reference genome. REF=refs/ERCC92.fa # The name of the index file. IDX=refs/ERCC92.idx # Build kallisto index kallisto index -i $IDX $REF 2>> log.txt # Generate the root names for the files. parallel -j 1 echo {1}_{2} ::: UHR HBR ::: 1 2 3 > names.txt # Create the directory that will hold the results. mkdir -p results # Quantify all samples in parallel. cat names.txt | parallel "kallisto quant -i $IDX -o results/{} reads/{}_R1.fq reads/{}_R2.fq" 2>> log.txt # Rename the abundance files to match the samplenames. cat names.txt | parallel cp results/{}/abundance.tsv results/{}.abundance.tsv # Create a combined count file. paste results/H*.tsv results/U*.tsv | cut -f 1,4,9,14,19,24,29 > counts.txt # Get the script that runs the deseq 1 method. wget -q -nc http://data.biostarhandbook.com/rnaseq/code/deseq1.r # Perform the differential expression study. cat counts.txt | Rscript deseq1.r 3x3 > results.txt 2>> log.txt # Download the script to draw the heatmaps (requires the glplots package). wget -q -nc http://data.biostarhandbook.com/rnaseq/code/draw-heatmap.r # Generate heatmap from the deseq1 normalized matrix. cat norm-matrix-deseq1.txt | Rscript draw-heatmap.r > heatmap-norm-matrix-deseq1.pdf 2>> log.txt