# Print the commands as are executed.
# Stop on errors.
set -uex

# The URL the data is located at.
URL=http://data.biostarhandbook.com/rnaseq/projects/griffith/griffith-data.tar.gz

# Obtain and download the data
wget -q -nc $URL

# Unpack the dataset.
tar zxvf griffith-data.tar.gz 2>> log.txt

# This is the reference genome.
REF=refs/ERCC92.fa

# The name of the index file.
IDX=refs/ERCC92.idx

# Build kallisto index
kallisto index -i $IDX  $REF 2>> log.txt

# Generate the root names for the files.
parallel -j 1 echo {1}_{2} ::: UHR HBR ::: 1 2 3 > names.txt

# Create the directory that will hold the results.
mkdir -p results

# Quantify all samples in parallel.
cat names.txt | parallel "kallisto quant -i $IDX -o results/{} reads/{}_R1.fq reads/{}_R2.fq" 2>> log.txt

# Rename the abundance files to match the samplenames.
cat names.txt | parallel cp results/{}/abundance.tsv results/{}.abundance.tsv

# Create a combined count file.
paste results/H*.tsv  results/U*.tsv | cut -f 1,4,9,14,19,24,29  > counts.txt

# Get the script that runs the deseq 1 method.
wget -q -nc  http://data.biostarhandbook.com/rnaseq/code/deseq1.r

# Perform the differential expression study.
cat counts.txt | Rscript deseq1.r 3x3 > results.txt 2>> log.txt

# Download the script to draw the heatmaps (requires the glplots package).
wget -q -nc http://data.biostarhandbook.com/rnaseq/code/draw-heatmap.r

# Generate heatmap from the deseq1 normalized matrix.
cat norm-matrix-deseq1.txt | Rscript draw-heatmap.r > heatmap-norm-matrix-deseq1.pdf  2>> log.txt