Trim MetaT files from JGI (eg: NatBWC12O_metat.fastq.gz)
for file in `dir -d *.fastq.gz` ; do
echo "working with file $file"
trim=`echo "$file" | sed 's/.fastq.gz/.trim.fastq.gz/'`
java -jar /usr/share/java/trimmomatic-0.39.jar SE -threads 40 $file $trim ILLUMINACLIP:../TruSeq2-SE.fa:2:40:15 SLIDINGWINDOW:4:20
done
Map HQ Genomes (eg: 3300021003.fa.3.fa) and the trimmed MetaT files
for genome in `dir -d *fa` ; do
for meta in ../MetaT/*.trim.fastq.gz ; do
echo "working with genome $genome and metat $meta"
file=`echo "$meta" | sed 's/.fastq.gz//' | sed 's/..\/MetaT\///'`
echo "$file"
sorted_bam=../../../BHD1/Arianna/Results/$file.$genome.sorted.bam
echo "$sorted_bam"
bwa mem -t 60 $genome $meta | samtools sort -O bam -T tmp -@ 60 > $sorted_bam
echo "now indexing sorted bam $sorted_bam"
samtools index $sorted_bam
done
done
Then filter aligned reads to 3 quality levels 99.9% or mapq of 30 99% or mapq of 20 97% or mapq of 15
for file in `dir -d *.sorted.bam` ; do
echo "working with file $file"
q30=`echo "$file" | sed 's/.sorted.bam/.q30.sorted.bam/'`
q20=`echo "$file" | sed 's/.sorted.bam/.q20.sorted.bam/'`
q15=`echo "$file" | sed 's/.sorted.bam/.q15.sorted.bam/'`
samtools view -q 30 -b $file > $q30
samtools view -q 20 -b $file > $q20
samtools view -q 15 -b $file > $q15
done
Collect total read count and aligned read count:
parallel --tag samtools view -c ::: *.bam > reads.csv

LS0tCnRpdGxlOiAiV29ya2Zsb3cgNS81IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKZWRpdG9yX29wdGlvbnM6IAogIGNodW5rX291dHB1dF90eXBlOiBpbmxpbmUKLS0tCgpUcmltIE1ldGFUIGZpbGVzIGZyb20gSkdJIChlZzogTmF0QldDMTJPX21ldGF0LmZhc3RxLmd6KQpgYGAKZm9yIGZpbGUgaW4gYGRpciAtZCAqLmZhc3RxLmd6YCA7IGRvCiAgZWNobyAid29ya2luZyB3aXRoIGZpbGUgJGZpbGUiCiAgdHJpbT1gZWNobyAiJGZpbGUiIHwgc2VkICdzLy5mYXN0cS5nei8udHJpbS5mYXN0cS5nei8nYApqYXZhIC1qYXIgL3Vzci9zaGFyZS9qYXZhL3RyaW1tb21hdGljLTAuMzkuamFyIFNFIC10aHJlYWRzIDQwICRmaWxlICR0cmltIElMTFVNSU5BQ0xJUDouLi9UcnVTZXEyLVNFLmZhOjI6NDA6MTUgIFNMSURJTkdXSU5ET1c6NDoyMApkb25lCmBgYAoKTWFwIEhRIEdlbm9tZXMgKGVnOiAzMzAwMDIxMDAzLmZhLjMuZmEpIGFuZCB0aGUgdHJpbW1lZCBNZXRhVCBmaWxlcwpgYGAKZm9yIGdlbm9tZSBpbiBgZGlyIC1kICpmYWAgOyBkbwogIGZvciBtZXRhIGluIC4uL01ldGFULyoudHJpbS5mYXN0cS5neiA7IGRvCiAgICBlY2hvICJ3b3JraW5nIHdpdGggZ2Vub21lICRnZW5vbWUgYW5kIG1ldGF0ICRtZXRhIiAKICAgIGZpbGU9YGVjaG8gIiRtZXRhIiB8IHNlZCAncy8uZmFzdHEuZ3ovLycgfCBzZWQgJ3MvLi5cL01ldGFUXC8vLydgCiAgICBlY2hvICIkZmlsZSIKICAgIHNvcnRlZF9iYW09Li4vLi4vLi4vQkhEMS9Bcmlhbm5hL1Jlc3VsdHMvJGZpbGUuJGdlbm9tZS5zb3J0ZWQuYmFtCiAgICBlY2hvICIkc29ydGVkX2JhbSIKICAgIGJ3YSBtZW0gLXQgNjAgJGdlbm9tZSAkbWV0YSB8IHNhbXRvb2xzIHNvcnQgLU8gYmFtIC1UIHRtcCAtQCA2MCA+ICRzb3J0ZWRfYmFtCiAgICBlY2hvICJub3cgaW5kZXhpbmcgc29ydGVkIGJhbSAkc29ydGVkX2JhbSIKICAgIHNhbXRvb2xzIGluZGV4ICRzb3J0ZWRfYmFtCiAgZG9uZQpkb25lCmBgYAoKVGhlbiBmaWx0ZXIgYWxpZ25lZCByZWFkcyB0byAzIHF1YWxpdHkgbGV2ZWxzIAo5OS45JSBvciBtYXBxIG9mIDMwCjk5JSBvciBtYXBxIG9mIDIwCjk3JSBvciBtYXBxIG9mIDE1CgpgYGAKZm9yIGZpbGUgaW4gYGRpciAtZCAqLnNvcnRlZC5iYW1gIDsgZG8KICBlY2hvICJ3b3JraW5nIHdpdGggZmlsZSAkZmlsZSIKICBxMzA9YGVjaG8gIiRmaWxlIiB8IHNlZCAncy8uc29ydGVkLmJhbS8ucTMwLnNvcnRlZC5iYW0vJ2AKICBxMjA9YGVjaG8gIiRmaWxlIiB8IHNlZCAncy8uc29ydGVkLmJhbS8ucTIwLnNvcnRlZC5iYW0vJ2AKICBxMTU9YGVjaG8gIiRmaWxlIiB8IHNlZCAncy8uc29ydGVkLmJhbS8ucTE1LnNvcnRlZC5iYW0vJ2AKICBzYW10b29scyB2aWV3IC1xIDMwIC1iICRmaWxlID4gJHEzMAogIHNhbXRvb2xzIHZpZXcgLXEgMjAgLWIgJGZpbGUgPiAkcTIwCiAgc2FtdG9vbHMgdmlldyAtcSAxNSAtYiAkZmlsZSA+ICRxMTUKZG9uZQpgYGAKCkNvbGxlY3QgdG90YWwgcmVhZCBjb3VudCBhbmQgYWxpZ25lZCByZWFkIGNvdW50OgoKYGBgCnBhcmFsbGVsIC0tdGFnIHNhbXRvb2xzIHZpZXcgLWMgIDo6OiAqLmJhbSA+IHJlYWRzLmNzdgpgYGAKCjxpbWcgc3JjPSJyZXN1bHRzLnBuZyIgLz4K