[go: nahoru, domu]

Skip to content

Commit

Permalink
Sequencing databases for better mem cache
Browse files Browse the repository at this point in the history
  • Loading branch information
shawncal committed Jan 27, 2022
1 parent 10f620d commit 81c65ef
Showing 1 changed file with 31 additions and 29 deletions.
60 changes: 31 additions & 29 deletions input_prep/make_msa.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/bin/bash

# make the script stop when error (non-true exit code) is occured
set -e

# inputs
in_fasta="$1"
out_dir="$2"
Expand All @@ -9,50 +12,49 @@ CPU="$3"
MEM="$4"

# sequence databases
DB="$PIPEDIR/UniRef30_2020_06/UniRef30_2020_06"
MYDB="$PIPEDIR/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt"
declare -a DATABASES=( \
"$PIPEDIR/UniRef30_2020_06/UniRef30_2020_06" \
"$PIPEDIR/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt")

# setup hhblits command
HHBLITS="hhblits -o /dev/null -mact 0.35 -maxfilt 100000000 -neffmax 20 -cov 25 -cpu $CPU -nodiff -realign_max 100000000 -maxseq 1000000 -maxmem $MEM -n 4 -d $DB -d $MYDB"
HHBLITS="hhblits -o /dev/null -mact 0.35 -maxfilt 100000000 -neffmax 20 -cov 25 -cpu $CPU -nodiff -realign_max 100000000 -maxseq 1000000 -maxmem $MEM -n 4"
echo $HHBLITS

mkdir -p $out_dir/hhblits
tmp_dir="$out_dir/hhblits"
mkdir -p $tmp_dir
out_prefix="$out_dir/t000_"

# perform iterative searches
prev_a3m="$in_fasta"
for e in 1e-30 1e-10 1e-6 1e-3
for (( i=0; i<${#DATABASES[@]}; i++ ))
do
echo $e
$HHBLITS -i $prev_a3m -oa3m $tmp_dir/t000_.$e.a3m -e $e -v 0
hhfilter -id 90 -cov 75 -i $tmp_dir/t000_.$e.a3m -o $tmp_dir/t000_.$e.id90cov75.a3m
hhfilter -id 90 -cov 50 -i $tmp_dir/t000_.$e.a3m -o $tmp_dir/t000_.$e.id90cov50.a3m
prev_a3m="$tmp_dir/t000_.$e.id90cov50.a3m"
n75=`grep -c "^>" $tmp_dir/t000_.$e.id90cov75.a3m`
n50=`grep -c "^>" $tmp_dir/t000_.$e.id90cov50.a3m`

if ((n75>2000))
then
if [ ! -s ${out_prefix}.msa0.a3m ]
for e in 1e-30 1e-10 1e-6 1e-3
do
echo db:$i e:$e
$HHBLITS -i $prev_a3m -oa3m $tmp_dir/t000_.db$i.$e.a3m -e $e -v 0 -d ${DATABASES[$i]}

hhfilter -id 90 -cov 75 -i $tmp_dir/t000_.db$i.$e.a3m -o $tmp_dir/t000_.db$i.$e.id90cov75.a3m
prev_a3m="$tmp_dir/t000_.db$i.$e.id90cov50.a3m"
n75=`grep -c "^>" $tmp_dir/t000_.db$i.$e.id90cov75.a3m`
echo " -- cov75 results: $n75"
if (($n75>2000))
then
cp $tmp_dir/t000_.$e.id90cov75.a3m ${out_prefix}.msa0.a3m
break
cp $tmp_dir/t000_.db$i.$e.id90cov75.a3m ${out_prefix}.msa0.a3m
break 2
fi
elif ((n50>4000))
then
if [ ! -s ${out_prefix}.msa0.a3m ]

hhfilter -id 90 -cov 50 -i $tmp_dir/t000_.db$i.$e.a3m -o $tmp_dir/t000_.db$i.$e.id90cov50.a3m
n50=`grep -c "^>" $tmp_dir/t000_.db$i.$e.id90cov50.a3m`
echo " -- cov50 results: $n50"
if (($n50>4000))
then
cp $tmp_dir/t000_.$e.id90cov50.a3m ${out_prefix}.msa0.a3m
break
cp $tmp_dir/t000_.db$i.$e.id90cov50.a3m ${out_prefix}.msa0.a3m
break 2
fi
else
continue
fi

done
done

if [ ! -s ${out_prefix}.msa0.a3m ]
then
cp $tmp_dir/t000_.1e-3.id90cov50.a3m ${out_prefix}.msa0.a3m
fi
cp $prev_a3m ${out_prefix}.msa0.a3m
fi

0 comments on commit 81c65ef

Please sign in to comment.