-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit_reads.sh
executable file
·137 lines (88 loc) · 2.28 KB
/
split_reads.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/bin/bash
# Thu Jul 30 10:27:01 2015
# kpalin@merit.ltdk.helsinki.fi
# For debugging
#set -o verbose
# Die on unset variables
set -o nounset
# Die on errors
set -o errexit
# Die if any part of a pipe fails
set -o pipefail
usage() {
echo -e "usage:\n$0 -F splitters.fasta sequence.(fast5|fasta)
Map short sequences in splitters.fasta to reads in sequence.fasta and
generate SAM like output file usable for e.g. split_fasta_by_sam.py
-F splitters.fast(a|5) Split the reads according to alignment to these.
-d Use default lastal arguments, instead of ones for nanopore reads
-h Show this message and exit." >&2
exit 1
}
#PATH=/mnt/cg8/projects/Nanopore/software/tmp/last-756/src:$PATH
function toFasta() {
SEQFILENAME=$1
FASTA=$2
test ! -s ${FASTA} # Check that output file doesn't exist
isHDF=$(file ${SEQFILENAME}|grep -cF 'Hierarchical Data Format' || true)
if [ ${isHDF} -eq 1 ]; then
for READTYPE in 2D fwd rev
do
poretools fasta --type ${READTYPE} ${SEQFILENAME} >${FASTA}
test ! -s ${FASTA} || break
done
else
cp ${SEQFILENAME} ${FASTA}
fi
}
while getopts "F:hdS" flag
do
case "$flag" in
F)
INPUT2=$(readlink -f "$OPTARG")
test -r ${INPUT2}
;;
S)
KEEPSAM=1
;;
d)
DEFAULTLASTAL=1
;;
h|*)
usage
;;
esac
done
shift $((OPTIND-1)); OPTIND=1
trap usage EXIT
FAST5=$1
# Make temporary directory which will be cleaned at script exit
TEMPDIR=$(mktemp --directory )
function _cleanup {
rm -r $TEMPDIR
}
trap _cleanup EXIT
BASE=$(basename ${FAST5} .fast5)
OUT=$(readlink -f ${BASE}.png)
FASTA1=${TEMPDIR}/${BASE}.fasta
MAF=${TEMPDIR}/${BASE}.maf
toFasta ${FAST5} ${FASTA1}
if [ -z ${INPUT2:-} ];
then
FASTA2=${FASTA1}
else
FASTA2=${TEMPDIR}/$(basename ${INPUT2} )_2.fasta
toFasta ${INPUT2} ${FASTA2}
fi
SEQ1name=$(head -1 ${FASTA1}|sed -e 's/[>]\([^ ]\+\).*$/\1/')
SEQ2name=$(head -1 ${FASTA2}|sed -e 's/[>]\([^ ]\+\).*$/\1/')
SAM=$(readlink -f ${SEQ1name}_vs_${SEQ2name}.sam)
cd $TEMPDIR
lastdb ${BASE} ${FASTA1}
if [ "${DEFAULTLASTAL:-0}" -eq 1 ];
then
echo NO HERE
lastal -T 0 ${BASE} ${FASTA2} >${MAF}
else
lastal -l 3 -T 1 -e 20 -a 1 -b 1 -r 1 -q 1 ${BASE} ${FASTA2} >${MAF}
fi
maf-convert sam ${MAF}> ${SAM}