forked from cuidaniel/Fusion_hg38
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis_description_both_v6.py
88 lines (80 loc) · 2.87 KB
/
analysis_description_both_v6.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import subprocess
import os
import sys
# call example:
# foo.py samples.txt BRCA Yes submission_dir
cancer = sys.argv[2]
if sys.argv[4]:
currPath = os.path.abspath(sys.argv[4])+"/";
else:
currPath = os.path.abspath(".")+"/";
if sys.argv[3] == "Yes":
miss_samples = [];
with open("Missing/samples.txt") as sampleFile:
for line in sampleFile:
miss_samples.append(line.strip().split("\t")[0])
else:
miss_samples = [];
#miss_samples = [];
# with open("Missing/samples.txt") as sampleFile:
# for line in sampleFile:
# miss_samples.append(line.strip().split("\t")[0])
samples = [];
with open(sys.argv[1]) as sampleFile:
for line in sampleFile:
samples.append(line.strip())
uuidDict = {}
sampleFile = open("/diskmnt/Projects/Users/dcui/Projects/Fusion_hg38/Data_locations/CPTAC3.catalog/BamMap/katmai.BamMap.dat");
for line in sampleFile:
line = line.strip().split("\t");
if "FASTQ" not in line:
continue
if line[1] not in samples+miss_samples:
continue;
if line[0].split(".")[1] != "RNA-Seq":
continue;
if line[1] not in uuidDict:
uuidDict[line[1]] = {"tumor":{"R1":"","R2":""},"normal":{"R1":"","R2":""}}
if line[0][-1] == "T":
if "R1" in line[0]:
uuidDict[line[1]]["tumor"]["R1"] = line[9];
else:
uuidDict[line[1]]["tumor"]["R2"] = line[9];
else:
if "R1" in line[0]:
uuidDict[line[1]]["normal"]["R1"] = line[9];
else:
uuidDict[line[1]]["normal"]["R2"] = line[9];
sampleFile.close();
print("#Case_Name\tDisease\tOutput_File_Path\tOutput_File_Format\tSample_Name_R1\tFASTQ_R1_UUID\tSample_Name_R2\tFASTQ_R2_UUID");
for sample in samples:
if sample in miss_samples:
continue
print("\t".join([sample,
cancer,
currPath+"Submission/"+sample+"_T.Fusions.txt",
"TSV",
sample+".RNA-Seq.R1.T",
uuidDict[sample]["tumor"]["R1"],
sample+".RNA-Seq.R2.T",
uuidDict[sample]["tumor"]["R2"]
]))
print("\t".join([sample,
cancer,
currPath+"Submission/"+sample+"_N.Fusions.txt",
"TSV",
sample+".RNA-Seq.R1.A",
uuidDict[sample]["normal"]["R1"],
sample+".RNA-Seq.R2.A",
uuidDict[sample]["normal"]["R2"]
]))
for sample in miss_samples:
print("\t".join([sample,
cancer,
currPath+"Submission/"+sample+"_T.Fusions.txt",
"TSV",
sample+".RNA-Seq.R1.T",
uuidDict[sample]["tumor"]["R1"],
sample+".RNA-Seq.R2.T",
uuidDict[sample]["tumor"]["R2"]
]))