-
Notifications
You must be signed in to change notification settings - Fork 5
/
qryclinvar.py
executable file
·52 lines (47 loc) · 1.78 KB
/
qryclinvar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python
""" Query ClinVar data indexed with MongoDB """
from nosqlbiosets.qryutils import Query
class QueryClinVar(Query):
# Abundance of variant interpretations per submitter
def topinterpretationspersubmitter(self, qc):
aggq = [
{"$match": qc},
{"$unwind": "$InterpretedRecord.clinicalAssertion"},
{"$group": {
"_id": {
"interpretation": "$InterpretedRecord.Interpretations."
"Interpretation.Description",
"submitter": "$InterpretedRecord.clinicalAssertion."
"ClinVarAccession.SubmitterName",
},
"abundance": {"$sum": 1}
}},
{"$sort": {"abundance": -1}},
{'$project': {
"abundance": 1,
"interpretation": "$_id.interpretation",
"submitter": "$_id.submitter",
"_id": 0
}}
]
r = list(self.aggregate_query(aggq))
r = {(i['interpretation'], i['submitter']): i['abundance'] for i in r}
return r
# Abundance of variant interpretations per gene
def topinterpretationspergene(self, qc, limit=10):
aggq = [
{"$match": qc},
{"$group": {
"_id": {
"gene": '$InterpretedRecord.SimpleAllele.GeneList.Gene.'
'Symbol',
"desc": "$InterpretedRecord.Interpretations."
"Interpretation.Description"
},
"abundance": {"$sum": 1}
}},
{"$sort": {"abundance": -1}},
{"$limit": limit}
]
cr = self.aggregate_query(aggq)
return cr