-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
grouped functions into methods under classes
- Loading branch information
1 parent
9308c0d
commit 2e9baa9
Showing
1 changed file
with
63 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,55 +1,65 @@ | ||
#!/usr/bin/env python | ||
|
||
def vcfExtract(vcf): | ||
import allel | ||
vcfInfo = allel.vcf_to_dataframe(vcf, ['variants/CHROM', 'variants/POS', 'variants/REF', 'variants/ALT'], alt_number=1) | ||
return vcfInfo | ||
#extract information from vcf to a df | ||
|
||
def vcfDFtoList(vcfdf): | ||
vcfList = vcfdf.values.tolist() | ||
return vcfList | ||
#convert df to a list | ||
|
||
#for SNVs only | ||
def snvList(variantsList): | ||
snvList = [] | ||
for variant in variantsList: | ||
ref = len(str(variant[2])) | ||
alt = len(str(variant[3])) | ||
if(ref == 1 and alt == 1): | ||
snvList.append(variant) | ||
return snvList | ||
|
||
#for INDELs only | ||
def indelList(variantsList): | ||
indelList = [] | ||
for variant in variantsList: | ||
ref = len(str(variant[2])) | ||
alt = len(str(variant[3])) | ||
if (ref > 1 or alt > 1): | ||
indelList.append(variant) | ||
return indelList | ||
|
||
#for both SNVs and INDELs | ||
def snvINDELlists(variantsList): | ||
snvList = [] | ||
indelList = [] | ||
for variant in variantsList: | ||
ref = len(str(variant[2])) | ||
alt = len(str(variant[3])) | ||
if (ref > 1 or alt > 1): | ||
indelList.append(variant) | ||
else: | ||
snvList.append(variant) | ||
return snvList, indelList | ||
#separate SNVs and INDELs into separate lists | ||
#this function takes (1) two empty SNV and INDEL lists and (2) a list with variants, and separates the variants according to size (SNVs and INDELs) | ||
|
||
def variantCalls(truth, query): | ||
Truth_set = set(map(tuple, truth)) #convert nested lists to sets for speed | ||
Query_Set = set(map(tuple, query)) | ||
TPs = Truth_set.intersection(Query_Set) | ||
FPs = Query_Set.difference(Truth_set) | ||
FNs = Truth_set.difference(Query_Set) | ||
return TPs, FPs, FNs | ||
|
||
class infoExtract: | ||
def __init__(self, vcf): | ||
self.vcf = vcf | ||
|
||
|
||
def alleles(self): | ||
import allel | ||
vcfInfo = allel.vcf_to_dataframe(self.vcf, ['variants/CHROM', 'variants/POS', 'variants/REF', 'variants/ALT'], alt_number=1) | ||
vcfList = vcfInfo.values.tolist() | ||
return vcfList | ||
|
||
|
||
class createLists(): | ||
def __init__(self, variantsList): | ||
self.variantsList = variantsList | ||
|
||
|
||
def snvList(self): | ||
snvList = [] | ||
for variant in self.variantsList: | ||
ref = len(str(variant[2])) | ||
alt = len(str(variant[3])) | ||
if(ref == 1 and alt == 1): | ||
snvList.append(variant) | ||
return snvList | ||
|
||
|
||
def indelList(self): | ||
indelList = [] | ||
for variant in self.variantsList: | ||
ref = len(str(variant[2])) | ||
alt = len(str(variant[3])) | ||
if (ref > 1 or alt > 1): | ||
indelList.append(variant) | ||
return indelList | ||
|
||
|
||
def snvINDELlists(self): | ||
snvList = [] | ||
indelList = [] | ||
for variant in self.variantsList: | ||
ref = len(str(variant[2])) | ||
alt = len(str(variant[3])) | ||
if (ref > 1 or alt > 1): | ||
indelList.append(variant) | ||
else: | ||
snvList.append(variant) | ||
return snvList, indelList | ||
|
||
class concordance(): | ||
def __init__(self, truth, query): | ||
self.truth = truth | ||
self.query = query | ||
|
||
|
||
def variantCalls(self): | ||
Truth_set = set(map(tuple, self.truth)) | ||
Query_Set = set(map(tuple, self.query)) | ||
TPs = Truth_set.intersection(Query_Set) | ||
FPs = Query_Set.difference(Truth_set) | ||
FNs = Truth_set.difference(Query_Set) | ||
return TPs, FPs, FNs |