[go: nahoru, domu]

Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
LindoNkambule committed Aug 10, 2019
1 parent 448b2de commit a022abf
Showing 1 changed file with 106 additions and 0 deletions.
106 changes: 106 additions & 0 deletions VCFCompare.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total Truth VCF records: 4358\n",
"Non-reference VCF records: 4358\n",
"Total Query VCF records: 7494\n",
"Non-reference VCF records: 4495\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/anaconda3/lib/python3.6/site-packages/allel/io/vcf_read.py:1741: UserWarning: invalid INFO header: '##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">\\n'\n",
" warnings.warn('invalid INFO header: %r' % header)\n"
]
}
],
"source": [
"#!/usr/local/bin python\n",
"import sys\n",
"import pandas as pd\n",
"import allel #pip install scikit-allel to install this module for analysis of large scale genetic variation data\n",
"import csv\n",
"\n",
"golden_vcf = allel.vcf_to_dataframe(\"bcftools_variants.vcf\", ['variants/CHROM', 'variants/POS', 'variants/REF', 'variants/ALT', 'QUAL'], alt_number=1) # Storing the information in the VCF file into a dataframe\n",
"query_vcf = allel.vcf_to_dataframe(\"freeebayes_variants.vcf\", ['variants/CHROM', 'variants/POS', 'variants/REF', 'variants/ALT', 'QUAL'], alt_number=1)\n",
"golden_variants_list = golden_vcf.values.tolist() #Each variant info will be saved onto a list, creating a list of list\n",
"query_variants_list = query_vcf.values.tolist()\n",
"\n",
"# for list in query_variants_list:\n",
"# print (list[4])\n",
"\n",
"header = ['TRUTH.TOTAL', 'TP', 'FP', 'FN', 'QUERY.TOTAL', 'Recall', 'Precision']\n",
"list = []\n",
"\n",
"#Totals\n",
"Truth_Total = len(golden_variants_list)\n",
"print (\"Total Truth VCF records: \" + str(Truth_Total))\n",
"Truth_Pass_Total = len([x for x in golden_variants_list if x[4] > 2])\n",
"print (\"Non-reference VCF records: \" + str(Truth_Pass_Total))\n",
"Query_Total = len(query_variants_list)\n",
"print (\"Total Query VCF records: \" + str(Query_Total))\n",
"Query_Pass_Total = len([x for x in query_variants_list if x[4] > 0.003])\n",
"print (\"Non-reference VCF records: \" + str(Query_Pass_Total))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4\n",
"4\n",
"5\n"
]
}
],
"source": [
"a = [[1, 3, 4], [2, 4, 4], [3, 4, 5]]\n",
"for list in a:\n",
" print (list[2])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit a022abf

Please sign in to comment.