%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
!head -n 135 Pfeiffer-quartet.vcf
##fileformat=VCFv4.1 ##ApplyRecalibration="analysis_type=ApplyRecalibration input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/tmp/generate_APPLY_RECALIBRATOR.py/d2cd22743d3eea79f59dd21ebb84a0d7/human_g1k_v37.fasta nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=/storage/gluster/insilico/data/GenomicsData/Series//ISDB11122GPL11154/apply_recalibrator/d2cd22743d3eea79f59dd21ebb84a0d7/SM.recal.snps.vcf.log help=false input=[(RodBinding name=input source=/tmp/generate_APPLY_RECALIBRATOR.py/d2cd22743d3eea79f59dd21ebb84a0d7/ISDB11122.snps.raw.vcf)] recal_file=(RodBinding name=recal_file source=/tmp/generate_APPLY_RECALIBRATOR.py/d2cd22743d3eea79f59dd21ebb84a0d7/ISDB11122.snps.VarRecal.recal) tranches_file=/tmp/generate_APPLY_RECALIBRATOR.py/d2cd22743d3eea79f59dd21ebb84a0d7/ISDB11122.snps.VarRecal.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 ignore_filter=null mode=SNP filter_mismatching_base_and_quals=false" ##CombineVariants="analysis_type=CombineVariants input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/tmp/generate_COMBINE_VARIANTS.py/00e90c599e331929a09028693bed91f7/human_g1k_v37.fasta nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=/storage/gluster/insilico/data/GenomicsData/Series//ISDB11122GPL11154/combine_variants/00e90c599e331929a09028693bed91f7/ISDB11122.SNPrecal.IndelFiltered.vcf.log help=false variant=[(RodBinding name=variant source=/tmp/generate_COMBINE_VARIANTS.py/00e90c599e331929a09028693bed91f7/ISDB11122.indel.filtered.vcf), (RodBinding name=variant2 source=/tmp/generate_COMBINE_VARIANTS.py/00e90c599e331929a09028693bed91f7/SM.recal.snps.vcf)] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub genotypemergeoption=UNSORTED filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED multipleallelesmergetype=BY_TYPE rod_priority_list=null printComplexMerges=false filteredAreUncalled=false minimalVCF=false setKey=set assumeIdenticalSamples=false minimumN=1 suppressCommandLineHeader=false mergeInfoWithMaxAC=false filter_mismatching_base_and_quals=false" ##FILTER=<ID=GATKStandard,Description="QD < 2.0 || ReadPosRankSum < -20.0 || FS > 200.0"> ##FILTER=<ID=LowQual,Description="Low quality"> ##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -5.6494 <= x < 1.6606"> ##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -2277.7485"> ##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -2277.7485 <= x < -5.6494"> ##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> ##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> ##INFO=<ID=ABHet,Number=1,Type=Float,Description="Allele Balance for hets (ref/(ref+alt))"> ##INFO=<ID=ABHom,Number=1,Type=Float,Description="Allele Balance for homs (A/(A+O))"> ##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> ##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> ##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> ##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership"> ##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> ##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?"> ##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions"> ##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval"> ##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> ##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes"> ##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation"> ##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"> ##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"> ##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> ##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads"> ##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> ##INFO=<ID=OND,Number=1,Type=Float,Description="Overall non-diploid ratio (alleles/(alleles+non-alleles))"> ##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> ##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)"> ##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)"> ##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> ##INFO=<ID=SB,Number=1,Type=Float,Description="Strand Bias"> ##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat"> ##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="Log odds ratio of being a true variant versus being false under the trained gaussian mixture model"> ##INFO=<ID=culprit,Number=1,Type=String,Description="The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"> ##INFO=<ID=set,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants"> ##SelectVariants="analysis_type=SelectVariants input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/tmp/generate_SELECTEDVARIANTS.py/3aed2f246e8c0249ff8a031bd209dc10/human_g1k_v37.fasta nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/tmp/generate_SELECTEDVARIANTS.py/3aed2f246e8c0249ff8a031bd209dc10/ISDB11122.variants.raw.vcf) discordance=(RodBinding name= source=UNBOUND) concordance=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sample_name=[] sample_expressions=null sample_file=null exclude_sample_name=[] exclude_sample_file=[] select_expressions=[] excludeNonVariants=false excludeFiltered=false regenotype=false restrictAllelesTo=ALL keepOriginalAC=false mendelianViolation=false mendelianViolationQualThreshold=0.0 select_random_number=0 select_random_fraction=0.0 remove_fraction_genotypes=0.0 selectTypeToInclude=[SNP] keepIDs=null fullyDecode=false forceGenotypesDecode=false justRead=false filter_mismatching_base_and_quals=false" ##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/storage/gluster/insilico/data/GenomicsData/ISDBM32/ISDBM322016/printreads/880ddadd1566253466a7e6761e854445/ISDBM322016.recal.bam, /storage/gluster/insilico/data/GenomicsData/ISDBM32/ISDBM322018/printreads/c3a712cdc76c431297effdb013de7bd2/ISDBM322018.recal.bam, /storage/gluster/insilico/data/GenomicsData/ISDBM32/ISDBM322017/printreads/b43e7abc909888a6a066fc3da4c305e7/ISDBM322017.recal.bam, /storage/gluster/insilico/data/GenomicsData/ISDBM32/ISDBM322015/printreads/8eb77fc396f7325f7da846402d1a6df9/ISDBM322015.recal.bam] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/tmp/generate_UNIFIEDGENOTYPER.py/dbe01e32e5f0143c1e3d62bfa4a46bd0/human_g1k_v37.fasta nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null num_threads=6 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH p_nonref_model=EXACT pcr_error_rate=1.0E-4 noSLOD=false annotateNDA=false min_base_quality_score=17 max_deletion_fraction=0.05 cap_max_alternate_alleles_for_indels=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 noBandedIndel=false indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false heterozygosity=0.0010 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=3 dbsnp=(RodBinding name=dbsnp source=/tmp/generate_UNIFIEDGENOTYPER.py/dbe01e32e5f0143c1e3d62bfa4a46bd0/dbsnp_135.b37.vcf) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub debug_file=null metrics_file=null annotation=[AlleleBalance, DepthOfCoverage, FisherStrand] excludeAnnotation=[] filter_mismatching_base_and_quals=false" ##VariantFiltration="analysis_type=VariantFiltration input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/tmp/generate_VARIANT_FILTRATION.py/af94a8015760f0c3d105ae21bdcee583/human_g1k_v37.fasta nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null num_threads=1 num_cpu_threads=null num_io_threads=null num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=/storage/gluster/insilico/data/GenomicsData/Series//ISDB11122GPL11154/variant_filtration/af94a8015760f0c3d105ae21bdcee583/ISDB11122.indel.filtered.vcf.log help=false variant=(RodBinding name=variant source=/tmp/generate_VARIANT_FILTRATION.py/af94a8015760f0c3d105ae21bdcee583/ISDB11122.indels.raw.vcf) mask=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub filterExpression=[QD < 2.0 || ReadPosRankSum < -20.0 || FS > 200.0] filterName=[GATKStandard] genotypeFilterExpression=[] genotypeFilterName=[] clusterSize=3 clusterWindowSize=0 maskExtension=0 maskName=Mask missingValuesInExpressionsShouldEvaluateAsFailing=true invalidatePreviousFilters=false filter_mismatching_base_and_quals=false" ##contig=<ID=1,length=249250621,assembly=b37> ##contig=<ID=2,length=243199373,assembly=b37> ##contig=<ID=3,length=198022430,assembly=b37> ##contig=<ID=4,length=191154276,assembly=b37> ##contig=<ID=5,length=180915260,assembly=b37> ##contig=<ID=6,length=171115067,assembly=b37> ##contig=<ID=7,length=159138663,assembly=b37> ##contig=<ID=8,length=146364022,assembly=b37> ##contig=<ID=9,length=141213431,assembly=b37> ##contig=<ID=10,length=135534747,assembly=b37> ##contig=<ID=11,length=135006516,assembly=b37> ##contig=<ID=12,length=133851895,assembly=b37> ##contig=<ID=13,length=115169878,assembly=b37> ##contig=<ID=14,length=107349540,assembly=b37> ##contig=<ID=15,length=102531392,assembly=b37> ##contig=<ID=16,length=90354753,assembly=b37> ##contig=<ID=17,length=81195210,assembly=b37> ##contig=<ID=18,length=78077248,assembly=b37> ##contig=<ID=19,length=59128983,assembly=b37> ##contig=<ID=20,length=63025520,assembly=b37> ##contig=<ID=21,length=48129895,assembly=b37> ##contig=<ID=22,length=51304566,assembly=b37> ##contig=<ID=X,length=155270560,assembly=b37> ##contig=<ID=Y,length=59373566,assembly=b37> ##contig=<ID=MT,length=16569,assembly=b37> ##contig=<ID=GL000207.1,length=4262,assembly=b37> ##contig=<ID=GL000226.1,length=15008,assembly=b37> ##contig=<ID=GL000229.1,length=19913,assembly=b37> ##contig=<ID=GL000231.1,length=27386,assembly=b37> ##contig=<ID=GL000210.1,length=27682,assembly=b37> ##contig=<ID=GL000239.1,length=33824,assembly=b37> ##contig=<ID=GL000235.1,length=34474,assembly=b37> ##contig=<ID=GL000201.1,length=36148,assembly=b37> ##contig=<ID=GL000247.1,length=36422,assembly=b37> ##contig=<ID=GL000245.1,length=36651,assembly=b37> ##contig=<ID=GL000197.1,length=37175,assembly=b37> ##contig=<ID=GL000203.1,length=37498,assembly=b37> ##contig=<ID=GL000246.1,length=38154,assembly=b37> ##contig=<ID=GL000249.1,length=38502,assembly=b37> ##contig=<ID=GL000196.1,length=38914,assembly=b37> ##contig=<ID=GL000248.1,length=39786,assembly=b37> ##contig=<ID=GL000244.1,length=39929,assembly=b37> ##contig=<ID=GL000238.1,length=39939,assembly=b37> ##contig=<ID=GL000202.1,length=40103,assembly=b37> ##contig=<ID=GL000234.1,length=40531,assembly=b37> ##contig=<ID=GL000232.1,length=40652,assembly=b37> ##contig=<ID=GL000206.1,length=41001,assembly=b37> ##contig=<ID=GL000240.1,length=41933,assembly=b37> ##contig=<ID=GL000236.1,length=41934,assembly=b37> ##contig=<ID=GL000241.1,length=42152,assembly=b37> ##contig=<ID=GL000243.1,length=43341,assembly=b37> ##contig=<ID=GL000242.1,length=43523,assembly=b37> ##contig=<ID=GL000230.1,length=43691,assembly=b37> ##contig=<ID=GL000237.1,length=45867,assembly=b37> ##contig=<ID=GL000233.1,length=45941,assembly=b37> ##contig=<ID=GL000204.1,length=81310,assembly=b37> ##contig=<ID=GL000198.1,length=90085,assembly=b37> ##contig=<ID=GL000208.1,length=92689,assembly=b37> ##contig=<ID=GL000191.1,length=106433,assembly=b37> ##contig=<ID=GL000227.1,length=128374,assembly=b37> ##contig=<ID=GL000228.1,length=129120,assembly=b37> ##contig=<ID=GL000214.1,length=137718,assembly=b37> ##contig=<ID=GL000221.1,length=155397,assembly=b37> ##contig=<ID=GL000209.1,length=159169,assembly=b37> ##contig=<ID=GL000218.1,length=161147,assembly=b37> ##contig=<ID=GL000220.1,length=161802,assembly=b37> ##contig=<ID=GL000213.1,length=164239,assembly=b37> ##contig=<ID=GL000211.1,length=166566,assembly=b37> ##contig=<ID=GL000199.1,length=169874,assembly=b37> ##contig=<ID=GL000217.1,length=172149,assembly=b37> ##contig=<ID=GL000216.1,length=172294,assembly=b37> ##contig=<ID=GL000215.1,length=172545,assembly=b37> ##contig=<ID=GL000205.1,length=174588,assembly=b37> ##contig=<ID=GL000219.1,length=179198,assembly=b37> ##contig=<ID=GL000224.1,length=179693,assembly=b37> ##contig=<ID=GL000223.1,length=180455,assembly=b37> ##contig=<ID=GL000195.1,length=182896,assembly=b37> ##contig=<ID=GL000212.1,length=186858,assembly=b37> ##contig=<ID=GL000222.1,length=186861,assembly=b37> ##contig=<ID=GL000200.1,length=187035,assembly=b37> ##contig=<ID=GL000193.1,length=189789,assembly=b37> ##contig=<ID=GL000194.1,length=191469,assembly=b37> ##contig=<ID=GL000225.1,length=211173,assembly=b37> ##contig=<ID=GL000192.1,length=547496,assembly=b37> ##reference=file:///tmp/generate_COMBINE_VARIANTS.py/00e90c599e331929a09028693bed91f7/human_g1k_v37.fasta ##source=SelectVariants ##SnpEffVersion="3.3c (build 2013-06-28), by Pablo Cingolani" ##SnpEffCmd="SnpEff -t hg19 /tmp/generate_SNPEFF.py/98b17a7321fce5a67e5dcd5a1c24a311/ISDB11122.SNPrecal.IndelFiltered.vcf " ##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: 'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Transcript_BioType | Gene_Coding | Transcript_ID | Exon | GenotypeNum [ | ERRORS | WARNINGS ] )' "> #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT ISDBM322015 ISDBM322016 ISDBM322017 ISDBM322018
데이터 로드하기. 135줄부터 로드해야 함.
df = pd.read_table('Pfeiffer-quartet.vcf', skiprows=134, dtype={'#CHROM': str})
df
#CHROM | POS | ID | REF | ALT | QUAL | FILTER | INFO | FORMAT | ISDBM322015 | ISDBM322016 | ISDBM322017 | ISDBM322018 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 14907 | rs79585140 | A | G | 514.87 | VQSRTrancheSNP99.00to99.90 | ABHet=0.628;ABHom=1.00;AC=3;AF=0.375;AN=8;Base... | GT:AD:DP:GQ:PL | 0/0:6,0:6:18:0,18,178 | 0/1:17,10:27:99:208,0,341 | 0/1:14,9:23:99:176,0,258 | 0/1:20,11:31:99:174,0,380 |
1 | 1 | 14930 | rs75454623 | A | G | 780.25 | VQSRTrancheSNP99.00to99.90 | ABHet=0.644;AC=4;AF=0.500;AN=8;BaseQRankSum=-5... | GT:AD:DP:GQ:PL | 0/1:6,1:7:7:7,0,150 | 0/1:16,13:29:99:296,0,378 | 0/1:19,13:32:99:245,0,420 | 0/1:24,15:39:99:274,0,535 |
2 | 1 | 14948 | . | G | A | 47.48 | VQSRTrancheSNP99.90to100.00 | ABHet=0.846;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | 0/0:10,0:10:24:0,24,228 | 0/0:32,0:32:81:0,81,734 | 0/1:28,6:34:79:79,0,549 | 0/1:41,6:47:8:8,0,850 |
3 | 1 | 15211 | rs144718396 | T | G | 110.28 | VQSRTrancheSNP99.90to100.00 | ABHet=0.359;ABHom=1.00;AC=5;AF=0.625;AN=8;Base... | GT:AD:DP:GQ:PL | 0/1:1,1:2:17:17,0,17 | 0/1:3,5:8:8:76,0,8 | 1/1:0,1:1:3:23,3,0 | 0/1:1,4:5:14:36,0,14 |
4 | 1 | 17538 | . | C | A | 151.64 | VQSRTrancheSNP99.00to99.90 | ABHet=0.735;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | 0/0:2,0:2:6:0,6,80 | 0/1:10,2:13:51:51,0,138 | 0/0:23,1:24:0:0,0,418 | 0/1:14,8:22:99:139,0,243 |
5 | 1 | 63336 | . | C | T | 460.26 | VQSRTrancheSNP99.90to100.00 | ABHet=0.705;ABHom=1.00;AC=2;AF=0.333;AN=6;Base... | GT:AD:DP:GQ:PL | ./. | 0/1:31,15:46:99:340,0,308 | 0/1:25,9:34:99:159,0,394 | 0/0:52,0:52:90:0,90,830 |
6 | 1 | 63735 | . | CCTA | C | 193.23 | PASS | AC=3;AF=0.500;AN=6;BaseQRankSum=-0.960;DP=42;F... | GT:AD:DP:GQ:PL | ./. | 1/1:6,2:4:6:116,6,0 | 0/1:8,4:8:99:128,0,301 | 0/0:18,2:15:30:0,30,666 |
7 | 1 | 69511 | rs75062661 | A | G | 4293.01 | VQSRTrancheSNP99.90to100.00 | ABHom=0.982;AC=8;AF=1.00;AN=8;BaseQRankSum=2.0... | GT:AD:DP:GQ:PL | 1/1:2,171:173:99:2218,228,0 | 1/1:0,33:34:60:508,60,0 | 1/1:0,61:63:93:777,93,0 | 1/1:0,61:61:96:790,96,0 |
8 | 1 | 121009 | rs1851943 | C | T | 35.27 | VQSRTrancheSNP99.00to99.90 | ABHet=0.636;ABHom=0.929;AC=1;AF=0.167;AN=6;Bas... | GT:AD:DP:GQ:PL | ./. | 0/0:11,0:11:33:0,33,436 | 0/0:6,1:7:18:0,18,240 | 0/1:7,4:11:70:70,0,229 |
9 | 1 | 133160 | . | G | A | 88.69 | VQSRTrancheSNP99.00to99.90 | ABHet=0.750;ABHom=0.841;AC=3;AF=0.500;AN=6;Bas... | GT:AD:DP:GQ:PL | 1/1:1,3:4:9:109,9,0 | 0/1:3,1:4:20:20,0,107 | ./. | 0/0:1,0:1:3:0,3,40 |
10 | 1 | 546952 | rs9438487 | T | C | 43.22 | VQSRTrancheSNP99.90to100.00 | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=3;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:49,6,0 | ./. | ./. | 1/1:0,1:1:3:28,3,0 |
11 | 1 | 745347 | . | T | C | 59.16 | VQSRTrancheSNP99.90to100.00 | ABHet=0.702;ABHom=0.938;AC=3;AF=0.375;AN=8;Bas... | GT:AD:DP:GQ:PL | 0/1:3,1:4:13:13,0,60 | 0/0:15,1:16:8:0,8,212 | 0/1:4,4:8:71:75,0,71 | 0/1:13,2:15:13:13,0,203 |
12 | 1 | 745370 | rs146246821 | TA | T | 510.54 | PASS | AC=4;AF=0.500;AN=8;BaseQRankSum=1.905;DB;DP=64... | GT:AD:DP:GQ:PL | 0/1:4,1:5:32:32,0,185 | 0/1:17,4:18:99:132,0,623 | 0/1:9,7:16:99:273,0,395 | 0/1:16,4:19:99:125,0,661 |
13 | 1 | 752566 | rs3094315 | G | A | 118.67 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=4;Dels=0.00... | GT:AD:DP:GQ:PL | ./. | 1/1:0,2:2:6:79,6,0 | 1/1:0,1:1:3:39,3,0 | 1/1:0,1:1:3:36,3,0 |
14 | 1 | 752721 | rs3131972 | A | G | 242.70 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=7;Dels=0.00... | GT:AD:DP:GQ:PL | ./. | 1/1:0,2:2:6:80,6,0 | 1/1:0,4:4:12:159,12,0 | 1/1:0,1:1:3:40,3,0 |
15 | 1 | 753405 | rs61770173 | C | A | 432.40 | VQSRTrancheSNP99.90to100.00 | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=31;Dels=0.0... | GT:AD:DP:GQ:PL | ./. | 1/1:0,14:14:24:221,24,0 | 1/1:0,8:8:12:111,12,0 | 1/1:0,9:9:15:138,15,0 |
16 | 1 | 753474 | rs2073814 | C | G | 271.92 | VQSRTrancheSNP99.00to99.90 | ABHom=0.952;AC=6;AF=1.00;AN=6;DB;DP=15;Dels=0.... | GT:AD:DP:GQ:PL | ./. | 1/1:0,3:3:9:118,9,0 | 1/1:0,6:7:9:83,9,0 | 1/1:0,5:5:9:108,9,0 |
17 | 1 | 758324 | rs3131955 | T | C | 45.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:77,6,0 | ./. | ./. | ./. |
18 | 1 | 780027 | rs2977613 | G | T | 47.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:0,1:1:3:40,3,0 | ./. | 1/1:0,1:1:3:40,3,0 | ./. |
19 | 1 | 808631 | rs11240779 | G | A | 1853.99 | PASS | ABHom=1.000;AC=8;AF=1.00;AN=8;DB;DP=52;Dels=0.... | GT:AD:DP:GQ:PL | 1/1:0,7:7:21:223,21,0 | 1/1:0,16:16:48:602,48,0 | 1/1:0,13:13:36:457,36,0 | 1/1:0,16:16:48:611,48,0 |
20 | 1 | 808922 | rs6594027 | G | A | 4960.01 | VQSRTrancheSNP99.00to99.90 | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=222;Dels=0.... | GT:AD:DP:GQ:PL | 1/1:0,43:43:96:1199,96,0 | 1/1:0,62:62:99:1304,102,0 | 1/1:0,58:58:93:1169,93,0 | 1/1:0,59:59:99:1288,102,0 |
21 | 1 | 808928 | rs11240780 | C | T | 5605.01 | VQSRTrancheSNP99.00to99.90 | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=234;Dels=0.... | GT:AD:DP:GQ:PL | 1/1:0,48:48:99:1318,111,0 | 1/1:0,61:61:99:1456,114,0 | 1/1:0,63:63:99:1383,111,0 | 1/1:0,62:62:99:1448,114,0 |
22 | 1 | 809171 | . | G | A | 103.55 | VQSRTrancheSNP99.90to100.00 | ABHet=0.800;ABHom=0.936;AC=2;AF=0.250;AN=8;Bas... | GT:AD:DP:GQ:PL | 0/1:35,6:41:36:36,0,520 | 0/1:44,15:59:99:109,0,657 | 0/0:47,5:52:57:0,57,722 | 0/0:31,0:32:78:0,78,634 |
23 | 1 | 809732 | rs147199422 | T | C | 44.02 | VQSRTrancheSNP99.90to100.00 | ABHet=0.865;ABHom=0.932;AC=1;AF=0.125;AN=8;Bas... | GT:AD:DP:GQ:PL | 0/1:64,10:74:80:80,0,2123 | 0/0:58,2:60:99:0,121,2171 | 0/0:42,4:46:17:0,17,1486 | 0/0:54,5:59:29:0,29,1912 |
24 | 1 | 809744 | . | A | G | 40.99 | VQSRTrancheSNP99.90to100.00 | ABHet=0.864;ABHom=0.950;AC=1;AF=0.125;AN=8;Bas... | GT:AD:DP:GQ:PL | 0/1:57,9:67:77:77,0,1851 | 0/0:57,2:60:99:0,115,2057 | 0/0:46,3:49:59:0,59,1686 | 0/0:53,3:56:74:0,74,1828 |
25 | 1 | 812267 | rs7541694 | A | G | 385.67 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=13;Dels=0.0... | GT:AD:DP:GQ:PL | 1/1:0,3:3:9:95,9,0 | 1/1:0,5:5:15:175,15,0 | 1/1:0,2:2:6:55,6,0 | 1/1:0,3:3:9:99,9,0 |
26 | 1 | 812284 | rs7545373 | C | G | 428.75 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=13;Dels=0.0... | GT:AD:DP:GQ:PL | 1/1:0,5:5:15:185,15,0 | 1/1:0,4:4:12:148,12,0 | 1/1:0,2:2:6:55,6,0 | 1/1:0,2:2:6:79,6,0 |
27 | 1 | 823790 | rs143626389 | G | A | 38.82 | PASS | ABHom=1.00;AC=2;AF=0.500;AN=4;BaseQRankSum=0.3... | GT:AD:DP:GQ:PL | ./. | 1/1:0,2:2:6:75,6,0 | ./. | 0/0:3,0:3:9:0,9,99 |
28 | 1 | 834832 | rs4411087 | G | C | 48.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ./. | 1/1:0,2:2:6:80,6,0 | ./. | ./. |
29 | 1 | 849998 | rs13303222 | A | G | 45.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:77,6,0 | ./. | ./. | ./. |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
300006 | GL000225.1 | 198637 | . | C | T | 80.98 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=3;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ./. | 1/1:0,3:3:9:113,9,0 | ./. | ./. |
300007 | GL000225.1 | 198643 | . | A | G | 86.98 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=3;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ./. | 1/1:0,3:3:9:119,9,0 | ./. | ./. |
300008 | GL000225.1 | 202533 | . | C | T | 46.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=2;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ./. | ./. | ./. | 1/1:0,2:2:6:78,6,0 |
300009 | GL000225.1 | 203673 | . | C | T | 78.22 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DP=3;Dels=0.00;FS... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:80,6,0 | 1/1:0,1:1:3:32,3,0 | ./. | ./. |
300010 | GL000192.1 | 99120 | . | C | T | 1132.79 | VQSRTrancheSNP99.90to100.00 | ABHet=0.594;ABHom=1.00;AC=3;AF=0.375;AN=8;Base... | GT:AD:DP:GQ:PL | 0/1:37,35:72:99:680,0,597 | 0/1:30,11:41:99:211,0,460 | 0/1:15,13:28:99:285,0,257 | 0/0:45,0:45:99:0,123,1131 |
300011 | GL000192.1 | 99283 | . | T | C | 775.57 | VQSRTrancheSNP99.90to100.00 | ABHet=0.648;ABHom=0.800;AC=5;AF=0.625;AN=8;Bas... | GT:AD:DP:GQ:PL | 0/1:21,12:33:99:143,0,226 | 0/1:13,12:25:99:223,0,167 | 0/1:26,7:33:99:127,0,155 | 1/1:4,16:20:36:328,36,0 |
300012 | GL000192.1 | 99390 | . | C | G | 74.63 | VQSRTrancheSNP99.90to100.00 | ABHet=0.527;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | 0/0:3,0:3:6:0,6,55 | 0/0:7,0:7:12:0,12,111 | 0/1:5,4:9:13:96,0,13 | 0/1:5,5:10:19:19,0,46 |
300013 | GL000192.1 | 100085 | . | A | G | 128.56 | VQSRTrancheSNP99.00to99.90 | ABHet=0.442;ABHom=1.00;AC=4;AF=0.667;AN=6;Base... | GT:AD:DP:GQ:PL | ./. | 0/1:2,1:3:19:19,0,66 | 0/1:1,4:5:11:120,0,11 | 1/1:0,1:1:3:28,3,0 |
300014 | GL000192.1 | 101228 | . | T | C | 493.56 | VQSRTrancheSNP99.00to99.90 | ABHet=0.378;ABHom=1.00;AC=5;AF=0.625;AN=8;Base... | GT:AD:DP:GQ:PL | 0/1:6,3:9:93:93,0,186 | 0/1:3,7:10:75:222,0,75 | 0/1:1,5:6:23:133,0,23 | 1/1:0,3:3:9:90,9,0 |
300015 | GL000192.1 | 101317 | . | A | C | 311.87 | VQSRTrancheSNP99.90to100.00 | ABHet=0.429;ABHom=1.00;AC=3;AF=0.375;AN=8;Base... | GT:AD:DP:GQ:PL | 0/1:10,7:17:99:146,0,201 | 0/1:3,7:10:28:166,0,28 | 0/0:7,0:7:18:0,18,166 | 0/1:2,3:5:43:43,0,43 |
300016 | GL000192.1 | 107709 | . | G | A | 149.56 | VQSRTrancheSNP99.00to99.90 | ABHet=0.662;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | 0/0:24,0:24:45:0,45,511 | 0/0:12,0:12:27:0,27,328 | 0/1:6,5:12:99:124,0,131 | 0/1:14,4:18:67:67,0,313 |
300017 | GL000192.1 | 111042 | . | C | A | 440.55 | VQSRTrancheSNP99.00to99.90 | ABHet=0.460;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | 0/0:39,0:39:78:0,78,1000 | 0/0:24,0:24:45:0,45,579 | 0/1:9,11:20:45:310,0,45 | 0/1:8,9:17:99:172,0,133 |
300018 | GL000192.1 | 121717 | . | G | A | 286.55 | VQSRTrancheSNP99.90to100.00 | ABHet=0.663;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | 0/0:45,0:45:51:0,51,470 | 0/0:29,0:29:51:0,51,463 | 0/1:20,7:27:87:87,0,281 | 0/1:17,12:29:99:241,0,209 |
300019 | GL000192.1 | 121977 | . | G | A | 1200.79 | VQSRTrancheSNP99.00to99.90 | ABHet=0.450;ABHom=1.00;AC=3;AF=0.375;AN=8;Base... | GT:AD:DP:GQ:PL | 0/1:19,19:38:99:492,0,361 | 0/1:16,15:31:99:394,0,386 | 0/0:28,0:28:84:0,84,918 | 0/1:6,12:18:99:358,0,137 |
300020 | GL000192.1 | 131599 | . | C | G | 360.79 | VQSRTrancheSNP99.90to100.00 | ABHet=0.460;ABHom=1.00;AC=3;AF=0.375;AN=8;Base... | GT:AD:DP:GQ:PL | 0/1:3,3:6:65:65,0,65 | 0/1:10,9:19:99:155,0,125 | 0/0:15,0:15:36:0,36,332 | 0/1:6,11:17:75:184,0,75 |
300021 | GL000192.1 | 139953 | . | C | G | 165.56 | VQSRTrancheSNP99.90to100.00 | ABHet=0.451;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | 0/0:27,0:27:57:0,57,725 | 0/0:18,0:18:30:0,30,363 | 0/1:11,4:15:48:48,0,280 | 0/1:1,5:6:21:159,0,21 |
300022 | GL000192.1 | 139953 | . | CTG | C | 1104.53 | PASS | AC=4;AF=0.500;AN=8;BaseQRankSum=-3.358;DP=66;F... | GT:AD:DP:GQ:PL | 1/1:7,18:20:51:677,51,0 | 0/1:9,6:10:33:204,0,33 | 0/1:8,7:10:19:280,0,19 | 0/0:6,0:6:9:0,9,141 |
300023 | GL000192.1 | 160087 | . | C | T | 157.22 | PASS | ABHet=0.625;ABHom=1.00;AC=6;AF=0.750;AN=8;Base... | GT:AD:DP:GQ:PL | 0/1:2,2:4:52:60,0,52 | 0/1:3,1:4:25:25,0,105 | 1/1:0,2:2:6:78,6,0 | 1/1:0,1:1:3:37,3,0 |
300024 | GL000192.1 | 197562 | . | T | C | 158.55 | VQSRTrancheSNP99.90to100.00 | ABHom=1.00;AC=4;AF=1.00;AN=4;DP=7;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ./. | ./. | 1/1:0,4:4:12:111,12,0 | 1/1:0,3:3:9:83,9,0 |
300025 | GL000192.1 | 212151 | . | G | C | 566.79 | VQSRTrancheSNP99.90to100.00 | ABHet=0.498;AC=4;AF=0.500;AN=8;BaseQRankSum=0.... | GT:AD:DP:GQ:PL | 0/1:18,23:41:99:291,0,208 | 0/1:6,12:18:99:253,0,114 | 0/1:7,4:11:62:62,0,90 | 0/1:11,1:12:1:1,0,194 |
300026 | GL000192.1 | 216599 | . | C | A | 223.55 | VQSRTrancheSNP99.90to100.00 | ABHet=0.492;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | 0/0:46,0:46:99:0,102,941 | 0/0:19,0:19:33:0,33,304 | 0/1:5,6:11:56:139,0,56 | 0/1:9,8:17:99:126,0,130 |
300027 | GL000192.1 | 228788 | . | G | T | 46.77 | VQSRTrancheSNP99.90to100.00 | ABHom=0.500;AC=2;AF=1.00;AN=2;DP=10;Dels=0.00;... | GT:AD:DP:GQ:PL | ./. | 1/1:2,2:4:6:78,6,0 | ./. | ./. |
300028 | GL000192.1 | 229867 | . | C | T | 2232.98 | VQSRTrancheSNP99.90to100.00 | ABHom=0.857;AC=8;AF=1.00;AN=8;DP=111;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:4,25:29:66:609,66,0 | 1/1:2,25:27:57:528,57,0 | 1/1:6,23:29:66:609,66,0 | 1/1:4,22:26:57:526,57,0 |
300029 | GL000192.1 | 272061 | . | C | CT | 125.59 | PASS | AC=3;AF=0.375;AN=8;BaseQRankSum=-0.058;DP=33;F... | GT:AD:DP:GQ:PL | 0/0:5,0:4:9:0,9,93 | 0/1:5,4:7:12:107,0,12 | 0/1:9,2:3:18:53,0,18 | 0/1:6,1:4:17:17,0,69 |
300030 | GL000192.1 | 311575 | . | C | T | 248.50 | PASS | ABHet=0.333;ABHom=1.00;AC=3;AF=0.375;AN=8;Base... | GT:AD:DP:GQ:PL | 0/0:2,0:2:3:0,3,39 | 0/1:3,6:9:93:212,0,93 | 1/1:0,2:2:6:80,6,0 | 0/0:5,0:5:15:0,15,181 |
300031 | GL000192.1 | 313293 | . | G | A | 174.79 | VQSRTrancheSNP99.90to100.00 | ABHet=0.640;ABHom=1.00;AC=3;AF=0.375;AN=8;Base... | GT:AD:DP:GQ:PL | 0/1:42,16:58:55:55,0,587 | 0/0:34,0:34:42:0,42,447 | 0/1:26,10:36:57:57,0,367 | 0/1:19,21:40:99:106,0,367 |
300032 | GL000192.1 | 313701 | . | T | C | 120.67 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DP=4;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ./. | 1/1:0,1:1:3:40,3,0 | 1/1:0,1:1:3:39,3,0 | 1/1:0,2:2:6:77,6,0 |
300033 | GL000192.1 | 337826 | . | TA | T | 489.51 | PASS | AC=2;AF=0.250;AN=8;BaseQRankSum=-3.725;DP=62;F... | GT:AD:DP:GQ:PL | 0/0:18,0:18:51:0,51,855 | 0/0:13,0:13:39:0,39,695 | 0/1:5,10:15:99:438,0,228 | 0/1:13,3:16:99:102,0,647 |
300034 | GL000192.1 | 394140 | . | A | G | 45.26 | PASS | ABHom=1.00;AC=2;AF=0.500;AN=4;BaseQRankSum=0.7... | GT:AD:DP:GQ:PL | ./. | 0/0:1,0:1:3:0,3,39 | ./. | 1/1:0,2:2:6:80,6,0 |
300035 | 10 | 123256215 | . | T | G | 100.00 | PASS | GENE=FGFR2;INHERITANCE=AD;MIM=101600 | GT:AD:DP:GQ:PL | 0/0:1,0:1:3:0,3,39 | 0/0:1,0:1:3:0,3,39 | 1/0:1,0:1:3:0,3,39 | 0/0:1,0:1:3:0,3,39 |
300036 rows × 13 columns
df['ALT'].value_counts()
A 74449 T 73785 G 72981 C 72857 TA 509 CT 493 AT 479 GA 424 CA 395 GT 359 AG 256 TG 233 GC 219 TC 157 AC 152 CG 63 A,T 56 CTT 50 CAT 45 TAA 42 C,CT 41 A,C 41 CAA 40 TAC 37 A,G 37 CAG 37 C,T 36 C,G 36 CCT 35 G,T 35 ... ATTC 1 C,CGT 1 CAACA 1 TAAA,TA 1 GTGTT 1 GATAA 1 CAGAGAG,C 1 AGGG 1 GGAGGAA 1 GCCTT 1 G,GTT 1 GGGT 1 CTCAT 1 TACTG 1 CGGT 1 GGTTT 1 CACCT 1 GA,G 1 GGGGCTGGTACACACAGGTCAGCACGGCCAGGTTCCCACTCCCG 1 GT,G 1 G,GAAA 1 CATAT 1 TTGTG,T 1 CTTAT 1 GACAC,G 1 ACCAT 1 CAAA,C 1 ACTAT 1 GTAAA 1 CCTCTCT,C 1 Name: ALT, dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 300036 entries, 0 to 300035 Data columns (total 13 columns): #CHROM 300036 non-null object POS 300036 non-null int64 ID 300036 non-null object REF 300036 non-null object ALT 300036 non-null object QUAL 300036 non-null float64 FILTER 300036 non-null object INFO 300036 non-null object FORMAT 300036 non-null object ISDBM322015 300036 non-null object ISDBM322016 300036 non-null object ISDBM322017 300036 non-null object ISDBM322018 300036 non-null object dtypes: float64(1), int64(1), object(11) memory usage: 29.8+ MB
FILTER가 PASS인 레코드만 취합
dfc = df[df['FILTER'] == 'PASS']
dfc.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 268101 entries, 6 to 300035 Data columns (total 13 columns): #CHROM 268101 non-null object POS 268101 non-null int64 ID 268101 non-null object REF 268101 non-null object ALT 268101 non-null object QUAL 268101 non-null float64 FILTER 268101 non-null object INFO 268101 non-null object FORMAT 268101 non-null object ISDBM322015 268101 non-null object ISDBM322016 268101 non-null object ISDBM322017 268101 non-null object ISDBM322018 268101 non-null object dtypes: float64(1), int64(1), object(11) memory usage: 28.6+ MB
dfc = dfc.reset_index()
sample_ids = ['ISDBM322015', 'ISDBM322016', 'ISDBM322017', 'ISDBM322018']
sample_ids_genotype = ['{} genotype'.format(s) for s in sample_ids]
def call_genotype(row):
alts = row['ALT'].split(',')
m = {
'0': row['REF'],
'1': alts[0],
'2': alts[1] if len(alts) == 2 else alts[0],
'3': alts[2] if len(alts) == 3 else alts[0],
'.': '',
}
return pd.Series(['{}-{}'.format(
m[row[s][0]], m[row[s][2]]) for s in sample_ids if s],
index=sample_ids_genotype)
genotypes = dfc.apply(call_genotype, axis=1)
genotypes
ISDBM322015 genotype | ISDBM322016 genotype | ISDBM322017 genotype | ISDBM322018 genotype | |
---|---|---|---|---|
0 | - | C-C | CCTA-C | CCTA-CCTA |
1 | TA-T | TA-T | TA-T | TA-T |
2 | - | A-A | A-A | A-A |
3 | - | G-G | G-G | G-G |
4 | C-C | - | - | - |
5 | T-T | - | T-T | - |
6 | A-A | A-A | A-A | A-A |
7 | G-G | G-G | G-G | G-G |
8 | G-G | G-G | G-G | G-G |
9 | - | A-A | - | G-G |
10 | - | C-C | - | - |
11 | G-G | - | - | - |
12 | G-G | G-G | - | - |
13 | G-G | - | - | G-G |
14 | T-T | C-C | - | - |
15 | A-A | A-A | A-A | A-A |
16 | CCCCT-CCCCT | CCCCT-CCCCT | C-C | CCCCT-CCCCT |
17 | - | - | G-G | G-G |
18 | - | - | T-T | T-T |
19 | - | - | C-C | C-C |
20 | C-C | C-C | - | C-C |
21 | - | T-T | G-G | G-T |
22 | T-T | T-T | - | - |
23 | G-G | G-G | - | - |
24 | C-C | C-G | C-G | C-C |
25 | C-T | C-C | C-C | C-T |
26 | G-C | G-G | G-G | G-C |
27 | A-A | A-A | A-A | A-A |
28 | C-C | C-C | C-C | C-C |
29 | G-G | G-G | G-G | G-G |
... | ... | ... | ... | ... |
268071 | G-G | - | G-G | G-G |
268072 | GT-G | G-G | - | G-G |
268073 | G-G | - | - | A-G |
268074 | G-G | - | - | A-G |
268075 | C-C | - | - | C-C |
268076 | T-T | - | T-T | C-T |
268077 | - | - | T-T | A-T |
268078 | - | - | G-G | A-G |
268079 | G-G | - | - | - |
268080 | C-C | C-C | C-C | - |
268081 | G-G | G-G | G-G | G-G |
268082 | G-G | - | - | - |
268083 | - | C-C | C-C | - |
268084 | T-T | T-T | T-T | T-T |
268085 | G-G | G-G | - | - |
268086 | TC-TC | TC-TC | TC-TC | T-TC |
268087 | C-C | - | - | - |
268088 | G-G | - | - | - |
268089 | - | T-T | - | - |
268090 | - | G-G | - | - |
268091 | - | - | - | T-T |
268092 | T-T | T-T | - | - |
268093 | C-C | CTG-C | CTG-C | CTG-CTG |
268094 | C-T | C-T | T-T | T-T |
268095 | C-C | C-CT | C-CT | C-CT |
268096 | C-C | C-T | T-T | C-C |
268097 | - | C-C | C-C | C-C |
268098 | TA-TA | TA-TA | TA-T | TA-T |
268099 | - | A-A | - | G-G |
268100 | T-T | T-T | G-T | T-T |
268101 rows × 4 columns
dfc[sample_ids_genotype] = genotypes
dfc
index | #CHROM | POS | ID | REF | ALT | QUAL | FILTER | INFO | FORMAT | ISDBM322015 | ISDBM322016 | ISDBM322017 | ISDBM322018 | ISDBM322015 genotype | ISDBM322016 genotype | ISDBM322017 genotype | ISDBM322018 genotype | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 6 | 1 | 63735 | . | CCTA | C | 193.23 | PASS | AC=3;AF=0.500;AN=6;BaseQRankSum=-0.960;DP=42;F... | GT:AD:DP:GQ:PL | ./. | 1/1:6,2:4:6:116,6,0 | 0/1:8,4:8:99:128,0,301 | 0/0:18,2:15:30:0,30,666 | - | C-C | CCTA-C | CCTA-CCTA |
1 | 12 | 1 | 745370 | rs146246821 | TA | T | 510.54 | PASS | AC=4;AF=0.500;AN=8;BaseQRankSum=1.905;DB;DP=64... | GT:AD:DP:GQ:PL | 0/1:4,1:5:32:32,0,185 | 0/1:17,4:18:99:132,0,623 | 0/1:9,7:16:99:273,0,395 | 0/1:16,4:19:99:125,0,661 | TA-T | TA-T | TA-T | TA-T |
2 | 13 | 1 | 752566 | rs3094315 | G | A | 118.67 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=4;Dels=0.00... | GT:AD:DP:GQ:PL | ./. | 1/1:0,2:2:6:79,6,0 | 1/1:0,1:1:3:39,3,0 | 1/1:0,1:1:3:36,3,0 | - | A-A | A-A | A-A |
3 | 14 | 1 | 752721 | rs3131972 | A | G | 242.70 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=7;Dels=0.00... | GT:AD:DP:GQ:PL | ./. | 1/1:0,2:2:6:80,6,0 | 1/1:0,4:4:12:159,12,0 | 1/1:0,1:1:3:40,3,0 | - | G-G | G-G | G-G |
4 | 17 | 1 | 758324 | rs3131955 | T | C | 45.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:77,6,0 | ./. | ./. | ./. | C-C | - | - | - |
5 | 18 | 1 | 780027 | rs2977613 | G | T | 47.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:0,1:1:3:40,3,0 | ./. | 1/1:0,1:1:3:40,3,0 | ./. | T-T | - | T-T | - |
6 | 19 | 1 | 808631 | rs11240779 | G | A | 1853.99 | PASS | ABHom=1.000;AC=8;AF=1.00;AN=8;DB;DP=52;Dels=0.... | GT:AD:DP:GQ:PL | 1/1:0,7:7:21:223,21,0 | 1/1:0,16:16:48:602,48,0 | 1/1:0,13:13:36:457,36,0 | 1/1:0,16:16:48:611,48,0 | A-A | A-A | A-A | A-A |
7 | 25 | 1 | 812267 | rs7541694 | A | G | 385.67 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=13;Dels=0.0... | GT:AD:DP:GQ:PL | 1/1:0,3:3:9:95,9,0 | 1/1:0,5:5:15:175,15,0 | 1/1:0,2:2:6:55,6,0 | 1/1:0,3:3:9:99,9,0 | G-G | G-G | G-G | G-G |
8 | 26 | 1 | 812284 | rs7545373 | C | G | 428.75 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=13;Dels=0.0... | GT:AD:DP:GQ:PL | 1/1:0,5:5:15:185,15,0 | 1/1:0,4:4:12:148,12,0 | 1/1:0,2:2:6:55,6,0 | 1/1:0,2:2:6:79,6,0 | G-G | G-G | G-G | G-G |
9 | 27 | 1 | 823790 | rs143626389 | G | A | 38.82 | PASS | ABHom=1.00;AC=2;AF=0.500;AN=4;BaseQRankSum=0.3... | GT:AD:DP:GQ:PL | ./. | 1/1:0,2:2:6:75,6,0 | ./. | 0/0:3,0:3:9:0,9,99 | - | A-A | - | G-G |
10 | 28 | 1 | 834832 | rs4411087 | G | C | 48.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ./. | 1/1:0,2:2:6:80,6,0 | ./. | ./. | - | C-C | - | - |
11 | 29 | 1 | 849998 | rs13303222 | A | G | 45.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:77,6,0 | ./. | ./. | ./. | G-G | - | - | - |
12 | 30 | 1 | 851757 | rs62677860 | A | G | 63.22 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=4;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:69,6,0 | 1/1:0,2:2:3:28,3,0 | ./. | ./. | G-G | G-G | - | - |
13 | 31 | 1 | 861808 | rs13302982 | A | G | 66.22 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=3;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:64,6,0 | ./. | ./. | 1/1:0,1:1:3:36,3,0 | G-G | - | - | G-G |
14 | 32 | 1 | 862866 | rs3892970 | C | T | 31.26 | PASS | ABHom=1.00;AC=2;AF=0.500;AN=4;BaseQRankSum=-0.... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:66,6,0 | 0/0:1,0:1:3:0,3,40 | ./. | ./. | T-T | C-C | - | - |
15 | 33 | 1 | 866319 | rs9988021 | G | A | 825.39 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=24;Dels=0.0... | GT:AD:DP:GQ:PL | 1/1:0,11:11:33:427,33,0 | 1/1:0,7:7:18:228,18,0 | 1/1:0,4:4:12:133,12,0 | 1/1:0,2:2:6:76,6,0 | A-A | A-A | A-A | A-A |
16 | 34 | 1 | 866511 | rs60722469 | C | CCCCT | 325.15 | PASS | AC=6;AF=0.750;AN=8;BaseQRankSum=0.747;DB;DP=15... | GT:AD:DP:GQ:PL | 1/1:5,5:10:15:278,15,0 | 1/1:3,0:3:3:32,3,0 | 0/0:1,0:1:3:0,3,65 | 1/1:0,1:1:3:67,3,0 | CCCCT-CCCCT | CCCCT-CCCCT | C-C | CCCCT-CCCCT |
17 | 35 | 1 | 866920 | rs2341361 | A | G | 47.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ./. | ./. | 1/1:0,1:1:3:40,3,0 | 1/1:0,1:1:3:40,3,0 | - | - | G-G | G-G |
18 | 36 | 1 | 867584 | rs2341360 | A | T | 47.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ./. | ./. | 1/1:0,1:1:3:40,3,0 | 1/1:0,1:1:3:40,3,0 | - | - | T-T | T-T |
19 | 37 | 1 | 869323 | rs13303207 | T | C | 69.22 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=3;Dels=0.00... | GT:AD:DP:GQ:PL | ./. | ./. | 1/1:0,2:2:6:73,6,0 | 1/1:0,1:1:3:30,3,0 | - | - | C-C | C-C |
20 | 40 | 1 | 870903 | rs13303094 | T | C | 183.66 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=6;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:62,6,0 | 1/1:0,2:2:6:79,6,0 | ./. | 1/1:0,2:2:6:79,6,0 | C-C | C-C | - | C-C |
21 | 41 | 1 | 871334 | rs4072383 | G | T | 74.01 | PASS | ABHet=0.500;ABHom=1.00;AC=3;AF=0.500;AN=6;Base... | GT:AD:DP:GQ:PL | ./. | 1/1:0,2:2:6:63,6,0 | 0/0:3,0:3:9:0,9,120 | 0/1:2,2:4:52:52,0,68 | - | T-T | G-G | G-T |
22 | 42 | 1 | 873558 | rs1110052 | G | T | 39.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:0,1:1:3:39,3,0 | 1/1:0,1:1:3:33,3,0 | ./. | ./. | T-T | T-T | - | - |
23 | 43 | 1 | 876499 | rs4372192 | A | G | 200.89 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=6;Dels=0.00... | GT:AD:DP:GQ:PL | 1/1:0,4:4:12:160,12,0 | 1/1:0,2:2:6:76,6,0 | ./. | ./. | G-G | G-G | - | - |
24 | 44 | 1 | 878784 | rs142929357 | C | G | 125.24 | PASS | ABHet=0.583;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | 0/0:16,0:16:45:0,45,553 | 0/1:4,4:8:99:136,0,113 | 0/1:2,1:3:30:30,0,57 | 0/0:2,0:2:6:0,6,66 | C-C | C-G | C-G | C-C |
25 | 45 | 1 | 879317 | rs7523549 | C | T | 388.57 | PASS | ABHet=0.493;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | 0/1:15,8:23:99:243,0,369 | 0/0:7,0:7:21:0,21,252 | 0/0:12,0:12:36:0,36,405 | 0/1:3,6:9:64:187,0,64 | C-T | C-C | C-C | C-T |
26 | 46 | 1 | 879482 | rs149880798 | G | C | 799.55 | PASS | ABHet=0.592;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | 0/1:27,20:47:99:584,0,841 | 0/0:33,0:33:93:0,93,1155 | 0/0:19,0:19:51:0,51,650 | 0/1:14,9:23:99:257,0,389 | G-C | G-G | G-G | G-C |
27 | 47 | 1 | 879676 | rs6605067 | G | A | 432.72 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=12;Dels=0.0... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:73,6,0 | 1/1:0,3:3:9:120,9,0 | 1/1:0,3:3:9:120,9,0 | 1/1:0,4:4:12:158,12,0 | A-A | A-A | A-A | A-A |
28 | 48 | 1 | 879687 | rs2839 | T | C | 335.22 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=10;Dels=0.0... | GT:AD:DP:GQ:PL | 1/1:0,1:1:3:40,3,0 | 1/1:0,3:3:9:114,9,0 | 1/1:0,2:2:6:80,6,0 | 1/1:0,4:4:12:139,12,0 | C-C | C-C | C-C | C-C |
29 | 49 | 1 | 880238 | rs3748592 | A | G | 516.84 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=14;Dels=0.0... | GT:AD:DP:GQ:PL | 1/1:0,1:1:3:40,3,0 | 1/1:0,5:5:15:199,15,0 | 1/1:0,3:3:9:120,9,0 | 1/1:0,5:5:15:196,15,0 | G-G | G-G | G-G | G-G |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
268071 | 299643 | GL000225.1 | 64237 | . | A | G | 152.14 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DP=5;Dels=0.00;FS... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:70,6,0 | ./. | 1/1:0,1:1:3:40,3,0 | 1/1:0,2:2:6:78,6,0 | G-G | - | G-G | G-G |
268072 | 299706 | GL000225.1 | 67508 | . | GT | G | 229.23 | PASS | AC=5;AF=0.833;AN=6;BaseQRankSum=-0.742;DP=13;F... | GT:AD:DP:GQ:PL | 0/1:1,6:4:40:136,0,40 | 1/1:1,1:2:3:47,3,0 | ./. | 1/1:0,4:2:6:94,6,0 | GT-G | G-G | - | G-G |
268073 | 299713 | GL000225.1 | 71934 | . | A | G | 62.01 | PASS | ABHet=0.500;ABHom=1.00;AC=3;AF=0.750;AN=4;Base... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:70,6,0 | ./. | ./. | 0/1:1,1:2:28:28,0,33 | G-G | - | - | A-G |
268074 | 299714 | GL000225.1 | 71966 | . | A | G | 138.21 | PASS | ABHet=0.333;ABHom=1.00;AC=3;AF=0.750;AN=4;Base... | GT:AD:DP:GQ:PL | 1/1:0,3:3:9:113,9,0 | ./. | ./. | 0/1:1,2:3:30:62,0,30 | G-G | - | - | A-G |
268075 | 299715 | GL000225.1 | 72003 | . | T | C | 119.76 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DP=5;Dels=0.00;FS... | GT:AD:DP:GQ:PL | 1/1:0,4:4:9:114,9,0 | ./. | ./. | 1/1:0,1:1:3:40,3,0 | C-C | - | - | C-C |
268076 | 299716 | GL000225.1 | 72057 | . | C | T | 75.26 | PASS | ABHet=0.500;ABHom=1.00;AC=5;AF=0.833;AN=6;Base... | GT:AD:DP:GQ:PL | 1/1:0,1:1:3:40,3,0 | ./. | 1/1:0,1:1:3:40,3,0 | 0/1:1,1:2:26:33,0,26 | T-T | - | T-T | C-T |
268077 | 299717 | GL000225.1 | 72073 | . | A | T | 37.25 | PASS | ABHet=0.500;ABHom=1.00;AC=3;AF=0.750;AN=4;Base... | GT:AD:DP:GQ:PL | ./. | ./. | 1/1:0,1:1:3:39,3,0 | 0/1:1,1:2:30:33,0,30 | - | - | T-T | A-T |
268078 | 299718 | GL000225.1 | 72104 | . | A | G | 74.02 | PASS | ABHet=0.500;ABHom=1.00;AC=3;AF=0.750;AN=4;Base... | GT:AD:DP:GQ:PL | ./. | ./. | 1/1:0,2:2:6:77,6,0 | 0/1:1,1:2:24:33,0,24 | - | - | G-G | A-G |
268079 | 299723 | GL000225.1 | 72249 | . | A | G | 37.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=2;Dels=0.00;FS... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:69,6,0 | ./. | ./. | ./. | G-G | - | - | - |
268080 | 299734 | GL000225.1 | 72555 | . | T | C | 123.67 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DP=4;Dels=0.00;FS... | GT:AD:DP:GQ:PL | 1/1:0,1:1:3:40,3,0 | 1/1:0,2:2:6:79,6,0 | 1/1:0,1:1:3:40,3,0 | ./. | C-C | C-C | C-C | - |
268081 | 299735 | GL000225.1 | 72582 | . | A | G | 202.89 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DP=8;Dels=0.00;FS... | GT:AD:DP:GQ:PL | 1/1:0,4:4:9:111,9,0 | 1/1:0,2:2:6:61,6,0 | 1/1:0,1:1:3:35,3,0 | 1/1:0,1:1:3:33,3,0 | G-G | G-G | G-G | G-G |
268082 | 299778 | GL000225.1 | 78642 | . | A | G | 32.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=2;Dels=0.00;FS... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:64,6,0 | ./. | ./. | ./. | G-G | - | - | - |
268083 | 299860 | GL000225.1 | 89058 | . | T | C | 47.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DP=3;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ./. | 1/1:0,2:2:3:40,3,0 | 1/1:0,1:1:3:40,3,0 | ./. | - | C-C | C-C | - |
268084 | 299876 | GL000225.1 | 89930 | . | A | T | 139.54 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DP=5;Dels=0.00;FS... | GT:AD:DP:GQ:PL | 1/1:0,1:1:3:39,3,0 | 1/1:0,2:2:6:72,6,0 | 1/1:0,1:1:3:30,3,0 | 1/1:0,1:1:3:35,3,0 | T-T | T-T | T-T | T-T |
268085 | 299909 | GL000225.1 | 96661 | . | A | G | 40.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS... | GT:AD:DP:GQ:PL | 1/1:0,1:1:3:40,3,0 | 1/1:0,1:1:3:33,3,0 | ./. | ./. | G-G | G-G | - | - |
268086 | 299927 | GL000225.1 | 114739 | . | T | TC | 679.82 | PASS | AC=7;AF=0.875;AN=8;BaseQRankSum=3.317;DP=23;FS... | GT:AD:DP:GQ:PL | 1/1:0,5:5:15:209,15,0 | 1/1:0,6:6:18:260,18,0 | 1/1:0,4:4:12:158,12,0 | 0/1:5,3:8:99:106,0,182 | TC-TC | TC-TC | TC-TC | T-TC |
268087 | 299992 | GL000225.1 | 143836 | . | T | C | 117.52 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=4;Dels=0.00;FS... | GT:AD:DP:GQ:PL | 1/1:0,4:4:12:150,12,0 | ./. | ./. | ./. | C-C | - | - | - |
268088 | 299993 | GL000225.1 | 143849 | . | A | G | 117.52 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=4;Dels=0.00;FS... | GT:AD:DP:GQ:PL | 1/1:0,4:4:12:150,12,0 | ./. | ./. | ./. | G-G | - | - | - |
268089 | 300006 | GL000225.1 | 198637 | . | C | T | 80.98 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=3;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ./. | 1/1:0,3:3:9:113,9,0 | ./. | ./. | - | T-T | - | - |
268090 | 300007 | GL000225.1 | 198643 | . | A | G | 86.98 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=3;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ./. | 1/1:0,3:3:9:119,9,0 | ./. | ./. | - | G-G | - | - |
268091 | 300008 | GL000225.1 | 202533 | . | C | T | 46.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=2;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ./. | ./. | ./. | 1/1:0,2:2:6:78,6,0 | - | - | - | T-T |
268092 | 300009 | GL000225.1 | 203673 | . | C | T | 78.22 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DP=3;Dels=0.00;FS... | GT:AD:DP:GQ:PL | 1/1:0,2:2:6:80,6,0 | 1/1:0,1:1:3:32,3,0 | ./. | ./. | T-T | T-T | - | - |
268093 | 300022 | GL000192.1 | 139953 | . | CTG | C | 1104.53 | PASS | AC=4;AF=0.500;AN=8;BaseQRankSum=-3.358;DP=66;F... | GT:AD:DP:GQ:PL | 1/1:7,18:20:51:677,51,0 | 0/1:9,6:10:33:204,0,33 | 0/1:8,7:10:19:280,0,19 | 0/0:6,0:6:9:0,9,141 | C-C | CTG-C | CTG-C | CTG-CTG |
268094 | 300023 | GL000192.1 | 160087 | . | C | T | 157.22 | PASS | ABHet=0.625;ABHom=1.00;AC=6;AF=0.750;AN=8;Base... | GT:AD:DP:GQ:PL | 0/1:2,2:4:52:60,0,52 | 0/1:3,1:4:25:25,0,105 | 1/1:0,2:2:6:78,6,0 | 1/1:0,1:1:3:37,3,0 | C-T | C-T | T-T | T-T |
268095 | 300029 | GL000192.1 | 272061 | . | C | CT | 125.59 | PASS | AC=3;AF=0.375;AN=8;BaseQRankSum=-0.058;DP=33;F... | GT:AD:DP:GQ:PL | 0/0:5,0:4:9:0,9,93 | 0/1:5,4:7:12:107,0,12 | 0/1:9,2:3:18:53,0,18 | 0/1:6,1:4:17:17,0,69 | C-C | C-CT | C-CT | C-CT |
268096 | 300030 | GL000192.1 | 311575 | . | C | T | 248.50 | PASS | ABHet=0.333;ABHom=1.00;AC=3;AF=0.375;AN=8;Base... | GT:AD:DP:GQ:PL | 0/0:2,0:2:3:0,3,39 | 0/1:3,6:9:93:212,0,93 | 1/1:0,2:2:6:80,6,0 | 0/0:5,0:5:15:0,15,181 | C-C | C-T | T-T | C-C |
268097 | 300032 | GL000192.1 | 313701 | . | T | C | 120.67 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DP=4;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ./. | 1/1:0,1:1:3:40,3,0 | 1/1:0,1:1:3:39,3,0 | 1/1:0,2:2:6:77,6,0 | - | C-C | C-C | C-C |
268098 | 300033 | GL000192.1 | 337826 | . | TA | T | 489.51 | PASS | AC=2;AF=0.250;AN=8;BaseQRankSum=-3.725;DP=62;F... | GT:AD:DP:GQ:PL | 0/0:18,0:18:51:0,51,855 | 0/0:13,0:13:39:0,39,695 | 0/1:5,10:15:99:438,0,228 | 0/1:13,3:16:99:102,0,647 | TA-TA | TA-TA | TA-T | TA-T |
268099 | 300034 | GL000192.1 | 394140 | . | A | G | 45.26 | PASS | ABHom=1.00;AC=2;AF=0.500;AN=4;BaseQRankSum=0.7... | GT:AD:DP:GQ:PL | ./. | 0/0:1,0:1:3:0,3,39 | ./. | 1/1:0,2:2:6:80,6,0 | - | A-A | - | G-G |
268100 | 300035 | 10 | 123256215 | . | T | G | 100.00 | PASS | GENE=FGFR2;INHERITANCE=AD;MIM=101600 | GT:AD:DP:GQ:PL | 0/0:1,0:1:3:0,3,39 | 0/0:1,0:1:3:0,3,39 | 1/0:1,0:1:3:0,3,39 | 0/0:1,0:1:3:0,3,39 | T-T | T-T | G-T | T-T |
268101 rows × 18 columns
sample_ids_number = ['{} num'.format(s) for s in sample_ids]
def get_number(ref, alt):
if ',' in alt or '.' in ref or '.' in alt:
return np.nan
return sum(map(int, [ref, alt]))
def call_genotype_number(row):
return pd.Series(
[get_number(row[s][0], row[s][2]) for s in sample_ids if s],
index=sample_ids_number)
dfc[sample_ids_number] = dfc.apply(call_genotype_number, axis=1)
dfc
index | #CHROM | POS | ID | REF | ALT | QUAL | FILTER | INFO | FORMAT | ... | ISDBM322017 | ISDBM322018 | ISDBM322015 genotype | ISDBM322016 genotype | ISDBM322017 genotype | ISDBM322018 genotype | ISDBM322015 num | ISDBM322016 num | ISDBM322017 num | ISDBM322018 num | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 6 | 1 | 63735 | . | CCTA | C | 193.23 | PASS | AC=3;AF=0.500;AN=6;BaseQRankSum=-0.960;DP=42;F... | GT:AD:DP:GQ:PL | ... | 0/1:8,4:8:99:128,0,301 | 0/0:18,2:15:30:0,30,666 | - | C-C | CCTA-C | CCTA-CCTA | NaN | 2.0 | 1.0 | 0.0 |
1 | 12 | 1 | 745370 | rs146246821 | TA | T | 510.54 | PASS | AC=4;AF=0.500;AN=8;BaseQRankSum=1.905;DB;DP=64... | GT:AD:DP:GQ:PL | ... | 0/1:9,7:16:99:273,0,395 | 0/1:16,4:19:99:125,0,661 | TA-T | TA-T | TA-T | TA-T | 1.0 | 1.0 | 1.0 | 1.0 |
2 | 13 | 1 | 752566 | rs3094315 | G | A | 118.67 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=4;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:39,3,0 | 1/1:0,1:1:3:36,3,0 | - | A-A | A-A | A-A | NaN | 2.0 | 2.0 | 2.0 |
3 | 14 | 1 | 752721 | rs3131972 | A | G | 242.70 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=7;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,4:4:12:159,12,0 | 1/1:0,1:1:3:40,3,0 | - | G-G | G-G | G-G | NaN | 2.0 | 2.0 | 2.0 |
4 | 17 | 1 | 758324 | rs3131955 | T | C | 45.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | C-C | - | - | - | 2.0 | NaN | NaN | NaN |
5 | 18 | 1 | 780027 | rs2977613 | G | T | 47.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:40,3,0 | ./. | T-T | - | T-T | - | 2.0 | NaN | 2.0 | NaN |
6 | 19 | 1 | 808631 | rs11240779 | G | A | 1853.99 | PASS | ABHom=1.000;AC=8;AF=1.00;AN=8;DB;DP=52;Dels=0.... | GT:AD:DP:GQ:PL | ... | 1/1:0,13:13:36:457,36,0 | 1/1:0,16:16:48:611,48,0 | A-A | A-A | A-A | A-A | 2.0 | 2.0 | 2.0 | 2.0 |
7 | 25 | 1 | 812267 | rs7541694 | A | G | 385.67 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=13;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:55,6,0 | 1/1:0,3:3:9:99,9,0 | G-G | G-G | G-G | G-G | 2.0 | 2.0 | 2.0 | 2.0 |
8 | 26 | 1 | 812284 | rs7545373 | C | G | 428.75 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=13;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:55,6,0 | 1/1:0,2:2:6:79,6,0 | G-G | G-G | G-G | G-G | 2.0 | 2.0 | 2.0 | 2.0 |
9 | 27 | 1 | 823790 | rs143626389 | G | A | 38.82 | PASS | ABHom=1.00;AC=2;AF=0.500;AN=4;BaseQRankSum=0.3... | GT:AD:DP:GQ:PL | ... | ./. | 0/0:3,0:3:9:0,9,99 | - | A-A | - | G-G | NaN | 2.0 | NaN | 0.0 |
10 | 28 | 1 | 834832 | rs4411087 | G | C | 48.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | - | C-C | - | - | NaN | 2.0 | NaN | NaN |
11 | 29 | 1 | 849998 | rs13303222 | A | G | 45.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | G-G | - | - | - | 2.0 | NaN | NaN | NaN |
12 | 30 | 1 | 851757 | rs62677860 | A | G | 63.22 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=4;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | G-G | G-G | - | - | 2.0 | 2.0 | NaN | NaN |
13 | 31 | 1 | 861808 | rs13302982 | A | G | 66.22 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=3;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,1:1:3:36,3,0 | G-G | - | - | G-G | 2.0 | NaN | NaN | 2.0 |
14 | 32 | 1 | 862866 | rs3892970 | C | T | 31.26 | PASS | ABHom=1.00;AC=2;AF=0.500;AN=4;BaseQRankSum=-0.... | GT:AD:DP:GQ:PL | ... | ./. | ./. | T-T | C-C | - | - | 2.0 | 0.0 | NaN | NaN |
15 | 33 | 1 | 866319 | rs9988021 | G | A | 825.39 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=24;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,4:4:12:133,12,0 | 1/1:0,2:2:6:76,6,0 | A-A | A-A | A-A | A-A | 2.0 | 2.0 | 2.0 | 2.0 |
16 | 34 | 1 | 866511 | rs60722469 | C | CCCCT | 325.15 | PASS | AC=6;AF=0.750;AN=8;BaseQRankSum=0.747;DB;DP=15... | GT:AD:DP:GQ:PL | ... | 0/0:1,0:1:3:0,3,65 | 1/1:0,1:1:3:67,3,0 | CCCCT-CCCCT | CCCCT-CCCCT | C-C | CCCCT-CCCCT | 2.0 | 2.0 | 0.0 | 2.0 |
17 | 35 | 1 | 866920 | rs2341361 | A | G | 47.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:40,3,0 | 1/1:0,1:1:3:40,3,0 | - | - | G-G | G-G | NaN | NaN | 2.0 | 2.0 |
18 | 36 | 1 | 867584 | rs2341360 | A | T | 47.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:40,3,0 | 1/1:0,1:1:3:40,3,0 | - | - | T-T | T-T | NaN | NaN | 2.0 | 2.0 |
19 | 37 | 1 | 869323 | rs13303207 | T | C | 69.22 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=3;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:73,6,0 | 1/1:0,1:1:3:30,3,0 | - | - | C-C | C-C | NaN | NaN | 2.0 | 2.0 |
20 | 40 | 1 | 870903 | rs13303094 | T | C | 183.66 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=6;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,2:2:6:79,6,0 | C-C | C-C | - | C-C | 2.0 | 2.0 | NaN | 2.0 |
21 | 41 | 1 | 871334 | rs4072383 | G | T | 74.01 | PASS | ABHet=0.500;ABHom=1.00;AC=3;AF=0.500;AN=6;Base... | GT:AD:DP:GQ:PL | ... | 0/0:3,0:3:9:0,9,120 | 0/1:2,2:4:52:52,0,68 | - | T-T | G-G | G-T | NaN | 2.0 | 0.0 | 1.0 |
22 | 42 | 1 | 873558 | rs1110052 | G | T | 39.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | T-T | T-T | - | - | 2.0 | 2.0 | NaN | NaN |
23 | 43 | 1 | 876499 | rs4372192 | A | G | 200.89 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=6;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | G-G | G-G | - | - | 2.0 | 2.0 | NaN | NaN |
24 | 44 | 1 | 878784 | rs142929357 | C | G | 125.24 | PASS | ABHet=0.583;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | ... | 0/1:2,1:3:30:30,0,57 | 0/0:2,0:2:6:0,6,66 | C-C | C-G | C-G | C-C | 0.0 | 1.0 | 1.0 | 0.0 |
25 | 45 | 1 | 879317 | rs7523549 | C | T | 388.57 | PASS | ABHet=0.493;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | ... | 0/0:12,0:12:36:0,36,405 | 0/1:3,6:9:64:187,0,64 | C-T | C-C | C-C | C-T | 1.0 | 0.0 | 0.0 | 1.0 |
26 | 46 | 1 | 879482 | rs149880798 | G | C | 799.55 | PASS | ABHet=0.592;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | ... | 0/0:19,0:19:51:0,51,650 | 0/1:14,9:23:99:257,0,389 | G-C | G-G | G-G | G-C | 1.0 | 0.0 | 0.0 | 1.0 |
27 | 47 | 1 | 879676 | rs6605067 | G | A | 432.72 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=12;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,3:3:9:120,9,0 | 1/1:0,4:4:12:158,12,0 | A-A | A-A | A-A | A-A | 2.0 | 2.0 | 2.0 | 2.0 |
28 | 48 | 1 | 879687 | rs2839 | T | C | 335.22 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=10;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:80,6,0 | 1/1:0,4:4:12:139,12,0 | C-C | C-C | C-C | C-C | 2.0 | 2.0 | 2.0 | 2.0 |
29 | 49 | 1 | 880238 | rs3748592 | A | G | 516.84 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=14;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,3:3:9:120,9,0 | 1/1:0,5:5:15:196,15,0 | G-G | G-G | G-G | G-G | 2.0 | 2.0 | 2.0 | 2.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
268071 | 299643 | GL000225.1 | 64237 | . | A | G | 152.14 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DP=5;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:40,3,0 | 1/1:0,2:2:6:78,6,0 | G-G | - | G-G | G-G | 2.0 | NaN | 2.0 | 2.0 |
268072 | 299706 | GL000225.1 | 67508 | . | GT | G | 229.23 | PASS | AC=5;AF=0.833;AN=6;BaseQRankSum=-0.742;DP=13;F... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,4:2:6:94,6,0 | GT-G | G-G | - | G-G | 1.0 | 2.0 | NaN | 2.0 |
268073 | 299713 | GL000225.1 | 71934 | . | A | G | 62.01 | PASS | ABHet=0.500;ABHom=1.00;AC=3;AF=0.750;AN=4;Base... | GT:AD:DP:GQ:PL | ... | ./. | 0/1:1,1:2:28:28,0,33 | G-G | - | - | A-G | 2.0 | NaN | NaN | 1.0 |
268074 | 299714 | GL000225.1 | 71966 | . | A | G | 138.21 | PASS | ABHet=0.333;ABHom=1.00;AC=3;AF=0.750;AN=4;Base... | GT:AD:DP:GQ:PL | ... | ./. | 0/1:1,2:3:30:62,0,30 | G-G | - | - | A-G | 2.0 | NaN | NaN | 1.0 |
268075 | 299715 | GL000225.1 | 72003 | . | T | C | 119.76 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DP=5;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,1:1:3:40,3,0 | C-C | - | - | C-C | 2.0 | NaN | NaN | 2.0 |
268076 | 299716 | GL000225.1 | 72057 | . | C | T | 75.26 | PASS | ABHet=0.500;ABHom=1.00;AC=5;AF=0.833;AN=6;Base... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:40,3,0 | 0/1:1,1:2:26:33,0,26 | T-T | - | T-T | C-T | 2.0 | NaN | 2.0 | 1.0 |
268077 | 299717 | GL000225.1 | 72073 | . | A | T | 37.25 | PASS | ABHet=0.500;ABHom=1.00;AC=3;AF=0.750;AN=4;Base... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:39,3,0 | 0/1:1,1:2:30:33,0,30 | - | - | T-T | A-T | NaN | NaN | 2.0 | 1.0 |
268078 | 299718 | GL000225.1 | 72104 | . | A | G | 74.02 | PASS | ABHet=0.500;ABHom=1.00;AC=3;AF=0.750;AN=4;Base... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:77,6,0 | 0/1:1,1:2:24:33,0,24 | - | - | G-G | A-G | NaN | NaN | 2.0 | 1.0 |
268079 | 299723 | GL000225.1 | 72249 | . | A | G | 37.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=2;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | ./. | ./. | G-G | - | - | - | 2.0 | NaN | NaN | NaN |
268080 | 299734 | GL000225.1 | 72555 | . | T | C | 123.67 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DP=4;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:40,3,0 | ./. | C-C | C-C | C-C | - | 2.0 | 2.0 | 2.0 | NaN |
268081 | 299735 | GL000225.1 | 72582 | . | A | G | 202.89 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DP=8;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:35,3,0 | 1/1:0,1:1:3:33,3,0 | G-G | G-G | G-G | G-G | 2.0 | 2.0 | 2.0 | 2.0 |
268082 | 299778 | GL000225.1 | 78642 | . | A | G | 32.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=2;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | ./. | ./. | G-G | - | - | - | 2.0 | NaN | NaN | NaN |
268083 | 299860 | GL000225.1 | 89058 | . | T | C | 47.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DP=3;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:40,3,0 | ./. | - | C-C | C-C | - | NaN | 2.0 | 2.0 | NaN |
268084 | 299876 | GL000225.1 | 89930 | . | A | T | 139.54 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DP=5;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:30,3,0 | 1/1:0,1:1:3:35,3,0 | T-T | T-T | T-T | T-T | 2.0 | 2.0 | 2.0 | 2.0 |
268085 | 299909 | GL000225.1 | 96661 | . | A | G | 40.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DP=2;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | ./. | ./. | G-G | G-G | - | - | 2.0 | 2.0 | NaN | NaN |
268086 | 299927 | GL000225.1 | 114739 | . | T | TC | 679.82 | PASS | AC=7;AF=0.875;AN=8;BaseQRankSum=3.317;DP=23;FS... | GT:AD:DP:GQ:PL | ... | 1/1:0,4:4:12:158,12,0 | 0/1:5,3:8:99:106,0,182 | TC-TC | TC-TC | TC-TC | T-TC | 2.0 | 2.0 | 2.0 | 1.0 |
268087 | 299992 | GL000225.1 | 143836 | . | T | C | 117.52 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=4;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | ./. | ./. | C-C | - | - | - | 2.0 | NaN | NaN | NaN |
268088 | 299993 | GL000225.1 | 143849 | . | A | G | 117.52 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=4;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | ./. | ./. | G-G | - | - | - | 2.0 | NaN | NaN | NaN |
268089 | 300006 | GL000225.1 | 198637 | . | C | T | 80.98 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=3;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | ./. | ./. | - | T-T | - | - | NaN | 2.0 | NaN | NaN |
268090 | 300007 | GL000225.1 | 198643 | . | A | G | 86.98 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=3;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | ./. | ./. | - | G-G | - | - | NaN | 2.0 | NaN | NaN |
268091 | 300008 | GL000225.1 | 202533 | . | C | T | 46.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DP=2;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,2:2:6:78,6,0 | - | - | - | T-T | NaN | NaN | NaN | 2.0 |
268092 | 300009 | GL000225.1 | 203673 | . | C | T | 78.22 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DP=3;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | ./. | ./. | T-T | T-T | - | - | 2.0 | 2.0 | NaN | NaN |
268093 | 300022 | GL000192.1 | 139953 | . | CTG | C | 1104.53 | PASS | AC=4;AF=0.500;AN=8;BaseQRankSum=-3.358;DP=66;F... | GT:AD:DP:GQ:PL | ... | 0/1:8,7:10:19:280,0,19 | 0/0:6,0:6:9:0,9,141 | C-C | CTG-C | CTG-C | CTG-CTG | 2.0 | 1.0 | 1.0 | 0.0 |
268094 | 300023 | GL000192.1 | 160087 | . | C | T | 157.22 | PASS | ABHet=0.625;ABHom=1.00;AC=6;AF=0.750;AN=8;Base... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:78,6,0 | 1/1:0,1:1:3:37,3,0 | C-T | C-T | T-T | T-T | 1.0 | 1.0 | 2.0 | 2.0 |
268095 | 300029 | GL000192.1 | 272061 | . | C | CT | 125.59 | PASS | AC=3;AF=0.375;AN=8;BaseQRankSum=-0.058;DP=33;F... | GT:AD:DP:GQ:PL | ... | 0/1:9,2:3:18:53,0,18 | 0/1:6,1:4:17:17,0,69 | C-C | C-CT | C-CT | C-CT | 0.0 | 1.0 | 1.0 | 1.0 |
268096 | 300030 | GL000192.1 | 311575 | . | C | T | 248.50 | PASS | ABHet=0.333;ABHom=1.00;AC=3;AF=0.375;AN=8;Base... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:80,6,0 | 0/0:5,0:5:15:0,15,181 | C-C | C-T | T-T | C-C | 0.0 | 1.0 | 2.0 | 0.0 |
268097 | 300032 | GL000192.1 | 313701 | . | T | C | 120.67 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DP=4;Dels=0.00;FS... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:39,3,0 | 1/1:0,2:2:6:77,6,0 | - | C-C | C-C | C-C | NaN | 2.0 | 2.0 | 2.0 |
268098 | 300033 | GL000192.1 | 337826 | . | TA | T | 489.51 | PASS | AC=2;AF=0.250;AN=8;BaseQRankSum=-3.725;DP=62;F... | GT:AD:DP:GQ:PL | ... | 0/1:5,10:15:99:438,0,228 | 0/1:13,3:16:99:102,0,647 | TA-TA | TA-TA | TA-T | TA-T | 0.0 | 0.0 | 1.0 | 1.0 |
268099 | 300034 | GL000192.1 | 394140 | . | A | G | 45.26 | PASS | ABHom=1.00;AC=2;AF=0.500;AN=4;BaseQRankSum=0.7... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,2:2:6:80,6,0 | - | A-A | - | G-G | NaN | 0.0 | NaN | 2.0 |
268100 | 300035 | 10 | 123256215 | . | T | G | 100.00 | PASS | GENE=FGFR2;INHERITANCE=AD;MIM=101600 | GT:AD:DP:GQ:PL | ... | 1/0:1,0:1:3:0,3,39 | 0/0:1,0:1:3:0,3,39 | T-T | T-T | G-T | T-T | 0.0 | 0.0 | 1.0 | 0.0 |
268101 rows × 22 columns
dfc[[not x.startswith('G') for x in dfc['#CHROM']]]
index | #CHROM | POS | ID | REF | ALT | QUAL | FILTER | INFO | FORMAT | ... | ISDBM322017 | ISDBM322018 | ISDBM322015 genotype | ISDBM322016 genotype | ISDBM322017 genotype | ISDBM322018 genotype | ISDBM322015 num | ISDBM322016 num | ISDBM322017 num | ISDBM322018 num | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 6 | 1 | 63735 | . | CCTA | C | 193.23 | PASS | AC=3;AF=0.500;AN=6;BaseQRankSum=-0.960;DP=42;F... | GT:AD:DP:GQ:PL | ... | 0/1:8,4:8:99:128,0,301 | 0/0:18,2:15:30:0,30,666 | - | C-C | CCTA-C | CCTA-CCTA | NaN | 2.0 | 1.0 | 0.0 |
1 | 12 | 1 | 745370 | rs146246821 | TA | T | 510.54 | PASS | AC=4;AF=0.500;AN=8;BaseQRankSum=1.905;DB;DP=64... | GT:AD:DP:GQ:PL | ... | 0/1:9,7:16:99:273,0,395 | 0/1:16,4:19:99:125,0,661 | TA-T | TA-T | TA-T | TA-T | 1.0 | 1.0 | 1.0 | 1.0 |
2 | 13 | 1 | 752566 | rs3094315 | G | A | 118.67 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=4;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:39,3,0 | 1/1:0,1:1:3:36,3,0 | - | A-A | A-A | A-A | NaN | 2.0 | 2.0 | 2.0 |
3 | 14 | 1 | 752721 | rs3131972 | A | G | 242.70 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=7;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,4:4:12:159,12,0 | 1/1:0,1:1:3:40,3,0 | - | G-G | G-G | G-G | NaN | 2.0 | 2.0 | 2.0 |
4 | 17 | 1 | 758324 | rs3131955 | T | C | 45.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | C-C | - | - | - | 2.0 | NaN | NaN | NaN |
5 | 18 | 1 | 780027 | rs2977613 | G | T | 47.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:40,3,0 | ./. | T-T | - | T-T | - | 2.0 | NaN | 2.0 | NaN |
6 | 19 | 1 | 808631 | rs11240779 | G | A | 1853.99 | PASS | ABHom=1.000;AC=8;AF=1.00;AN=8;DB;DP=52;Dels=0.... | GT:AD:DP:GQ:PL | ... | 1/1:0,13:13:36:457,36,0 | 1/1:0,16:16:48:611,48,0 | A-A | A-A | A-A | A-A | 2.0 | 2.0 | 2.0 | 2.0 |
7 | 25 | 1 | 812267 | rs7541694 | A | G | 385.67 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=13;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:55,6,0 | 1/1:0,3:3:9:99,9,0 | G-G | G-G | G-G | G-G | 2.0 | 2.0 | 2.0 | 2.0 |
8 | 26 | 1 | 812284 | rs7545373 | C | G | 428.75 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=13;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:55,6,0 | 1/1:0,2:2:6:79,6,0 | G-G | G-G | G-G | G-G | 2.0 | 2.0 | 2.0 | 2.0 |
9 | 27 | 1 | 823790 | rs143626389 | G | A | 38.82 | PASS | ABHom=1.00;AC=2;AF=0.500;AN=4;BaseQRankSum=0.3... | GT:AD:DP:GQ:PL | ... | ./. | 0/0:3,0:3:9:0,9,99 | - | A-A | - | G-G | NaN | 2.0 | NaN | 0.0 |
10 | 28 | 1 | 834832 | rs4411087 | G | C | 48.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | - | C-C | - | - | NaN | 2.0 | NaN | NaN |
11 | 29 | 1 | 849998 | rs13303222 | A | G | 45.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | G-G | - | - | - | 2.0 | NaN | NaN | NaN |
12 | 30 | 1 | 851757 | rs62677860 | A | G | 63.22 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=4;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | G-G | G-G | - | - | 2.0 | 2.0 | NaN | NaN |
13 | 31 | 1 | 861808 | rs13302982 | A | G | 66.22 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=3;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,1:1:3:36,3,0 | G-G | - | - | G-G | 2.0 | NaN | NaN | 2.0 |
14 | 32 | 1 | 862866 | rs3892970 | C | T | 31.26 | PASS | ABHom=1.00;AC=2;AF=0.500;AN=4;BaseQRankSum=-0.... | GT:AD:DP:GQ:PL | ... | ./. | ./. | T-T | C-C | - | - | 2.0 | 0.0 | NaN | NaN |
15 | 33 | 1 | 866319 | rs9988021 | G | A | 825.39 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=24;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,4:4:12:133,12,0 | 1/1:0,2:2:6:76,6,0 | A-A | A-A | A-A | A-A | 2.0 | 2.0 | 2.0 | 2.0 |
16 | 34 | 1 | 866511 | rs60722469 | C | CCCCT | 325.15 | PASS | AC=6;AF=0.750;AN=8;BaseQRankSum=0.747;DB;DP=15... | GT:AD:DP:GQ:PL | ... | 0/0:1,0:1:3:0,3,65 | 1/1:0,1:1:3:67,3,0 | CCCCT-CCCCT | CCCCT-CCCCT | C-C | CCCCT-CCCCT | 2.0 | 2.0 | 0.0 | 2.0 |
17 | 35 | 1 | 866920 | rs2341361 | A | G | 47.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:40,3,0 | 1/1:0,1:1:3:40,3,0 | - | - | G-G | G-G | NaN | NaN | 2.0 | 2.0 |
18 | 36 | 1 | 867584 | rs2341360 | A | T | 47.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:40,3,0 | 1/1:0,1:1:3:40,3,0 | - | - | T-T | T-T | NaN | NaN | 2.0 | 2.0 |
19 | 37 | 1 | 869323 | rs13303207 | T | C | 69.22 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=3;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:73,6,0 | 1/1:0,1:1:3:30,3,0 | - | - | C-C | C-C | NaN | NaN | 2.0 | 2.0 |
20 | 40 | 1 | 870903 | rs13303094 | T | C | 183.66 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=6;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,2:2:6:79,6,0 | C-C | C-C | - | C-C | 2.0 | 2.0 | NaN | 2.0 |
21 | 41 | 1 | 871334 | rs4072383 | G | T | 74.01 | PASS | ABHet=0.500;ABHom=1.00;AC=3;AF=0.500;AN=6;Base... | GT:AD:DP:GQ:PL | ... | 0/0:3,0:3:9:0,9,120 | 0/1:2,2:4:52:52,0,68 | - | T-T | G-G | G-T | NaN | 2.0 | 0.0 | 1.0 |
22 | 42 | 1 | 873558 | rs1110052 | G | T | 39.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | T-T | T-T | - | - | 2.0 | 2.0 | NaN | NaN |
23 | 43 | 1 | 876499 | rs4372192 | A | G | 200.89 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=6;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | G-G | G-G | - | - | 2.0 | 2.0 | NaN | NaN |
24 | 44 | 1 | 878784 | rs142929357 | C | G | 125.24 | PASS | ABHet=0.583;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | ... | 0/1:2,1:3:30:30,0,57 | 0/0:2,0:2:6:0,6,66 | C-C | C-G | C-G | C-C | 0.0 | 1.0 | 1.0 | 0.0 |
25 | 45 | 1 | 879317 | rs7523549 | C | T | 388.57 | PASS | ABHet=0.493;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | ... | 0/0:12,0:12:36:0,36,405 | 0/1:3,6:9:64:187,0,64 | C-T | C-C | C-C | C-T | 1.0 | 0.0 | 0.0 | 1.0 |
26 | 46 | 1 | 879482 | rs149880798 | G | C | 799.55 | PASS | ABHet=0.592;ABHom=1.00;AC=2;AF=0.250;AN=8;Base... | GT:AD:DP:GQ:PL | ... | 0/0:19,0:19:51:0,51,650 | 0/1:14,9:23:99:257,0,389 | G-C | G-G | G-G | G-C | 1.0 | 0.0 | 0.0 | 1.0 |
27 | 47 | 1 | 879676 | rs6605067 | G | A | 432.72 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=12;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,3:3:9:120,9,0 | 1/1:0,4:4:12:158,12,0 | A-A | A-A | A-A | A-A | 2.0 | 2.0 | 2.0 | 2.0 |
28 | 48 | 1 | 879687 | rs2839 | T | C | 335.22 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=10;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:80,6,0 | 1/1:0,4:4:12:139,12,0 | C-C | C-C | C-C | C-C | 2.0 | 2.0 | 2.0 | 2.0 |
29 | 49 | 1 | 880238 | rs3748592 | A | G | 516.84 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=14;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,3:3:9:120,9,0 | 1/1:0,5:5:15:196,15,0 | G-G | G-G | G-G | G-G | 2.0 | 2.0 | 2.0 | 2.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
267146 | 296313 | Y | 59026513 | rs1826575 | A | G | 42.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,2:2:6:74,6,0 | - | - | - | G-G | NaN | NaN | NaN | 2.0 |
267147 | 296320 | Y | 59033110 | rs28628009 | A | T | 36.01 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,1:1:3:39,3,0 | - | T-T | - | T-T | NaN | 2.0 | NaN | 2.0 |
267148 | 296321 | Y | 59033139 | rs55686319 | T | C | 37.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,2:2:6:69,6,0 | - | - | - | C-C | NaN | NaN | NaN | 2.0 |
267149 | 296322 | MT | 73 | rs3087742 | A | G | 327.99 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=10;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,3:3:9:94,9,0 | 1/1:0,2:2:6:79,6,0 | G-G | G-G | G-G | G-G | 2.0 | 2.0 | 2.0 | 2.0 |
267150 | 296323 | MT | 195 | rs2857291 | T | C | 47.77 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=2;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | ./. | - | C-C | - | - | NaN | 2.0 | NaN | NaN |
267151 | 296324 | MT | 263 | rs2853515 | A | G | 229.77 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=8;Dels=0.00... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:35,3,0 | 1/1:0,1:1:3:32,3,0 | G-G | G-G | G-G | G-G | 2.0 | 2.0 | 2.0 | 2.0 |
267152 | 296325 | MT | 709 | rs2853517 | G | A | 346.40 | PASS | ABHom=1.00;AC=4;AF=0.667;AN=6;BaseQRankSum=1.4... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,5:5:15:193,15,0 | A-A | G-G | - | A-A | 2.0 | 0.0 | NaN | 2.0 |
267153 | 296326 | MT | 750 | rs2853518 | A | G | 476.08 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=15;Dels=0.0... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,7:7:21:273,21,0 | G-G | G-G | - | G-G | 2.0 | 2.0 | NaN | 2.0 |
267154 | 296327 | MT | 1243 | rs28358572 | T | C | 422.79 | PASS | ABHom=1.00;AC=6;AF=0.750;AN=8;BaseQRankSum=0.6... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:40,3,0 | 1/1:0,5:5:9:119,9,0 | C-C | T-T | C-C | C-C | 2.0 | 0.0 | 2.0 | 2.0 |
267155 | 296329 | MT | 1719 | rs3928305 | G | A | 35.00 | PASS | ABHom=1.00;AC=2;AF=0.250;AN=8;BaseQRankSum=-0.... | GT:AD:DP:GQ:PL | ... | 0/0:1,0:1:3:0,3,32 | 0/0:4,0:4:12:0,12,159 | G-G | A-A | G-G | G-G | 0.0 | 2.0 | 0.0 | 0.0 |
267156 | 296330 | MT | 2706 | rs2854128 | A | G | 300.19 | PASS | ABHom=0.899;AC=6;AF=1.00;AN=6;DB;DP=10;Dels=0.... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,2:3:6:73,6,0 | G-G | G-G | - | G-G | 2.0 | 2.0 | NaN | 2.0 |
267157 | 296331 | MT | 3505 | rs28358585 | A | G | 219.20 | PASS | ABHom=1.00;AC=6;AF=0.750;AN=8;BaseQRankSum=1.6... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:40,3,0 | 1/1:0,2:2:6:67,6,0 | G-G | A-A | G-G | G-G | 2.0 | 0.0 | 2.0 | 2.0 |
267158 | 296333 | MT | 5460 | rs3021088 | G | A | 515.25 | PASS | ABHom=1.00;AC=6;AF=0.750;AN=8;BaseQRankSum=0.1... | GT:AD:DP:GQ:PL | ... | 1/1:0,4:4:12:144,12,0 | 1/1:0,12:12:33:380,33,0 | A-A | G-G | A-A | A-A | 2.0 | 0.0 | 2.0 | 2.0 |
267159 | 296335 | MT | 6371 | rs41366755 | C | T | 116.32 | PASS | ABHom=1.00;AC=2;AF=0.250;AN=8;BaseQRankSum=0.5... | GT:AD:DP:GQ:PL | ... | 0/0:1,0:1:3:0,3,40 | 0/0:8,0:8:24:0,24,297 | C-C | T-T | C-C | C-C | 0.0 | 2.0 | 0.0 | 0.0 |
267160 | 296336 | MT | 8697 | rs28358886 | G | A | 280.28 | PASS | ABHom=1.00;AC=4;AF=1.00;AN=4;DB;DP=8;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,7:7:21:275,21,0 | A-A | - | - | A-A | 2.0 | NaN | NaN | 2.0 |
267161 | 296340 | MT | 11947 | rs28359168 | A | G | 529.29 | PASS | ABHom=1.00;AC=4;AF=0.667;AN=6;BaseQRankSum=-1.... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,7:7:21:243,21,0 | G-G | A-A | - | G-G | 2.0 | 0.0 | NaN | 2.0 |
267162 | 296341 | MT | 12414 | . | T | C | 547.46 | PASS | ABHom=1.00;AC=4;AF=0.667;AN=6;BaseQRankSum=-0.... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,10:10:30:392,30,0 | C-C | T-T | - | C-C | 2.0 | 0.0 | NaN | 2.0 |
267163 | 296342 | MT | 12705 | rs2854122 | C | T | 272.26 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=9;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,2:2:6:80,6,0 | T-T | T-T | - | T-T | 2.0 | 2.0 | NaN | 2.0 |
267164 | 296343 | MT | 13819 | . | T | C | 77.31 | PASS | ABHom=1.00;AC=2;AF=0.250;AN=8;BaseQRankSum=1.4... | GT:AD:DP:GQ:PL | ... | 0/0:4,0:4:12:0,12,159 | 0/0:7,0:7:21:0,21,268 | T-T | C-C | T-T | T-T | 0.0 | 2.0 | 0.0 | 0.0 |
267165 | 296344 | MT | 13966 | rs41535848 | A | G | 188.02 | PASS | ABHom=1.00;AC=2;AF=0.250;AN=8;BaseQRankSum=3.3... | GT:AD:DP:GQ:PL | ... | 0/0:2,0:2:6:0,6,75 | 0/0:10,0:10:27:0,27,316 | A-A | G-G | A-A | A-A | 0.0 | 2.0 | 0.0 | 0.0 |
267166 | 296345 | MT | 14371 | . | T | C | 634.48 | PASS | ABHom=1.00;AC=6;AF=0.750;AN=8;BaseQRankSum=2.3... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:29,3,0 | 1/1:0,9:9:27:336,27,0 | C-C | T-T | C-C | C-C | 2.0 | 0.0 | 2.0 | 2.0 |
267167 | 296346 | MT | 14470 | rs3135030 | T | C | 288.43 | PASS | ABHom=1.00;AC=2;AF=0.333;AN=6;BaseQRankSum=3.4... | GT:AD:DP:GQ:PL | ... | ./. | 0/0:7,0:7:21:0,21,243 | T-T | C-C | - | T-T | 0.0 | 2.0 | NaN | 0.0 |
267168 | 296347 | MT | 14766 | rs3135031 | C | T | 526.56 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=15;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:77,6,0 | 1/1:0,3:3:9:116,9,0 | T-T | T-T | T-T | T-T | 2.0 | 2.0 | 2.0 | 2.0 |
267169 | 296349 | MT | 15326 | rs2853508 | A | G | 557.29 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=16;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:37,3,0 | 1/1:0,9:9:27:333,27,0 | G-G | G-G | G-G | G-G | 2.0 | 2.0 | 2.0 | 2.0 |
267170 | 296350 | MT | 15884 | rs28617642 | G | C | 83.98 | PASS | ABHom=1.00;AC=2;AF=1.00;AN=2;DB;DP=3;Dels=0.00... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,3:3:9:116,9,0 | - | - | - | C-C | NaN | NaN | NaN | 2.0 |
267171 | 296351 | MT | 16223 | rs2853513 | C | T | 396.83 | PASS | ABHom=1.00;AC=6;AF=1.00;AN=6;DB;DP=11;Dels=0.0... | GT:AD:DP:GQ:PL | ... | ./. | 1/1:0,4:4:12:155,12,0 | T-T | T-T | - | T-T | 2.0 | 2.0 | NaN | 2.0 |
267172 | 296352 | MT | 16278 | rs41458645 | C | T | 109.32 | PASS | ABHom=1.00;AC=2;AF=0.250;AN=8;BaseQRankSum=-2.... | GT:AD:DP:GQ:PL | ... | 0/0:1,0:1:3:0,3,40 | 0/0:5,0:5:15:0,15,180 | C-C | T-T | C-C | C-C | 0.0 | 2.0 | 0.0 | 0.0 |
267173 | 296353 | MT | 16290 | rs34524463 | C | T | 583.33 | PASS | ABHom=1.00;AC=6;AF=0.750;AN=8;BaseQRankSum=-0.... | GT:AD:DP:GQ:PL | ... | 1/1:0,1:1:3:39,3,0 | 1/1:0,5:5:15:181,15,0 | T-T | C-C | T-T | T-T | 2.0 | 0.0 | 2.0 | 2.0 |
267174 | 296356 | MT | 16519 | rs3937033 | T | C | 455.12 | PASS | ABHom=1.00;AC=8;AF=1.00;AN=8;DB;DP=13;Dels=0.0... | GT:AD:DP:GQ:PL | ... | 1/1:0,2:2:6:80,6,0 | 1/1:0,3:3:9:116,9,0 | C-C | C-C | C-C | C-C | 2.0 | 2.0 | 2.0 | 2.0 |
268100 | 300035 | 10 | 123256215 | . | T | G | 100.00 | PASS | GENE=FGFR2;INHERITANCE=AD;MIM=101600 | GT:AD:DP:GQ:PL | ... | 1/0:1,0:1:3:0,3,39 | 0/0:1,0:1:3:0,3,39 | T-T | T-T | G-T | T-T | 0.0 | 0.0 | 1.0 | 0.0 |
267176 rows × 22 columns
dfc[[not x.startswith('G') for x in dfc['#CHROM']]].boxplot(
column='QUAL', by='#CHROM', figsize=(15,5))
<matplotlib.axes._subplots.AxesSubplot at 0x11d838198>
dfc[[not x.startswith('G') for x in dfc['#CHROM']]]['#CHROM'].value_counts().plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x10891ebe0>
dfc = dfc[[not x.startswith('G') for x in dfc['#CHROM']]]
sample_ids_snps = ['{} snp'.format(s) for s in sample_ids]
def _call_snp(row, sample_id):
if ',' in row['ALT']:
return np.nan
m = {
'0': row['REF'],
'1': row['ALT'],
'.': ''
}
genotype = '{} {}'.format(m[row[sample_id][0]], m[row[sample_id][2]])
if len(genotype) != 3:
return np.nan
return genotype
def call_snp(row):
return pd.Series([_call_snp(row, s) for s in sample_ids if s],
index=sample_ids_genotype)
snps = dfc.apply(call_snp, axis=1)
snps
ISDBM322015 genotype | ISDBM322016 genotype | ISDBM322017 genotype | ISDBM322018 genotype | |
---|---|---|---|---|
0 | NaN | C C | NaN | NaN |
1 | NaN | NaN | NaN | NaN |
2 | NaN | A A | A A | A A |
3 | NaN | G G | G G | G G |
4 | C C | NaN | NaN | NaN |
5 | T T | NaN | T T | NaN |
6 | A A | A A | A A | A A |
7 | G G | G G | G G | G G |
8 | G G | G G | G G | G G |
9 | NaN | A A | NaN | G G |
10 | NaN | C C | NaN | NaN |
11 | G G | NaN | NaN | NaN |
12 | G G | G G | NaN | NaN |
13 | G G | NaN | NaN | G G |
14 | T T | C C | NaN | NaN |
15 | A A | A A | A A | A A |
16 | NaN | NaN | C C | NaN |
17 | NaN | NaN | G G | G G |
18 | NaN | NaN | T T | T T |
19 | NaN | NaN | C C | C C |
20 | C C | C C | NaN | C C |
21 | NaN | T T | G G | G T |
22 | T T | T T | NaN | NaN |
23 | G G | G G | NaN | NaN |
24 | C C | C G | C G | C C |
25 | C T | C C | C C | C T |
26 | G C | G G | G G | G C |
27 | A A | A A | A A | A A |
28 | C C | C C | C C | C C |
29 | G G | G G | G G | G G |
... | ... | ... | ... | ... |
267146 | NaN | NaN | NaN | G G |
267147 | NaN | T T | NaN | T T |
267148 | NaN | NaN | NaN | C C |
267149 | G G | G G | G G | G G |
267150 | NaN | C C | NaN | NaN |
267151 | G G | G G | G G | G G |
267152 | A A | G G | NaN | A A |
267153 | G G | G G | NaN | G G |
267154 | C C | T T | C C | C C |
267155 | G G | A A | G G | G G |
267156 | G G | G G | NaN | G G |
267157 | G G | A A | G G | G G |
267158 | A A | G G | A A | A A |
267159 | C C | T T | C C | C C |
267160 | A A | NaN | NaN | A A |
267161 | G G | A A | NaN | G G |
267162 | C C | T T | NaN | C C |
267163 | T T | T T | NaN | T T |
267164 | T T | C C | T T | T T |
267165 | A A | G G | A A | A A |
267166 | C C | T T | C C | C C |
267167 | T T | C C | NaN | T T |
267168 | T T | T T | T T | T T |
267169 | G G | G G | G G | G G |
267170 | NaN | NaN | NaN | C C |
267171 | T T | T T | NaN | T T |
267172 | C C | T T | C C | C C |
267173 | T T | C C | T T | T T |
267174 | C C | C C | C C | C C |
268100 | T T | T T | G T | T T |
267176 rows × 4 columns
def make_plink_map(df):
with open('pfeiffer.map', 'w') as mapfile:
for i, r in df.iterrows():
id_ = r['ID']
if id_ == '.':
id_ = '{}-{}'.format(r['#CHROM'], r['POS'])
mapfile.write(
'{} {} 0 {}\n'.format(r['#CHROM'], r['POS'], id_)
)
make_plink_map(dfc)
!head -n 100 pfeiffer.map
1 63735 0 1-63735 1 745370 0 rs146246821 1 752566 0 rs3094315 1 752721 0 rs3131972 1 758324 0 rs3131955 1 780027 0 rs2977613 1 808631 0 rs11240779 1 812267 0 rs7541694 1 812284 0 rs7545373 1 823790 0 rs143626389 1 834832 0 rs4411087 1 849998 0 rs13303222 1 851757 0 rs62677860 1 861808 0 rs13302982 1 862866 0 rs3892970 1 866319 0 rs9988021 1 866511 0 rs60722469 1 866920 0 rs2341361 1 867584 0 rs2341360 1 869323 0 rs13303207 1 870903 0 rs13303094 1 871334 0 rs4072383 1 873558 0 rs1110052 1 876499 0 rs4372192 1 878784 0 rs142929357 1 879317 0 rs7523549 1 879482 0 rs149880798 1 879676 0 rs6605067 1 879687 0 rs2839 1 880238 0 rs3748592 1 880390 0 rs3748593 1 881627 0 rs2272757 1 883625 0 rs4970378 1 884091 0 rs7522415 1 887560 0 rs3748595 1 887801 0 rs3828047 1 888639 0 rs3748596 1 888659 0 rs3748597 1 889158 0 rs56262069 1 889159 0 rs13302945 1 889638 0 rs13303206 1 892460 0 rs41285802 1 892745 0 rs13303227 1 894573 0 rs13303010 1 895706 0 rs13303327 1 896333 0 rs144174542 1 897325 0 rs4970441 1 897564 0 rs13303229 1 897730 0 rs7549631 1 898323 0 rs6605071 1 900285 0 rs4970435 1 900286 0 rs4970434 1 900505 0 rs28705211 1 902069 0 rs116147894 1 908823 0 rs28687780 1 909073 0 rs3892467 1 909238 0 rs3829740 1 909309 0 rs3829738 1 909555 0 rs2340594 1 909768 0 rs2340593 1 911916 0 rs74045046 1 914876 0 rs13302983 1 916549 0 rs6660139 1 928520 0 rs35002855 1 936848 0 rs149671836 1 941539 0 rs9778087 1 943468 0 rs3121567 1 943687 0 rs2465140 1 948846 0 rs3841266 1 948870 0 rs4615788 1 948921 0 rs15842 1 949235 0 rs2465124 1 949608 0 rs1921 1 949654 0 rs8997 1 949925 0 rs2799070 1 957898 0 rs2799064 1 962210 0 rs3128126 1 963249 0 rs2710870 1 971224 0 rs2799055 1 974662 0 rs2465135 1 977203 0 rs3121552 1 977330 0 rs2799066 1 977570 0 rs2710876 1 980460 0 rs3128097 1 981087 0 rs3128098 1 981931 0 rs2465128 1 982444 0 rs3128099 1 982462 0 rs3128100 1 982513 0 rs3128101 1 982941 0 rs3128102 1 982994 0 rs10267 1 986443 0 rs2710887 1 987200 0 rs9803031 1 988932 0 rs2710871 1 990806 0 rs2799073 1 1001177 0 rs4970401 1 1001178 0 1-1001178 1 1017587 0 rs3766191 1 1019175 0 rs2298215 1 1021346 0 rs10907177
def make_plink_ped(snps):
with open('pfeiffer.ped', 'w') as pedfile:
for i, r in snps.T.iterrows():
s = ' '.join(list(r))
pedfile.write(
'FAM1 {} 0 0 0 {}'.format(i.split()[0], s)
)
make_plink_ped(snps.replace(np.nan, '0 0'))