Source code for civicpy.exports.civic_vcf_record

import vcfpy
import requests
import re
from civicpy.civic import GeneVariant

[docs]class CivicVcfRecord(vcfpy.Record): """ :param civic.GeneVariant variant: A :class:`civic.GeneVariant` object to convert to a :class:`civic.exports.CivicVcfRecord` object (inherits from vcfpy.Record) """ def __init__(self, variant, include_status=["submitted", "accepted"]): if not isinstance(variant, GeneVariant): raise Exception('Variant is not a GeneVariant.') self.variant = variant if not self.variant.is_valid_for_vcf(emit_warnings=True): raise Exception('Variant is not valid for VCF.') if self.vcf_coordinates() is None: raise Exception("Variant doesn't have any coordinates.") (start, ref, alt) = self.vcf_coordinates() info = vcfpy.OrderedDict({ ('GN', self.variant.gene.name), ('VT', self.sanitized_name()), ('CSQ', tuple(self.csq(include_status))), }) super().__init__( CHROM=self.variant.coordinates.chromosome, POS=start, ID=[str(self.variant.id)], REF=ref, ALT=[self.alt_record(alt)], QUAL=None, FILTER=['.'], INFO=info, ) def vcf_coordinates(self): ensembl_server = "https://grch37.rest.ensembl.org" if self.variant.coordinates.reference_build != 'GRCh37': return if self.variant.is_insertion: if not self.variant.coordinates.representative_transcript: return else: start = self.variant.coordinates.start ext = "/sequence/region/human/{}:{}-{}".format(self.variant.coordinates.chromosome, start, start) r = requests.get(ensembl_server+ext, headers={ "Content-Type" : "text/plain"}) r.raise_for_status() if self.variant.coordinates.reference_bases == None or self.variant.coordinates.reference_bases == '-' or self.variant.coordinates.reference_bases == '': ref = r.text else: ref = "{}{}".format(r.text, self.variant.coordinates.reference_bases) alt = "{}{}".format(r.text, self.variant.coordinates.variant_bases) elif self.variant.is_deletion: if not self.variant.coordinates.representative_transcript: return else: start = self.variant.coordinates.start - 1 ext = "/sequence/region/human/{}:{}-{}".format(self.variant.coordinates.chromosome, start, start) r = requests.get(ensembl_server+ext, headers={ "Content-Type" : "text/plain"}) r.raise_for_status() ref = "{}{}".format(r.text, self.variant.coordinates.reference_bases) if self.variant.coordinates.variant_bases == None or self.variant.coordinates.variant_bases == '-' or self.variant.coordinates.variant_bases == '': alt = r.text else: alt = "{}{}".format(r.text, self.variant.coordinates.variant_bases) else: start = self.variant.coordinates.start ref = self.variant.coordinates.reference_bases alt = self.variant.coordinates.variant_bases return (start, ref, alt) def alt_record(self, alt): if self.variant.is_insertion: return vcfpy.Substitution(type_="INS", value=alt) elif self.variant.is_deletion: return vcfpy.Substitution(type_="DEL", value=alt) else: if len(alt) == 1: return vcfpy.Substitution(type_="SNV", value=alt) else: return vcfpy.Substitution(type_="MNV", value=alt) def csq_alt(self): if self.variant.coordinates.reference_build != 'GRCh37': return if self.variant.is_insertion: if not self.variant.coordinates.representative_transcript: return else: return self.variant.coordinates.variant_bases elif self.variant.is_deletion: if not self.variant.coordinates.representative_transcript: return else: return "-" else: return self.variant.coordinates.variant_bases def hgvs_c(self): if self.variant.coordinates.representative_transcript: hgvs_cs = [e for e in self.variant.hgvs_expressions if (':c.' in e) and (self.variant.coordinates.representative_transcript in e)] return hgvs_cs[0] if len(hgvs_cs) == 1 else '' else: return '' def hgvs_p(self): if self.variant.coordinates.representative_transcript: hgvs_ps = [e for e in self.variant.hgvs_expressions if (':p.' in e) and (self.variant.coordinates.representative_transcript in e)] return hgvs_ps[0] if len(hgvs_ps) == 1 else '' else: return '' def sanitized_name(self): name = self.variant.name regex = re.compile(r"^([A-Z]+)([0-9]+)(=)(.*)$") match = regex.match(name) if match is not None: name = "".join([match.group(1), match.group(2), match.group(1), match.group(4)]) return name def csq(self, include_status=None): if self.csq_alt() is None: return [] else: csq = [] for mp in self.variant.molecular_profiles: for evidence in mp.evidence: if include_status is not None and evidence.status not in include_status: continue csq.append('|'.join([ self.csq_alt(), '&'.join(map(lambda t: t.name, self.variant.variant_types)), self.variant.gene.name, str(self.variant.gene.entrez_id), 'transcript', str(self.variant.coordinates.representative_transcript), self.hgvs_c(), self.hgvs_p(), self.sanitized_name(), str(self.variant.id), '&'.join(self.variant.variant_aliases), "https://civicdb.org/links/variants/{}".format(self.variant.id), mp.sanitized_name(), str(mp.id), '&'.join(mp.aliases), "https://civicdb.org/links/molecular-profiles/{}".format(mp.id), '&'.join(self.variant.hgvs_expressions), str(self.variant.allele_registry_id), '&'.join(self.variant.clinvar_entries), str(mp.molecular_profile_score), "evidence", str(evidence.id), "https://civicdb.org/links/evidence/{}".format(evidence.id), "{} ({})".format(evidence.source.citation_id, evidence.source.source_type), str(evidence.variant_origin), evidence.status, str(evidence.significance or ''), str(evidence.evidence_direction or ''), evidence.disease.name if evidence.disease is not None else "", '&'.join([str(therapy) for therapy in evidence.therapies]), str(evidence.therapy_interaction_type or ""), '&'.join(["{} (HPO ID {})".format(phenotype.name, phenotype.hpo_id) for phenotype in evidence.phenotypes]), evidence.evidence_level, str(evidence.rating), "", "", "", "", "", "", ])) for assertion in mp.assertions: if include_status is not None and assertion.status not in include_status: continue csq.append('|'.join([ self.csq_alt(), '&'.join(map(lambda t: t.name, self.variant.variant_types)), self.variant.gene.name, str(self.variant.gene.entrez_id), 'transcript', str(self.variant.coordinates.representative_transcript), self.hgvs_c(), self.hgvs_p(), self.sanitized_name(), str(self.variant.id), '&'.join(self.variant.variant_aliases), "https://civicdb.org/links/variants/{}".format(self.variant.id), mp.sanitized_name(), str(mp.id), '&'.join(mp.aliases), "https://civicdb.org/links/molecular-profiles/{}".format(mp.id), '&'.join(self.variant.hgvs_expressions), str(self.variant.allele_registry_id), '&'.join(self.variant.clinvar_entries), str(mp.molecular_profile_score), "assertion", str(assertion.id), "https://civicdb.org/links/assertion/{}".format(assertion.id), "", str(assertion.variant_origin), assertion.status, assertion.significance, assertion.assertion_direction, str(assertion.disease), '&'.join([str(therapy) for therapy in assertion.therapies]), str(assertion.therapy_interaction_type or ''), "", "", "", "&".join([acmg_code.code for acmg_code in assertion.acmg_codes]), str(assertion.amp_level or ''), "&".join([clingen_code.code for clingen_code in assertion.clingen_codes]), assertion.format_nccn_guideline(), str(assertion.fda_regulatory_approval or ''), str(assertion.fda_companion_test or ''), ])) return csq