From dde6a2992ea8760b6b689ace89729a8876803f02 Mon Sep 17 00:00:00 2001 From: Adam Faulconbridge Date: Thu, 26 Nov 2020 10:14:44 +0000 Subject: [PATCH] remove singleton metaclass --- src/snps/resources.py | 4 +- src/snps/snps.py | 14 +++++-- src/snps/utils.py | 10 ----- tests/io/test_reader.py | 89 +++++++++++++++++++++-------------------- tests/io/test_writer.py | 18 ++++----- 5 files changed, 67 insertions(+), 68 deletions(-) diff --git a/src/snps/resources.py b/src/snps/resources.py index 3fb7b6e6..9236a5ba 100644 --- a/src/snps/resources.py +++ b/src/snps/resources.py @@ -63,12 +63,12 @@ import numpy as np from snps.ensembl import EnsemblRestClient -from snps.utils import create_dir, Singleton +from snps.utils import create_dir logger = logging.getLogger(__name__) -class Resources(metaclass=Singleton): +class Resources: """ Object used to manage resources required by `snps`. """ def __init__(self, resources_dir="resources"): diff --git a/src/snps/snps.py b/src/snps/snps.py index 8a2ed148..b52078f9 100644 --- a/src/snps/snps.py +++ b/src/snps/snps.py @@ -60,7 +60,8 @@ def __init__( only_detect_source=False, assign_par_snps=False, output_dir="output", - resources_dir="resources", + resources_dir=None, + resources_obj=Resources(resources_dir="resources"), deduplicate=True, deduplicate_XY_chrom=True, deduplicate_MT_chrom=True, @@ -81,7 +82,9 @@ def __init__( output_dir : str path to output directory resources_dir : str - name / path of resources directory + name / path of resources directory if resources_obj is None + resources_obj : None or Resources + resources object to use deduplicate : bool deduplicate RSIDs and make SNPs available as `SNPs.duplicate` deduplicate_XY_chrom : bool @@ -110,7 +113,12 @@ def __init__( self._build = 0 self._build_detected = False self._output_dir = output_dir - self._resources = Resources(resources_dir=resources_dir) + if resources_dir: + self._resources = Resources(resources_dir=resources_dir) + elif resources_obj: + self._resources = resources_obj + else: + raise ValueError("One of resources_dir or resources_obj must be defined") self._parallelizer = Parallelizer(parallelize=parallelize, processes=processes) if file: diff --git a/src/snps/utils.py b/src/snps/utils.py index eb71b2b7..ef024daa 100644 --- a/src/snps/utils.py +++ b/src/snps/utils.py @@ -89,16 +89,6 @@ def __call__(self, f, tasks): return map(f, tasks) -class Singleton(type): - # https://stackoverflow.com/a/6798042 - _instances = {} - - def __call__(cls, *args, **kwargs): - if cls not in cls._instances: - cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) - return cls._instances[cls] - - def create_dir(path): """ Create directory specified by `path` if it doesn't already exist. diff --git a/tests/io/test_reader.py b/tests/io/test_reader.py index bf93f660..bc977478 100644 --- a/tests/io/test_reader.py +++ b/tests/io/test_reader.py @@ -36,34 +36,13 @@ from atomicwrites import atomic_write +from snps import SNPs from snps.resources import Resources from snps.utils import gzip_file from tests import BaseSNPsTestCase class TestReader(BaseSNPsTestCase): - @staticmethod - def _setup_gsa_test(resources_dir): - # reset resource if already loaded - r = Resources() - r._resources_dir = resources_dir - r._gsa_resources = {} - - gzip_file( - "tests/resources/gsa_rsid_map.txt", - os.path.join(resources_dir, "gsa_rsid_map.txt.gz"), - ) - gzip_file( - "tests/resources/gsa_chrpos_map.txt", - os.path.join(resources_dir, "gsa_chrpos_map.txt.gz"), - ) - - @staticmethod - def _teardown_gsa_test(): - r = Resources() - r._resources_dir = "resources" - r._gsa_resources = {} - def run_build_detection_test( self, run_parsing_tests_func, @@ -149,20 +128,6 @@ def test_read_ancestry_multi_sep(self): # https://www.ancestry.com self.run_parsing_tests("tests/input/ancestry_multi_sep.txt", "AncestryDNA") - def test_read_codigo46(self): - # https://codigo46.com.mx - with tempfile.TemporaryDirectory() as tmpdir: - self._setup_gsa_test(tmpdir) - self.run_parsing_tests("tests/input/codigo46.txt", "Codigo46") - self._teardown_gsa_test() - - def test_read_tellmeGen(self): - # https://www.tellmegen.com/ - with tempfile.TemporaryDirectory() as tmpdir: - self._setup_gsa_test(tmpdir) - self.run_parsing_tests("tests/input/tellmeGen.txt", "tellmeGen") - self._teardown_gsa_test() - def test_read_DNALand(self): # https://dna.land/ self.run_parsing_tests("tests/input/DNALand.txt", "DNA.Land") @@ -309,13 +274,6 @@ def test_read_myheritage_extra_quotes(self): # https://www.myheritage.com self.run_parsing_tests("tests/input/myheritage_extra_quotes.csv", "MyHeritage") - def test_read_sano(self): - # https://sanogenetics.com - with tempfile.TemporaryDirectory() as tmpdir: - self._setup_gsa_test(tmpdir) - self.run_parsing_tests("tests/input/sano.txt", "Sano") - self._teardown_gsa_test() - def test_read_vcf(self): self.run_parsing_tests_vcf("tests/input/testvcf.vcf") @@ -353,3 +311,48 @@ def test_read_unannotated_vcf(self): self.run_parsing_tests_vcf( "tests/input/unannotated_testvcf.vcf", "vcf", unannotated=True, build=0 ) + + +class TestGSAReader(BaseSNPsTestCase): + resources = None + + def _setup_gsa_test(self, resources_dir): + self.resources = Resources() + self.resources._resources_dir = resources_dir + self.resources._gsa_resources = {} + + gzip_file( + "tests/resources/gsa_rsid_map.txt", + os.path.join(resources_dir, "gsa_rsid_map.txt.gz"), + ) + gzip_file( + "tests/resources/gsa_chrpos_map.txt", + os.path.join(resources_dir, "gsa_chrpos_map.txt.gz"), + ) + + def parse_file(self, file, rsids=()): + return SNPs(file, rsids=rsids, resources_dir=None, resources_obj=self.resources) + + def parse_bytes(self, file, rsids=()): + with open(file, "rb") as f: + return SNPs( + f.read(), rsids=rsids, resources_dir=None, resources_obj=self.resources + ) + + def test_read_codigo46(self): + # https://codigo46.com.mx + with tempfile.TemporaryDirectory() as tmpdir: + self._setup_gsa_test(tmpdir) + self.run_parsing_tests("tests/input/codigo46.txt", "Codigo46") + + def test_read_tellmeGen(self): + # https://www.tellmegen.com/ + with tempfile.TemporaryDirectory() as tmpdir: + self._setup_gsa_test(tmpdir) + self.run_parsing_tests("tests/input/tellmeGen.txt", "tellmeGen") + + def test_read_sano(self): + # https://sanogenetics.com + with tempfile.TemporaryDirectory() as tmpdir: + self._setup_gsa_test(tmpdir) + self.run_parsing_tests("tests/input/sano.txt", "Sano") diff --git a/tests/io/test_writer.py b/tests/io/test_writer.py index 8c4b19e1..de2827c7 100644 --- a/tests/io/test_writer.py +++ b/tests/io/test_writer.py @@ -82,11 +82,11 @@ def test_save_snps_csv_filename(self): self.run_parsing_tests("output/generic.csv", "generic") def test_save_snps_vcf(self): - s = SNPs("tests/input/testvcf.vcf") - r = Resources() r._reference_sequences["GRCh37"] = {} + s = SNPs("tests/input/testvcf.vcf", resources_obj=r) + with tempfile.TemporaryDirectory() as tmpdir: dest = os.path.join(tmpdir, "generic.fa.gz") gzip_file("tests/input/generic.fa", dest) @@ -100,11 +100,11 @@ def test_save_snps_vcf(self): self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf") def test_save_snps_vcf_false_positive_build(self): - snps = SNPs("tests/input/testvcf.vcf") - r = Resources() r._reference_sequences["GRCh37"] = {} + snps = SNPs("tests/input/testvcf.vcf", resources_obj=r) + with tempfile.TemporaryDirectory() as tmpdir: dest = os.path.join(tmpdir, "generic.fa.gz") gzip_file("tests/input/generic.fa", dest) @@ -130,11 +130,11 @@ def test_save_snps_vcf_false_positive_build(self): self.run_parsing_tests_vcf(output) def test_save_snps_vcf_discrepant_pos(self): - s = SNPs("tests/input/testvcf.vcf") - r = Resources() r._reference_sequences["GRCh37"] = {} + s = SNPs("tests/input/testvcf.vcf", resources_obj=r) + with tempfile.TemporaryDirectory() as tmpdir: dest = os.path.join(tmpdir, "generic.fa.gz") gzip_file("tests/input/generic.fa", dest) @@ -164,13 +164,11 @@ def test_save_snps_vcf_discrepant_pos(self): self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf", snps_df=expected) def test_save_snps_vcf_phased(self): - # read phased data - s = SNPs("tests/input/testvcf_phased.vcf") - - # setup resource to use test FASTA reference sequence r = Resources() r._reference_sequences["GRCh37"] = {} + s = SNPs("tests/input/testvcf_phased.vcf", resources_obj=r) + with tempfile.TemporaryDirectory() as tmpdir: dest = os.path.join(tmpdir, "generic.fa.gz") gzip_file("tests/input/generic.fa", dest)