Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove singleton metaclass #116

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/snps/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@
import numpy as np

from snps.ensembl import EnsemblRestClient
from snps.utils import create_dir, Singleton
from snps.utils import create_dir

logger = logging.getLogger(__name__)


class Resources(metaclass=Singleton):
class Resources:
""" Object used to manage resources required by `snps`. """

def __init__(self, resources_dir="resources"):
Expand Down
14 changes: 11 additions & 3 deletions src/snps/snps.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ def __init__(
only_detect_source=False,
assign_par_snps=False,
output_dir="output",
resources_dir="resources",
resources_dir=None,
resources_obj=Resources(resources_dir="resources"),
deduplicate=True,
deduplicate_XY_chrom=True,
deduplicate_MT_chrom=True,
Expand All @@ -81,7 +82,9 @@ def __init__(
output_dir : str
path to output directory
resources_dir : str
name / path of resources directory
name / path of resources directory if resources_obj is None
resources_obj : None or Resources
resources object to use
deduplicate : bool
deduplicate RSIDs and make SNPs available as `SNPs.duplicate`
deduplicate_XY_chrom : bool
Expand Down Expand Up @@ -110,7 +113,12 @@ def __init__(
self._build = 0
self._build_detected = False
self._output_dir = output_dir
self._resources = Resources(resources_dir=resources_dir)
if resources_dir:
self._resources = Resources(resources_dir=resources_dir)
elif resources_obj:
self._resources = resources_obj
else:
raise ValueError("One of resources_dir or resources_obj must be defined")
self._parallelizer = Parallelizer(parallelize=parallelize, processes=processes)

if file:
Expand Down
10 changes: 0 additions & 10 deletions src/snps/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,6 @@ def __call__(self, f, tasks):
return map(f, tasks)


class Singleton(type):
# https://stackoverflow.com/a/6798042
_instances = {}

def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
return cls._instances[cls]


def create_dir(path):
""" Create directory specified by `path` if it doesn't already exist.

Expand Down
89 changes: 46 additions & 43 deletions tests/io/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,34 +36,13 @@

from atomicwrites import atomic_write

from snps import SNPs
from snps.resources import Resources
from snps.utils import gzip_file
from tests import BaseSNPsTestCase


class TestReader(BaseSNPsTestCase):
@staticmethod
def _setup_gsa_test(resources_dir):
# reset resource if already loaded
r = Resources()
r._resources_dir = resources_dir
r._gsa_resources = {}

gzip_file(
"tests/resources/gsa_rsid_map.txt",
os.path.join(resources_dir, "gsa_rsid_map.txt.gz"),
)
gzip_file(
"tests/resources/gsa_chrpos_map.txt",
os.path.join(resources_dir, "gsa_chrpos_map.txt.gz"),
)

@staticmethod
def _teardown_gsa_test():
r = Resources()
r._resources_dir = "resources"
r._gsa_resources = {}

def run_build_detection_test(
self,
run_parsing_tests_func,
Expand Down Expand Up @@ -149,20 +128,6 @@ def test_read_ancestry_multi_sep(self):
# https://www.ancestry.com
self.run_parsing_tests("tests/input/ancestry_multi_sep.txt", "AncestryDNA")

def test_read_codigo46(self):
# https://codigo46.com.mx
with tempfile.TemporaryDirectory() as tmpdir:
self._setup_gsa_test(tmpdir)
self.run_parsing_tests("tests/input/codigo46.txt", "Codigo46")
self._teardown_gsa_test()

def test_read_tellmeGen(self):
# https://www.tellmegen.com/
with tempfile.TemporaryDirectory() as tmpdir:
self._setup_gsa_test(tmpdir)
self.run_parsing_tests("tests/input/tellmeGen.txt", "tellmeGen")
self._teardown_gsa_test()

def test_read_DNALand(self):
# https://dna.land/
self.run_parsing_tests("tests/input/DNALand.txt", "DNA.Land")
Expand Down Expand Up @@ -309,13 +274,6 @@ def test_read_myheritage_extra_quotes(self):
# https://www.myheritage.com
self.run_parsing_tests("tests/input/myheritage_extra_quotes.csv", "MyHeritage")

def test_read_sano(self):
# https://sanogenetics.com
with tempfile.TemporaryDirectory() as tmpdir:
self._setup_gsa_test(tmpdir)
self.run_parsing_tests("tests/input/sano.txt", "Sano")
self._teardown_gsa_test()

def test_read_vcf(self):
self.run_parsing_tests_vcf("tests/input/testvcf.vcf")

Expand Down Expand Up @@ -353,3 +311,48 @@ def test_read_unannotated_vcf(self):
self.run_parsing_tests_vcf(
"tests/input/unannotated_testvcf.vcf", "vcf", unannotated=True, build=0
)


class TestGSAReader(BaseSNPsTestCase):
resources = None

def _setup_gsa_test(self, resources_dir):
self.resources = Resources()
self.resources._resources_dir = resources_dir
self.resources._gsa_resources = {}

gzip_file(
"tests/resources/gsa_rsid_map.txt",
os.path.join(resources_dir, "gsa_rsid_map.txt.gz"),
)
gzip_file(
"tests/resources/gsa_chrpos_map.txt",
os.path.join(resources_dir, "gsa_chrpos_map.txt.gz"),
)

def parse_file(self, file, rsids=()):
return SNPs(file, rsids=rsids, resources_dir=None, resources_obj=self.resources)

def parse_bytes(self, file, rsids=()):
with open(file, "rb") as f:
return SNPs(
f.read(), rsids=rsids, resources_dir=None, resources_obj=self.resources
)

def test_read_codigo46(self):
# https://codigo46.com.mx
with tempfile.TemporaryDirectory() as tmpdir:
self._setup_gsa_test(tmpdir)
self.run_parsing_tests("tests/input/codigo46.txt", "Codigo46")

def test_read_tellmeGen(self):
# https://www.tellmegen.com/
with tempfile.TemporaryDirectory() as tmpdir:
self._setup_gsa_test(tmpdir)
self.run_parsing_tests("tests/input/tellmeGen.txt", "tellmeGen")

def test_read_sano(self):
# https://sanogenetics.com
with tempfile.TemporaryDirectory() as tmpdir:
self._setup_gsa_test(tmpdir)
self.run_parsing_tests("tests/input/sano.txt", "Sano")
18 changes: 8 additions & 10 deletions tests/io/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,11 @@ def test_save_snps_csv_filename(self):
self.run_parsing_tests("output/generic.csv", "generic")

def test_save_snps_vcf(self):
s = SNPs("tests/input/testvcf.vcf")

r = Resources()
r._reference_sequences["GRCh37"] = {}

s = SNPs("tests/input/testvcf.vcf", resources_obj=r)

with tempfile.TemporaryDirectory() as tmpdir:
dest = os.path.join(tmpdir, "generic.fa.gz")
gzip_file("tests/input/generic.fa", dest)
Expand All @@ -100,11 +100,11 @@ def test_save_snps_vcf(self):
self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf")

def test_save_snps_vcf_false_positive_build(self):
snps = SNPs("tests/input/testvcf.vcf")

r = Resources()
r._reference_sequences["GRCh37"] = {}

snps = SNPs("tests/input/testvcf.vcf", resources_obj=r)

with tempfile.TemporaryDirectory() as tmpdir:
dest = os.path.join(tmpdir, "generic.fa.gz")
gzip_file("tests/input/generic.fa", dest)
Expand All @@ -130,11 +130,11 @@ def test_save_snps_vcf_false_positive_build(self):
self.run_parsing_tests_vcf(output)

def test_save_snps_vcf_discrepant_pos(self):
s = SNPs("tests/input/testvcf.vcf")

r = Resources()
r._reference_sequences["GRCh37"] = {}

s = SNPs("tests/input/testvcf.vcf", resources_obj=r)

with tempfile.TemporaryDirectory() as tmpdir:
dest = os.path.join(tmpdir, "generic.fa.gz")
gzip_file("tests/input/generic.fa", dest)
Expand Down Expand Up @@ -164,13 +164,11 @@ def test_save_snps_vcf_discrepant_pos(self):
self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf", snps_df=expected)

def test_save_snps_vcf_phased(self):
# read phased data
s = SNPs("tests/input/testvcf_phased.vcf")

# setup resource to use test FASTA reference sequence
r = Resources()
r._reference_sequences["GRCh37"] = {}

s = SNPs("tests/input/testvcf_phased.vcf", resources_obj=r)

with tempfile.TemporaryDirectory() as tmpdir:
dest = os.path.join(tmpdir, "generic.fa.gz")
gzip_file("tests/input/generic.fa", dest)
Expand Down