Skip to content

Commit

Permalink
Merge pull request #58 from databio/dev
Browse files Browse the repository at this point in the history
Release 0.7.0
  • Loading branch information
khoroshevskyi authored Oct 7, 2024
2 parents cc97dbe + c99382d commit 827509d
Show file tree
Hide file tree
Showing 11 changed files with 269 additions and 56 deletions.
2 changes: 1 addition & 1 deletion bbconf/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.1"
__version__ = "0.7.0"
1 change: 0 additions & 1 deletion bbconf/bbagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def bedset(self) -> BedAgentBedSet:
def objects(self) -> BBObjects:
return self._objects

@cached_property
def get_stats(self) -> StatsReturn:
"""
Get statistics for a bed file
Expand Down
2 changes: 1 addition & 1 deletion bbconf/config_parser/bedbaseconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import yacman
import zarr
from botocore.exceptions import BotoCoreError, EndpointConnectionError
from geniml.region2vec import Region2VecExModel
from geniml.region2vec.main import Region2VecExModel
from geniml.search import BED2BEDSearchInterface, QdrantBackend, Text2BEDSearchInterface
from geniml.search.query2vec import BED2Vec, Text2Vec
from pephubclient import PEPHubClient
Expand Down
109 changes: 108 additions & 1 deletion bbconf/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,16 @@
from typing import List, Optional

import pandas as pd
from sqlalchemy import TIMESTAMP, BigInteger, ForeignKey, Result, Select, event, select
from sqlalchemy import (
TIMESTAMP,
BigInteger,
ForeignKey,
Result,
Select,
event,
select,
UniqueConstraint,
)
from sqlalchemy.dialects.postgresql import JSON
from sqlalchemy.engine import URL, Engine, create_engine
from sqlalchemy.event import listens_for
Expand Down Expand Up @@ -99,6 +108,10 @@ class Bed(Base):
"BedFileBedSetRelation", back_populates="bedfile", cascade="all, delete-orphan"
)

annotations: Mapped["BedMetadata"] = relationship(
back_populates="bed", cascade="all, delete-orphan", lazy="joined"
)

stats: Mapped["BedStats"] = relationship(
back_populates="bed", cascade="all, delete-orphan"
)
Expand All @@ -114,6 +127,74 @@ class Bed(Base):
)
license_mapping: Mapped["License"] = relationship("License", back_populates="bed")

ref_classifier: Mapped["GenomeRefStats"] = relationship(
"GenomeRefStats", back_populates="bed", cascade="all, delete-orphan"
)


class BedMetadata(Base):
__tablename__ = "bed_metadata"

species_name: Mapped[str] = mapped_column(default=None, comment="Organism name")
species_id: Mapped[str] = mapped_column(
default=None, nullable=True, comment="Organism taxon id"
)

genotype: Mapped[str] = mapped_column(
default=None, nullable=True, comment="Genotype of the sample"
)
phenotype: Mapped[str] = mapped_column(
default=None, nullable=True, comment="Phenotype of the sample"
)

cell_type: Mapped[str] = mapped_column(
default=None,
nullable=True,
comment="Specific kind of cell with distinct characteristics found in an organism. e.g. Neurons, Hepatocytes, Adipocytes",
)
cell_line: Mapped[str] = mapped_column(
default=None,
nullable=True,
comment="Population of cells derived from a single cell and cultured in the lab for extended use, e.g. HeLa, HepG2, k562",
)
tissue: Mapped[str] = mapped_column(
default=None, nullable=True, comment="Tissue type"
)
library_source: Mapped[str] = mapped_column(
default=None,
nullable=True,
comment="Library source (e.g. genomic, transcriptomic)",
)
assay: Mapped[str] = mapped_column(
default=None, nullable=True, comment="Experimental protocol (e.g. ChIP-seq)"
)
antibody: Mapped[str] = mapped_column(
default=None, nullable=True, comment="Antibody used in the assay"
)
target: Mapped[str] = mapped_column(
default=None, nullable=True, comment="Target of the assay (e.g. H3K4me3)"
)
treatment: Mapped[str] = mapped_column(
default=None,
nullable=True,
comment="Treatment of the sample (e.g. drug treatment)",
)

global_sample_id: Mapped[str] = mapped_column(
default=None, nullable=True, comment="Global sample identifier. e.g. GSM000"
)
global_experiment_id: Mapped[str] = mapped_column(
default=None, nullable=True, comment="Global experiment identifier. e.g. GSE000"
)

id: Mapped[str] = mapped_column(
ForeignKey("bed.id", ondelete="CASCADE"),
primary_key=True,
index=True,
)

bed: Mapped["Bed"] = relationship("Bed", back_populates="annotations")


class BedStats(Base):
__tablename__ = "bed_stats"
Expand Down Expand Up @@ -283,6 +364,32 @@ class License(Base):
bed: Mapped[List["Bed"]] = relationship("Bed", back_populates="license_mapping")


class GenomeRefStats(Base):
__tablename__ = "genome_ref_stats"

id: Mapped[int] = mapped_column(primary_key=True, index=True)

bed_id: Mapped[str] = mapped_column(
ForeignKey("bed.id", ondelete="CASCADE"),
index=True,
nullable=False,
)
provided_genome: Mapped[str]
compared_genome: Mapped[str] = mapped_column(
nullable=False, comment="Compared Genome"
)

xs: Mapped[float] = mapped_column(nullable=True, default=None)
oobr: Mapped[float] = mapped_column(nullable=True, default=None)
sequence_fit: Mapped[float] = mapped_column(nullable=True, default=None)
assigned_points: Mapped[int] = mapped_column(nullable=False)
tier_ranking: Mapped[int] = mapped_column(nullable=False)

bed: Mapped["Bed"] = relationship("Bed", back_populates="ref_classifier")

__table_args__ = (UniqueConstraint("bed_id", "compared_genome"),)


@listens_for(Universes, "after_insert")
@listens_for(Universes, "after_update")
def add_bed_universe(mapper, connection, target):
Expand Down
63 changes: 62 additions & 1 deletion bbconf/models/bed_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,54 @@ class BedPEPHub(BaseModel):
model_config = ConfigDict(extra="allow", populate_by_name=True)


class StandardMeta(BaseModel):
"""
Standardized Bed file metadata
"""

species_name: str = Field(
default="", description="Name of species. e.g. Homo sapiens.", alias="organism"
)
species_id: str = ""
genotype: str = Field("", description="Genotype of the sample")
phenotype: str = Field("", description="Phenotype of the sample")
description: Union[str, None] = ""

cell_type: str = Field(
"",
description="specific kind of cell with distinct characteristics found in an organism. e.g. Neurons, Hepatocytes, Adipocytes",
)
cell_line: str = Field(
"",
description="population of cells derived from a single cell and cultured in the lab for extended use, e.g. HeLa, HepG2, k562",
)
tissue: str = Field("", description="Tissue type")

library_source: str = Field(
"", description="Library source (e.g. genomic, transcriptomic)"
)
assay: str = Field(
"", description="Experimental protocol (e.g. ChIP-seq)", alias="exp_protocol"
)
antibody: str = Field("", description="Antibody used in the assay")
target: str = Field("", description="Target of the assay (e.g. H3K4me3)")
treatment: str = Field(
"", description="Treatment of the sample (e.g. drug treatment)"
)

global_sample_id: str = Field(
"", description="Global sample identifier. e.g. GSM000"
) # excluded in training
global_experiment_id: str = Field(
"", description="Global experiment identifier. e.g. GSE000"
) # excluded in training

model_config = ConfigDict(
populate_by_name=True,
extra="ignore",
)


class BedPEPHubRestrict(BedPEPHub):

model_config = ConfigDict(extra="ignore")
Expand All @@ -111,6 +159,7 @@ class BedMetadataBasic(BedClassification):
last_update_date: Optional[datetime.datetime] = None
is_universe: Optional[bool] = False
license_id: Optional[str] = DEFAULT_LICENSE
annotation: Optional[StandardMeta] = None


class UniverseMetadata(BaseModel):
Expand All @@ -124,7 +173,7 @@ class BedSetMinimal(BaseModel):
description: Union[str, None] = None


class BedMetadata(BedMetadataBasic):
class BedMetadataAll(BedMetadataBasic):
stats: Union[BedStatsModel, None] = None
plots: Union[BedPlots, None] = None
files: Union[BedFiles, None] = None
Expand Down Expand Up @@ -171,3 +220,15 @@ class TokenizedPathResponse(BaseModel):
universe_id: str
file_path: str
endpoint_url: str


class RefGenValidModel(BaseModel):
provided_genome: str
compared_genome: str
xs: float = 0.0
oobr: Union[float, None] = None
sequence_fit: Union[float, None] = None
assigned_points: int
tier_ranking: int

model_config = ConfigDict(extra="forbid")
Loading

0 comments on commit 827509d

Please sign in to comment.