Module `phc.easy.summary.counts`

Expand source code

import inspect
from typing import Optional

import pandas as pd
from funcy import first, iffy, lmapcat, rpartial
from phc.easy.abstract.paging_api_item import PagingApiItem, PagingApiOptions
from phc.easy.auth import Auth
from phc.easy.util.frame import combine_first, drop
from toolz import pipe


class NoOptions(PagingApiOptions):
    pass


class SummaryCounts(PagingApiItem):
    @staticmethod
    def resource_path():
        return "analytics/summary/{project_id}"

    @staticmethod
    def response_to_items(data):
        squashed = first(pd.json_normalize(data).to_dict("records")) or {}
        return lmapcat(
            lambda k: (
                [{"summary": k, **v} for v in squashed[k]]
                if isinstance(squashed[k], list)
                else []
            ),
            squashed.keys(),
        )

    @staticmethod
    def execute_args() -> dict:
        return dict(ignore_cache=True)

    @staticmethod
    def params_class():
        return NoOptions

    @staticmethod
    def transform_results(
        data_frame: pd.DataFrame, include_demographics: bool, **expand_args
    ):
        return pipe(
            data_frame,
            rpartial(
                combine_first, ["code", "index", "demographic_value"], "code"
            ),
            rpartial(
                combine_first,
                [
                    "code_count",
                    "count",
                    "sequence_type_count",
                    "test_type_count",
                    "variant_count",
                ],
                "count",
            ),
            rpartial(combine_first, ["display", "sequence_type"], "display"),
            iffy(
                lambda df: "summary" in df.columns,
                lambda df: df.assign(
                    summary=df.summary.str.replace(".counts", "", regex=False)
                ),
            ),
            rpartial(
                drop,
                [
                    "index",
                    "sequence_type_count",
                    "sequence_type",
                    "code_count",
                    "demographic_value",
                    "test_type_count",
                    "variant_count",
                ],
            ),
            iffy(
                lambda df: "summary" in df.columns and "count" in df.columns,
                lambda df: df.sort_values(
                    ["summary", "count"], ascending=False
                ),
            ),
            iffy(
                lambda df: include_demographics is False
                and "summary" in df.columns,
                lambda df: df[~df.summary.str.contains("demographic")],
            ),
        ).reset_index(drop=True)

    @classmethod
    def get_data_frame(
        cls,
        include_demographics: bool = False,
        all_results: bool = True,
        auth_args: Auth = Auth.shared(),
        max_pages: Optional[int] = None,
        page_size: Optional[int] = None,
        log: bool = False,
        **kw_args,
    ):
        """Execute a request for summary counts across clinical and omics data

        NOTE: By default, demographic data is excluded since it is not
        technically counts of entities. If demographics-only data is desired,
        use this:

        >>> from phc.easy.summary.item_counts import SummaryItemCounts
        >>> SummaryItemCounts.get_data_frame(summary="demographics")

        ## Parameters

        Execution: `phc.easy.query.Query.execute_paging_api`
        """

        # NOTE: include_demographics gets passed through to transform_results
        # since explicitly declared there.

        df = super().get_data_frame(
            **kw_args, **cls._get_current_args(inspect.currentframe(), locals())
        )

        return df

Classes

class NoOptions (**data: Any)

Usage docs: https://docs.pydantic.dev/2.10/concepts/models/

A base class for creating Pydantic models.

Attributes

__class_vars__: The names of the class variables defined on the model.
__private_attributes__: Metadata about the private attributes of the model.
__signature__: The synthesized __init__ [Signature][inspect.Signature] of the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom __init__ function.
__pydantic_decorators__: Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [RootModel][pydantic.root_model.RootModel].
__pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_fields__: A dictionary of field names and their corresponding [FieldInfo][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [ComputedFieldInfo][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_extra__: A dictionary containing extra values, if [extra][pydantic.config.ConfigDict.extra] is set to 'allow'.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Expand source code

class NoOptions(PagingApiOptions):
    pass

Ancestors

PagingApiOptions
pydantic.main.BaseModel

Class variables

var model_config

Methods

def model_dump(self): Inherited from: PagingApiOptions.model_dump

Usage docs: https://docs.pydantic.dev/2.10/concepts/serialization/#modelmodel_dump …

class SummaryCounts

Expand source code

class SummaryCounts(PagingApiItem):
    @staticmethod
    def resource_path():
        return "analytics/summary/{project_id}"

    @staticmethod
    def response_to_items(data):
        squashed = first(pd.json_normalize(data).to_dict("records")) or {}
        return lmapcat(
            lambda k: (
                [{"summary": k, **v} for v in squashed[k]]
                if isinstance(squashed[k], list)
                else []
            ),
            squashed.keys(),
        )

    @staticmethod
    def execute_args() -> dict:
        return dict(ignore_cache=True)

    @staticmethod
    def params_class():
        return NoOptions

    @staticmethod
    def transform_results(
        data_frame: pd.DataFrame, include_demographics: bool, **expand_args
    ):
        return pipe(
            data_frame,
            rpartial(
                combine_first, ["code", "index", "demographic_value"], "code"
            ),
            rpartial(
                combine_first,
                [
                    "code_count",
                    "count",
                    "sequence_type_count",
                    "test_type_count",
                    "variant_count",
                ],
                "count",
            ),
            rpartial(combine_first, ["display", "sequence_type"], "display"),
            iffy(
                lambda df: "summary" in df.columns,
                lambda df: df.assign(
                    summary=df.summary.str.replace(".counts", "", regex=False)
                ),
            ),
            rpartial(
                drop,
                [
                    "index",
                    "sequence_type_count",
                    "sequence_type",
                    "code_count",
                    "demographic_value",
                    "test_type_count",
                    "variant_count",
                ],
            ),
            iffy(
                lambda df: "summary" in df.columns and "count" in df.columns,
                lambda df: df.sort_values(
                    ["summary", "count"], ascending=False
                ),
            ),
            iffy(
                lambda df: include_demographics is False
                and "summary" in df.columns,
                lambda df: df[~df.summary.str.contains("demographic")],
            ),
        ).reset_index(drop=True)

    @classmethod
    def get_data_frame(
        cls,
        include_demographics: bool = False,
        all_results: bool = True,
        auth_args: Auth = Auth.shared(),
        max_pages: Optional[int] = None,
        page_size: Optional[int] = None,
        log: bool = False,
        **kw_args,
    ):
        """Execute a request for summary counts across clinical and omics data

        NOTE: By default, demographic data is excluded since it is not
        technically counts of entities. If demographics-only data is desired,
        use this:

        >>> from phc.easy.summary.item_counts import SummaryItemCounts
        >>> SummaryItemCounts.get_data_frame(summary="demographics")

        ## Parameters

        Execution: `phc.easy.query.Query.execute_paging_api`
        """

        # NOTE: include_demographics gets passed through to transform_results
        # since explicitly declared there.

        df = super().get_data_frame(
            **kw_args, **cls._get_current_args(inspect.currentframe(), locals())
        )

        return df

Ancestors

PagingApiItem

Static methods

def execute_args() ‑> dict

Expand source code

@staticmethod
def execute_args() -> dict:
    return dict(ignore_cache=True)

def get_data_frame(include_demographics: bool = False, all_results: bool = True, auth_args: Auth = <phc.easy.auth.Auth object>, max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, **kw_args)

Execute a request for summary counts across clinical and omics data

NOTE: By default, demographic data is excluded since it is not technically counts of entities. If demographics-only data is desired, use this:

>>> from phc.easy.summary.item_counts import SummaryItemCounts
>>> SummaryItemCounts.get_data_frame(summary="demographics")

Parameters

Execution: Query.execute_paging_api()

Expand source code

@classmethod
def get_data_frame(
    cls,
    include_demographics: bool = False,
    all_results: bool = True,
    auth_args: Auth = Auth.shared(),
    max_pages: Optional[int] = None,
    page_size: Optional[int] = None,
    log: bool = False,
    **kw_args,
):
    """Execute a request for summary counts across clinical and omics data

    NOTE: By default, demographic data is excluded since it is not
    technically counts of entities. If demographics-only data is desired,
    use this:

    >>> from phc.easy.summary.item_counts import SummaryItemCounts
    >>> SummaryItemCounts.get_data_frame(summary="demographics")

    ## Parameters

    Execution: `phc.easy.query.Query.execute_paging_api`
    """

    # NOTE: include_demographics gets passed through to transform_results
    # since explicitly declared there.

    df = super().get_data_frame(
        **kw_args, **cls._get_current_args(inspect.currentframe(), locals())
    )

    return df

def params_class()

Inherited from: PagingApiItem.params_class

Returns a pydantic type that validates and transforms the params with dict()

Expand source code

@staticmethod
def params_class():
    return NoOptions

def process_params(params: dict) ‑> dict

Inherited from: PagingApiItem.process_params

Validates and transforms the API query parameters

def resource_path()

Inherited from: PagingApiItem.resource_path

Returns the API url name for retrieval

Expand source code

@staticmethod
def resource_path():
    return "analytics/summary/{project_id}"

def response_to_items(data)

Expand source code

@staticmethod
def response_to_items(data):
    squashed = first(pd.json_normalize(data).to_dict("records")) or {}
    return lmapcat(
        lambda k: (
            [{"summary": k, **v} for v in squashed[k]]
            if isinstance(squashed[k], list)
            else []
        ),
        squashed.keys(),
    )

def transform_results(data_frame: pandas.core.frame.DataFrame, include_demographics: bool, **expand_args)

Inherited from: PagingApiItem.transform_results

Transform data frame batch

Expand source code

@staticmethod
def transform_results(
    data_frame: pd.DataFrame, include_demographics: bool, **expand_args
):
    return pipe(
        data_frame,
        rpartial(
            combine_first, ["code", "index", "demographic_value"], "code"
        ),
        rpartial(
            combine_first,
            [
                "code_count",
                "count",
                "sequence_type_count",
                "test_type_count",
                "variant_count",
            ],
            "count",
        ),
        rpartial(combine_first, ["display", "sequence_type"], "display"),
        iffy(
            lambda df: "summary" in df.columns,
            lambda df: df.assign(
                summary=df.summary.str.replace(".counts", "", regex=False)
            ),
        ),
        rpartial(
            drop,
            [
                "index",
                "sequence_type_count",
                "sequence_type",
                "code_count",
                "demographic_value",
                "test_type_count",
                "variant_count",
            ],
        ),
        iffy(
            lambda df: "summary" in df.columns and "count" in df.columns,
            lambda df: df.sort_values(
                ["summary", "count"], ascending=False
            ),
        ),
        iffy(
            lambda df: include_demographics is False
            and "summary" in df.columns,
            lambda df: df[~df.summary.str.contains("demographic")],
        ),
    ).reset_index(drop=True)