Module phc.easy.summary.counts

Expand source code
import inspect
from typing import Optional

import pandas as pd
from funcy import first, iffy, lmapcat, rpartial
from phc.easy.abstract.paging_api_item import PagingApiItem, PagingApiOptions
from phc.easy.auth import Auth
from phc.easy.util.frame import combine_first, drop
from toolz import pipe


class NoOptions(PagingApiOptions):
    pass


class SummaryCounts(PagingApiItem):
    @staticmethod
    def resource_path():
        return "analytics/summary/{project_id}"

    @staticmethod
    def response_to_items(data):
        squashed = first(pd.json_normalize(data).to_dict("records")) or {}
        return lmapcat(
            lambda k: [{"summary": k, **v} for v in squashed[k]]
            if isinstance(squashed[k], list)
            else [],
            squashed.keys(),
        )

    @staticmethod
    def execute_args() -> dict:
        return dict(ignore_cache=True)

    @staticmethod
    def params_class():
        return NoOptions

    @staticmethod
    def transform_results(
        data_frame: pd.DataFrame, include_demographics: bool, **expand_args
    ):
        return pipe(
            data_frame,
            rpartial(
                combine_first, ["code", "index", "demographic_value"], "code"
            ),
            rpartial(
                combine_first,
                [
                    "code_count",
                    "count",
                    "sequence_type_count",
                    "test_type_count",
                    "variant_count",
                ],
                "count",
            ),
            rpartial(combine_first, ["display", "sequence_type"], "display"),
            iffy(
                lambda df: "summary" in df.columns,
                lambda df: df.assign(
                    summary=df.summary.str.replace(".counts", "", regex=False)
                ),
            ),
            rpartial(
                drop,
                [
                    "index",
                    "sequence_type_count",
                    "sequence_type",
                    "code_count",
                    "demographic_value",
                    "test_type_count",
                    "variant_count",
                ],
            ),
            iffy(
                lambda df: "summary" in df.columns and "count" in df.columns,
                lambda df: df.sort_values(
                    ["summary", "count"], ascending=False
                ),
            ),
            iffy(
                lambda df: include_demographics is False
                and "summary" in df.columns,
                lambda df: df[~df.summary.str.contains("demographic")],
            ),
        ).reset_index(drop=True)

    @classmethod
    def get_data_frame(
        cls,
        include_demographics: bool = False,
        all_results: bool = True,
        auth_args: Auth = Auth.shared(),
        max_pages: Optional[int] = None,
        page_size: Optional[int] = None,
        log: bool = False,
        **kw_args,
    ):
        """Execute a request for summary counts across clinical and omics data

        NOTE: By default, demographic data is excluded since it is not
        technically counts of entities. If demographics-only data is desired,
        use this:

        >>> from phc.easy.summary.item_counts import SummaryItemCounts
        >>> SummaryItemCounts.get_data_frame(summary="demographics")

        ## Parameters

        Execution: `phc.easy.query.Query.execute_paging_api`
        """

        # NOTE: include_demographics gets passed through to transform_results
        # since explicitly declared there.

        df = super().get_data_frame(
            **kw_args, **cls._get_current_args(inspect.currentframe(), locals())
        )

        return df

Classes

class NoOptions (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code
class NoOptions(PagingApiOptions):
    pass

Ancestors

Methods

def dict(self)

Inherited from: PagingApiOptions.dict

Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.

class SummaryCounts
Expand source code
class SummaryCounts(PagingApiItem):
    @staticmethod
    def resource_path():
        return "analytics/summary/{project_id}"

    @staticmethod
    def response_to_items(data):
        squashed = first(pd.json_normalize(data).to_dict("records")) or {}
        return lmapcat(
            lambda k: [{"summary": k, **v} for v in squashed[k]]
            if isinstance(squashed[k], list)
            else [],
            squashed.keys(),
        )

    @staticmethod
    def execute_args() -> dict:
        return dict(ignore_cache=True)

    @staticmethod
    def params_class():
        return NoOptions

    @staticmethod
    def transform_results(
        data_frame: pd.DataFrame, include_demographics: bool, **expand_args
    ):
        return pipe(
            data_frame,
            rpartial(
                combine_first, ["code", "index", "demographic_value"], "code"
            ),
            rpartial(
                combine_first,
                [
                    "code_count",
                    "count",
                    "sequence_type_count",
                    "test_type_count",
                    "variant_count",
                ],
                "count",
            ),
            rpartial(combine_first, ["display", "sequence_type"], "display"),
            iffy(
                lambda df: "summary" in df.columns,
                lambda df: df.assign(
                    summary=df.summary.str.replace(".counts", "", regex=False)
                ),
            ),
            rpartial(
                drop,
                [
                    "index",
                    "sequence_type_count",
                    "sequence_type",
                    "code_count",
                    "demographic_value",
                    "test_type_count",
                    "variant_count",
                ],
            ),
            iffy(
                lambda df: "summary" in df.columns and "count" in df.columns,
                lambda df: df.sort_values(
                    ["summary", "count"], ascending=False
                ),
            ),
            iffy(
                lambda df: include_demographics is False
                and "summary" in df.columns,
                lambda df: df[~df.summary.str.contains("demographic")],
            ),
        ).reset_index(drop=True)

    @classmethod
    def get_data_frame(
        cls,
        include_demographics: bool = False,
        all_results: bool = True,
        auth_args: Auth = Auth.shared(),
        max_pages: Optional[int] = None,
        page_size: Optional[int] = None,
        log: bool = False,
        **kw_args,
    ):
        """Execute a request for summary counts across clinical and omics data

        NOTE: By default, demographic data is excluded since it is not
        technically counts of entities. If demographics-only data is desired,
        use this:

        >>> from phc.easy.summary.item_counts import SummaryItemCounts
        >>> SummaryItemCounts.get_data_frame(summary="demographics")

        ## Parameters

        Execution: `phc.easy.query.Query.execute_paging_api`
        """

        # NOTE: include_demographics gets passed through to transform_results
        # since explicitly declared there.

        df = super().get_data_frame(
            **kw_args, **cls._get_current_args(inspect.currentframe(), locals())
        )

        return df

Ancestors

Static methods

def execute_args() ‑> dict
Expand source code
@staticmethod
def execute_args() -> dict:
    return dict(ignore_cache=True)
def get_data_frame(include_demographics: bool = False, all_results: bool = True, auth_args: Auth = <phc.easy.auth.Auth object>, max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, **kw_args)

Execute a request for summary counts across clinical and omics data

NOTE: By default, demographic data is excluded since it is not technically counts of entities. If demographics-only data is desired, use this:

>>> from phc.easy.summary.item_counts import SummaryItemCounts
>>> SummaryItemCounts.get_data_frame(summary="demographics")

Parameters

Execution: Query.execute_paging_api()

Expand source code
@classmethod
def get_data_frame(
    cls,
    include_demographics: bool = False,
    all_results: bool = True,
    auth_args: Auth = Auth.shared(),
    max_pages: Optional[int] = None,
    page_size: Optional[int] = None,
    log: bool = False,
    **kw_args,
):
    """Execute a request for summary counts across clinical and omics data

    NOTE: By default, demographic data is excluded since it is not
    technically counts of entities. If demographics-only data is desired,
    use this:

    >>> from phc.easy.summary.item_counts import SummaryItemCounts
    >>> SummaryItemCounts.get_data_frame(summary="demographics")

    ## Parameters

    Execution: `phc.easy.query.Query.execute_paging_api`
    """

    # NOTE: include_demographics gets passed through to transform_results
    # since explicitly declared there.

    df = super().get_data_frame(
        **kw_args, **cls._get_current_args(inspect.currentframe(), locals())
    )

    return df
def params_class()

Inherited from: PagingApiItem.params_class

Returns a pydantic type that validates and transforms the params with dict()

Expand source code
@staticmethod
def params_class():
    return NoOptions
def process_params(params: dict) ‑> dict

Inherited from: PagingApiItem.process_params

Validates and transforms the API query parameters

def resource_path()

Inherited from: PagingApiItem.resource_path

Returns the API url name for retrieval

Expand source code
@staticmethod
def resource_path():
    return "analytics/summary/{project_id}"
def response_to_items(data)
Expand source code
@staticmethod
def response_to_items(data):
    squashed = first(pd.json_normalize(data).to_dict("records")) or {}
    return lmapcat(
        lambda k: [{"summary": k, **v} for v in squashed[k]]
        if isinstance(squashed[k], list)
        else [],
        squashed.keys(),
    )
def transform_results(data_frame: pandas.core.frame.DataFrame, include_demographics: bool, **expand_args)

Inherited from: PagingApiItem.transform_results

Transform data frame batch

Expand source code
@staticmethod
def transform_results(
    data_frame: pd.DataFrame, include_demographics: bool, **expand_args
):
    return pipe(
        data_frame,
        rpartial(
            combine_first, ["code", "index", "demographic_value"], "code"
        ),
        rpartial(
            combine_first,
            [
                "code_count",
                "count",
                "sequence_type_count",
                "test_type_count",
                "variant_count",
            ],
            "count",
        ),
        rpartial(combine_first, ["display", "sequence_type"], "display"),
        iffy(
            lambda df: "summary" in df.columns,
            lambda df: df.assign(
                summary=df.summary.str.replace(".counts", "", regex=False)
            ),
        ),
        rpartial(
            drop,
            [
                "index",
                "sequence_type_count",
                "sequence_type",
                "code_count",
                "demographic_value",
                "test_type_count",
                "variant_count",
            ],
        ),
        iffy(
            lambda df: "summary" in df.columns and "count" in df.columns,
            lambda df: df.sort_values(
                ["summary", "count"], ascending=False
            ),
        ),
        iffy(
            lambda df: include_demographics is False
            and "summary" in df.columns,
            lambda df: df[~df.summary.str.contains("demographic")],
        ),
    ).reset_index(drop=True)