Module phc.easy.summary.counts
Expand source code
import inspect
from typing import Optional
import pandas as pd
from funcy import first, iffy, lmapcat, rpartial
from phc.easy.abstract.paging_api_item import PagingApiItem, PagingApiOptions
from phc.easy.auth import Auth
from phc.easy.util.frame import combine_first, drop
from toolz import pipe
class NoOptions(PagingApiOptions):
pass
class SummaryCounts(PagingApiItem):
@staticmethod
def resource_path():
return "analytics/summary/{project_id}"
@staticmethod
def response_to_items(data):
squashed = first(pd.json_normalize(data).to_dict("records")) or {}
return lmapcat(
lambda k: [{"summary": k, **v} for v in squashed[k]]
if isinstance(squashed[k], list)
else [],
squashed.keys(),
)
@staticmethod
def execute_args() -> dict:
return dict(ignore_cache=True)
@staticmethod
def params_class():
return NoOptions
@staticmethod
def transform_results(
data_frame: pd.DataFrame, include_demographics: bool, **expand_args
):
return pipe(
data_frame,
rpartial(
combine_first, ["code", "index", "demographic_value"], "code"
),
rpartial(
combine_first,
[
"code_count",
"count",
"sequence_type_count",
"test_type_count",
"variant_count",
],
"count",
),
rpartial(combine_first, ["display", "sequence_type"], "display"),
iffy(
lambda df: "summary" in df.columns,
lambda df: df.assign(
summary=df.summary.str.replace(".counts", "", regex=False)
),
),
rpartial(
drop,
[
"index",
"sequence_type_count",
"sequence_type",
"code_count",
"demographic_value",
"test_type_count",
"variant_count",
],
),
iffy(
lambda df: "summary" in df.columns and "count" in df.columns,
lambda df: df.sort_values(
["summary", "count"], ascending=False
),
),
iffy(
lambda df: include_demographics is False
and "summary" in df.columns,
lambda df: df[~df.summary.str.contains("demographic")],
),
).reset_index(drop=True)
@classmethod
def get_data_frame(
cls,
include_demographics: bool = False,
all_results: bool = True,
auth_args: Auth = Auth.shared(),
max_pages: Optional[int] = None,
page_size: Optional[int] = None,
log: bool = False,
**kw_args,
):
"""Execute a request for summary counts across clinical and omics data
NOTE: By default, demographic data is excluded since it is not
technically counts of entities. If demographics-only data is desired,
use this:
>>> from phc.easy.summary.item_counts import SummaryItemCounts
>>> SummaryItemCounts.get_data_frame(summary="demographics")
## Parameters
Execution: `phc.easy.query.Query.execute_paging_api`
"""
# NOTE: include_demographics gets passed through to transform_results
# since explicitly declared there.
df = super().get_data_frame(
**kw_args, **cls._get_current_args(inspect.currentframe(), locals())
)
return df
Classes
class NoOptions (**data: Any)
-
Create a new model by parsing and validating input data from keyword arguments.
Raises ValidationError if the input data cannot be parsed to form a valid model.
Expand source code
class NoOptions(PagingApiOptions): pass
Ancestors
- PagingApiOptions
- pydantic.main.BaseModel
- pydantic.utils.Representation
Methods
def dict(self)
-
Inherited from:
PagingApiOptions
.dict
Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
class SummaryCounts
-
Expand source code
class SummaryCounts(PagingApiItem): @staticmethod def resource_path(): return "analytics/summary/{project_id}" @staticmethod def response_to_items(data): squashed = first(pd.json_normalize(data).to_dict("records")) or {} return lmapcat( lambda k: [{"summary": k, **v} for v in squashed[k]] if isinstance(squashed[k], list) else [], squashed.keys(), ) @staticmethod def execute_args() -> dict: return dict(ignore_cache=True) @staticmethod def params_class(): return NoOptions @staticmethod def transform_results( data_frame: pd.DataFrame, include_demographics: bool, **expand_args ): return pipe( data_frame, rpartial( combine_first, ["code", "index", "demographic_value"], "code" ), rpartial( combine_first, [ "code_count", "count", "sequence_type_count", "test_type_count", "variant_count", ], "count", ), rpartial(combine_first, ["display", "sequence_type"], "display"), iffy( lambda df: "summary" in df.columns, lambda df: df.assign( summary=df.summary.str.replace(".counts", "", regex=False) ), ), rpartial( drop, [ "index", "sequence_type_count", "sequence_type", "code_count", "demographic_value", "test_type_count", "variant_count", ], ), iffy( lambda df: "summary" in df.columns and "count" in df.columns, lambda df: df.sort_values( ["summary", "count"], ascending=False ), ), iffy( lambda df: include_demographics is False and "summary" in df.columns, lambda df: df[~df.summary.str.contains("demographic")], ), ).reset_index(drop=True) @classmethod def get_data_frame( cls, include_demographics: bool = False, all_results: bool = True, auth_args: Auth = Auth.shared(), max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, **kw_args, ): """Execute a request for summary counts across clinical and omics data NOTE: By default, demographic data is excluded since it is not technically counts of entities. If demographics-only data is desired, use this: >>> from phc.easy.summary.item_counts import SummaryItemCounts >>> SummaryItemCounts.get_data_frame(summary="demographics") ## Parameters Execution: `phc.easy.query.Query.execute_paging_api` """ # NOTE: include_demographics gets passed through to transform_results # since explicitly declared there. df = super().get_data_frame( **kw_args, **cls._get_current_args(inspect.currentframe(), locals()) ) return df
Ancestors
Static methods
def execute_args() ‑> dict
-
Expand source code
@staticmethod def execute_args() -> dict: return dict(ignore_cache=True)
def get_data_frame(include_demographics: bool = False, all_results: bool = True, auth_args: Auth = <phc.easy.auth.Auth object>, max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, **kw_args)
-
Execute a request for summary counts across clinical and omics data
NOTE: By default, demographic data is excluded since it is not technically counts of entities. If demographics-only data is desired, use this:
>>> from phc.easy.summary.item_counts import SummaryItemCounts >>> SummaryItemCounts.get_data_frame(summary="demographics")
Parameters
Execution:
Query.execute_paging_api()
Expand source code
@classmethod def get_data_frame( cls, include_demographics: bool = False, all_results: bool = True, auth_args: Auth = Auth.shared(), max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, **kw_args, ): """Execute a request for summary counts across clinical and omics data NOTE: By default, demographic data is excluded since it is not technically counts of entities. If demographics-only data is desired, use this: >>> from phc.easy.summary.item_counts import SummaryItemCounts >>> SummaryItemCounts.get_data_frame(summary="demographics") ## Parameters Execution: `phc.easy.query.Query.execute_paging_api` """ # NOTE: include_demographics gets passed through to transform_results # since explicitly declared there. df = super().get_data_frame( **kw_args, **cls._get_current_args(inspect.currentframe(), locals()) ) return df
def params_class()
-
Inherited from:
PagingApiItem
.params_class
Returns a pydantic type that validates and transforms the params with dict()
Expand source code
@staticmethod def params_class(): return NoOptions
def process_params(params: dict) ‑> dict
-
Inherited from:
PagingApiItem
.process_params
Validates and transforms the API query parameters
def resource_path()
-
Inherited from:
PagingApiItem
.resource_path
Returns the API url name for retrieval
Expand source code
@staticmethod def resource_path(): return "analytics/summary/{project_id}"
def response_to_items(data)
-
Expand source code
@staticmethod def response_to_items(data): squashed = first(pd.json_normalize(data).to_dict("records")) or {} return lmapcat( lambda k: [{"summary": k, **v} for v in squashed[k]] if isinstance(squashed[k], list) else [], squashed.keys(), )
def transform_results(data_frame: pandas.core.frame.DataFrame, include_demographics: bool, **expand_args)
-
Inherited from:
PagingApiItem
.transform_results
Transform data frame batch
Expand source code
@staticmethod def transform_results( data_frame: pd.DataFrame, include_demographics: bool, **expand_args ): return pipe( data_frame, rpartial( combine_first, ["code", "index", "demographic_value"], "code" ), rpartial( combine_first, [ "code_count", "count", "sequence_type_count", "test_type_count", "variant_count", ], "count", ), rpartial(combine_first, ["display", "sequence_type"], "display"), iffy( lambda df: "summary" in df.columns, lambda df: df.assign( summary=df.summary.str.replace(".counts", "", regex=False) ), ), rpartial( drop, [ "index", "sequence_type_count", "sequence_type", "code_count", "demographic_value", "test_type_count", "variant_count", ], ), iffy( lambda df: "summary" in df.columns and "count" in df.columns, lambda df: df.sort_values( ["summary", "count"], ascending=False ), ), iffy( lambda df: include_demographics is False and "summary" in df.columns, lambda df: df[~df.summary.str.contains("demographic")], ), ).reset_index(drop=True)