Module phc.easy.summary.counts
Expand source code
import inspect
from typing import Optional
import pandas as pd
from funcy import first, iffy, lmapcat, rpartial
from phc.easy.abstract.paging_api_item import PagingApiItem, PagingApiOptions
from phc.easy.auth import Auth
from phc.easy.util.frame import combine_first, drop
from toolz import pipe
class NoOptions(PagingApiOptions):
pass
class SummaryCounts(PagingApiItem):
@staticmethod
def resource_path():
return "analytics/summary/{project_id}"
@staticmethod
def response_to_items(data):
squashed = first(pd.json_normalize(data).to_dict("records")) or {}
return lmapcat(
lambda k: (
[{"summary": k, **v} for v in squashed[k]]
if isinstance(squashed[k], list)
else []
),
squashed.keys(),
)
@staticmethod
def execute_args() -> dict:
return dict(ignore_cache=True)
@staticmethod
def params_class():
return NoOptions
@staticmethod
def transform_results(
data_frame: pd.DataFrame, include_demographics: bool, **expand_args
):
return pipe(
data_frame,
rpartial(
combine_first, ["code", "index", "demographic_value"], "code"
),
rpartial(
combine_first,
[
"code_count",
"count",
"sequence_type_count",
"test_type_count",
"variant_count",
],
"count",
),
rpartial(combine_first, ["display", "sequence_type"], "display"),
iffy(
lambda df: "summary" in df.columns,
lambda df: df.assign(
summary=df.summary.str.replace(".counts", "", regex=False)
),
),
rpartial(
drop,
[
"index",
"sequence_type_count",
"sequence_type",
"code_count",
"demographic_value",
"test_type_count",
"variant_count",
],
),
iffy(
lambda df: "summary" in df.columns and "count" in df.columns,
lambda df: df.sort_values(
["summary", "count"], ascending=False
),
),
iffy(
lambda df: include_demographics is False
and "summary" in df.columns,
lambda df: df[~df.summary.str.contains("demographic")],
),
).reset_index(drop=True)
@classmethod
def get_data_frame(
cls,
include_demographics: bool = False,
all_results: bool = True,
auth_args: Auth = Auth.shared(),
max_pages: Optional[int] = None,
page_size: Optional[int] = None,
log: bool = False,
**kw_args,
):
"""Execute a request for summary counts across clinical and omics data
NOTE: By default, demographic data is excluded since it is not
technically counts of entities. If demographics-only data is desired,
use this:
>>> from phc.easy.summary.item_counts import SummaryItemCounts
>>> SummaryItemCounts.get_data_frame(summary="demographics")
## Parameters
Execution: `phc.easy.query.Query.execute_paging_api`
"""
# NOTE: include_demographics gets passed through to transform_results
# since explicitly declared there.
df = super().get_data_frame(
**kw_args, **cls._get_current_args(inspect.currentframe(), locals())
)
return df
Classes
class NoOptions (**data: Any)-
Usage docs: https://docs.pydantic.dev/2.10/concepts/models/
A base class for creating Pydantic models.
Attributes
__class_vars__- The names of the class variables defined on the model.
__private_attributes__- Metadata about the private attributes of the model.
__signature__- The synthesized
__init__[Signature][inspect.Signature] of the model. __pydantic_complete__- Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__- The core schema of the model.
__pydantic_custom_init__- Whether the model has a custom
__init__function. __pydantic_decorators__- Metadata containing the decorators defined on the model.
This replaces
Model.__validators__andModel.__root_validators__from Pydantic V1. __pydantic_generic_metadata__- Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__- Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__- The name of the post-init method for the model, if defined.
__pydantic_root_model__- Whether the model is a [
RootModel][pydantic.root_model.RootModel]. __pydantic_serializer__- The
pydantic-coreSchemaSerializerused to dump instances of the model. __pydantic_validator__- The
pydantic-coreSchemaValidatorused to validate instances of the model. __pydantic_fields__- A dictionary of field names and their corresponding [
FieldInfo][pydantic.fields.FieldInfo] objects. __pydantic_computed_fields__- A dictionary of computed field names and their corresponding [
ComputedFieldInfo][pydantic.fields.ComputedFieldInfo] objects. __pydantic_extra__- A dictionary containing extra values, if [
extra][pydantic.config.ConfigDict.extra] is set to'allow'. __pydantic_fields_set__- The names of fields explicitly set during instantiation.
__pydantic_private__- Values of private attributes set on the model instance.
Create a new model by parsing and validating input data from keyword arguments.
Raises [
ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.selfis explicitly positional-only to allowselfas a field name.Expand source code
class NoOptions(PagingApiOptions): passAncestors
- PagingApiOptions
- pydantic.main.BaseModel
Class variables
var model_config
Methods
def model_dump(self)-
Inherited from:
PagingApiOptions.model_dump
class SummaryCounts-
Expand source code
class SummaryCounts(PagingApiItem): @staticmethod def resource_path(): return "analytics/summary/{project_id}" @staticmethod def response_to_items(data): squashed = first(pd.json_normalize(data).to_dict("records")) or {} return lmapcat( lambda k: ( [{"summary": k, **v} for v in squashed[k]] if isinstance(squashed[k], list) else [] ), squashed.keys(), ) @staticmethod def execute_args() -> dict: return dict(ignore_cache=True) @staticmethod def params_class(): return NoOptions @staticmethod def transform_results( data_frame: pd.DataFrame, include_demographics: bool, **expand_args ): return pipe( data_frame, rpartial( combine_first, ["code", "index", "demographic_value"], "code" ), rpartial( combine_first, [ "code_count", "count", "sequence_type_count", "test_type_count", "variant_count", ], "count", ), rpartial(combine_first, ["display", "sequence_type"], "display"), iffy( lambda df: "summary" in df.columns, lambda df: df.assign( summary=df.summary.str.replace(".counts", "", regex=False) ), ), rpartial( drop, [ "index", "sequence_type_count", "sequence_type", "code_count", "demographic_value", "test_type_count", "variant_count", ], ), iffy( lambda df: "summary" in df.columns and "count" in df.columns, lambda df: df.sort_values( ["summary", "count"], ascending=False ), ), iffy( lambda df: include_demographics is False and "summary" in df.columns, lambda df: df[~df.summary.str.contains("demographic")], ), ).reset_index(drop=True) @classmethod def get_data_frame( cls, include_demographics: bool = False, all_results: bool = True, auth_args: Auth = Auth.shared(), max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, **kw_args, ): """Execute a request for summary counts across clinical and omics data NOTE: By default, demographic data is excluded since it is not technically counts of entities. If demographics-only data is desired, use this: >>> from phc.easy.summary.item_counts import SummaryItemCounts >>> SummaryItemCounts.get_data_frame(summary="demographics") ## Parameters Execution: `phc.easy.query.Query.execute_paging_api` """ # NOTE: include_demographics gets passed through to transform_results # since explicitly declared there. df = super().get_data_frame( **kw_args, **cls._get_current_args(inspect.currentframe(), locals()) ) return dfAncestors
Static methods
def execute_args() ‑> dict-
Expand source code
@staticmethod def execute_args() -> dict: return dict(ignore_cache=True) def get_data_frame(include_demographics: bool = False, all_results: bool = True, auth_args: Auth = <phc.easy.auth.Auth object>, max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, **kw_args)-
Execute a request for summary counts across clinical and omics data
NOTE: By default, demographic data is excluded since it is not technically counts of entities. If demographics-only data is desired, use this:
>>> from phc.easy.summary.item_counts import SummaryItemCounts >>> SummaryItemCounts.get_data_frame(summary="demographics")Parameters
Execution:
Query.execute_paging_api()Expand source code
@classmethod def get_data_frame( cls, include_demographics: bool = False, all_results: bool = True, auth_args: Auth = Auth.shared(), max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, **kw_args, ): """Execute a request for summary counts across clinical and omics data NOTE: By default, demographic data is excluded since it is not technically counts of entities. If demographics-only data is desired, use this: >>> from phc.easy.summary.item_counts import SummaryItemCounts >>> SummaryItemCounts.get_data_frame(summary="demographics") ## Parameters Execution: `phc.easy.query.Query.execute_paging_api` """ # NOTE: include_demographics gets passed through to transform_results # since explicitly declared there. df = super().get_data_frame( **kw_args, **cls._get_current_args(inspect.currentframe(), locals()) ) return df def params_class()-
Inherited from:
PagingApiItem.params_classReturns a pydantic type that validates and transforms the params with dict()
Expand source code
@staticmethod def params_class(): return NoOptions def process_params(params: dict) ‑> dict-
Inherited from:
PagingApiItem.process_paramsValidates and transforms the API query parameters
def resource_path()-
Inherited from:
PagingApiItem.resource_pathReturns the API url name for retrieval
Expand source code
@staticmethod def resource_path(): return "analytics/summary/{project_id}" def response_to_items(data)-
Expand source code
@staticmethod def response_to_items(data): squashed = first(pd.json_normalize(data).to_dict("records")) or {} return lmapcat( lambda k: ( [{"summary": k, **v} for v in squashed[k]] if isinstance(squashed[k], list) else [] ), squashed.keys(), ) def transform_results(data_frame: pandas.core.frame.DataFrame, include_demographics: bool, **expand_args)-
Inherited from:
PagingApiItem.transform_resultsTransform data frame batch
Expand source code
@staticmethod def transform_results( data_frame: pd.DataFrame, include_demographics: bool, **expand_args ): return pipe( data_frame, rpartial( combine_first, ["code", "index", "demographic_value"], "code" ), rpartial( combine_first, [ "code_count", "count", "sequence_type_count", "test_type_count", "variant_count", ], "count", ), rpartial(combine_first, ["display", "sequence_type"], "display"), iffy( lambda df: "summary" in df.columns, lambda df: df.assign( summary=df.summary.str.replace(".counts", "", regex=False) ), ), rpartial( drop, [ "index", "sequence_type_count", "sequence_type", "code_count", "demographic_value", "test_type_count", "variant_count", ], ), iffy( lambda df: "summary" in df.columns and "count" in df.columns, lambda df: df.sort_values( ["summary", "count"], ascending=False ), ), iffy( lambda df: include_demographics is False and "summary" in df.columns, lambda df: df[~df.summary.str.contains("demographic")], ), ).reset_index(drop=True)