Module phc.easy.summary.counts
Expand source code
import inspect
from typing import Optional
import pandas as pd
from funcy import first, iffy, lmapcat, rpartial
from phc.easy.abstract.paging_api_item import PagingApiItem, PagingApiOptions
from phc.easy.auth import Auth
from phc.easy.util.frame import combine_first, drop
from toolz import pipe
class NoOptions(PagingApiOptions):
pass
class SummaryCounts(PagingApiItem):
@staticmethod
def resource_path():
return "analytics/summary/{project_id}"
@staticmethod
def response_to_items(data):
squashed = first(pd.json_normalize(data).to_dict("records")) or {}
return lmapcat(
lambda k: (
[{"summary": k, **v} for v in squashed[k]]
if isinstance(squashed[k], list)
else []
),
squashed.keys(),
)
@staticmethod
def execute_args() -> dict:
return dict(ignore_cache=True)
@staticmethod
def params_class():
return NoOptions
@staticmethod
def transform_results(
data_frame: pd.DataFrame, include_demographics: bool, **expand_args
):
return pipe(
data_frame,
rpartial(
combine_first, ["code", "index", "demographic_value"], "code"
),
rpartial(
combine_first,
[
"code_count",
"count",
"sequence_type_count",
"test_type_count",
"variant_count",
],
"count",
),
rpartial(combine_first, ["display", "sequence_type"], "display"),
iffy(
lambda df: "summary" in df.columns,
lambda df: df.assign(
summary=df.summary.str.replace(".counts", "", regex=False)
),
),
rpartial(
drop,
[
"index",
"sequence_type_count",
"sequence_type",
"code_count",
"demographic_value",
"test_type_count",
"variant_count",
],
),
iffy(
lambda df: "summary" in df.columns and "count" in df.columns,
lambda df: df.sort_values(
["summary", "count"], ascending=False
),
),
iffy(
lambda df: include_demographics is False
and "summary" in df.columns,
lambda df: df[~df.summary.str.contains("demographic")],
),
).reset_index(drop=True)
@classmethod
def get_data_frame(
cls,
include_demographics: bool = False,
all_results: bool = True,
auth_args: Auth = Auth.shared(),
max_pages: Optional[int] = None,
page_size: Optional[int] = None,
log: bool = False,
**kw_args,
):
"""Execute a request for summary counts across clinical and omics data
NOTE: By default, demographic data is excluded since it is not
technically counts of entities. If demographics-only data is desired,
use this:
>>> from phc.easy.summary.item_counts import SummaryItemCounts
>>> SummaryItemCounts.get_data_frame(summary="demographics")
## Parameters
Execution: `phc.easy.query.Query.execute_paging_api`
"""
# NOTE: include_demographics gets passed through to transform_results
# since explicitly declared there.
df = super().get_data_frame(
**kw_args, **cls._get_current_args(inspect.currentframe(), locals())
)
return df
Classes
class NoOptions (**data: Any)
-
Usage docs: https://docs.pydantic.dev/2.10/concepts/models/
A base class for creating Pydantic models.
Attributes
__class_vars__
- The names of the class variables defined on the model.
__private_attributes__
- Metadata about the private attributes of the model.
__signature__
- The synthesized
__init__
[Signature
][inspect.Signature] of the model. __pydantic_complete__
- Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__
- The core schema of the model.
__pydantic_custom_init__
- Whether the model has a custom
__init__
function. __pydantic_decorators__
- Metadata containing the decorators defined on the model.
This replaces
Model.__validators__
andModel.__root_validators__
from Pydantic V1. __pydantic_generic_metadata__
- Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__
- Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__
- The name of the post-init method for the model, if defined.
__pydantic_root_model__
- Whether the model is a [
RootModel
][pydantic.root_model.RootModel]. __pydantic_serializer__
- The
pydantic-core
SchemaSerializer
used to dump instances of the model. __pydantic_validator__
- The
pydantic-core
SchemaValidator
used to validate instances of the model. __pydantic_fields__
- A dictionary of field names and their corresponding [
FieldInfo
][pydantic.fields.FieldInfo] objects. __pydantic_computed_fields__
- A dictionary of computed field names and their corresponding [
ComputedFieldInfo
][pydantic.fields.ComputedFieldInfo] objects. __pydantic_extra__
- A dictionary containing extra values, if [
extra
][pydantic.config.ConfigDict.extra] is set to'allow'
. __pydantic_fields_set__
- The names of fields explicitly set during instantiation.
__pydantic_private__
- Values of private attributes set on the model instance.
Create a new model by parsing and validating input data from keyword arguments.
Raises [
ValidationError
][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.self
is explicitly positional-only to allowself
as a field name.Expand source code
class NoOptions(PagingApiOptions): pass
Ancestors
- PagingApiOptions
- pydantic.main.BaseModel
Class variables
var model_config
Methods
def model_dump(self)
-
Inherited from:
PagingApiOptions
.model_dump
class SummaryCounts
-
Expand source code
class SummaryCounts(PagingApiItem): @staticmethod def resource_path(): return "analytics/summary/{project_id}" @staticmethod def response_to_items(data): squashed = first(pd.json_normalize(data).to_dict("records")) or {} return lmapcat( lambda k: ( [{"summary": k, **v} for v in squashed[k]] if isinstance(squashed[k], list) else [] ), squashed.keys(), ) @staticmethod def execute_args() -> dict: return dict(ignore_cache=True) @staticmethod def params_class(): return NoOptions @staticmethod def transform_results( data_frame: pd.DataFrame, include_demographics: bool, **expand_args ): return pipe( data_frame, rpartial( combine_first, ["code", "index", "demographic_value"], "code" ), rpartial( combine_first, [ "code_count", "count", "sequence_type_count", "test_type_count", "variant_count", ], "count", ), rpartial(combine_first, ["display", "sequence_type"], "display"), iffy( lambda df: "summary" in df.columns, lambda df: df.assign( summary=df.summary.str.replace(".counts", "", regex=False) ), ), rpartial( drop, [ "index", "sequence_type_count", "sequence_type", "code_count", "demographic_value", "test_type_count", "variant_count", ], ), iffy( lambda df: "summary" in df.columns and "count" in df.columns, lambda df: df.sort_values( ["summary", "count"], ascending=False ), ), iffy( lambda df: include_demographics is False and "summary" in df.columns, lambda df: df[~df.summary.str.contains("demographic")], ), ).reset_index(drop=True) @classmethod def get_data_frame( cls, include_demographics: bool = False, all_results: bool = True, auth_args: Auth = Auth.shared(), max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, **kw_args, ): """Execute a request for summary counts across clinical and omics data NOTE: By default, demographic data is excluded since it is not technically counts of entities. If demographics-only data is desired, use this: >>> from phc.easy.summary.item_counts import SummaryItemCounts >>> SummaryItemCounts.get_data_frame(summary="demographics") ## Parameters Execution: `phc.easy.query.Query.execute_paging_api` """ # NOTE: include_demographics gets passed through to transform_results # since explicitly declared there. df = super().get_data_frame( **kw_args, **cls._get_current_args(inspect.currentframe(), locals()) ) return df
Ancestors
Static methods
def execute_args() ‑> dict
-
Expand source code
@staticmethod def execute_args() -> dict: return dict(ignore_cache=True)
def get_data_frame(include_demographics: bool = False, all_results: bool = True, auth_args: Auth = <phc.easy.auth.Auth object>, max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, **kw_args)
-
Execute a request for summary counts across clinical and omics data
NOTE: By default, demographic data is excluded since it is not technically counts of entities. If demographics-only data is desired, use this:
>>> from phc.easy.summary.item_counts import SummaryItemCounts >>> SummaryItemCounts.get_data_frame(summary="demographics")
Parameters
Execution:
Query.execute_paging_api()
Expand source code
@classmethod def get_data_frame( cls, include_demographics: bool = False, all_results: bool = True, auth_args: Auth = Auth.shared(), max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, **kw_args, ): """Execute a request for summary counts across clinical and omics data NOTE: By default, demographic data is excluded since it is not technically counts of entities. If demographics-only data is desired, use this: >>> from phc.easy.summary.item_counts import SummaryItemCounts >>> SummaryItemCounts.get_data_frame(summary="demographics") ## Parameters Execution: `phc.easy.query.Query.execute_paging_api` """ # NOTE: include_demographics gets passed through to transform_results # since explicitly declared there. df = super().get_data_frame( **kw_args, **cls._get_current_args(inspect.currentframe(), locals()) ) return df
def params_class()
-
Inherited from:
PagingApiItem
.params_class
Returns a pydantic type that validates and transforms the params with dict()
Expand source code
@staticmethod def params_class(): return NoOptions
def process_params(params: dict) ‑> dict
-
Inherited from:
PagingApiItem
.process_params
Validates and transforms the API query parameters
def resource_path()
-
Inherited from:
PagingApiItem
.resource_path
Returns the API url name for retrieval
Expand source code
@staticmethod def resource_path(): return "analytics/summary/{project_id}"
def response_to_items(data)
-
Expand source code
@staticmethod def response_to_items(data): squashed = first(pd.json_normalize(data).to_dict("records")) or {} return lmapcat( lambda k: ( [{"summary": k, **v} for v in squashed[k]] if isinstance(squashed[k], list) else [] ), squashed.keys(), )
def transform_results(data_frame: pandas.core.frame.DataFrame, include_demographics: bool, **expand_args)
-
Inherited from:
PagingApiItem
.transform_results
Transform data frame batch
Expand source code
@staticmethod def transform_results( data_frame: pd.DataFrame, include_demographics: bool, **expand_args ): return pipe( data_frame, rpartial( combine_first, ["code", "index", "demographic_value"], "code" ), rpartial( combine_first, [ "code_count", "count", "sequence_type_count", "test_type_count", "variant_count", ], "count", ), rpartial(combine_first, ["display", "sequence_type"], "display"), iffy( lambda df: "summary" in df.columns, lambda df: df.assign( summary=df.summary.str.replace(".counts", "", regex=False) ), ), rpartial( drop, [ "index", "sequence_type_count", "sequence_type", "code_count", "demographic_value", "test_type_count", "variant_count", ], ), iffy( lambda df: "summary" in df.columns and "count" in df.columns, lambda df: df.sort_values( ["summary", "count"], ascending=False ), ), iffy( lambda df: include_demographics is False and "summary" in df.columns, lambda df: df[~df.summary.str.contains("demographic")], ), ).reset_index(drop=True)