Module phc.easy.util.batch

Expand source code
import pandas as pd
from functools import reduce, partial
from typing import List, Any, Callable, TypeVar
from funcy import chunks, identity
from phc.easy.util import tqdm


def chunk(n: int, seq: list):
    return chunks(n, n, seq)


def batch_get_frame(
    ids: List[str],
    max_batch_size: int,
    map_t: Callable[[List[str]], pd.DataFrame],
):
    if len(ids) == 0:
        return pd.DataFrame()

    chunked_ids = list(chunk(max_batch_size, ids))

    if len(chunked_ids) > 1 and tqdm is not None:
        chunked_ids = tqdm(chunked_ids, desc="Batch")

    def map_chunks(chunks: List[List[str]]):
        count = 0
        frames = []
        for chunk in chunks:
            result = map_t(chunk, count)
            count += len(result)
            frames.append(result)

        return frames

    return pd.concat(map_chunks(chunked_ids), ignore_index=True).reset_index(
        drop=True
    )

Functions

def batch_get_frame(ids: List[str], max_batch_size: int, map_t: Callable[[List[str]], pandas.core.frame.DataFrame])
Expand source code
def batch_get_frame(
    ids: List[str],
    max_batch_size: int,
    map_t: Callable[[List[str]], pd.DataFrame],
):
    if len(ids) == 0:
        return pd.DataFrame()

    chunked_ids = list(chunk(max_batch_size, ids))

    if len(chunked_ids) > 1 and tqdm is not None:
        chunked_ids = tqdm(chunked_ids, desc="Batch")

    def map_chunks(chunks: List[List[str]]):
        count = 0
        frames = []
        for chunk in chunks:
            result = map_t(chunk, count)
            count += len(result)
            frames.append(result)

        return frames

    return pd.concat(map_chunks(chunked_ids), ignore_index=True).reset_index(
        drop=True
    )
def chunk(n: int, seq: list)
Expand source code
def chunk(n: int, seq: list):
    return chunks(n, n, seq)