Source code for leiap.progress

"""
This file contains functions to generate progress reports
"""


from leiap.io import *
import datetime as _datetime


#######################################################################################################################


[docs]def get_measuring_progress(year, measure_col='Weight'): """Get a quick estimate of the amount of measuring left to do. Parameters ---------- year : str or int The specific year of interest measure_col : str Column to use to judge whether an artifact is complete or not. Returns ------- (dt, not_done) : tuple of datetime and int dt is the current datetime; not_done is the number of sherds remaining to measure. Notes ----- 1. The function also prints a human-readable summary statement. 2. The calculation is based on the number of artifacts that have been classified but not yet had a weight recorded. This is very much an imperfect measure, but it helps to give a rough guide to progress. """ artifacts_df = get_artifacts(sections=['metrics'], years=[year]) dt = _datetime.datetime.now() done = len(artifacts_df[(artifacts_df[measure_col].notna())]) not_done = len(artifacts_df[(artifacts_df[measure_col].isna())]) pct_remain = not_done / (not_done + done) * 100 print(f'At {dt}, there are {round(pct_remain, 1)}% remaining ({not_done}/{done+not_done}) to be measured') return dt, not_done
#######################################################################################################################
[docs]def get_classification_progress(year, bags_col='NumBags'): """Get a rough estimate of the number of bags left to classify. Parameters ---------- year : str or int The specific year of interest bags_col : str Column containing the count of bags per point Returns ------- (dt, bags_remain) : tuple of datetime and int dt is the current datetime; bags_remain is the number of bags remaining to classify. Notes ----- 1. The function also prints a human-readable summary statement. 2. The calculation is based on the number of points recorded as having a bag. This is very much an imperfect measure, but it helps to give a rough guide to progress. Some reasons why the count might not reach 0: - Problem bags - Points that were accidentally assigned with bags = 1 during GPS data upload. - Points that should have been changed to -1 bags (i.e., all artifacts discarded during preliminary sort) but were not, for whatever reason, changed. """ points_df = get_points(years=[year]) artifacts_df = get_artifacts(years=[year], discards=True) pts_w_bags = points_df[points_df[bags_col] > 0].shape[0] pts_classified = artifacts_df.groupby(['SurveyPointId']).size().shape[0] bags_remain = pts_w_bags - pts_classified pct_remain = bags_remain / pts_w_bags * 100 dt = _datetime.datetime.now() print(f'At {dt} there are {round(pct_remain, 1)}% of bags remaining ({bags_remain}/{pts_w_bags}) to be classified.') return dt, bags_remain
#######################################################################################################################