Source code for leiap.fieldschool

"""
These functions take care of some admin for the field team
"""


import pandas as _pd
import numpy as _np
import datetime as _datetime


#######################################################################################################################


[docs]def assign_chores(csv_file, names, start_col, end_col, start_date, end_date, out_file='chore_schedule.xlsx'):
    """Create equitable schedule of chore duties
    
    Parameters
    ----------
    csv_file : str
        File path to CSV file where names and dates are stored
    names : str
        CSV column with crew member names
    start_col, end_col : str
        CSV columns with crew member start dates and end dates, respectively
    start_date, end_date : str
        First and last days, respectively, when chores are needed
    out_file : str
        File path or name for the output Excel
    
    Returns
    -------
    crew_dates : pandas DataFrame
        DataFrame containing some info about how jobs were assigned. Useful for deciding if the results are fair. 
        
    Example
    -------
    >> assign_chores('all_students.csv',
                     names='student',
                     start_col='arrive', end_col='depart',
                     start_date='2017-06-27', end_date='2017-07-25')
    """
    
    # read in CSV file
    crew_dates = _pd.read_csv(csv_file)

    # convert date columns to date type
    crew_dates[start_col] = _pd.to_datetime(crew_dates[start_col])
    crew_dates[end_col] = _pd.to_datetime(crew_dates[end_col])
    
    crew_dates['workdays'] = 0
    crew_dates.set_index(names, inplace=True)

    workdays = []  # list to store all the days when chores are required

    # function to loop through days
    def daterange(start, end):
        for n in range(int((end - start).days)+1):
            yield start + _datetime.timedelta(n)

    # make program start and end dates into pandas Timestamps
    start_date = _pd.Timestamp(start_date)    
    end_date = _pd.Timestamp(end_date)

    for single_day in daterange(start_date, end_date):  # loop through days
        if single_day.dayofweek in [6, 0, 1, 2, 3]:  # if it is a workday
            workdays.append(single_day)  # add it to the list
            # find all present on that day
            crew_present = crew_dates[(crew_dates[start_col] < single_day) & (crew_dates[end_col] > single_day)]
            for person in crew_present.index:
                crew_dates.loc[person, 'workdays'] = crew_dates.loc[person, 'workdays']+1  # +1 to their workday count

    total_person_days = crew_dates['workdays'].sum()
    total_workdays = len(workdays)
    total_jobs = total_workdays * 6  # total number of chore slots to be filled

    # calculate each person's fraction of person days
    #     multiply this fraction by total jobs and round up for max jobs
    #     calculate estimated breakfasts (max jobs x 0.33), lunches (max jobs x 0.33), and dinners (max jobs x 0.33)
    #     initialize counts for each type of job

    crew_dates['day_fraction'] = crew_dates['workdays'] / total_person_days
    crew_dates['max_jobs'] = _np.ceil(crew_dates['day_fraction'] * total_jobs)
    crew_dates['breakfast_ct'] = 0
    crew_dates['breakfast_max'] = crew_dates['max_jobs'] * (1/3)
    crew_dates['lunch_ct'] = 0
    crew_dates['lunch_max'] = crew_dates['max_jobs'] * (1/3)
    crew_dates['dinner_ct'] = 0
    crew_dates['dinner_max'] = crew_dates['max_jobs'] * (1/3)

    # create dataframe with jobs as index
    job_list = ['breakfast1', 'breakfast2', 
                'lunch1', 'lunch2',
                'dinner1', 'dinner2']
    chores = _pd.DataFrame(index=job_list)

    # go day by day
    # find subset of people who are present that day
    # for each meal
    #     1. find out who has not reached their quota for that job type yet
    #     2. while number of assigned people is less than 2, 
    #            find those with the lowest job counts among those people
    #     3. randomly select from those people
    #     4. assign them to meal that day
    #     5. remove them from the possible selections for that day
    #     6. [repeat]

    for day in workdays:
        present_df = crew_dates[(crew_dates[start_col] <= day) & (crew_dates[end_col] >= day)]
        if len(present_df.index) < len(job_list):
            print("Failed! Not enough people present on "+str(day))
        day_list = []

        bf_count = 0
        while bf_count < 2:
            bf_df = present_df[(present_df['breakfast_ct'] < present_df['breakfast_max'])]
            min_ct = bf_df['breakfast_ct'].min()
            pick_df = bf_df[bf_df['breakfast_ct'] == min_ct]
            row = pick_df.sample(n=1, replace=True)
            person = row.index[0]
            if person not in day_list:
                day_list.append(person)
                crew_dates.loc[person, 'breakfast_ct'] += 1
                present_df.drop(person, inplace=True)
                bf_count += 1

        lun_count = 0
        while lun_count < 2:
            lun_df = present_df[(present_df['lunch_ct'] < present_df['lunch_max'])]
            min_ct = lun_df['lunch_ct'].min()
            pick_df = lun_df[lun_df['lunch_ct'] == min_ct]
            row = pick_df.sample(n=1, replace=True)
            person = row.index[0]
            if person not in day_list:
                day_list.append(person)
                crew_dates.loc[person, 'lunch_ct'] += 1
                present_df.drop(person, inplace=True)
                lun_count += 1

        din_count = 0
        while din_count < 2:
            din_df = present_df[(present_df['dinner_ct'] < present_df['dinner_max'])]
            min_ct = din_df['dinner_ct'].min()
            pick_df = din_df[din_df['dinner_ct'] == min_ct]
            row = pick_df.sample(n=1, replace=True)
            person = row.index[0]
            if person not in day_list:
                day_list.append(person)
                crew_dates.loc[person, 'dinner_ct'] += 1
                present_df.drop(person, inplace=True)
                din_count += 1

        roster = _pd.Series(day_list)  # convert list to pandas Series
        chores[day.date()] = roster.values  # add column to chores dataframe

    chores = chores.T  # get dates as rows, jobs as columns
    
    # save to Excel
    chores_writer = _pd.ExcelWriter(out_file, engine='xlsxwriter')
    chores.to_excel(chores_writer, sheet_name='chores')            
    chores_writer.save()
    
    # calculate a couple quick stats to evaluate how even the chores are distributed
    # a negative 'max-assigned' value indicates the person has more jobs than expected
    # a positive 'max-assigned' value indicates the person has fewer jobs than expected
    crew_dates['assigned_jobs'] = crew_dates['breakfast_ct']+crew_dates['lunch_ct']+crew_dates['dinner_ct']
    crew_dates['max-assigned'] = crew_dates['max_jobs']-crew_dates['assigned_jobs']
    return crew_dates

#######################################################################################################################