"""
These functions take care of some admin for the field team
"""
import pandas as _pd
import numpy as _np
import datetime as _datetime
#######################################################################################################################
[docs]def assign_chores(csv_file, names, start_col, end_col, start_date, end_date, out_file='chore_schedule.xlsx'):
"""Create equitable schedule of chore duties
Parameters
----------
csv_file : str
File path to CSV file where names and dates are stored
names : str
CSV column with crew member names
start_col, end_col : str
CSV columns with crew member start dates and end dates, respectively
start_date, end_date : str
First and last days, respectively, when chores are needed
out_file : str
File path or name for the output Excel
Returns
-------
crew_dates : pandas DataFrame
DataFrame containing some info about how jobs were assigned. Useful for deciding if the results are fair.
Example
-------
>> assign_chores('all_students.csv',
names='student',
start_col='arrive', end_col='depart',
start_date='2017-06-27', end_date='2017-07-25')
"""
# read in CSV file
crew_dates = _pd.read_csv(csv_file)
# convert date columns to date type
crew_dates[start_col] = _pd.to_datetime(crew_dates[start_col])
crew_dates[end_col] = _pd.to_datetime(crew_dates[end_col])
crew_dates['workdays'] = 0
crew_dates.set_index(names, inplace=True)
workdays = [] # list to store all the days when chores are required
# function to loop through days
def daterange(start, end):
for n in range(int((end - start).days)+1):
yield start + _datetime.timedelta(n)
# make program start and end dates into pandas Timestamps
start_date = _pd.Timestamp(start_date)
end_date = _pd.Timestamp(end_date)
for single_day in daterange(start_date, end_date): # loop through days
if single_day.dayofweek in [6, 0, 1, 2, 3]: # if it is a workday
workdays.append(single_day) # add it to the list
# find all present on that day
crew_present = crew_dates[(crew_dates[start_col] < single_day) & (crew_dates[end_col] > single_day)]
for person in crew_present.index:
crew_dates.loc[person, 'workdays'] = crew_dates.loc[person, 'workdays']+1 # +1 to their workday count
total_person_days = crew_dates['workdays'].sum()
total_workdays = len(workdays)
total_jobs = total_workdays * 6 # total number of chore slots to be filled
# calculate each person's fraction of person days
# multiply this fraction by total jobs and round up for max jobs
# calculate estimated breakfasts (max jobs x 0.33), lunches (max jobs x 0.33), and dinners (max jobs x 0.33)
# initialize counts for each type of job
crew_dates['day_fraction'] = crew_dates['workdays'] / total_person_days
crew_dates['max_jobs'] = _np.ceil(crew_dates['day_fraction'] * total_jobs)
crew_dates['breakfast_ct'] = 0
crew_dates['breakfast_max'] = crew_dates['max_jobs'] * (1/3)
crew_dates['lunch_ct'] = 0
crew_dates['lunch_max'] = crew_dates['max_jobs'] * (1/3)
crew_dates['dinner_ct'] = 0
crew_dates['dinner_max'] = crew_dates['max_jobs'] * (1/3)
# create dataframe with jobs as index
job_list = ['breakfast1', 'breakfast2',
'lunch1', 'lunch2',
'dinner1', 'dinner2']
chores = _pd.DataFrame(index=job_list)
# go day by day
# find subset of people who are present that day
# for each meal
# 1. find out who has not reached their quota for that job type yet
# 2. while number of assigned people is less than 2,
# find those with the lowest job counts among those people
# 3. randomly select from those people
# 4. assign them to meal that day
# 5. remove them from the possible selections for that day
# 6. [repeat]
for day in workdays:
present_df = crew_dates[(crew_dates[start_col] <= day) & (crew_dates[end_col] >= day)]
if len(present_df.index) < len(job_list):
print("Failed! Not enough people present on "+str(day))
day_list = []
bf_count = 0
while bf_count < 2:
bf_df = present_df[(present_df['breakfast_ct'] < present_df['breakfast_max'])]
min_ct = bf_df['breakfast_ct'].min()
pick_df = bf_df[bf_df['breakfast_ct'] == min_ct]
row = pick_df.sample(n=1, replace=True)
person = row.index[0]
if person not in day_list:
day_list.append(person)
crew_dates.loc[person, 'breakfast_ct'] += 1
present_df.drop(person, inplace=True)
bf_count += 1
lun_count = 0
while lun_count < 2:
lun_df = present_df[(present_df['lunch_ct'] < present_df['lunch_max'])]
min_ct = lun_df['lunch_ct'].min()
pick_df = lun_df[lun_df['lunch_ct'] == min_ct]
row = pick_df.sample(n=1, replace=True)
person = row.index[0]
if person not in day_list:
day_list.append(person)
crew_dates.loc[person, 'lunch_ct'] += 1
present_df.drop(person, inplace=True)
lun_count += 1
din_count = 0
while din_count < 2:
din_df = present_df[(present_df['dinner_ct'] < present_df['dinner_max'])]
min_ct = din_df['dinner_ct'].min()
pick_df = din_df[din_df['dinner_ct'] == min_ct]
row = pick_df.sample(n=1, replace=True)
person = row.index[0]
if person not in day_list:
day_list.append(person)
crew_dates.loc[person, 'dinner_ct'] += 1
present_df.drop(person, inplace=True)
din_count += 1
roster = _pd.Series(day_list) # convert list to pandas Series
chores[day.date()] = roster.values # add column to chores dataframe
chores = chores.T # get dates as rows, jobs as columns
# save to Excel
chores_writer = _pd.ExcelWriter(out_file, engine='xlsxwriter')
chores.to_excel(chores_writer, sheet_name='chores')
chores_writer.save()
# calculate a couple quick stats to evaluate how even the chores are distributed
# a negative 'max-assigned' value indicates the person has more jobs than expected
# a positive 'max-assigned' value indicates the person has fewer jobs than expected
crew_dates['assigned_jobs'] = crew_dates['breakfast_ct']+crew_dates['lunch_ct']+crew_dates['dinner_ct']
crew_dates['max-assigned'] = crew_dates['max_jobs']-crew_dates['assigned_jobs']
return crew_dates
#######################################################################################################################