The conversion I'm trying to make is:

• If nb_day is not integer:
• if notes contains 'am':
• split line: 1 with full days, modifying date_end = previous date_end - 1 1 with 0.5 day, modifying date_begin = date_end
• if notes contacts 'pm':
• split line: 1 with 0.5 day, modifying date_end = date_begin 1 with full days, modifying date_begin = previous date_begin + 1
In [1]:
import datetime
import petl.interactive as etl
print etl.__version__

0.26

In [2]:
data = """user_id,date_begin,date_end,nb_days,notes,projet_id
user1,2014-07-31,2014-08-07,5.5,5 days + am,cp
user2,2014-07-31,2014-08-07,5.5,5 days + pm,cp
user3,2014-07-31,2014-08-06,5,5 days,cp
"""

In [3]:
day = datetime.timedelta(days=1)

def split_partial_days(row):
if isinstance(row.nb_days, float):
# split out partial days into separate row
if 'am' in row.notes:
# full days
yield (row.user_id,
row.date_begin,
row.date_end - day,
int(row.nb_days),
row.notes.split('+')[0].strip(),
row.projet_id)
# partial days
yield (row.user_id,
row.date_end,
row.date_end,
row.nb_days - int(row.nb_days),
row.notes.split('+')[1].strip(),
row.projet_id)
if 'pm' in row.notes:
# partial days
yield (row.user_id,
row.date_begin,
row.date_begin,
row.nb_days - int(row.nb_days),
row.notes.split('+')[1].strip(),
row.projet_id)
# full days
yield (row.user_id,
row.date_begin + day,
row.date_end,
int(row.nb_days),
row.notes.split('+')[0].strip(),
row.projet_id)
else:
# do nothing
yield row

tbl = (etl
.fromcsv(etl.StringSource(data))
.convert(('date_begin', 'date_end'), etl.dateparser('%Y-%m-%d'))
.convert('nb_days', etl.parsenumber)
.rowmapmany(split_partial_days, fields=['user_id', 'date_begin', 'date_end', 'nb_days', 'notes', 'projet_id'])
)
tbl

Out[3]:
user_id date_begin date_end nb_days notes projet_id
user1 2014-07-31 2014-08-06 5 5 days cp
user1 2014-08-07 2014-08-07 0.5 am cp
user2 2014-07-31 2014-07-31 0.5 pm cp
user2 2014-08-01 2014-08-07 5 5 days cp
user3 2014-07-31 2014-08-06 5 5 days cp
In [3]: