Resolving conflicts

There is currently no in-built support for resolving conflicts within a table using petl, however this notebook gives an example of a workaround strategy.

In [1]:
import sys
sys.version_info
Out[1]:
sys.version_info(major=3, minor=4, micro=2, releaselevel='final', serial=0)
In [2]:
import petl as etl
etl.__version__
Out[2]:
'1.0.6'
In [3]:
data_master = b"""id  name              value            age
 1  Tressa           1203              42
 2  Phil                23997            
 3  Darius                .                 78
 4  Delinda          96501            64
 5  Adelina          96508             50
"""
tbl_master = (
    etl
    .fromtext(etl.MemorySource(data_master))
    .split('lines', r'\s+')
    .skip(1)
    .replaceall('.', None)
    .addfield('master_age', lambda row: row.age)
)
tbl_master
Out[3]:
id name value age master_age
1 Tressa 1203 42 42
2 Phil 23997 None None
3 Darius None 78 78
4 Delinda 96501 64 64
5 Adelina 96508 50 50
In [4]:
data_other = b"""id  name                value            age
 2  Phil                 .                    53
 3  Darius           5000             76
"""
tbl_other = (
    etl
    .fromtext(etl.MemorySource(data_other))
    .split('lines', r'\s+')
    .skip(1)
    .replaceall('.', None)
)
tbl_other
Out[4]:
id name value age
2 Phil None 53
3 Darius 5000 76
In [5]:
tbl_merge = etl.merge(tbl_master, tbl_other, key='id')
tbl_merge
Out[5]:
id name value age master_age
1 Tressa 1203 42 42
2 Phil 23997 53 None
3 Darius 5000 Conflict({'76', '78'}) 78
4 Delinda 96501 64 64
5 Adelina 96508 50 50
In [6]:
tbl_merge_resolved = (
    tbl_merge
    .convert('age', lambda v, row: (row.master_age if isinstance(v, etl.Conflict) else v),
             pass_row=True)
    .cutout('master_age')
)
tbl_merge_resolved
Out[6]:
id name value age
1 Tressa 1203 42
2 Phil 23997 53
3 Darius 5000 78
4 Delinda 96501 64
5 Adelina 96508 50
In [ ]: