By Paulo Scardine - http://goo.gl/Ke1P0p
No site StackOverflow um usuário de R perguntou como implementar o pipe-operator do pacote dplyr (%>%
), onde x %>% f(y)
é equivalente a f(x, y)
. Adicionalmente, ele gostaria de usar uma sintaxe parecida com o pacote Pipe do cheese shop:
df = df | select('one') | rename(one='new_one')
No pacote Pipe esta sintaxe é chamada de "infix notation", e é equivalente a:
df = rename(select(df, 'one'), one='new_one')
import pandas as pd
df = pd.DataFrame({'one' : [1., 2., 3., 4., 4.],
'two' : [4., 3., 2., 1., 3.]})
def select(df, *args):
return df[list(args)]
def rename(df, **kwargs):
for name, value in kwargs.items():
df = df.rename(columns={'%s' % name: '%s' % value})
return df
df
one | two | |
---|---|---|
0 | 1 | 4 |
1 | 2 | 3 |
2 | 3 | 2 |
3 | 4 | 1 |
4 | 4 | 3 |
select(df, 'one')
one | |
---|---|
0 | 1 |
1 | 2 |
2 | 3 |
3 | 4 |
4 | 4 |
rename(select(df, 'one'), one='other')
other | |
---|---|
0 | 1 |
1 | 2 |
2 | 3 |
3 | 4 |
4 | 4 |
Para cada operador em Python existe um ou mais métodos mágicos __dunder__
, um para a operação normal e um para a operação "à direita". Por exemplo, para implementar o operador +
, você precisa sobrecarregar o método __add__
.
class Idem(object):
def __add__(self, other):
return other * 2
idem = Idem()
idem + 5
10
5 + idem
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-9-e85f72d6ce93> in <module>() ----> 1 5 + idem TypeError: unsupported operand type(s) for +: 'int' and 'Idem'
class Idem(object):
def __add__(self, other):
return other * 2
def __radd__(self, other):
return self.__add__(other)
idem = Idem()
5 + idem
10
Somar datetime.date
com datetime.time
:
import datetime
data = datetime.date.today()
hora = datetime.time(19)
data
SmartDate(2015, 11, 23)
hora
datetime.time(19, 0)
datetime.datetime.now()
datetime.datetime(2015, 11, 23, 20, 24, 15, 858188)
data + hora
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-15-8c7c29d2106d> in <module>() ----> 1 data + hora TypeError: unsupported operand type(s) for +: 'datetime.date' and 'datetime.time'
class SmartDate(datetime.date):
def __add__(self, other):
if isinstance(other, datetime.time):
return datetime.datetime.combine(self, other)
return super(SmartDate, self).__add__(other)
data = SmartDate(*data.timetuple()[:3])
data + hora
datetime.datetime(2015, 11, 23, 19, 0)
def pipe(original):
class PipeInto(object):
data = {'function': original}
def __init__(self, *args, **kwargs):
self.data['args'] = args
self.data['kwargs'] = kwargs
def __rrshift__(self, other):
return self.data['function'](
other,
*self.data['args'],
**self.data['kwargs']
)
return PipeInto
@pipe
def select(df, *args):
return df[list(cols)]
@pipe
def rename(df, **kwargs):
for name, value in kwargs.items():
df = df.rename(columns={'%s' % name: '%s' % value})
return df
df >> select('two', 'one')
two | one | |
---|---|---|
0 | 4 | 1 |
1 | 3 | 2 |
2 | 2 | 3 |
3 | 1 | 4 |
4 | 3 | 4 |
df
one | two | |
---|---|---|
0 | 1 | 4 |
1 | 2 | 3 |
2 | 3 | 2 |
3 | 4 | 1 |
4 | 4 | 3 |
df >> select('one') >> rename(one='first')
first | |
---|---|
0 | 1 |
1 | 2 |
2 | 3 |
3 | 4 |
4 | 4 |
16 << 1
32