%pylab inline
import pandas as pd
Populating the interactive namespace from numpy and matplotlib
/home/walrus/.virtualenvs/py/local/lib/python2.7/site-packages/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0. .format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))
data = pd.read_excel('../Orcamento/2015/Proposta/PLOA467BaseDadosQuadroDetalhadoDaAcao.xls')
def get_data_info(data):
data_info = {}
for c in data.columns:
if data[c].dtype in [np.dtype('int64'), np.dtype('float64')]:
data_info[c] = {}
data_info[c]['distinct_values'] = len(data[c].value_counts())
data_info[c]['range'] = (data[c].min(), data[c].max())
return data_info
data_info = get_data_info(data)
data_info
{u'ANO_EX': {'distinct_values': 1, 'range': (2015, 2015)}, u'COD_DA': {'distinct_values': 73, 'range': (1, 100)}, u'COD_DISTRITO': {'distinct_values': 68, 'range': (0, 99)}, u'COD_EMP': {'distinct_values': 14, 'range': (1, 91)}, u'COD_META': {'distinct_values': 321, 'range': (1, 999)}, u'COD_REGIAO': {'distinct_values': 7, 'range': (0, 9)}, u'COD_SUBPREFEITURA': {'distinct_values': 34, 'range': (0, 99)}, u'ORGAO': {'distinct_values': 86, 'range': (1, 99)}, u'PA': {'distinct_values': 313, 'range': (4, 9133)}, u'UNIDADE': {'distinct_values': 24, 'range': (10, 70)}, u'VALOR_DA': {'distinct_values': 1071, 'range': (1000.0, 5488238392.0)}}