#!/usr/bin/env python # coding: utf-8 # In[1]: #Importing libraries import pandas as pd import os as os import pycurl import csv # In[2]: #To get the location of current working directory os.getcwd() # In[4]: #To change the working directory os.chdir('C:\\Anaconda\\abalone') os.getcwd() # In[5]: #Use pycurl to get a datafile from https and write it to a csv file url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data' c = pycurl.Curl() c.setopt(c.URL, url) with open('abalone.csv', 'w+') as s: c.setopt(c.WRITEFUNCTION, s.write) c.perform() # In[6]: abalone = pd.read_csv('abalone.csv') abalone # In[8]: #To add column names abalone.columns = ['Sex', 'Length','Diameter','Height','Whole weight','Shucked weight','Viscera weight','Shell weight','Rings'] # In[14]: #To write data to a csv file abalone.to_csv('abalone.csv') # In[15]: #To get 4 top-most observations abalone.head(4) # In[16]: #To get 4 bottom-most observations abalone.tail(4) # In[17]: #To get basic statistics for all numeric variables abalone.describe() # In[18]: #To get covariance abalone.cov() # In[19]: #To get pairwise-correlation coefficients for all numeric variables abalone.corr() # In[20]: # To get unique values of 'Rings' column abalone['Rings'].unique() # In[21]: #To subset - have only 'Length','Diameter' and 'Height' in dataset abalone1 abalone1 = abalone[['Length','Diameter','Height']] # In[22]: #Inspect abalone1 by checking head and tail abalone1.head(3) # In[23]: abalone1.tail(3)