#Importing libraries
import pandas as pd
import os as os
import pycurl
import csv
#To get the location of current working directory
os.getcwd()
'C:\\Anaconda'
#To change the working directory
os.chdir('C:\\Anaconda\\abalone')
os.getcwd()
'C:\\Anaconda\\abalone'
#Use pycurl to get a datafile from https and write it to a csv file
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data'
c = pycurl.Curl()
c.setopt(c.URL, url)
with open('abalone.csv', 'w+') as s:
c.setopt(c.WRITEFUNCTION, s.write)
c.perform()
abalone = pd.read_csv('abalone.csv')
abalone
M | 0.455 | 0.365 | 0.095 | 0.514 | 0.2245 | 0.101 | 0.15 | 15 | |
---|---|---|---|---|---|---|---|---|---|
0 | M | 0.350 | 0.265 | 0.090 | 0.2255 | 0.0995 | 0.0485 | 0.0700 | 7 |
1 | F | 0.530 | 0.420 | 0.135 | 0.6770 | 0.2565 | 0.1415 | 0.2100 | 9 |
2 | M | 0.440 | 0.365 | 0.125 | 0.5160 | 0.2155 | 0.1140 | 0.1550 | 10 |
3 | I | 0.330 | 0.255 | 0.080 | 0.2050 | 0.0895 | 0.0395 | 0.0550 | 7 |
4 | I | 0.425 | 0.300 | 0.095 | 0.3515 | 0.1410 | 0.0775 | 0.1200 | 8 |
5 | F | 0.530 | 0.415 | 0.150 | 0.7775 | 0.2370 | 0.1415 | 0.3300 | 20 |
6 | F | 0.545 | 0.425 | 0.125 | 0.7680 | 0.2940 | 0.1495 | 0.2600 | 16 |
7 | M | 0.475 | 0.370 | 0.125 | 0.5095 | 0.2165 | 0.1125 | 0.1650 | 9 |
8 | F | 0.550 | 0.440 | 0.150 | 0.8945 | 0.3145 | 0.1510 | 0.3200 | 19 |
9 | F | 0.525 | 0.380 | 0.140 | 0.6065 | 0.1940 | 0.1475 | 0.2100 | 14 |
10 | M | 0.430 | 0.350 | 0.110 | 0.4060 | 0.1675 | 0.0810 | 0.1350 | 10 |
11 | M | 0.490 | 0.380 | 0.135 | 0.5415 | 0.2175 | 0.0950 | 0.1900 | 11 |
12 | F | 0.535 | 0.405 | 0.145 | 0.6845 | 0.2725 | 0.1710 | 0.2050 | 10 |
13 | F | 0.470 | 0.355 | 0.100 | 0.4755 | 0.1675 | 0.0805 | 0.1850 | 10 |
14 | M | 0.500 | 0.400 | 0.130 | 0.6645 | 0.2580 | 0.1330 | 0.2400 | 12 |
15 | I | 0.355 | 0.280 | 0.085 | 0.2905 | 0.0950 | 0.0395 | 0.1150 | 7 |
16 | F | 0.440 | 0.340 | 0.100 | 0.4510 | 0.1880 | 0.0870 | 0.1300 | 10 |
17 | M | 0.365 | 0.295 | 0.080 | 0.2555 | 0.0970 | 0.0430 | 0.1000 | 7 |
18 | M | 0.450 | 0.320 | 0.100 | 0.3810 | 0.1705 | 0.0750 | 0.1150 | 9 |
19 | M | 0.355 | 0.280 | 0.095 | 0.2455 | 0.0955 | 0.0620 | 0.0750 | 11 |
20 | I | 0.380 | 0.275 | 0.100 | 0.2255 | 0.0800 | 0.0490 | 0.0850 | 10 |
21 | F | 0.565 | 0.440 | 0.155 | 0.9395 | 0.4275 | 0.2140 | 0.2700 | 12 |
22 | F | 0.550 | 0.415 | 0.135 | 0.7635 | 0.3180 | 0.2100 | 0.2000 | 9 |
23 | F | 0.615 | 0.480 | 0.165 | 1.1615 | 0.5130 | 0.3010 | 0.3050 | 10 |
24 | F | 0.560 | 0.440 | 0.140 | 0.9285 | 0.3825 | 0.1880 | 0.3000 | 11 |
25 | F | 0.580 | 0.450 | 0.185 | 0.9955 | 0.3945 | 0.2720 | 0.2850 | 11 |
26 | M | 0.590 | 0.445 | 0.140 | 0.9310 | 0.3560 | 0.2340 | 0.2800 | 12 |
27 | M | 0.605 | 0.475 | 0.180 | 0.9365 | 0.3940 | 0.2190 | 0.2950 | 15 |
28 | M | 0.575 | 0.425 | 0.140 | 0.8635 | 0.3930 | 0.2270 | 0.2000 | 11 |
29 | M | 0.580 | 0.470 | 0.165 | 0.9975 | 0.3935 | 0.2420 | 0.3300 | 10 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4146 | M | 0.695 | 0.550 | 0.195 | 1.6645 | 0.7270 | 0.3600 | 0.4450 | 11 |
4147 | M | 0.770 | 0.605 | 0.175 | 2.0505 | 0.8005 | 0.5260 | 0.3550 | 11 |
4148 | I | 0.280 | 0.215 | 0.070 | 0.1240 | 0.0630 | 0.0215 | 0.0300 | 6 |
4149 | I | 0.330 | 0.230 | 0.080 | 0.1400 | 0.0565 | 0.0365 | 0.0460 | 7 |
4150 | I | 0.350 | 0.250 | 0.075 | 0.1695 | 0.0835 | 0.0355 | 0.0410 | 6 |
4151 | I | 0.370 | 0.280 | 0.090 | 0.2180 | 0.0995 | 0.0545 | 0.0615 | 7 |
4152 | I | 0.430 | 0.315 | 0.115 | 0.3840 | 0.1885 | 0.0715 | 0.1100 | 8 |
4153 | I | 0.435 | 0.330 | 0.095 | 0.3930 | 0.2190 | 0.0750 | 0.0885 | 6 |
4154 | I | 0.440 | 0.350 | 0.110 | 0.3805 | 0.1575 | 0.0895 | 0.1150 | 6 |
4155 | M | 0.475 | 0.370 | 0.110 | 0.4895 | 0.2185 | 0.1070 | 0.1460 | 8 |
4156 | M | 0.475 | 0.360 | 0.140 | 0.5135 | 0.2410 | 0.1045 | 0.1550 | 8 |
4157 | I | 0.480 | 0.355 | 0.110 | 0.4495 | 0.2010 | 0.0890 | 0.1400 | 8 |
4158 | F | 0.560 | 0.440 | 0.135 | 0.8025 | 0.3500 | 0.1615 | 0.2590 | 9 |
4159 | F | 0.585 | 0.475 | 0.165 | 1.0530 | 0.4580 | 0.2170 | 0.3000 | 11 |
4160 | F | 0.585 | 0.455 | 0.170 | 0.9945 | 0.4255 | 0.2630 | 0.2845 | 11 |
4161 | M | 0.385 | 0.255 | 0.100 | 0.3175 | 0.1370 | 0.0680 | 0.0920 | 8 |
4162 | I | 0.390 | 0.310 | 0.085 | 0.3440 | 0.1810 | 0.0695 | 0.0790 | 7 |
4163 | I | 0.390 | 0.290 | 0.100 | 0.2845 | 0.1255 | 0.0635 | 0.0810 | 7 |
4164 | I | 0.405 | 0.300 | 0.085 | 0.3035 | 0.1500 | 0.0505 | 0.0880 | 7 |
4165 | I | 0.475 | 0.365 | 0.115 | 0.4990 | 0.2320 | 0.0885 | 0.1560 | 10 |
4166 | M | 0.500 | 0.380 | 0.125 | 0.5770 | 0.2690 | 0.1265 | 0.1535 | 9 |
4167 | F | 0.515 | 0.400 | 0.125 | 0.6150 | 0.2865 | 0.1230 | 0.1765 | 8 |
4168 | M | 0.520 | 0.385 | 0.165 | 0.7910 | 0.3750 | 0.1800 | 0.1815 | 10 |
4169 | M | 0.550 | 0.430 | 0.130 | 0.8395 | 0.3155 | 0.1955 | 0.2405 | 10 |
4170 | M | 0.560 | 0.430 | 0.155 | 0.8675 | 0.4000 | 0.1720 | 0.2290 | 8 |
4171 | F | 0.565 | 0.450 | 0.165 | 0.8870 | 0.3700 | 0.2390 | 0.2490 | 11 |
4172 | M | 0.590 | 0.440 | 0.135 | 0.9660 | 0.4390 | 0.2145 | 0.2605 | 10 |
4173 | M | 0.600 | 0.475 | 0.205 | 1.1760 | 0.5255 | 0.2875 | 0.3080 | 9 |
4174 | F | 0.625 | 0.485 | 0.150 | 1.0945 | 0.5310 | 0.2610 | 0.2960 | 10 |
4175 | M | 0.710 | 0.555 | 0.195 | 1.9485 | 0.9455 | 0.3765 | 0.4950 | 12 |
4176 rows × 9 columns
#To add column names
abalone.columns = ['Sex', 'Length','Diameter','Height','Whole weight','Shucked weight','Viscera weight','Shell weight','Rings']
#To write data to a csv file
abalone.to_csv('abalone.csv')
#To get 4 top-most observations
abalone.head(4)
Sex | Length | Diameter | Height | Whole weight | Shucked weight | Viscera weight | Shell weight | Rings | |
---|---|---|---|---|---|---|---|---|---|
0 | M | 0.35 | 0.265 | 0.090 | 0.2255 | 0.0995 | 0.0485 | 0.070 | 7 |
1 | F | 0.53 | 0.420 | 0.135 | 0.6770 | 0.2565 | 0.1415 | 0.210 | 9 |
2 | M | 0.44 | 0.365 | 0.125 | 0.5160 | 0.2155 | 0.1140 | 0.155 | 10 |
3 | I | 0.33 | 0.255 | 0.080 | 0.2050 | 0.0895 | 0.0395 | 0.055 | 7 |
#To get 4 bottom-most observations
abalone.tail(4)
Sex | Length | Diameter | Height | Whole weight | Shucked weight | Viscera weight | Shell weight | Rings | |
---|---|---|---|---|---|---|---|---|---|
4172 | M | 0.590 | 0.440 | 0.135 | 0.9660 | 0.4390 | 0.2145 | 0.2605 | 10 |
4173 | M | 0.600 | 0.475 | 0.205 | 1.1760 | 0.5255 | 0.2875 | 0.3080 | 9 |
4174 | F | 0.625 | 0.485 | 0.150 | 1.0945 | 0.5310 | 0.2610 | 0.2960 | 10 |
4175 | M | 0.710 | 0.555 | 0.195 | 1.9485 | 0.9455 | 0.3765 | 0.4950 | 12 |
#To get basic statistics for all numeric variables
abalone.describe()
Length | Diameter | Height | Whole weight | Shucked weight | Viscera weight | Shell weight | Rings | |
---|---|---|---|---|---|---|---|---|
count | 4176.000000 | 4176.000000 | 4176.000000 | 4176.000000 | 4176.00000 | 4176.000000 | 4176.000000 | 4176.000000 |
mean | 0.524009 | 0.407892 | 0.139527 | 0.828818 | 0.35940 | 0.180613 | 0.238852 | 9.932471 |
std | 0.120103 | 0.099250 | 0.041826 | 0.490424 | 0.22198 | 0.109620 | 0.139213 | 3.223601 |
min | 0.075000 | 0.055000 | 0.000000 | 0.002000 | 0.00100 | 0.000500 | 0.001500 | 1.000000 |
25% | 0.450000 | 0.350000 | 0.115000 | 0.441500 | 0.18600 | 0.093375 | 0.130000 | 8.000000 |
50% | 0.545000 | 0.425000 | 0.140000 | 0.799750 | 0.33600 | 0.171000 | 0.234000 | 9.000000 |
75% | 0.615000 | 0.480000 | 0.165000 | 1.153250 | 0.50200 | 0.253000 | 0.329000 | 11.000000 |
max | 0.815000 | 0.650000 | 1.130000 | 2.825500 | 1.48800 | 0.760000 | 1.005000 | 29.000000 |
#To get covariance
abalone.cov()
Length | Diameter | Height | Whole weight | Shucked weight | Viscera weight | Shell weight | Rings | |
---|---|---|---|---|---|---|---|---|
Length | 0.014425 | 0.011763 | 0.004157 | 0.054499 | 0.023938 | 0.011889 | 0.015009 | 0.215697 |
Diameter | 0.011763 | 0.009850 | 0.003461 | 0.045046 | 0.019678 | 0.009789 | 0.012509 | 0.183968 |
Height | 0.004157 | 0.003461 | 0.001749 | 0.016804 | 0.007195 | 0.003660 | 0.004759 | 0.075251 |
Whole weight | 0.054499 | 0.045046 | 0.016804 | 0.240515 | 0.105533 | 0.051953 | 0.065225 | 0.854995 |
Shucked weight | 0.023938 | 0.019678 | 0.007195 | 0.105533 | 0.049275 | 0.022678 | 0.027275 | 0.301440 |
Viscera weight | 0.011889 | 0.009789 | 0.003660 | 0.051953 | 0.022678 | 0.012017 | 0.013851 | 0.178196 |
Shell weight | 0.015009 | 0.012509 | 0.004759 | 0.065225 | 0.027275 | 0.013851 | 0.019380 | 0.281839 |
Rings | 0.215697 | 0.183968 | 0.075251 | 0.854995 | 0.301440 | 0.178196 | 0.281839 | 10.391606 |
#To get pairwise-correlation coefficients for all numeric variables
abalone.corr()
Length | Diameter | Height | Whole weight | Shucked weight | Viscera weight | Shell weight | Rings | |
---|---|---|---|---|---|---|---|---|
Length | 1.000000 | 0.986813 | 0.827552 | 0.925255 | 0.897905 | 0.903010 | 0.897697 | 0.557123 |
Diameter | 0.986813 | 1.000000 | 0.833705 | 0.925452 | 0.893159 | 0.899726 | 0.905328 | 0.575005 |
Height | 0.827552 | 0.833705 | 1.000000 | 0.819209 | 0.774957 | 0.798293 | 0.817326 | 0.558109 |
Whole weight | 0.925255 | 0.925452 | 0.819209 | 1.000000 | 0.969403 | 0.966372 | 0.955351 | 0.540818 |
Shucked weight | 0.897905 | 0.893159 | 0.774957 | 0.969403 | 1.000000 | 0.931956 | 0.882606 | 0.421256 |
Viscera weight | 0.903010 | 0.899726 | 0.798293 | 0.966372 | 0.931956 | 1.000000 | 0.907647 | 0.504274 |
Shell weight | 0.897697 | 0.905328 | 0.817326 | 0.955351 | 0.882606 | 0.907647 | 1.000000 | 0.628031 |
Rings | 0.557123 | 0.575005 | 0.558109 | 0.540818 | 0.421256 | 0.504274 | 0.628031 | 1.000000 |
# To get unique values of 'Rings' column
abalone['Rings'].unique()
array([ 7, 9, 10, 8, 20, 16, 19, 14, 11, 12, 15, 18, 13, 5, 4, 6, 21, 17, 22, 1, 3, 26, 23, 29, 2, 27, 25, 24], dtype=int64)
#To subset - have only 'Length','Diameter' and 'Height' in dataset abalone1
abalone1 = abalone[['Length','Diameter','Height']]
#Inspect abalone1 by checking head and tail
abalone1.head(3)
Length | Diameter | Height | |
---|---|---|---|
0 | 0.35 | 0.265 | 0.090 |
1 | 0.53 | 0.420 | 0.135 |
2 | 0.44 | 0.365 | 0.125 |
abalone1.tail(3)
Length | Diameter | Height | |
---|---|---|---|
4173 | 0.600 | 0.475 | 0.205 |
4174 | 0.625 | 0.485 | 0.150 |
4175 | 0.710 | 0.555 | 0.195 |