A description of the data set is available at http://www.transtats.bts.gov/OT_Delay/OT_DelayCause1.asp?pn=1
%matplotlib inline
import numpy as np
import pandas as pd
airline_data_frame = pd.read_csv('../../data/AirlineOntime/289478468_62014_5927_airline_delay_causes.csv')
airline_data_frame[:10]
year | month | carrier | carrier_name | airport | airport_name | arr_flights | arr_del15 | carrier_ct | weather_ct | ... | late_aircraft_ct | arr_cancelled | arr_diverted | arr_delay | carrier_delay | weather_delay | nas_delay | security_delay | late_aircraft_delay | Unnamed: 21 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2003 | 6 | AA | American Airlines Inc. | ABQ | Albuquerque, NM: Albuquerque International Sun... | 307 | 56 | 14.68 | 10.79 | ... | 9.96 | 1 | 1 | 2530 | 510 | 621 | 676 | 25 | 698 | NaN |
1 | 2003 | 6 | AA | American Airlines Inc. | ANC | Anchorage, AK: Ted Stevens Anchorage Internati... | 90 | 27 | 7.09 | 2.00 | ... | 7.16 | 0 | 0 | 1390 | 271 | 83 | 581 | 0 | 455 | NaN |
2 | 2003 | 6 | AA | American Airlines Inc. | ATL | Atlanta, GA: Hartsfield-Jackson Atlanta Intern... | 752 | 186 | 33.99 | 27.82 | ... | 17.53 | 5 | 0 | 8314 | 1367 | 1722 | 3817 | 139 | 1269 | NaN |
3 | 2003 | 6 | AA | American Airlines Inc. | AUS | Austin, TX: Austin - Bergstrom International | 842 | 174 | 60.24 | 20.54 | ... | 40.75 | 9 | 1 | 8344 | 3040 | 1032 | 1835 | 115 | 2322 | NaN |
4 | 2003 | 6 | AA | American Airlines Inc. | BDL | Hartford, CT: Bradley International | 383 | 55 | 14.90 | 8.91 | ... | 16.61 | 0 | 0 | 3137 | 815 | 574 | 555 | 0 | 1193 | NaN |
5 | 2003 | 6 | AA | American Airlines Inc. | BHM | Birmingham, AL: Birmingham-Shuttlesworth Inter... | 89 | 12 | 2.79 | 2.19 | ... | 3.82 | 0 | 0 | 673 | 94 | 90 | 50 | 35 | 404 | NaN |
6 | 2003 | 6 | AA | American Airlines Inc. | BNA | Nashville, TN: Nashville International | 445 | 82 | 25.44 | 11.98 | ... | 26.65 | 2 | 0 | 4663 | 1217 | 913 | 800 | 1 | 1732 | NaN |
7 | 2003 | 6 | AA | American Airlines Inc. | BOS | Boston, MA: Logan International | 1266 | 225 | 69.43 | 23.66 | ... | 45.73 | 7 | 0 | 12139 | 4201 | 1783 | 3067 | 45 | 3043 | NaN |
8 | 2003 | 6 | AA | American Airlines Inc. | BUR | Burbank, CA: Bob Hope | 119 | 27 | 7.49 | 4.65 | ... | 7.04 | 1 | 0 | 1187 | 326 | 191 | 285 | 0 | 385 | NaN |
9 | 2003 | 6 | AA | American Airlines Inc. | BWI | Baltimore, MD: Baltimore/Washington Internatio... | 593 | 101 | 17.56 | 20.49 | ... | 24.69 | 1 | 1 | 5698 | 1058 | 1332 | 1708 | 0 | 1600 | NaN |
10 rows × 22 columns
n_arrived_flights, n_delayed_flights = airline_data_frame[['arr_flights','arr_del15']].sum()
print "About %d%% of flights are delay by at least 15 minutes." % int(n_delayed_flights/n_arrived_flights*100)
About 20% of flights are delay by at least 15 minutes.
carrier_arrived_vs_delay = airline_data_frame.groupby(['carrier', 'carrier_name'])[['arr_flights','arr_del15']].sum()
carrier_arrived_vs_delay[:10]
arr_flights | arr_del15 | ||
---|---|---|---|
carrier | carrier_name | ||
9E | Endeavor Air Inc. | 119950 | 17163 |
Pinnacle Airlines Inc. | 1222147 | 220024 | |
AA | American Airlines Inc. | 6650779 | 1454411 |
AQ | Aloha Airlines Inc. | 89547 | 6374 |
AS | Alaska Airlines Inc. | 1691562 | 306527 |
B6 | JetBlue Airways | 1991036 | 465940 |
CO | Continental Air Lines Inc. | 2447164 | 537252 |
DH | Atlantic Coast Airlines | 374247 | 79078 |
Independence Air | 196436 | 38823 | |
DL | Delta Air Lines Inc. | 6924551 | 1296691 |
carrier_arrived_vs_delay['ratio'] = carrier_arrived_vs_delay['arr_del15']/carrier_arrived_vs_delay['arr_flights']
carrier_arrived_vs_delay.sort('ratio', ascending=False)[:10]
arr_flights | arr_del15 | ratio | ||
---|---|---|---|---|
carrier | carrier_name | |||
MQ | Envoy Air | 102515 | 25980 | 0.253426 |
EV | Atlantic Southeast Airlines | 2526762 | 618201 | 0.244661 |
B6 | JetBlue Airways | 1991036 | 465940 | 0.234019 |
EV | ExpressJet Airlines Inc. | 1839993 | 425655 | 0.231335 |
OH | Comair Inc. | 1765828 | 394146 | 0.223207 |
NW | Northwest Airlines Inc. | 2767549 | 610029 | 0.220422 |
CO | Continental Air Lines Inc. | 2447164 | 537252 | 0.219541 |
AA | American Airlines Inc. | 6650779 | 1454411 | 0.218683 |
XE | ExpressJet Airlines Inc. | 2145095 | 468323 | 0.218323 |
MQ | American Eagle Airlines Inc. | 5184465 | 1129165 | 0.217798 |