%pylab inline
Populating the interactive namespace from numpy and matplotlib
import pandas as pd
from sqlalchemy import create_engine
cnx = ################################
query = """SELECT * FROM salaries LIMIT 10"""
pd.read_sql_query(query, cnx).head()
yearid | teamid | lgid | playerid | salary | |
---|---|---|---|---|---|
0 | 1985 | ATL | NL | barkele01 | 870000 |
1 | 1985 | ATL | NL | bedrost01 | 550000 |
2 | 1985 | ATL | NL | benedbr01 | 545000 |
3 | 1985 | ATL | NL | campri01 | 633333 |
4 | 1985 | ATL | NL | ceronri01 | 625000 |
def show_columns(table, con):
from pandas import read_sql_table
return read_sql_table(table, con).columns
show_columns('salaries',cnx)
Index([u'yearid', u'teamid', u'lgid', u'playerid', u'salary'], dtype='object')
query = """select playerid, salary
from salaries
where yearid = 1985 and salary > 500000 """
pd.read_sql_query(query, cnx).head()
playerid | salary | |
---|---|---|
0 | barkele01 | 870000 |
1 | bedrost01 | 550000 |
2 | benedbr01 | 545000 |
3 | campri01 | 633333 |
4 | ceronri01 | 625000 |
query = """select yearid, teamid
from teams
where rank = 1
order by yearid """
pd.read_sql_query(query, cnx).head()
yearid | teamid | |
---|---|---|
0 | 1871 | PH1 |
1 | 1872 | BS1 |
2 | 1873 | BS1 |
3 | 1874 | BS1 |
4 | 1875 | BS1 |
query = """select count(schoolid)
from schools
where schoolstate = 'CT' """
pd.read_sql_query(query, cnx).head()
count | |
---|---|
0 | 15 |
query = """select schoolstate, count(schoolid)
from schools
group by schoolstate"""
pd.read_sql_query(query, cnx).head()
schoolstate | count | |
---|---|---|
0 | PA | 72 |
1 | AZ | 14 |
2 | FL | 58 |
3 | LA | 17 |
4 | NM | 7 |
query = """select yearid, teamid, sum(salary)
from salaries
group by yearid,teamid"""
pd.read_sql_query(query, cnx).head()
yearid | teamid | sum | |
---|---|---|---|
0 | 1985 | LAN | 10967917 |
1 | 1997 | SLN | 45456667 |
2 | 2008 | ATL | 102365683 |
3 | 2014 | DET | 152855500 |
4 | 2007 | CHN | 99670332 |
query = """select salaries.salary
from salaries inner join fielding on salaries.playerid = fielding.playerid
where salaries.yearid = 2012 and fielding.pos = 'SS'
"""
pd.read_sql_query(query, cnx).head()
salary | |
---|---|
0 | 1900000 |
1 | 1900000 |
2 | 1900000 |
3 | 1900000 |
4 | 1900000 |
query = """select playerid, min(yearid) as first_year, max(yearid) as last_year
from salaries
group by playerid
"""
pd.read_sql_query(query, cnx).head()
playerid | first_year | last_year | |
---|---|---|---|
0 | putkolu01 | 2014 | 2014 |
1 | bookech01 | 2006 | 2006 |
2 | wisede01 | 2000 | 2013 |
3 | contrjo01 | 2003 | 2012 |
4 | myersro02 | 1997 | 1998 |
query = """select playerid, count(distinct gameid) allstar from allstarfull
group by playerid
order by allstar desc
limit 1
"""
pd.read_sql_query(query, cnx).head()
playerid | allstar | |
---|---|---|
0 | aaronha01 | 25 |
query = """select schoolid, count(distinct playerid)
from collegeplaying
group by schoolid
order by count(distinct playerid) desc
"""
pd.read_sql_query(query, cnx).head()
schoolid | count | |
---|---|---|
0 | texas | 107 |
1 | usc | 105 |
2 | arizonast | 101 |
3 | stanford | 86 |
4 | michigan | 76 |
query = """select a.schoolid, b.salary
from collegeplaying a
inner join
(select playerid, salary, min(yearid)
from salaries
group by playerid, salary) b
on a.playerid = b.playerid
order by b.salary desc
"""
pd.read_sql_query(query, cnx).head(10)
schoolid | salary | |
---|---|---|
0 | msmerid | 25000000 |
1 | swmost | 25000000 |
2 | arkansas | 25000000 |
3 | swmost | 25000000 |
4 | msmerid | 25000000 |
5 | swmost | 25000000 |
6 | longbeach | 23428571 |
7 | longbeach | 23428571 |
8 | longbeach | 23428571 |
9 | gatech | 23125000 |
query = """select teamid, salary
from salaries
where(
select count(*) from salaries as s
where s.teamid = salaries.teamid and salaries.salary < s.salary) <=5 and yearid=2014
"""
pd.read_sql_query(query, cnx)
teamid | salary | |
---|---|---|
0 | BAL | 13000000 |
1 | BAL | 15000000 |
2 | CHA | 15000000 |
3 | CLE | 13500000 |
4 | CLE | 15000000 |
5 | DET | 22000000 |
6 | KCA | 13500000 |
7 | LAA | 23000000 |
8 | LAN | 21000000 |
9 | LAN | 26000000 |
10 | LAN | 21000000 |
11 | MIL | 16000000 |
12 | MIN | 23000000 |
13 | NYM | 9000000 |
14 | NYM | 3625000 |
15 | NYM | 13000000 |
16 | NYM | 5700000 |
17 | NYM | 5000000 |
18 | NYM | 3700000 |
19 | NYN | 20000000 |
20 | PHI | 22500000 |
21 | PHI | 25000000 |
22 | PHI | 25000000 |
23 | PIT | 13000000 |
24 | SDN | 10525000 |
25 | SEA | 24000000 |
26 | SEA | 22857000 |
27 | SFG | 11000000 |
28 | SFG | 17000000 |
29 | SFG | 10250000 |
30 | SFG | 14500000 |
31 | SFG | 16000000 |
32 | SFG | 12500000 |
33 | SFN | 20000000 |
34 | SLN | 17000000 |
35 | SLN | 19500000 |
36 | TBA | 9000000 |
37 | TBA | 14000000 |
38 | TEX | 17000000 |
39 | TEX | 24000000 |
40 | TOR | 16000000 |
41 | WAS | 20000000 |
42 | WAS | 14000000 |
query = """
select b.teamid, b.yearid, count(playerid) from salaries inner join
(select * from
(select teamid, yearid,
(AVG(salary) over (PARTITION BY yearid, teamid) + STDDEV(salary) over (PARTITION BY yearid, teamid)) threshold
from salaries) a
group by a.teamid, a.yearid, a.threshold) b
on salaries.teamid = b.teamid and salaries.yearid = b.yearid
where salaries.salary>b.threshold
group by b.teamid, b.yearid
"""
pd.read_sql_query(query, cnx).head()
teamid | yearid | count | |
---|---|---|---|
0 | SLN | 2009 | 3 |
1 | KCA | 1995 | 5 |
2 | ANA | 1999 | 6 |
3 | SEA | 2007 | 4 |
4 | PHI | 1991 | 6 |
query = """
select teamid, sum(cast(w AS numeric))/sum(cast(g AS numeric)) win_percentage from teams
group by teamid
"""
pd.read_sql_query(query, cnx)
teamid | win_percentage | |
---|---|---|
0 | NYA | 0.566587 |
1 | MLA | 0.345324 |
2 | SL1 | 0.210526 |
3 | BFN | 0.478659 |
4 | PTP | 0.468750 |
5 | HOU | 0.486883 |
6 | BLU | 0.547170 |
7 | CAL | 0.482404 |
8 | COL | 0.468589 |
9 | TBA | 0.462209 |
10 | KCN | 0.238095 |
11 | NY3 | 0.368421 |
12 | CL6 | 0.478469 |
13 | CHA | 0.501799 |
14 | MIL | 0.472575 |
15 | WS6 | 0.178571 |
16 | KCA | 0.482297 |
17 | HR1 | 0.503597 |
18 | KCF | 0.482085 |
19 | BR3 | 0.523627 |
20 | PHN | 0.233333 |
21 | WAS | 0.430915 |
22 | MLU | 0.666667 |
23 | CHN | 0.506994 |
24 | LAA | 0.535509 |
25 | BRF | 0.474194 |
26 | CHF | 0.554487 |
27 | LS2 | 0.465531 |
28 | IN1 | 0.380952 |
29 | CN2 | 0.573668 |
... | ... | ... |
119 | SEA | 0.467761 |
120 | MIA | 0.427984 |
121 | PRO | 0.604138 |
122 | BRP | 0.571429 |
123 | LS3 | 0.374107 |
124 | TRO | 0.518519 |
125 | IN3 | 0.366834 |
126 | SL2 | 0.557143 |
127 | WOR | 0.357143 |
128 | RC2 | 0.473684 |
129 | FW1 | 0.368421 |
130 | ELI | 0.086957 |
131 | PHA | 0.473152 |
132 | CL4 | 0.481409 |
133 | BR4 | 0.260000 |
134 | SE1 | 0.392638 |
135 | PHI | 0.470214 |
136 | LS1 | 0.500000 |
137 | WS7 | 0.190476 |
138 | CIN | 0.501420 |
139 | MON | 0.483164 |
140 | KC1 | 0.402427 |
141 | DTN | 0.484091 |
142 | WS2 | 0.417372 |
143 | OAK | 0.522047 |
144 | NH1 | 0.148936 |
145 | SL5 | 0.333333 |
146 | WS8 | 0.317121 |
147 | BLF | 0.417197 |
148 | SDN | 0.463701 |
149 rows × 2 columns
query = """ select a.teamid, a.yearid, a.spend_rank, teams.rank
from teams inner join
(select teamid, yearid, sum(salary) spend,
rank() over (PARTITION BY yearid ORDER BY sum(salary) DESC) spend_rank
from salaries
group by teamid, yearid) a
on a.teamid=teams.teamid and a.yearid=teams.yearid
"""
pd.read_sql_query(query, cnx)
teamid | yearid | spend_rank | rank | |
---|---|---|---|---|
0 | ATL | 1985 | 1 | 5 |
1 | CAL | 1985 | 2 | 2 |
2 | NYA | 1985 | 3 | 2 |
3 | CHN | 1985 | 4 | 4 |
4 | SLN | 1985 | 5 | 1 |
5 | BAL | 1985 | 6 | 4 |
6 | ML4 | 1985 | 7 | 6 |
7 | SDN | 1985 | 8 | 3 |
8 | LAN | 1985 | 9 | 1 |
9 | BOS | 1985 | 10 | 5 |
10 | NYN | 1985 | 11 | 2 |
11 | DET | 1985 | 12 | 3 |
12 | PHI | 1985 | 13 | 5 |
13 | HOU | 1985 | 14 | 3 |
14 | CHA | 1985 | 15 | 3 |
15 | MON | 1985 | 16 | 3 |
16 | KCA | 1985 | 17 | 1 |
17 | PIT | 1985 | 18 | 6 |
18 | OAK | 1985 | 19 | 4 |
19 | TOR | 1985 | 20 | 1 |
20 | CIN | 1985 | 21 | 2 |
21 | SFN | 1985 | 22 | 6 |
22 | TEX | 1985 | 23 | 7 |
23 | CLE | 1985 | 24 | 7 |
24 | MIN | 1985 | 25 | 4 |
25 | SEA | 1985 | 26 | 6 |
26 | NYA | 1986 | 1 | 2 |
27 | CHN | 1986 | 2 | 5 |
28 | ATL | 1986 | 3 | 6 |
29 | NYN | 1986 | 4 | 1 |
... | ... | ... | ... | ... |
828 | LAN | 2014 | 1 | 1 |
829 | NYA | 2014 | 2 | 2 |
830 | PHI | 2014 | 3 | 5 |
831 | DET | 2014 | 4 | 1 |
832 | BOS | 2014 | 6 | 5 |
833 | WAS | 2014 | 7 | 1 |
834 | LAA | 2014 | 8 | 1 |
835 | SLN | 2014 | 9 | 1 |
836 | TEX | 2014 | 10 | 5 |
837 | TOR | 2014 | 11 | 3 |
838 | CIN | 2014 | 12 | 4 |
839 | BAL | 2014 | 13 | 1 |
840 | MIL | 2014 | 14 | 3 |
841 | ARI | 2014 | 15 | 5 |
842 | ATL | 2014 | 16 | 2 |
843 | COL | 2014 | 17 | 4 |
844 | SEA | 2014 | 18 | 3 |
845 | MIN | 2014 | 19 | 5 |
846 | CLE | 2014 | 20 | 3 |
847 | CHA | 2014 | 21 | 4 |
848 | PIT | 2014 | 22 | 2 |
849 | SDN | 2014 | 23 | 3 |
850 | KCA | 2014 | 24 | 2 |
851 | TBA | 2014 | 25 | 4 |
852 | OAK | 2014 | 26 | 2 |
853 | CHN | 2014 | 27 | 5 |
854 | MIA | 2014 | 29 | 4 |
855 | HOU | 2014 | 30 | 4 |
856 | NYN | 2014 | 31 | 3 |
857 | SFN | 2014 | 32 | 2 |
858 rows × 4 columns