#This is a code cell - we can write programme fragments in code cells and then execute them, one line at a time
#I'm going to load in the pandas programming library, and call it pd.
#Naming it pd is a convention, and, as you will see, makes working with the pandas tools more convenient, in typing terms!
import pandas as pd

#In a code cell, lines starting with a # are comments that are not executed as code

#The pandas command read_excel will read in an Excel file, given the location and name of the file
#We tell the programme that read_excel can be found in the pandas library by prefixing it with the convenience name, pd
#We can skip a specified number of the leading rows in the file using the skiprows variable
df=pd.read_excel('data/HSCA Active Locations September.xlsx', skiprows=7)
#A variable is like a named container we can set to a particular value (and change the value of, if required)
#The = after the skiprows variable name says that the number 7 is the number of rows we want to skip
#More formally, it assigns the value 7 to the variable skiprows

#Variable assignments have the form:
# variable_name_on_the_left = value_the_variable_takes_on_the_right

#Note that we also assigned the output of the pd.read_excel() command to a variable.
#If we just put the name of a variable at the end of a code cell, it's contents will be displayed
#In the following case, the [] after the variable name limits how many rows are displayed to the first 3 rows
df[:3]

#Specify a column name, and the operation we want to apply to it, in this case "sum"
df['Care homes beds'].sum()

df['Care homes beds'].mean()

#Number of locations - the len() command counts the number of rows (that is, the length) of the data table
len(df)

#Create a new variable, called carehomes, that just contains rows where the "Care home?' column value is Y
careHomes=df[ df['Care home?']=="Y" ]
len(careHomes)

#Now find the mean number of care home beds in locations that are care homes
careHomes['Care homes beds'].mean()