#!/usr/bin/env python # coding: utf-8 #
# #
# # # Exploratory Computing with Python # *Developed by Mark Bakker* # ## Notebook 3: `for` loops and `if/else` statements # As we will again be using `numpy` and `matplotlib`, we start by importing them # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import matplotlib.pyplot as plt # ### The `for` loop # Loops are used to execute a command repeatedly. The syntax for a loop is as follows # In[2]: for i in [0, 1, 2, 3, 4]: print('Hello world, the value of i is', i) # In the code above, the variable `i` loops through the five values in the list `[0, 1, 2, 3, 4]`. The first time through, the value of `i` is equal to `0`, the second time through, its value is `1`, and so on till the last time when its value is `4`. Note the syntax of a `for` loop: At the end of the `for` statement you need to put a colon (`:`) and after that you need to indent. It doesn't matter how many spaces you indent, as long as you keep using the same number of spaces for the entire `for` loop. Jupyter Notebooks automatically indent 4 spaces, which is considered good Python style, so use that. You can have as many lines of code inside the `for` loop as you want. To end the `for` loop, simply stop indenting. # In[3]: for x in [0, 1, 2, 3]: xsquared = x ** 2 print('x, xsquare', x, xsquared) print('We are done with the loop') # The list of values to loop through can be anything. It doesn't even have to be numbers. The `for` loop simply goes through all the values in the list one by one: # In[4]: for data in [20, 'mark', np.sqrt(10)]: print('the value of data is:', data) # It is, of course, rather inconvenient to have to specify a list to loop through when the list is very long. For example, if you want to do something 100 times, you don't want to type a list of values from 0 up to 100. But Python has a convenient function for that called `range`. You can loop through a `range` just like you can loop through a list. To loop 10 times, starting with the value `0`: # In[5]: for i in range(10): print('the value of i is:', i) # A `range` can be converted to a list with the `list` function (but we will not use that option very often). You can call `range` with just one argument, in which case it will generate a range from 0 up to but not including the specified number. Note that `range(10)` produces 10 numbers from 0 up to and including 9. You can optionally give a starting value and a step, similar to the `np.arange` function. # In[6]: print('a range with 10 values:', list(range(10))) print('a range from 10 till 20', list(range(10, 20))) print('a range from 10 till 20 with steps of 2:', list(range(10, 20, 2))) # A loop can be used to fill an array. Let's compute $y=\cos(x)$ where $x$ is an array that varies from 0 to $2\pi$ with 100 points. We already know, of course, that this can be done with the statement `y = np.cos(x)`. Sometimes this is not possible, however, and we need to fill an array with a loop. First we have to create the array `y` (for example filled with zeros using the `zeros_like` function) and then fill it with the correct values by looping through all values of `x`, so that the index goes from `0` to the length of the `x` array. The counter in the loop (the variable `i` in the code below) is used as the index of the array that is filled. # In[7]: x = np.linspace(0, 2 * np.pi, 100) y = np.zeros_like(x) # similar to zeros(shape(x)) for i in range(len(x)): y[i] = np.cos(x[i]) plt.plot(x, y); # Loops are very useful constructs in a programming script. Whenever you need to do a computation multiple times you should automatically think: *loop!*. # ### Exercise 1. First `for` loop # Create a list with the names of the months. Create a second list with the number of days in each month (for a regular year). Create a `for` loop that prints: # # `The number of days in MONTH is XX days` # # where, of course, you print the correct name of the month for `MONTH` and the correct number of days for `XX`. Use f-strings. # In[ ]: # Answer for Exercise 1 # ### The `if` statement # An `if` statement lets you perform a task only when the outcome of the `if` statement is true. For example # In[8]: data = 4 print('starting value:', data) if data < 6: print('changing data in the first if-statement') data = data + 2 print('value after the first if-statement:', data) if data > 20: print('changing data in the second if-statement') data = 200 print('value after the second if-statement:', data) # data hasn't changed as data is not larger than 20 # Note the syntax of the `if` statement: It starts with `if` followed by a statement that is either `True` or `False` and then a colon. After the colon, you need to indent and the entire indented code block (in this case 2 lines of code) is executed if the statement is `True`. The `if` statement is completed when you stop indenting. Recall from Notebook 2 that you can use larger than `>`, larger than or equal `>=`, equal `==`, smaller than or equal `<=`, smaller than `<` or not equal `!=`. # ### The `if`/`else` statement # The `if` statement may be followed by an `else` statement, which is executed when the condition after `if` is `False`. For example # In[9]: a = 4 if a < 3: print('a is smaller than 3') else: print('a is not smaller than 3') # You can even extend the `else` by adding one or more conditions with the `elif` command which is short for 'else if' # In[10]: a = 4 if a < 4: print('a is smaller than 4') elif a > 4: print('a is larger than 4') else: print('a is equal to 4') # Rather than specifying the value of a variable at the top of the code cell, you can ask the user to enter a value and store that value in the variable using the `input` function. The `input` function returns a string that can be converted into a number with the `float` function. Run the code cell below and test that it works when the entered value is larger than 4, smaller than 4, or equal to 4. # In[11]: for i in range(3): # do this 3 times a = float(input('Enter a value: ')) if a < 4: print('the entered value is smaller than 4') elif a > 4: print('the entered value is larger than 4') else: print('the entered value is equal to 4') # ### Exercise 2. Combination of `for` loop with `if` statement # Consider the function # # $\begin{split} # y &= \cos(x) \qquad \text{for} \qquad x < 0 \\ # y &= \exp(-x) \qquad \text{for} \qquad x \ge 0 \\ # \end{split}$ # # Compute $y$ for $x$ going from $-2\pi$ to $2\pi$ with 100 points and make a graph. Make sure the limits of the $x$-axis are from $-2\pi$ to $2\pi$. # In[ ]: # Answer for Exercise 2 # ### Exercise 3. Load and loop through temperature data # Load the temperature data for Holland from the data file `holland_temperature.dat`. Loop through all monthly temperatures and print a message that includes the month number and states whether the monthly average temperature is above or below 10 degrees # In[ ]: # Answer for Exercise 3 # ### Looping and summation # One application of a loop is to compute the sum of all the values in an array. Consider, for example, the array `data` with 8 values. We will compute the sum of all values in `data`. We first define a variable `datasum` and assign it the initial value 0. Next, we loop through all the values in `data` and add each value to `datasum`: # In[12]: data = np.array([1, 3, 2, 5, 7, 3, 4, 2]) datasum = 0 for i in range(len(data)): datasum = datasum + data[i] print('i, datasum: ', i, datasum) print('total sum of data: ', datasum) # Note that the statement # # `datasum = datasum + data[i]` # # means that `data[i]` is added to the current value of `datasum` and that the result is assigned to `datasum`. There is actually a shorter syntax for the same statement: # # `datasum += data[i]` # # The `+=` command means: add whatever is on the right side of the `+=` sign to whatever is on the left side. You can use whichever syntax you are most comfortable with (although `+=` is considered to be better and in some cases more efficient). # ### Exercise 4. Running total # For the data of the previous example, compute the running total and store it in an array using a loop. Hence, the result should be an array with the same length as `data` where item `i` is the sum of all values in the array `data` up to and including `data[i]`. Print both the array `data` and the array with the running total to the screen. Finally, check your answer by using the `cumsum` function of `numpy`, which should give the same answer as your loop. # In[ ]: # Answer for Exercise 4 # ### Finding the maximum value the hard way # Next, let's find the maximum in the array `data` and the index of the maximum value. For illustration purposes, we will do this the hard way by using a loop and an if statement. First, we create a variable `maxvalue` that contains the maximum value and set it initially to a very small number, and we create a variable `maxindex` that is the index of the maximum value and is initially set to `None`. Next we loop through all values in `data` and update the `maxvalue` and `maxindex` everytime we find a larger value than the current `maxvalue` # In[13]: maxvalue = -1e8 maxindex = None for i in range(len(data)): if data[i] > maxvalue: maxvalue = data[i] maxindex = i print('the maximum value is ', maxvalue) print('the index of the maximum value is ', maxindex) # For this example, it is easy to check whether these numbers are correct by looking at the `data` array, but that becomes more difficult when the `data` array is large. There are, of course, functions available in the `numpy` package to find the maximum value and the index of the maximum value: `np.max` returns the maximum value of an array, and `np.argmax` returns the index of the maximum of the array. There are similar functions for the mimimum value. # In[14]: print('the maximum value is ', np.max(data)) print('the index of the maximum value is ', np.argmax(data)) # ### Exercise 5. Month closest to 15 degrees # Find the month in which the average monthly temperature in Holland is closest to 15 degrees (use the data from Exercise 3). Apply the approach described above by looping through all values and use an `if` statement inside the loop. You may also want to use the `abs` function to compute the absolute value of a number. Check your answer by using a few `numpy` methods that don't require a loop and an `if` statement. # In[ ]: # Answer for Exercise 5 # ### Nested loops # It is also possible to have loops inside loops. These are called nested loops. For example, consider the array `data` with 3 rows and 4 columns shown below. We want to compute the sum of the values in each row (so we sum the columns) and we are going to do this using a double loop. First, we make an array of zeros called `rowtotal` of length 3 (one value for each row of the array `data`). Next, we loop through each row. For each row inside the loop, we start another loop that goes through all the columns and adds the value to the array `rowtotal` for that row. # In[15]: data = np.array([[1, 2, 3, 5], [4, 8, 6, 4], [3, 5, 4, 6]]) rowtotal = np.zeros(3) for irow in range(3): for jcol in range(4): rowtotal[irow] += data[irow, jcol] #longer alternative: #rowtotal[irow] = rowtotal[irow] + data[irow, jcol] print(rowtotal) # After running the code above, first make sure that the answer is correct. Next, note that it is important to set the values of `rowtotal` to 0 before starting the loops, as we add to these values to compute the sum of each row. In the code, we use two loops, so we indented twice. # # `numpy` has a `sum` function that can compute the sum of an entire array, or the sum along one of the axes (for example along the rows or columns) by specifying the `axis` keyword. # In[16]: print('sum of entire array:', np.sum(data)) print('sum rows (axis=0):', np.sum(data, axis=0)) print('sum columns (axis=1):', np.sum(data, axis=1)) # ### `break` and `while` # A common task is to find the position of a value in a sorted table (e.g., a list or array). # For example, determine between which two numbers the number 6 falls in the ordered sequence `[1, 4, 5, 8, 9]`. # I know, it is between `5` and `8`, but what if the list is long? # To find the position in the list, we need to loop through the list and break out of the loop once we have found the position. For this, Python has the command `break`. # In[17]: x = [1, 4, 5, 8, 9] a = 6 for i in range(len(x)): if a < x[i]: break print('a is between', x[i-1], 'and', x[i]) # There is another way to code this using a `while` loop as shown below # In[18]: x = [1, 4, 5, 8, 9] a = 6 i = 0 while a >= x[i]: i = i + 1 print('a is between', x[i-1], 'and', x[i]) # In the `while` loop, the comparison is done at the beginning of the loop, while the counter (in this case `i`) is updated inside the loop. Either a loop with a `break` or a `while` loop with a counter works fine, but `while` loops may be tricky in some cases, as they can result in infinite loops when you have an error in your code. Once you are in an infinite loop (one that never stops), click on the [Kernel] menu item at the top of the window and select [Interrupt Kernel] or [Restart Kernel]. This will end your Python session and start a new one. When you print something to the screen in your `while` loop, it may not be possible to break out of the loop and you may need to end your Jupyter session (and potentially lose some of your work). Because of these problems with errors in `while` loops, it is recommended to use a loop with a break rather than a while loop when possible. # ### Exercise 6, Oil data # The file `oil_price.dat` contains the monthly oil price since 1985. The file contains three columns: year, month, price in Euros (from the european bank website). Make a plot of the oil price (put numbers along the horizontal axis; we will learn how to do dates in another notebook) and determine the month and year the oil price first rose above 40 euros, above 60 euros, and above 80 euros. You need to write to the screen something like `The oil price exceeds 40 euros for the first time in month xx of year yyyy` where `xx` and `yyyy` are the correct month and year. Use a double loop. Can you modify the code such that it doesn't print the number of the month but the name of the month? # In[ ]: # Answer for Exercise 6 # ### Strings # Strings are a very versatile data type, and we can easily spend an entire Notebook on strings. # We will only make limited use of strings, so we keep treatment here to a minimum. We already used strings to specify names along axes of a graph, or in the legend of a graph. Strings are like arrays or lists in that every character in a string has an index. Each character or range of characters can be accessed, but they cannot be changed (so they are more like tuples than arrays or lists). A string also has a length. # In[19]: text1 = 'Goodmorning everybody' print(len(text1)) print(text1[0]) print(text1[0:12]) # When you add two strings, they are put back to back, just like lists. When you want to combine text with a variable, you first need to change the variable to a string and then add the two strings: # In[20]: text1 = 'Goodmorning everybody' newtext = text1 + ' in the class' # Adding two srings print(newtext) a = 7 mes = 'The magic number is ' + str(a) # Combining strings and numbers print(mes) # Comparisons work on strings just like they work on numbers. The comparison starts with the first character in a string and only goes to the next character when the first characters of both strings are equal. The letter 'a' is smaller than 'b', 'b' is smaller than 'c', etc. But be careful, in the order of things, the upper case characters are smaller than all lower case characters! So 'A' is smaller than 'a', but also smaller than 'm' or any other lower case character. Make sure you understand the following statements # In[21]: print('delft' < 'eindhoven') # True as 'd' is smaller than 'e' print('dalft' < 'delft') # True as 'a' is smaller than 'e' print('delft' == 'Delft') # False as lower and upper case are not equal print('Delft' < 'delft') # True as 'D' is smaller than 'd' print('delft' > 'Eindhoven') # Also True, as 'd' is larger than 'E' or any other upper case character # Strings can be converted to upper or lower case # In[22]: TU = 'TU Delft' print(TU) print(TU.lower()) print(TU.upper()) # A string consisting of multiple words can be converted into a list of words using `split` # In[23]: sentence = 'This is a sentence containing a number of words' print('This is the sentence:', sentence) wordlist = sentence.split() print('This is the split sentence:', wordlist) print('All words may be printed seperately:') for word in wordlist: print(word) # ### Exercise 7. Find the position of your name in a list # Use a loop to figure out between which two names your name falls in the list # `['Aaldrich', 'Babette', 'Chris', 'Franka', 'Joe', 'Louisa', 'Pierre', 'Simone', 'Tarek', 'Yvonne', 'Zu']` and print the result to the screen. # In[ ]: # Answer for Exercise 7 # ### Answers to the exercises # Answer to Exercise 1 # In[24]: months = ['January', 'February', 'March', 'April',\ 'May','June', 'July', 'August', 'September',\ 'October', 'November', 'December'] days = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] for i in range(12): print(f'The number of days in {months[i]} is {days[i]}') # Back to Exercise 1 # # Answer to Exercise 2 # In[25]: x = np.linspace(-2 * np.pi, 2 * np.pi, 100) y = np.zeros_like(x) for i in range(100): if x[i] < 0: y[i] = np.cos(x[i]) else: y[i] = np.exp(-x[i]) plt.plot(x, y) plt.xlim(-2 * np.pi, 2 * np.pi); # Back to Exercise 2 # # Answer to Exercise 3 # In[26]: temperature = np.loadtxt('holland_temperature.dat') for i in range(len(temperature)): if temperature[i] < 10: print('average monthly temperature in month ', i + 1, ' is less than 10 degrees') else: print('average monthly temperature in month ', i + 1, ' is more than 10 degrees') # Back to Exercise 3 # # Answer to Exercise 4 # In[27]: data = np.array([1, 3, 2, 5, 7, 3, 4, 2]) runningtotal = np.zeros_like(data) runningtotal[0] = data[0] for i in range(1, len(data)): runningtotal[i] = runningtotal[i-1] + data[i] print('data values:', data) print('running total:', runningtotal) print('running total with numpy:', np.cumsum(data)) # Back to Exercise 4 # # Answer to Exercise 5 # In[28]: temperature = np.loadtxt('holland_temperature.dat') print(temperature) monthindex = -1 tdiff = 100.0 for i in range(12): if abs(temperature[i] - 15) < tdiff: monthindex = i tdiff = abs(temperature[i] - 15) print('Number of month closest to 15 degrees, temp: ', monthindex + 1, temperature[monthindex]) print('Alternative method:') altmin = np.argmin(abs(temperature - 15)) print('Number of month closest to 15 degrees, temp: ', altmin + 1, temperature[altmin]) # Back to Exercise 5 # # Answer to Exercise 6 # In[29]: oilprice = np.loadtxt('oil_price_monthly.dat', delimiter=',') plt.plot(oilprice[:,2], 'b-') nrow, ncol = oilprice.shape months = ['January', 'February', 'March', 'April',\ 'May','June', 'July', 'August', 'September',\ 'October', 'November', 'December'] for price in [40, 60, 80]: for i in range(nrow): if oilprice[i, 2] > price: print(f'The oil price exceeds {price} euros for the first time in', \ f'{months[int(oilprice[i, 1])]} of {oilprice[i, 0]:.0f}') break # Back to Exercise 6 # # Answer to Exercise 7 # In[30]: x = ['Aaldrich', 'Babette', 'Chris', 'Franka', 'Joe', 'Louisa', 'Pierre', 'Simone', 'Tarek', 'Yvonne', 'Zu'] myname = 'Guido' for i in range(len(x)): if myname < x[i]: break print(myname, 'is between', x[i-1], 'and', x[i]) # Back to Exercise 7