import re #the regex module in the python standard library #strings to be searched for matching regex patterns str1 = "Aardvarks belong to the Captain" str2 = "Albert's famous equation, E = mc^2." str3 = "Located at 455 Serra Mall." str4 = "Beware of the shape-shifters!" test_strings = [str1, str2, str3, str4] #created a list of strings for test_string in test_strings: print 'The test string is "' + test_string + '"' match = re.search(r'[A-Z]', test_string) if match: print 'The first possible match is: ' + match.group() else: print 'no match.' for string in test_strings: print re.findall(r'[A-Z]', string) patterns = [re.compile(r'[ABC]'), re.compile(r'[^ABC]'), re.compile(r'[ABC^]'), re.compile(r'[0123456789]'), re.compile(r'[0-9]'), re.compile(r'[0-4]'), re.compile(r'[A-Z]'), re.compile(r'[A-Za-z]'), re.compile(r'[A-Za-z0-9]'), re.compile(r'[-a-z]'), re.compile(r'[- a-z]')] def find_match(pattern, string): match = re.search(pattern, string) if match: return match.group() else: return 'no match.' for test_string in test_strings: matches = [find_match(pattern, test_string) for pattern in patterns] for pattern in patterns: print 'The first potential match for "' + pattern.pattern + '" in "' + test_string + '" is: ' + matches[patterns.index(pattern)] print patterns.index(re.compile(r'[^ABC]')) patterns2 = [re.compile(r'.'), re.compile(r'\w'), re.compile(r'\W'), re.compile(r'\d'), re.compile(r'\D'), re.compile(r'\n'), re.compile(r'\r'), re.compile(r'\t'), re.compile(r'\f'), re.compile(r'\s')] test_strings.append('Aardvarks belong to the Captain, capt_hook') for test_string in test_strings: matches = [find_match(pattern, test_string) for pattern in patterns2] for pattern in patterns2: print 'The first potential match for "' + pattern.pattern + '" in "' + test_string + '" is: ' + matches[patterns2.index(pattern)] test_strings2 = ["The Aardvarks belong to the Captain.", "Bitter butter won't make the batter better.", "Hark, the pitter patter of little feet!"] patterns3 = [re.compile(r'Aa'), re.compile(r'[Aa][Aa]'), re.compile(r'[aeiou][aeiou]'), re.compile(r'[AaEeIiOoUu][aeiou]'), re.compile(r'[Tt]he'), re.compile(r'^[Tt]he'), re.compile(r'n.'), re.compile(r'n.$'), re.compile(r'\W\w'), re.compile(r'\w[aeiou]tter'), re.compile(r'\w[aeiou]tter'), re.compile(r'..tt..')] for test_string in test_strings2: matches = [find_match(pattern, test_string) for pattern in patterns3] for pattern in patterns3: print 'The first potential match for "' + pattern.pattern + '" in "' + test_string + '" is: ' + matches[patterns3.index(pattern)] def find_all_matches(pattern, string): matches = re.findall(pattern, string) if matches: return matches else: return None for test_string in test_strings2: matches = [find_all_matches(pattern, test_string) for pattern in patterns3] for pattern in patterns3: if matches[patterns3.index(pattern)]: print 'All potential matches for "' + pattern.pattern + '" in "' + test_string + '" is/are: ' + ', '.join(matches[patterns3.index(pattern)]) else: print 'There were no matches for "' + pattern.pattern + '" in "' + test_string + '".' test_strings3 = ['Now Mr. N said, "Nooooooo!"', 'Then she told him he had to be quiet.'] patterns4 = [re.compile(r'No*'), re.compile(r'No+'), re.compile(r'No?'), re.compile(r'No{7}'), re.compile(r's?he'), re.compile(r'(she|he)')] for test_string in test_strings3: matches = [find_all_matches(pattern, test_string) for pattern in patterns4] for pattern in patterns4: if matches[patterns4.index(pattern)]: print 'All potential matches for "' + pattern.pattern + '" in "' + test_string + '" is/are: ' + ', '.join(matches[patterns4.index(pattern)]) else: print 'There were no matches for "' + pattern.pattern + '" in "' + test_string + '".' test_strings4 = ['The benefit is being held for Mr. Kite and Mr. Henderson.', 'Tickets cost $5.00 for adults, $3.50 for children.', 'Over 9000 attendees are expected, up from 900 attendees last year.', 'Over 9,000 attendees are expected, up from 900 attendees last year.'] patterns5 = [re.compile(r'Mr\. (\w+)'), re.compile(r'\$(\d+\.\d\d)'), re.compile(r'(\d+) attendees'), re.compile(r'((\d+,)*\d+) attendees')] # simple example matches = re.search(patterns5[3], test_strings4[3]) print 'Group 0: ' + matches.group(0) print 'Group 1: ' + matches.group(1) print 'Group 2: ' + matches.group(2) #print 'Group 3: ' + matches.group(3) # what happens if you uncomment this? for test_string in test_strings4: for pattern in patterns5: for result in re.finditer(pattern, test_string): for i in range(pattern.groups+1): print 'In "' + test_string + '", ' + 'given pattern "' + pattern.pattern + '", the group ' +str(i)+ ' match is ' + str(result.group(i)) matches = re.findall(patterns5[3], test_strings4[3]) matches matches[0][0] phone_strings = ['Call Empire Carpets at 588-2300', 'Does Jenny live at 867 5309?', 'You can reach Mr. Plow at 636-555-3226']