In [1]:

import timeit
import re
import random

In [2]:

# build a test data set:
strings_to_match = ['%04dmagicword' % i for i in xrange(3000)]

In [3]:

print strings_to_match[:10]
print len(strings_to_match)

['0000magicword', '0001magicword', '0002magicword', '0003magicword', '0004magicword', '0005magicword', '0006magicword', '0007magicword', '0008magicword', '0009magicword']
3000

In [4]:

# build a test "file"
my_file = []
with open('/usr/share/dict/words') as fh:
    random.seed(42)
    for line in fh:
        split_point = random.randint(0, len(line))
        my_file.append(line[:split_point] + random.choice(strings_to_match) + line[split_point:])

my_file = my_file[:1000000]

In [5]:

print my_file[:10]
print len(my_file)

['a0075magicword\n', '0669magicwordA\n', 'aa2030magicword\n', 'AA\n0260magicword', 'aa0089magicworda\n', 'A1516magicwordachen\n', '0596magicwordAachenem\n', 'Aachen1634magicwordie\n', 'Aa1767magicwordchenowi\n', 'Aachenu0019magicword\n']
1000000

In [6]:

def naive_match():
    for line in my_file:
        if not any(s in line for s in strings_to_match):
            return False
    return True

In [7]:

%%timeit
naive_match()

1 loops, best of 3: 1min 46s per loop

In [8]:

def re_match():
    # building regular expression to match
    expression = re.compile(
        '(' + 
        '|'.join(re.escape(item) for item in strings_to_match) +
        ')')

    # perform matching
    for line in my_file:
        if not expression.search(line):
            return False
    return True

In [9]:

%%timeit
re_match()

1 loops, best of 3: 9.97 s per loop