Site: http://pyparsing.wikispaces.com/
Common information: http://pyparsing.wikispaces.com/HowToUsePyparsing
from pyparsing import *
from string import *
StringStart
StringEnd
LineStart
LineEnd
setParserAction
addParserAction
def action(s, loc, toks):
print('s', type(s), s)
print('loc', type(loc), loc)
print('toks', type(toks), toks)
return ['<'] + list(toks) + ['>']
pattern = Suppress('[') + Word(digits) + '-' + Word(digits).setParseAction(action) + Suppress(']')
pattern.parseString('[0-9]')
s <class 'str'> [0-9] loc <class 'int'> 3 toks <class 'pyparsing.ParseResults'> ['9']
(['0', '-', '<', '9', '>'], {})
# >>> pattern.parseString('test[0-9]test')
# ParseException: Expected "[" (at char 0), (line:1, col:1)
pattern.searchString('test[0-9]test')
s <class 'str'> test[0-9]test loc <class 'int'> 7 toks <class 'pyparsing.ParseResults'> ['9']
([(['0', '-', '<', '9', '>'], {})], {})
pattern.searchString('test[0-9]test[0-9]')
s <class 'str'> test[0-9]test[0-9] loc <class 'int'> 7 toks <class 'pyparsing.ParseResults'> ['9'] s <class 'str'> test[0-9]test[0-9] loc <class 'int'> 16 toks <class 'pyparsing.ParseResults'> ['9']
([(['0', '-', '<', '9', '>'], {}), (['0', '-', '<', '9', '>'], {})], {})
list(pattern.scanString('test[0-9]test'))
s <class 'str'> test[0-9]test loc <class 'int'> 7 toks <class 'pyparsing.ParseResults'> ['9']
[((['0', '-', '<', '9', '>'], {}), 4, 9)]
list(pattern.scanString('test[0-9]test[0-9]'))
s <class 'str'> test[0-9]test[0-9] loc <class 'int'> 7 toks <class 'pyparsing.ParseResults'> ['9'] s <class 'str'> test[0-9]test[0-9] loc <class 'int'> 16 toks <class 'pyparsing.ParseResults'> ['9']
[((['0', '-', '<', '9', '>'], {}), 4, 9), ((['0', '-', '<', '9', '>'], {}), 13, 18)]
pattern.transformString('test[0-9]test[0-9]')
s <class 'str'> test[0-9]test[0-9] loc <class 'int'> 7 toks <class 'pyparsing.ParseResults'> ['9'] s <class 'str'> test[0-9]test[0-9] loc <class 'int'> 16 toks <class 'pyparsing.ParseResults'> ['9']
'test0-<9>test0-<9>'
Make iterator:
text_block[numeric_block][numeric_block]text_block...
For example:
>>> te\[\]st\\\\[0-9]test[0-9]
te[]st\\0test0
te[]st\\0test1
te[]st\\0test2
...
te[]st\\9test7
te[]st\\9test8
te[]st\\9test9
text = SkipTo(StringEnd() | '[')
numeric = Suppress('[') + Word(digits) + Suppress('-') + Word(digits) + Suppress(']')
pattern = ZeroOrMore(text | numeric)
def skip_empty(toks):
if not toks[0]:
raise ParseException('must be not empty')
text.addParseAction(skip_empty)
SkipTo:({StringEnd | "["})
pattern.parseString('test[0-9]test[0-9]')
(['test', '0', '9', 'test', '0', '9'], {})
pattern.parseString('te\[\]st\\\\[0-9]test[0-9]')
(['te\\'], {})
escape = (Literal('\\\\').addParseAction(replaceWith('\\')) |
Literal('\\[').addParseAction(replaceWith('[')) |
Literal('\\]').addParseAction(replaceWith(']')))
text = SkipTo(StringEnd() | '[', ignore=escape).setParseAction(skip_empty)
pattern = ZeroOrMore(text | numeric)
pattern.parseString('te\[\]st\\\\[0-9]test[0-9]')
(['te\\[\\]st\\\\', '0', '9', 'test', '0', '9'], {})
def unescape(toks):
return [escape.transformString(item) for item in toks]
text.addParseAction(unescape)
SkipTo:({StringEnd | "["})
pattern.parseString('te\[\]st\\\\[0-9]test[0-9]')
(['te[]st\\', '0', '9', 'test', '0', '9'], {})
def text_block(toks):
return [iter(toks)]
def numeric_block(toks):
from_value, to_value = toks
return [map(str, range(int(from_value), int(to_value) + 1))]
text.addParseAction(text_block)
numeric.addParseAction(numeric_block)
{Suppress:("[") W:(0123...) Suppress:("-") W:(0123...) Suppress:("]")}
pattern.parseString('te\[\]st\\\\[0-9]test[0-9]')
([<list_iterator object at 0x7f35380c7fd0>, <map object at 0x7f35381252b0>, <list_iterator object at 0x7f3538125278>, <map object at 0x7f3538100668>], {})
from itertools import product
def iterator(string):
return (''.join(items) for items in product(*pattern.parseString(string)))
len(list(iterator('te\[\]st\\\\[0-9]test[0-9]')))
100
list(iterator('te\[\]st\\\\[0-9]test[0-9]'))[:3]
['te[]st\\0test0', 'te[]st\\0test1', 'te[]st\\0test2']
list(iterator('te\[\]st\\\\[0-9]test[0-9]'))[-3:]
['te[]st\\9test7', 'te[]st\\9test8', 'te[]st\\9test9']
pavel.tyslyatsky@gmail.com