https://stackoverflow.com/a/67346603/1421907
Here are the two solutions from the above question on stackoverflow. The first one is based on re
the second one is based on regex
.
import re
import regex
import ast
txt = r"""* DCH : 0.80000000 *
* PYR : 100.00000000 *
* Bond ( 1, 0) : 0.80000000 *
* Angle ( 1, 0, 2) : 100.00000000 *
"""
p = re.compile(r"\s+(\w+)\s+(\((?:\s*(?:\d+),?){2,4}\))?\s+:\s+(\d+.\d+)")
for line in txt.splitlines():
m = p.search(line)
coord = m.group(1)
value = float(m.group(3))
if m.group(2):
coord = ast.literal_eval(m.group(2))
print(f"{coord} {value}")
DCH 0.8 PYR 100.0 (1, 0) 0.8 (1, 0, 2) 100.0
%%timeit
for line in txt.splitlines():
m = p.search(line)
coord = m.group(1)
value = float(m.group(3))
if m.group(2):
coord = ast.literal_eval(m.group(2))
23.4 µs ± 667 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
pre = regex.compile(r'\s+(\w+)\s+(?:\((?:\s*(\d+),?){2,4}\))?\s+:\s+(\d+.\d+)')
for line in txt.splitlines():
m = pre.search(line)
coord = m.group(1)
value = float(m.group(3))
if m.captures(2):
coord = tuple([int(i) for i in m.captures(2)])
print(f"{coord} {value}")
DCH 0.8 PYR 100.0 (1, 0) 0.8 (1, 0, 2) 100.0
%%timeit
for line in txt.splitlines():
m = pre.search(line)
coord = m.group(1)
value = float(m.group(3))
if m.captures(2):
coord = tuple([int(i) for i in m.captures(2)])
12.4 µs ± 46.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)