This is a work-in-progress for some future cohpy meeting.
Lastly, uses names to refer to parts of a string that match a regular expression is fantastic! It makes the code much more readable than using meaningless numerical indexes.
This was new to a Python expert who now loves it, although his still prefers to eschew regular expressions because they are hard to read.
Imagine the following contrived example, where I want to get the hash of an git object from its filepath.
s = '.git/objects/8e/28241360c472576e8caa944253d4af368d9081'
import re
git_pattern = re.compile(r'''
.*/ # anything and a slash
(
([0-9a-fA-F]{2}) # 2 hexadecimal digits
/ # separated by a slash
([0-9a-fA-F]{38}) # 38 hexadecimal digits
)$''', flags=re.VERBOSE)
m = git_pattern.match(s)
m
<_sre.SRE_Match object; span=(0, 54), match='.git/objects/8e/28241360c472576e8caa944253d4af368>
m.group(0)
'.git/objects/8e/28241360c472576e8caa944253d4af368d9081'
m.group(1)
'8e/28241360c472576e8caa944253d4af368d9081'
m.group(2)
'8e'
m.group(3)
'28241360c472576e8caa944253d4af368d9081'
help(re.compile)
Help on function compile in module re: compile(pattern, flags=0) Compile a regular expression pattern, returning a pattern object.
s = '.git/objects/8e/28241360c472576e8caa944253d4af368d9081'
import re
git_pattern = re.compile(r'''
.*/ # anything and a slash
(?P<hash_with_slash>
(?P<hash_directory> [0-9a-fA-F]{2}) # 2 hexadecimal digits
/ # separated by a slash
(?P<hash_filename> [0-9a-fA-F]{38}) # 38 hexadecimal digits
)$''', flags=re.VERBOSE)
m = git_pattern.match(s)
m
<_sre.SRE_Match object; span=(0, 54), match='.git/objects/8e/28241360c472576e8caa944253d4af368>
m.group(2)
'8e'
m.group('hash_directory')
'8e'
m.group(3)
'28241360c472576e8caa944253d4af368d9081'
m.group('hash_filename')
'28241360c472576e8caa944253d4af368d9081'
hash = m.group('hash_directory') + m.group('hash_filename')
hash
'8e28241360c472576e8caa944253d4af368d9081'
hash = ''.join(map(m.group, ('hash_directory', 'hash_filename')))
hash
'8e28241360c472576e8caa944253d4af368d9081'
hash = ''.join(map(lambda s: m.group('hash_%s' % s), ('directory', 'filename')))
hash
'8e28241360c472576e8caa944253d4af368d9081'
m.group(1)
'8e/28241360c472576e8caa944253d4af368d9081'
m.group('hash_with_slash')
'8e/28241360c472576e8caa944253d4af368d9081'
hash = ''.join(c for c in m.group('hash_with_slash') if c != '/')
hash
'8e28241360c472576e8caa944253d4af368d9081'
s
'.git/objects/8e/28241360c472576e8caa944253d4af368d9081'
s = '8e/28241360c472576e8caa944253d4af368d9081'
m = git_pattern.match(s)
m
repr(m)
'None'