#!/usr/bin/env python # coding: utf-8 # This is a work-in-progress for some future cohpy meeting. # Lastly, uses names to refer to parts of a string that match a regular expression is fantastic! It makes the code much more readable than using meaningless numerical indexes. # # This was new to a Python expert who now loves it, # although his still prefers to eschew regular expressions # because they are hard to read. # # Imagine the following contrived example, # where I want to get the hash of an git object from its filepath. # In[1]: s = '.git/objects/8e/28241360c472576e8caa944253d4af368d9081' import re git_pattern = re.compile(r''' .*/ # anything and a slash ( ([0-9a-fA-F]{2}) # 2 hexadecimal digits / # separated by a slash ([0-9a-fA-F]{38}) # 38 hexadecimal digits )$''', flags=re.VERBOSE) # In[2]: m = git_pattern.match(s) m # In[3]: m.group(0) # In[4]: m.group(1) # In[5]: m.group(2) # In[6]: m.group(3) # In[7]: help(re.compile) # In[8]: s = '.git/objects/8e/28241360c472576e8caa944253d4af368d9081' import re git_pattern = re.compile(r''' .*/ # anything and a slash (?P (?P [0-9a-fA-F]{2}) # 2 hexadecimal digits / # separated by a slash (?P [0-9a-fA-F]{38}) # 38 hexadecimal digits )$''', flags=re.VERBOSE) # In[9]: m = git_pattern.match(s) m # In[10]: m.group(2) # In[11]: m.group('hash_directory') # In[12]: m.group(3) # In[13]: m.group('hash_filename') # In[14]: hash = m.group('hash_directory') + m.group('hash_filename') hash # In[15]: hash = ''.join(map(m.group, ('hash_directory', 'hash_filename'))) hash # In[16]: hash = ''.join(map(lambda s: m.group('hash_%s' % s), ('directory', 'filename'))) hash # In[17]: m.group(1) # In[18]: m.group('hash_with_slash') # In[19]: hash = ''.join(c for c in m.group('hash_with_slash') if c != '/') hash # In[20]: s # In[21]: s = '8e/28241360c472576e8caa944253d4af368d9081' m = git_pattern.match(s) m # In[22]: repr(m)