Python Module RE

" RE for Regular Expression! "

Posted by Ethan on July 1, 2016

RE

>>> str1 = ' ShenZhen University '

>>> str1.find('University')
10
>>> str1.find('11')
-1
>>> help(str1.find)
find(...)
S.find(sub [,start [,end]]) -> int

Return the lowest index in S where substring sub is found,
such that sub is contained within S[start:end].  Optional
arguments start and end are interpreted as in slice notation.  
   
   
>>> str1.startswith('Zhen')
False
>>> str1.startswith(' Shen')
True
>>> import re
>>> pa = re.compile(r'University')


>>> type(pa)
<type '_sre.SRE_Pattern'>
>>> dir(pa)
['__class__', '__copy__', '__deepcopy__', '__delattr__', '__doc__', '__format__', '__getattribute__', '__hash__', '__init__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'findall', 'finditer', 'flags', 'groupindex', 'groups', 'match', 'pattern', 'scanner', 'search', 'split', 'sub', 'subn']


>>> help(pa.match) 
match(...)
match(string[, pos[, endpos]]) --> match object or None.
Matches zero or more characters at the beginning of the string
(END)


>>> pa = re.compile('University')
>>> pa.match(str1)
>>> pa = re.compile(' Shen')
>>> pa.match(str1)
<_sre.SRE_Match object at 0x107f13f38>


>>> ma = pa.match(str1)
>>> ma.group()
' Shen'
>>> dir(ma)
['__class__', '__copy__', '__deepcopy__', '__delattr__', '__doc__', '__format__', '__getattribute__', '__hash__', '__init__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'end', 'endpos', 'expand', 'group', 'groupdict', 'groups', 'lastgroup', 'lastindex', 'pos', 're', 'regs', 'span', 'start', 'string']
	
>>> pa = re.compile(' shen', re.I)
>>> ma = pa.match(str1)
>>> ma.group()
' Shen'


>>> ma = re.match(r'hello', 'hello SZU, blah blah')
>>> ma.group()
'hello'
>>> ma.groups()
()
>>> ma = re.match(r'(hello)', 'hello SZU, blah blah')
>>> ma.group()
'hello'
>>> ma.groups()
('hello',)		 

>>> help(re.match)	
match(pattern, string, flags=0)
    Try to apply the pattern at the start of the string, returning
    a match object, or None if no match was found.
(END)





# match a Python variable(start with a '_' or letter)
>>> ma = re.match(r'[_a-zA-Z]+[_\w]*', '_str1')
>>> ma
<_sre.SRE_Match object at 0x107e1eb28>
>>> ma.group()
'_str1'
>>> ma = re.match(r'[_a-zA-Z]+[_\w]*', '123_wer')
>>> ma
>>>

re.search an re.findall

# re.search and re.findall
>>> str2 = 'SZU University course = 999'
>>> str2.find('999')
24
>>> 
>>> info = re.search(r'\d+', str2)
>>> info.group()
'999'
>>> str3 = 'c++ = 100, java = 90, python = 78'
>>> info = re.search(r'\d+', str3)
>>> info.group()
'100'
>>> info = re.findall(r'\d+', str3)
>>> type(info)
<type 'list'>
>>> info
['100', '90', '78']
>>> sum([int(x) for x in info])
268
>>> 

re.sub and re.split

# re.sub and re.split
>>> def add1(match):
...     val = match.group()
...     num = int(val)+1
...     return str(num)
... 
>>> str3 = 'SZ University course = 999'
>>> re.sub(r'\d+', add1, str3)
'SZ University course = 1000'

>>> help(re.sub)
	sub(pattern, repl, string, count=0, flags=0)
    Return the string obtained by replacing the leftmost
    non-overlapping occurrences of the pattern in string by the
    replacement repl.  repl can be either a string or a callable;
    if a string, backslash escapes in it are processed.  If it is
    a callable, it's passed the match object and must return
    a replacement string to be used.
(END)

>>> str4 = 'progLanguage:C C++ Python Java Php'
>>> re.split(r':|', str4)
['progLanguage', 'C C++ Python Java Php']
>>> re.split(r' :| ', str4)
['progLanguage:C', 'C++', 'Python', 'Java', 'Php']

>>> help(re.split)
	split(pattern, string, maxsplit=0, flags=0)
    Split the source string by the occurrences of the pattern,
    returning a list containing the resulting substrings.
	(END)