Monday, October 3, 2011

Python Regular Expression

#!/usr/bin/python
#Author: Balasubramaniam Natarajan
#Demonstrate how to use Regular Expression
import re

def search(searchstr):
    print "*********************************"
    print "searchstr = ", searchstr
    print "*********************************"

searchstr = "This is a test string"
search(searchstr)

re1 = re.compile('^T')
match1 = re1.match(searchstr)
print "Match for '^T' = ", match1.group()

re1 = re.compile('.*',re.DOTALL)
match1  = re1.match(searchstr)
print "Match for '.*' = ", match1.group()

searchstr = "this is first line\nthis is second line"
search(searchstr)

re1 = re.compile('.*',re.DOTALL)
match1 = re1.match(searchstr)
print "Match for '.*',re.DOTALL = ", match1.group()

re1 = re.compile('line$')
match1 = re1.match(searchstr)
print "Match for 'line$' = ", match1

re1 = re.compile('[a-z]')
match1 = re1.match(searchstr)
print "Match for '[a-z]' = ", match1

# Repeating metacharacter * (Zero or many), + (One or many), ? (Zero or one)

re1 = re.compile('[a-z]+')
match1 = re1.match(searchstr)
print "Match for '[a-z]+' = ", match1.group()

re1 = re.compile('[a-z]+.*')
match1 = re1.match(searchstr)
print "Match for '[a-z]+.*' = ", match1.group()

#The BackSlash will be interepreted by python to avoid that we specify lowercase r "raw expression"
#The \s stands for the space
re1 = re.compile(r'[a-z]+\s')
match1 = re1.match(searchstr)
print "Match for '[a-z]+\s' = ", match1.group()

#Here we want the next word as well
re1 = re.compile(r'[a-z]+\s[a-z]+')
match1 = re1.match(searchstr)
print "Match for r'[a-z]+\s[a-z]+' = ", match1.group()

re1 = re.compile(r'[a-z]+\s[a-z]+\s[a-z]+\s[a-z]+')
match1 = re1.match(searchstr)
print "Match for r'[a-z]+\s[a-z]+\s[a-z]+\s[a-z]+' = ", match1.group()

searchstr = "This is first LINE"
search(searchstr)

re1 = re.compile(r'[a-z]+\s[a-z]+\s[a-z]+\s[a-z]+',re.IGNORECASE)
match1 = re1.match(searchstr)
print "Match for r'[a-z]+\s[a-z]+\s[a-z]+\s[a-z]+',re.IGNORECASE = ", match1.group()

searchstr = "this is line number 987"
search(searchstr)

re1 = re.compile(r'[a-z]+\s[a-z]+\s\w+\s\w+\s\d+')
match1 = re1.match(searchstr)
print "Match for r'[a-z]+\s[a-z]+\s\w+\s\w+\s\d+' = ",match1.group()

#END

OUTPUT

bala@bala-laptop:~/python$ python RE.py
*********************************
searchstr =  This is a test string
*********************************
Match for '.*',re.DOTALL =  T
Match for '.*' =  This is a test string
*********************************
searchstr =  this is first line
this is second line
*********************************
Match for '.*',re.DOTALL =  this is first line
this is second line
Match for 'line$' =  None
Match for '[a-z]' =  <_sre.SRE_Match object at 0x7fa5ecf69238>
Match for '[a-z]+' =  this
Match for '[a-z]+.*' =  this is first line
Match for '[a-z]+\s' =  this
Match for r'[a-z]+\s[a-z]+' =  this is
Match for r'[a-z]+\s[a-z]+\s[a-z]+\s[a-z]+' =  this is first line
*********************************
searchstr =  This is first LINE
*********************************
Match for r'[a-z]+\s[a-z]+\s[a-z]+\s[a-z]+',re.IGNORECASE =  This is first LINE
*********************************
searchstr =  this is line number 987
*********************************
Match for r'[a-z]+\s[a-z]+\s\w+\s\w+\s\d+' =  this is line number 987
bala@bala-laptop:~/python$ clear

bala@bala-laptop:~/python$ python 30RE.py
*********************************
searchstr =  This is a test string
*********************************
Match for '^T' =  T
Match for '.*' =  This is a test string
*********************************
searchstr =  this is first line
this is second line
*********************************
Match for '.*',re.DOTALL =  this is first line
this is second line
Match for 'line$' =  None
Match for '[a-z]' =  <_sre.SRE_Match object at 0x7f646e2c0238>
Match for '[a-z]+' =  this
Match for '[a-z]+.*' =  this is first line
Match for '[a-z]+\s' =  this
Match for r'[a-z]+\s[a-z]+' =  this is
Match for r'[a-z]+\s[a-z]+\s[a-z]+\s[a-z]+' =  this is first line
*********************************
searchstr =  This is first LINE
*********************************
Match for r'[a-z]+\s[a-z]+\s[a-z]+\s[a-z]+',re.IGNORECASE =  This is first LINE
*********************************
searchstr =  this is line number 987
*********************************
Match for r'[a-z]+\s[a-z]+\s\w+\s\w+\s\d+' =  this is line number 987
bala@bala-laptop:~/python$

No comments:

Post a Comment