Wednesday, March 9, 2022

Regex and Pandas Dataframe in Python Code Sample

Code 1

(.env) [boris@fedora34server NUMPY]$ cat samleRegex3.py

import pandas as pd

import re as re

# creating data frame with columns name,date_of_birth and age

df = pd.DataFrame({'Name': ['Akash', 'Shyam', 'Ayush','Diksha', 'Radhika'],

 'date_of_birth': ['12/21/1998', '15/12/1998','06/11/2000', '05/10/1998','13/12/2010'],

                   'Age': [21, 12, 20, 21, 10]})

print("Printing the original dataframe")

print(df)

def checking_valid_dates(dt):

    #creating regular expression to check whether date fall in the format mm-dd-yyyy

    result = re.findall(

        r'\b(1[0-2]|0[1-9])/(3[01]|[12][0-9]|0[1-9])/([0-9]{4})\b', dt)

    return result

# creating new column with valid_date_of_birth

df['valid_date_of_birth'] = df['date_of_birth'].apply(

    lambda dt: checking_valid_dates(dt))

print("\nPrinting the data frame Valid dates in the format: mm-dd-yyyy:")

print(df)

 

No comments:

Post a Comment