Data Formatting
General Info
Useful Snippets

9.0.12 Apply a function to process text

Example:

###############################################################################
# Import Libraries
import pandas as pd
import unicodedata
import sys

# NLP Libaries
import nltk
from nltk.corpus      import stopwords
nltk.download('stopwords')

from nltk.tokenize    import word_tokenize
from nltk.tokenize    import sent_tokenize
nltk.download('punkt')

from nltk.stem.porter import PorterStemmer


###############################################################################
# test cases
text = "I have some text with spaces at the end "
text = "Here is some text.  It has three sentances.  Can you parse this text into a list with three elements?"
text = 'Does this text still have punctuation?!?!?!?!?'
###############################################################################


# Create an example dataset
DF_Text = pd.DataFrame({ 'text_string':["this is a string of text."
                                      , "My Home is my castle"
                                      , "chicken Fillet is closed on Sundays"
                                      , "Legend Dairy... suit up!"
                                      , "please feed me.  I'm hungry!!"] })

# Create a text processing function
def count_total_characters(text):
    "Returns the length of the string, which is a count of the total characters in the string"
    text = str(text)                # make sure the text is a string
    length_of_string = len(text)    # length of a string
    return(length_of_string)

# apply the text processing function to create a new column
DF_Text['New_Column'] = DF_Text['text_string'].apply(count_total_characters)