9.0.12 Apply a function to process text
Example:
###############################################################################
# Import Libraries
import pandas as pd
import unicodedata
import sys
# NLP Libaries
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer
###############################################################################
# test cases
text = "I have some text with spaces at the end "
text = "Here is some text. It has three sentances. Can you parse this text into a list with three elements?"
text = 'Does this text still have punctuation?!?!?!?!?'
###############################################################################
# Create an example dataset
DF_Text = pd.DataFrame({ 'text_string':["this is a string of text."
, "My Home is my castle"
, "chicken Fillet is closed on Sundays"
, "Legend Dairy... suit up!"
, "please feed me. I'm hungry!!"] })
# Create a text processing function
def count_total_characters(text):
"Returns the length of the string, which is a count of the total characters in the string"
text = str(text) # make sure the text is a string
length_of_string = len(text) # length of a string
return(length_of_string)
# apply the text processing function to create a new column
DF_Text['New_Column'] = DF_Text['text_string'].apply(count_total_characters)