You can download this code by clicking the button below.
This code is now available for download.
This code uses spaCy's TextCategorizer component to train a text classifier. It takes in texts and corresponding labels as input and trains the classifier over a given number of iterations.
Technology Stack : spaCy, TextCategorizer, TextBlob
Code Type : Function
Code Difficulty : Intermediate
import spacy
from spacy.pipeline import TextCategorizer
from spacy.util import minibatch
def train_text_categorizer(texts, labels, n_iter=100):
"""
Train a text categorizer using spaCy's TextCategorizer component.
"""
nlp = spacy.load('en_core_web_sm')
textcat = nlp.create_pipe('textcat')
nlp.add_pipe(textcat, last=True)
# Add labels to the text categorizer
textcat.add_labels(labels)
# Train the text categorizer
optimizer = nlp.begin_training()
for itn in range(n_iter):
losses = {}
batches = minibatch([(text, label) for text, label in zip(texts, labels)], size=8)
for texts, labels in batches:
texts = [nlp(text) for text in texts]
texts, labels = zip(*texts, labels)
nlp.update(texts, labels, sgd=optimizer, drop=0.2, losses=losses)
return textcat
# Example usage
texts = ["This is a good product", "I hate this product", "The quality is great", "It's not what I expected"]
labels = ["positive", "negative", "positive", "negative"]
textcat = train_text_categorizer(texts, labels)