You can download this code by clicking the button below.
This code is now available for download.
This function creates a text classifier using the spaCy library. It takes in texts and their corresponding labels, and then trains a classifier to predict the category of the texts.
Technology Stack : spaCy
Code Type : Text classifier creation
Code Difficulty : Intermediate
import spacy
from spacy.tokens import Span
from spacy.language import Language
from spacy.pipeline import TextCategorizer
from spacy.util import minibatch
def create_text_categorizer(texts, labels, nlp, threshold=0.5):
"""
Create a text categorizer using spaCy.
"""
# Create a new TextCategorizer component
text_categorizer = TextCategorizer(nlp)
# Add the component to the pipeline
nlp.add_pipe(text_categorizer)
# Train the categorizer
examples = [{'text': text, 'cats': {'label': label}} for text, label in zip(texts, labels)]
optimizer = nlp.create_optimizer()
for i in range(20): # Train for 20 iterations
batches = minibatch(examples, size=8)
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(texts, annotations, drop=0.5, losses="categorical_crossentropy", sgd=optimizer)
# Set the threshold for the categorizer
text_categorizer.threshold = threshold
return text_categorizer