You can download this code by clicking the button below.
This code is now available for download.
This function creates a randomly chosen machine learning pipeline using CountVectorizer for text vectorization and LogisticRegression or DecisionTreeClassifier as the classifier. Then, it uses the show_weights function from the eli5 library to generate a feature importances table and formats it as an HTML table.
Technology Stack : eli5, scikit-learn, CountVectorizer, LogisticRegression, DecisionTreeClassifier
Code Type : Machine learning
Code Difficulty : Intermediate
import random
import eli5
from eli5.formatters import table
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
def random_eli5_function(texts, labels):
# Create a random pipeline with a CountVectorizer and a classifier
pipeline = Pipeline([
('vect', CountVectorizer()),
('clf', random.choice([eli5.models.LogisticRegression(), eli5.models.DecisionTreeClassifier()])),
])
# Fit the pipeline on the provided texts and labels
pipeline.fit(texts, labels)
# Generate a feature importances table
feature_importances = eli5.show_weights(pipeline.named_steps['clf'], vec=pipeline.named_steps['vect'])
# Print the feature importances in a formatted table
print(table.format_as_html(feature_importances))
# Sample usage
texts = ["This is a sample text.", "Another sample text.", "Yet another text sample."]
labels = [0, 1, 0]
# Call the function
random_eli5_function(texts, labels)