Randomly Generated Stopwords from NLTK

  • Share this:

Code introduction


This function randomly selects a language from the NLTK library and generates the first 10 stopwords for that language.


Technology Stack : NLTK library, word_tokenize, stopwords, random.sample

Code Type : Custom function

Code Difficulty : Intermediate


                
                    
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import random

def random_stopwords(language):
    """
    Generate a list of random stopwords from a given language.
    """
    # List of languages supported by NLTK
    supported_languages = ['english', 'spanish', 'french', 'german', 'italian', 'dutch', 'chinese', 'japanese', 'korean']
    
    # Randomly select a language
    selected_language = random.choice(supported_languages)
    
    # Download stopwords if not already available
    if selected_language not in nltk.corpus.stopwords.fileids():
        nltk.download('stopwords')
    
    # Load stopwords for the selected language
    stop_words = set(stopwords.words(selected_language))
    
    # Randomly select stopwords
    random_stopwords = random.sample(stop_words, k=10)
    
    return random_stopwords