You can download this code by clicking the button below.
This code is now available for download.
This code defines a random word generator function based on a Recurrent Neural Network (RNN). The function trains a model using a given corpus and then uses the model to generate a random word sequence of a specified length.
Technology Stack : PyTorch, NumPy, torch.nn, collections.Counter, random
Code Type : The type of code
Code Difficulty : Intermediate
def random_word_generator(length, corpus):
import torch
import torch.nn as nn
import numpy as np
from collections import Counter
import random
# Define the model
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
out, _ = self.rnn(x, h0)
out = self.fc(out)
return out
# Prepare data
input_size = len(corpus)
hidden_size = 128
output_size = len(corpus)
model = RNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
# Train the model
for _ in range(1000):
for i in range(1, len(corpus) - 1):
input = torch.tensor([corpus[i - 1], corpus[i]]).unsqueeze(0)
target = torch.tensor([corpus[i + 1]]).unsqueeze(0)
optimizer.zero_grad()
output = model(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()
# Generate a random word
def generate_word(model, length, corpus):
input = torch.tensor([corpus[-1]]).unsqueeze(0)
current_word = corpus[-1]
for _ in range(length - 1):
output = model(input)
_, predicted = torch.max(output, 1)
next_char = corpus[predicted.item()]
current_word += next_char
input = torch.tensor([corpus[i], next_char]).unsqueeze(0)
return current_word
return generate_word
# Example usage:
# corpus = "hello world this is a test corpus"
# generator = random_word_generator(10, corpus)
# print(generator(corpus, 10))