Random Translation with Fairseq TransformerModel and HubertTokenizer

  • Share this:

Code introduction


This function randomly selects TransformerModel and HubertTokenizer from the Fairseq library to translate input text into another language.


Technology Stack : Fairseq, TransformerModel, HubertTokenizer

Code Type : The type of code

Code Difficulty :


                
                    
def random_select_from_fairseq():
    import random
    from fairseq.models import TransformerModel
    from fairseq.data.encoders import HubertTokenizer
    from fairseq.data.data_utils import batch_to_tensors

    def model_predict(input_text):
        # Load a pre-trained model
        model = TransformerModel.from_pretrained('transformer_wmt19_en_de')
        tokenizer = HubertTokenizer.from_pretrained('transformer_wmt19_en_de')

        # Tokenize the input text
        tokens = tokenizer.encode(input_text)
        # Convert the tokens to tensors
        source_tokens = batch_to_tensors([tokens], padding='longest')[0]

        # Generate translation
        with model.eval():
            translation = model.translate(source_tokens)

        # Decode the translation to text
        decoded_text = tokenizer.decode(translation[0])

        return decoded_text

    return model_predict