Fairseq-Based Machine Translation

  • Share this:

Code introduction


This function uses the Fairseq library to perform machine translation. It takes an input sentence and source and target languages, and then returns the translated sentence.


Technology Stack : Fairseq, PyTorch

Code Type : Machine translation

Code Difficulty : Intermediate


                
                    
def translate_sentence(input_sentence, source_lang, target_lang):
    import torch
    from fairseq.models import FairseqModel
    from fairseq.data.encoders import BaseSourceEncoder
    from fairseq.data import Dictionary

    # Load the model, dictionary and source encoder
    model_path = 'path_to_pretrained_model'
    model = FairseqModel.from_pretrained(model_path)
    src_dict = Dictionary.load(model_path + '/dict.txt')
    src_encoder = BaseSourceEncoder.from_pretrained(model_path, 'src')

    # Encode the input sentence
    src_tokens = src_encoder.encode(input_sentence)
    src_tokens = torch.tensor(src_tokens).unsqueeze(0)

    # Translate the encoded sentence
    with torch.no_grad():
        translated_tokens = model.translate(src_tokens)

    # Decode the translated tokens to a sentence
    translated_sentence = src_dict.decode(translated_tokens[0])

    return translated_sentence