import torch
from fastai2.text.all import *
from transformers import AutoModelWithLMHead, AutoTokenizer
from fastai_transformers_utils.generated_lm import GeneratedLM, GenerateArgs
# all_slow
Used on Hunggingface's Model¶
# load pretrained model and vocab
lm = AutoModelWithLMHead.from_pretrained('distilgpt2')
lm.eval()
tokenizer = AutoTokenizer.from_pretrained('distilgpt2')
num_returns = 3
sentence = 'The dog'
tgt = torch.tensor([tokenizer.encode(sentence)] * num_returns)
generate_args = GenerateArgs(
max_length=20,
do_sample=True,
num_beams=5,
temperature=1.5,
top_k=50,
top_p=1.0,
repetition_penalty=1,
length_penalty=1.0,
)
generated_lm = GeneratedLM(lm, tokenizer.vocab_size, lm.config.pad_token_id, [lm.config.eos_token_ids], True)
numeric_result = generated_lm.generate(tgt, generate_args)
for i in range(num_returns):
result = tokenizer.decode(list(numeric_result[i]), skip_special_tokens=True)
print(result)
Used on Fastai2 AWD_LSTM¶
# load pretrained model and vocab
path = untar_data(URLs.WT103_FWD)
vocab = list(path.glob('*.pkl'))[0].load()
model_weights = torch.load(list(path.glob('*.pth'))[0], map_location = lambda storage,loc: storage)
model = get_language_model(AWD_LSTM, len(vocab))
load_ignore_keys(model, model_weights)
# Tokenize and Numericalize
tokenizer = Tokenizer(SpacyTokenizer())
numericalizer = Numericalize(vocab=vocab)
pipe = Pipeline([tokenizer, numericalizer], True)
num_returns = 2
sentence = 'The dog'
tgt = torch.stack([pipe(sentence)] * num_returns, dim=0)
tgt
# Generate and Decode
generate_args = GenerateArgs(
max_length=20,
do_sample=True,
num_beams=5,
temperature=1.5,
top_k=50,
top_p=1,
repetition_penalty=1,
length_penalty=1.0,
)
generated_lm = GeneratedLM(model, len(vocab), awd_lstm_lm_config['pad_token'], [3], False)
numeric_result = generated_lm.generate(tgt, generate_args)
for i in range(num_returns):
result = pipe.decode(list(numeric_result[i]))
print(result)