conda install pytorch torchvision -c soumith
import torch
x = torch.Tensor(5, 3)
print(x)
1.00000e-44 * 0.0000 0.0000 0.0000 0.0000 1.6816 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 [torch.FloatTensor of size 5x3]
import torch
from torch.autograd import Variable
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10
# Create random Tensors to hold inputs and outputs, and wrap them in Variables.
x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)
# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. Each Linear Module computes output from input using a
# linear function, and holds internal Variables for its weight and bias.
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out),
)
# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(size_average=False)
learning_rate = 1e-4
for t in range(500):
# Forward pass: compute predicted y by passing x to the model. Module objects
# override the __call__ operator so you can call them like functions. When
# doing so you pass a Variable of input data to the Module and it produces
# a Variable of output data.
y_pred = model(x)
# Compute and print loss. We pass Variables containing the predicted and true
# values of y, and the loss function returns a Variable containing the
# loss.
loss = loss_fn(y_pred, y)
if t%50 == 0:
print(t, loss.data[0])
# Zero the gradients before running the backward pass.
model.zero_grad()
# Backward pass: compute gradient of the loss with respect to all the learnable
# parameters of the model. Internally, the parameters of each Module are stored
# in Variables with requires_grad=True, so this call will compute gradients for
# all learnable parameters in the model.
loss.backward()
# Update the weights using gradient descent. Each parameter is a Variable, so
# we can access its data and gradients like we did before.
for param in model.parameters():
param.data -= learning_rate * param.grad.data
(0, 717.2719116210938) (50, 35.097198486328125) (100, 1.8821511268615723) (150, 0.1728428155183792) (200, 0.02194761298596859) (250, 0.0034840735606849194) (300, 0.0006572074489668012) (350, 0.00014404028479475528) (400, 3.580378324841149e-05) (450, 9.810625670070294e-06)
A library of state-of-the-art pretrained models for Natural Language Processing (NLP)
!pip install pytorch-transformers
Collecting pytorch-transformers Downloading https://files.pythonhosted.org/packages/40/b5/2d78e74001af0152ee61d5ad4e290aec9a1e43925b21df2dc74ec100f1ab/pytorch_transformers-1.0.0-py3-none-any.whl (137kB) 100% |████████████████████████████████| 143kB 488kB/s ta 0:00:01 Collecting sentencepiece (from pytorch-transformers) Downloading https://files.pythonhosted.org/packages/99/8c/ca2c3ab61848526e85146aef40bfb7b399c7e70b1686a43b82d44cf1690f/sentencepiece-0.1.82-cp37-cp37m-macosx_10_6_x86_64.whl (1.1MB) 100% |████████████████████████████████| 1.1MB 11.9MB/s ta 0:00:01 Requirement already satisfied: torch>=0.4.1 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from pytorch-transformers) (1.1.0) Requirement already satisfied: numpy in /Users/datalab/anaconda3/lib/python3.7/site-packages (from pytorch-transformers) (1.16.2) Requirement already satisfied: tqdm in /Users/datalab/anaconda3/lib/python3.7/site-packages (from pytorch-transformers) (4.31.1) Collecting boto3 (from pytorch-transformers) Downloading https://files.pythonhosted.org/packages/39/82/608bb4a689dc543d09555e70ffc0e180bd72df76d53b68bf8891d7cbba91/boto3-1.9.194-py2.py3-none-any.whl (128kB) 100% |████████████████████████████████| 133kB 15.4MB/s ta 0:00:01 Requirement already satisfied: requests in /Users/datalab/anaconda3/lib/python3.7/site-packages (from pytorch-transformers) (2.21.0) Collecting regex (from pytorch-transformers) Downloading https://files.pythonhosted.org/packages/6f/4e/1b178c38c9a1a184288f72065a65ca01f3154df43c6ad898624149b8b4e0/regex-2019.06.08.tar.gz (651kB) 100% |████████████████████████████████| 655kB 14.1MB/s ta 0:00:01 Collecting jmespath<1.0.0,>=0.7.1 (from boto3->pytorch-transformers) Downloading https://files.pythonhosted.org/packages/83/94/7179c3832a6d45b266ddb2aac329e101367fbdb11f425f13771d27f225bb/jmespath-0.9.4-py2.py3-none-any.whl Collecting botocore<1.13.0,>=1.12.194 (from boto3->pytorch-transformers) Downloading https://files.pythonhosted.org/packages/7b/9f/f7206b658d764f1258bd8af056c71fd0d9792973f88c3045ab2faefd2362/botocore-1.12.194-py2.py3-none-any.whl (5.6MB) 100% |████████████████████████████████| 5.6MB 5.3MB/s eta 0:00:01 Collecting s3transfer<0.3.0,>=0.2.0 (from boto3->pytorch-transformers) Downloading https://files.pythonhosted.org/packages/16/8a/1fc3dba0c4923c2a76e1ff0d52b305c44606da63f718d14d3231e21c51b0/s3transfer-0.2.1-py2.py3-none-any.whl (70kB) 100% |████████████████████████████████| 71kB 17.4MB/s ta 0:00:01 Requirement already satisfied: urllib3<1.25,>=1.21.1 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from requests->pytorch-transformers) (1.24.1) Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from requests->pytorch-transformers) (3.0.4) Requirement already satisfied: idna<2.9,>=2.5 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from requests->pytorch-transformers) (2.8) Requirement already satisfied: certifi>=2017.4.17 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from requests->pytorch-transformers) (2019.3.9) Requirement already satisfied: python-dateutil<3.0.0,>=2.1; python_version >= "2.7" in /Users/datalab/anaconda3/lib/python3.7/site-packages (from botocore<1.13.0,>=1.12.194->boto3->pytorch-transformers) (2.8.0) Requirement already satisfied: docutils<0.15,>=0.10 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from botocore<1.13.0,>=1.12.194->boto3->pytorch-transformers) (0.14) Requirement already satisfied: six>=1.5 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from python-dateutil<3.0.0,>=2.1; python_version >= "2.7"->botocore<1.13.0,>=1.12.194->boto3->pytorch-transformers) (1.12.0) Building wheels for collected packages: regex Building wheel for regex (setup.py) ... done Stored in directory: /Users/datalab/Library/Caches/pip/wheels/35/e4/80/abf3b33ba89cf65cd262af8a22a5a999cc28fbfabea6b38473 Successfully built regex Installing collected packages: sentencepiece, jmespath, botocore, s3transfer, boto3, regex, pytorch-transformers Successfully installed boto3-1.9.194 botocore-1.12.194 jmespath-0.9.4 pytorch-transformers-1.0.0 regex-2019.6.8 s3transfer-0.2.1 sentencepiece-0.1.82
import torch
from pytorch_transformers import *
# PyTorch-Transformers has a unified API
# for 6 transformer architectures and 27 pretrained weights.
# Model | Tokenizer | Pretrained weights shortcut
MODELS = [(BertModel, BertTokenizer, 'bert-base-uncased'),
(OpenAIGPTModel, OpenAIGPTTokenizer, 'openai-gpt'),
(GPT2Model, GPT2Tokenizer, 'gpt2'),
(TransfoXLModel, TransfoXLTokenizer, 'transfo-xl-wt103'),
(XLNetModel, XLNetTokenizer, 'xlnet-base-cased'),
(XLMModel, XLMTokenizer, 'xlm-mlm-enfr-1024')]
import torch
from pytorch_transformers import BertTokenizer, BertModel, BertForMaskedLM
# OPTIONAL: if you want to have more information on what's happening under the hood, activate the logger as follows
import logging
logging.basicConfig(level=logging.INFO)
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Tokenize input
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
# Mask a token that we will try to predict back with `BertForMaskedLM`
masked_index = 8
tokenized_text[masked_index] = '[MASK]'
assert tokenized_text == ['[CLS]', 'who', 'was', 'jim', 'henson', '?', '[SEP]', 'jim', '[MASK]', 'was', 'a', 'puppet', '##eer', '[SEP]']
# Convert token to vocabulary indices
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
# Define sentence A and B indices associated to 1st and 2nd sentences (see paper)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
# Convert inputs to PyTorch tensors
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /Users/datalab/.cache/torch/pytorch_transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
tokenized_text
['[CLS]', 'who', 'was', 'jim', 'henson', '?', '[SEP]', 'jim', '[MASK]', 'was', 'a', 'puppet', '##eer', '[SEP]']
?BertModel.from_pretrained
model = BertModel.from_pretrained(u"/Users/datalab/bigdata/bert-base-uncased.bin")
INFO:pytorch_transformers.modeling_utils:loading configuration file /Users/datalab/bigdata/bert-base-uncased.bin
--------------------------------------------------------------------------- UnicodeDecodeError Traceback (most recent call last) <ipython-input-13-f9dd09faa64c> in <module> ----> 1 model = BertModel.from_pretrained(u"/Users/datalab/bigdata/bert-base-uncased.bin") ~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs) 360 # Load config 361 if config is None: --> 362 config = cls.config_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs) 363 364 # Load model ~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *input, **kwargs) 137 138 # Load config --> 139 config = cls.from_json_file(resolved_config_file) 140 141 # Update config with kwargs if needed ~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py in from_json_file(cls, json_file) 163 """Constructs a `BertConfig` from a json file of parameters.""" 164 with open(json_file, "r", encoding='utf-8') as reader: --> 165 text = reader.read() 166 return cls.from_dict(json.loads(text)) 167 ~/anaconda3/lib/python3.7/codecs.py in decode(self, input, final) 320 # decode input (taking the buffer into account) 321 data = self.buffer + input --> 322 (result, consumed) = self._buffer_decode(data, self.errors, final) 323 # keep undecoded input until the next call 324 self.buffer = data[consumed:] UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte
# Load pre-trained model (weights)
model = BertModel.from_pretrained('/Users/datalab/bigdata/bert-base-uncased-pytorch_model.bin')
# Set the model in evaluation mode to desactivate the DropOut modules
# This is IMPORTANT to have reproductible results during evaluation!
model.eval()
# If you have a GPU, put everything on cuda
# tokens_tensor = tokens_tensor.to('cuda')
# segments_tensors = segments_tensors.to('cuda')
# model.to('cuda')
# Predict hidden states features for each layer
with torch.no_grad():
# See the models docstrings for the detail of the inputs
outputs = model(tokens_tensor, token_type_ids=segments_tensors)
# PyTorch-Transformers models always output tuples.
# See the models docstrings for the detail of all the outputs
# In our case, the first element is the hidden state of the last layer of the Bert model
encoded_layers = outputs[0]
# We have encoded our input sequence in a FloatTensor of shape (batch size, sequence length, model hidden dimension)
assert tuple(encoded_layers.shape) == (1, len(indexed_tokens), model.config.hidden_size)
INFO:pytorch_transformers.modeling_utils:loading configuration file /Users/datalab/bigdata/bert-base-uncased-pytorch_model.bin
--------------------------------------------------------------------------- UnicodeDecodeError Traceback (most recent call last) <ipython-input-3-a62f7f60e32b> in <module> 1 # Load pre-trained model (weights) ----> 2 model = BertModel.from_pretrained('/Users/datalab/bigdata/bert-base-uncased-pytorch_model.bin') 3 4 # Set the model in evaluation mode to desactivate the DropOut modules 5 # This is IMPORTANT to have reproductible results during evaluation! ~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs) 360 # Load config 361 if config is None: --> 362 config = cls.config_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs) 363 364 # Load model ~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *input, **kwargs) 137 138 # Load config --> 139 config = cls.from_json_file(resolved_config_file) 140 141 # Update config with kwargs if needed ~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py in from_json_file(cls, json_file) 163 """Constructs a `BertConfig` from a json file of parameters.""" 164 with open(json_file, "r", encoding='utf-8') as reader: --> 165 text = reader.read() 166 return cls.from_dict(json.loads(text)) 167 ~/anaconda3/lib/python3.7/codecs.py in decode(self, input, final) 320 # decode input (taking the buffer into account) 321 data = self.buffer + input --> 322 (result, consumed) = self._buffer_decode(data, self.errors, final) 323 # keep undecoded input until the next call 324 self.buffer = data[consumed:] UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte
# Load pre-trained model (weights)
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
model.eval()
# If you have a GPU, put everything on cuda
tokens_tensor = tokens_tensor.to('cuda')
segments_tensors = segments_tensors.to('cuda')
model.to('cuda')
# Predict all tokens
with torch.no_grad():
outputs = model(tokens_tensor, token_type_ids=segments_tensors)
predictions = outputs[0]
# confirm we were able to predict 'henson'
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
assert predicted_token == 'henson'