In [1]:
import pickle

from dataset import NewsDataset
from model import DistilBertForSequenceClassification

from smooth_gradient import SmoothGradient
from integrated_gradient import IntegratedGradient

import torch
from torch import nn
from torch.utils.data import DataLoader
from transformers import DistilBertConfig, DistilBertTokenizer

from IPython.display import display, HTML
In [3]:
config = DistilBertConfig()
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
distilbert = DistilBertForSequenceClassification(config, num_labels=93)

criterion = nn.CrossEntropyLoss()

batch_size = 1

path = '/media/vitaliy/9C690A1791D68B8B/after/learningfolder/distilbert_medium_titles/distilbert.pth'

if torch.cuda.is_available():
    distilbert.load_state_dict(
        torch.load(path)
    )
else:
    distilbert.load_state_dict(
        torch.load(path, map_location=torch.device('cpu'))
    )
    
with open('../label_encoder.sklrn', 'rb') as f:
    le = pickle.load(f)
In [4]:
test_example = [
    ["Interpretation of HuggingFase's model decision"], 
    ["Transformer-based models have taken a leading role "
     "in NLP today."]
]

test_dataset = NewsDataset(
    data_list=test_example,
    tokenizer=tokenizer,
    max_length=config.max_position_embeddings, 
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
)
In [5]:
integrated_grad = IntegratedGradient(
    distilbert, 
    criterion, 
    tokenizer, 
    show_progress=False,
    encoder="bert"
)
instances = integrated_grad.saliency_interpret(test_dataloader)
In [6]:
coloder_string = integrated_grad.colorize(instances[0])
display(HTML(coloder_string))
Using bos_token, but it is not set yet.
[CLS] interpretation of huggingfase ' s model decision [SEP] transformer - based models have taken a leading role in nlp today . [SEP] Label: 44 |47.84%|
In [7]:
label = instances[0]['label']
print(f"Converted label #{label}: {le.inverse_transform([label])[0]}")
Converted label #44: machine-learning