import pickle
from dataset import NewsDataset
from model import DistilBertForSequenceClassification
from smooth_gradient import SmoothGradient
from integrated_gradient import IntegratedGradient
import torch
from torch import nn
from torch.utils.data import DataLoader
from transformers import DistilBertConfig, DistilBertTokenizer
from IPython.display import display, HTML
config = DistilBertConfig()
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
distilbert = DistilBertForSequenceClassification(config, num_labels=93)
criterion = nn.CrossEntropyLoss()
batch_size = 1
path = '/media/vitaliy/9C690A1791D68B8B/after/learningfolder/distilbert_medium_titles/distilbert.pth'
if torch.cuda.is_available():
distilbert.load_state_dict(
torch.load(path)
)
else:
distilbert.load_state_dict(
torch.load(path, map_location=torch.device('cpu'))
)
with open('../label_encoder.sklrn', 'rb') as f:
le = pickle.load(f)
test_example = [
["Interpretation of HuggingFase's model decision"],
["Transformer-based models have taken a leading role "
"in NLP today."]
]
test_dataset = NewsDataset(
data_list=test_example,
tokenizer=tokenizer,
max_length=config.max_position_embeddings,
)
test_dataloader = DataLoader(
test_dataset,
batch_size=batch_size,
shuffle=False,
)
integrated_grad = IntegratedGradient(
distilbert,
criterion,
tokenizer,
show_progress=False,
encoder="bert"
)
instances = integrated_grad.saliency_interpret(test_dataloader)
coloder_string = integrated_grad.colorize(instances[0])
display(HTML(coloder_string))
Using bos_token, but it is not set yet.
label = instances[0]['label']
print(f"Converted label #{label}: {le.inverse_transform([label])[0]}")
Converted label #44: machine-learning