Under oxidative stress Cysteines can undergo oxidative post-translational modifications (PTMs). The study by Akter, et al. compares the differences between S-Sulfinylations (R-SO2H) and S-Sulfenylations (R-SOH) in A549 and HeLa cell lines.
In this notebook we map the positions of these PTMs on available 3D structures in the Protein Data Bank.
We use the datasets provided in the supplementary materials of the following paper:
Chemical proteomics reveals new targets of cysteine sulfinic acid reductase. Akter S, Fu L, Jung Y, Conte ML, Lawson JR, Lowther WT, Sun, Liu, Yang J, Carroll KS. Nat Chem Biol. 2018 Sep 3. doi: 10.1038/s41589-018-0116-2
import pandas as pd
import numpy as np
from io import BytesIO
import xlrd
from ipywidgets import interact, IntSlider, widgets
import py3Dmol
from pyspark.sql import SparkSession
from pyspark.sql.functions import asc, collect_set, collect_list, col, concat_ws, sort_array
from mmtfPyspark.datasets import pdbToUniProt, pdbPtmDataset
# setup checkboxes for datasets
w1 = widgets.Checkbox(value=True, description='A549-RSO2H',disabled=False)
w2 = widgets.Checkbox(value=False, description='HeLa-RSO2H',disabled=False)
w3 = widgets.Checkbox(value=True, description='A549-RSOH',disabled=False)
w4 = widgets.Checkbox(value=False, description='HeLa-RSOH',disabled=False)
display(w1, w2, w3, w4)
Checkbox(value=True, description='A549-RSO2H')
Checkbox(value=False, description='HeLa-RSO2H')
Checkbox(value=True, description='A549-RSOH')
Checkbox(value=False, description='HeLa-RSOH')
def read_datasets():
dfs = []
if w1.value:
df1 = pd.read_excel('https://static-content.springer.com/esm/art%3A10.1038%2Fs41589-018-0116-2/MediaObjects/41589_2018_116_MOESM32_ESM.xlsx', sheet_name='A549', dtype=str)
df1 = df1.assign(ptms=np.full((df1.shape[0], 1), "A549-RSO2H"))
df1 = df1.rename(index=str, columns={"Modified site": "modifiedSite", "Uniprot Accession #": "uniprotAccession"})
dfs.append(df1)
if w2.value:
df2 = pd.read_excel('https://static-content.springer.com/esm/art%3A10.1038%2Fs41589-018-0116-2/MediaObjects/41589_2018_116_MOESM32_ESM.xlsx', sheet_name='HeLa', dtype=str)
df2 = df2.assign(ptms=np.full((df2.shape[0], 1), "HeLa-RSO2H"))
df2 = df2.rename(index=str, columns={"Modified site": "modifiedSite", "Uniprot Accession #": "uniprotAccession"})
dfs.append(df2)
if w3.value:
df3 = pd.read_excel('https://static-content.springer.com/esm/art%3A10.1038%2Fs41589-018-0116-2/MediaObjects/41589_2018_116_MOESM33_ESM.xlsx', sheet_name='A549', dtype=str)
df3 = df3.assign(ptms=np.full((df3.shape[0], 1), "A549-RSOH"))
df3 = df3.rename(index=str, columns={"Site #": "modifiedSite", "Uniprot Accession #": "uniprotAccession"})
dfs.append(df3)
if w4.value:
df4 = pd.read_excel('https://static-content.springer.com/esm/art%3A10.1038%2Fs41589-018-0116-2/MediaObjects/41589_2018_116_MOESM33_ESM.xlsx', sheet_name='HeLa', dtype=str)
df4 = df4.assign(ptms=np.full((df4.shape[0], 1), "HeLa-RSOH"))
df4 = df4.rename(index=str, columns={"Site #": "modifiedSite", "Uniprot Accession #": "uniprotAccession"})
dfs.append(df4)
return dfs
# concatenate and process dataset
dfs = read_datasets()
df = pd.concat(dfs, ignore_index=True, sort=False)
df = df[['ptms', 'modifiedSite', 'uniprotAccession', 'Description']]
df['modifiedSite'] = df['modifiedSite'].astype(np.int64)
df.head()
ptms | modifiedSite | uniprotAccession | Description | |
---|---|---|---|---|
0 | A549-RSO2H | 25 | P63104 | 14-3-3 protein zeta/delta OS=Homo sapiens GN=Y... |
1 | A549-RSO2H | 171 | P52209 | 6-phosphogluconate dehydrogenase, decarboxylat... |
2 | A549-RSO2H | 122 | Q9H7C9 | Mth938 domain-containing protein OS=Homo sapie... |
3 | A549-RSO2H | 187 | P00505 | Aspartate aminotransferase, mitochondrial OS=H... |
4 | A549-RSO2H | 477 | P49748 | Very long-chain specific acyl-CoA dehydrogenas... |
# convert Pandas dataframe to a Spark dataframe
spark = SparkSession.builder.appName("CysOxidationTo3DStructure").getOrCreate()
ds = spark.createDataFrame(df)
ds = ds.sort(ds.uniprotAccession, ds.modifiedSite)
Download PDB to UniProt mappings and filter out residues that were not observed in the 3D structure.
up = pdbToUniProt.get_cached_residue_mappings().filter("pdbResNum IS NOT NULL")
Joint PTM with PDB data if the UniProt Id and UniProt residue numbers match
st = up.join(ds, (up.uniprotId == ds.uniprotAccession) & (up.uniprotNum == ds.modifiedSite))
# Aggregate data
st = st.groupBy("structureChainId","pdbResNum","uniprotAccession","uniprotNum","Description").agg(collect_list("ptms").alias("ptms"))
st = st.withColumn("ptms", concat_ws((","), col("ptms")))
st = st.groupBy("structureChainId","uniprotAccession","Description").agg(collect_list("ptms").alias("ptms"), collect_list("pdbResNum").alias("pdbResNum"), collect_list("uniprotNum").alias("uniprotNum"))
Keep only a single structural representative
st = st.drop_duplicates(["uniprotAccession","uniprotNum"])
PDB residue numbers do not always match UniProt residue numbers. The table below shows the mapping for each protein chain.
# convert Spark dataframe back to a Pandas dataframe
sp = st.toPandas()
sp.head()
structureChainId | uniprotAccession | Description | ptms | pdbResNum | uniprotNum | |
---|---|---|---|---|---|---|
0 | 1K4Q.A | P00390 | Glutathione reductase, mitochondrial OS=Homo s... | [A549-RSOH, A549-RSOH, A549-RSOH] | [63, 417, 423] | [107, 461, 467] |
1 | 1DGB.D | P04040 | Catalase OS=Homo sapiens GN=CAT PE=1 SV=3 | [A549-RSO2H,A549-RSOH, A549-RSO2H,A549-RSOH] | [460, 232] | [460, 232] |
2 | 1QKI.D | P11413 | Glucose-6-phosphate 1-dehydrogenase OS=Homo sa... | [A549-RSOH, A549-RSOH, A549-RSOH, A549-RSOH] | [13, 385, 158, 294] | [13, 385, 158, 294] |
3 | 2O8E.B | P52701 | DNA mismatch repair protein Msh6 OS=Homo sapie... | [A549-RSO2H,A549-RSOH] | [765] | [765] |
4 | 2KTV.A | P62495 | Eukaryotic peptide chain release factor subuni... | [A549-RSOH, A549-RSOH] | [335, 302] | [335, 302] |
Residues with reported modifications are shown in an all atom prepresentation as red sticks with transparent spheres. Each modified residue position is labeled by the PDB residue number and the type of the modification. Residues surrounding modified residue (within 6 A) are highlighted as yellow sticks. Small molecules within the structure are rendered as gray sticks.
def view_modifications(df, cutoff_distance, *args):
def view3d(show_labels=True,show_bio_assembly=False, show_surface=False, i=0):
pdb_id, chain_id = df.iloc[i]['structureChainId'].split('.')
res_num = df.iloc[i]['pdbResNum']
labels = df.iloc[i]['ptms']
# print header
print ("PDB Id: " + pdb_id + " chain Id: " + chain_id)
# print any specified additional columns from the dataframe
for a in args:
print(a + ": " + df.iloc[i][a])
mod_res = {'chain': chain_id, 'resi': res_num}
# select neigboring residues by distance
surroundings = {'chain': chain_id, 'resi': res_num, 'byres': True, 'expand': cutoff_distance}
viewer = py3Dmol.view(query='pdb:' + pdb_id, options={'doAssembly': show_bio_assembly})
# polymer style
viewer.setStyle({'cartoon': {'color': 'spectrum', 'width': 0.6, 'opacity':0.8}})
# non-polymer style
viewer.setStyle({'hetflag': True}, {'stick':{'radius': 0.3, 'singleBond': False}})
# style for modifications
viewer.addStyle(surroundings,{'stick':{'colorscheme':'orangeCarbon', 'radius': 0.15}})
viewer.addStyle(mod_res, {'stick':{'colorscheme':'redCarbon', 'radius': 0.4}})
viewer.addStyle(mod_res, {'sphere':{'colorscheme':'gray', 'opacity': 0.7}})
# set residue labels
if show_labels:
for residue, label in zip(res_num, labels):
viewer.addLabel(residue + ": " + label, \
{'fontColor':'black', 'fontSize': 9, 'backgroundColor': 'lightgray'}, \
{'chain': chain_id, 'resi': residue})
viewer.zoomTo(surroundings)
if show_surface:
viewer.addSurface(py3Dmol.SES,{'opacity':0.8,'color':'lightblue'})
return viewer.show()
s_widget = IntSlider(min=0, max=len(df)-1, description='Structure', continuous_update=False)
return interact(view3d, show_labels=True, show_bio_assembly=False, show_surface=False, i=s_widget)
view_modifications(sp, 6, 'uniprotAccession', 'Description');
PDB Id: 1K4Q chain Id: A uniprotAccession: P00390 Description: Glutathione reductase, mitochondrial OS=Homo sapiens GN=GSR PE=1 SV=2
You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
jupyter labextension install jupyterlab_3dmol
spark.stop()