Skip to content

Commit

Permalink
Merge pull request #308 from debbiemarkslab/fix_mmcif_parser
Browse files Browse the repository at this point in the history
fix KeyError due to absence of secondary structure sections
  • Loading branch information
thomashopf committed Jul 23, 2024
2 parents 374d4c5 + f32491f commit b7abe35
Showing 1 changed file with 39 additions and 19 deletions.
58 changes: 39 additions & 19 deletions evcouplings/compare/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,15 +500,23 @@ def __init__(self, filehandle, keep_full_data=False):
name: _decode(data[source_column]) for source_column, name in ATOM_TARGET_COLS.items()
})

# decode information into dataframe with BioPython helper method
self.helix_table = pd.DataFrame({
name: _decode(data[source_column]) for source_column, name in HELIX_TARGET_COLS.items()
})

# decode information into dataframe with BioPython helper method
self.sheet_table = pd.DataFrame({
name: _decode(data[source_column]) for source_column, name in SHEET_TARGET_COLS.items()
})
# decode information into dataframe with BioPython helper method; note this section may not be
# present if no helices exist in the structure
try:
self.helix_table = pd.DataFrame({
name: _decode(data[source_column]) for source_column, name in HELIX_TARGET_COLS.items()
})
except KeyError:
self.helix_table = None

# decode information into dataframe with BioPython helper method; note this section may not be
# present if no sheets exist in the structure
try:
self.sheet_table = pd.DataFrame({
name: _decode(data[source_column]) for source_column, name in SHEET_TARGET_COLS.items()
})
except KeyError:
self.sheet_table = None

# create secondary structure table for merging to chain tables
# (will only contain helix/H and strand/E, coil/C will need to be filled in)
Expand All @@ -517,6 +525,10 @@ def __init__(self, filehandle, keep_full_data=False):
("H", self.helix_table),
("E", self.sheet_table)
]:
# skip if secondary structure element not present in PDB file at all
if sse_table is None:
continue

for _, row in sse_table.iterrows():
assert row.beg_label_asym_id == row.end_label_asym_id
for seq_id in range(row.beg_label_seq_id, row.end_label_seq_id + 1):
Expand All @@ -527,11 +539,14 @@ def __init__(self, filehandle, keep_full_data=False):
})

# drop duplicates, there are overlapping helix segment annotations e.g. for PDB 6cup:A:Asp92
self.secondary_structure = pd.DataFrame(
sse_raw
).drop_duplicates(
subset=["label_asym_id", "label_seq_id"]
)
if len(sse_raw) > 0:
self.secondary_structure = pd.DataFrame(
sse_raw
).drop_duplicates(
subset=["label_asym_id", "label_seq_id"]
)
else:
self.secondary_structure = None

# store information about models/chains for quick retrieval and verification;
# subtract 0 to start numbering consistently to how this was handled with MMTF
Expand Down Expand Up @@ -692,11 +707,16 @@ def get_chain(self, chain, model=0, is_author_id=True):
res.index.name = "residue_index"

# merge secondary structure information (left outer join as coil is missing from table)
res_sse = res.merge(
self.secondary_structure,
on=("label_seq_id", "label_asym_id"),
how="left"
)
if self.secondary_structure is not None:
res_sse = res.merge(
self.secondary_structure,
on=("label_seq_id", "label_asym_id"),
how="left"
)
else:
res_sse = res.assign(
sec_struct_3state=np.nan
)

res_sse.loc[
res_sse.sec_struct_3state.isnull() & (res_sse.label_seq_id > 0),
Expand Down

0 comments on commit b7abe35

Please sign in to comment.