Visualising InterPro Domain Annotations (Pfam and CATH-Gene3D)
This example demonstrates fetching domain information from InterPro member databases, specifically Pfam and CATH-Gene3D, and visualising them.
This script uses the InterProClient to fetch domain annotations from both Pfam and CATH-Gene3D for the protein p53 (P04637).
- An
AxisTracksets the sequence scale. - Two
InterProTrackinstances are created: one for Pfam data and one for CATH-Gene3D data. - Both tracks use plotting_option=”full” and show_domain_labels=True to display each domain signature in its own lane, labelled with its accession, name, and InterPro entry type. The database_name_for_label parameter is crucial for accurate labelling in “full” mode. This allows for a clear comparison of domain architectures as defined by different InterPro member databases.
from protviz import plot_protein_tracks
from protviz.data_retrieval import InterProClient, get_protein_sequence_length
from protviz.tracks import AxisTrack, InterProTrack
def main():
uniprot_id = "Q92794" # p53 - has Pfam and CATH-Gene3D entries via InterPro
interpro_client = InterProClient()
try:
seq_length = get_protein_sequence_length(uniprot_id)
print(f"Sequence length for {uniprot_id}: {seq_length}")
# Fetch Pfam annotations
pfam_annotations = interpro_client.get_pfam_annotations(uniprot_id)
if pfam_annotations:
print(f"Found {len(pfam_annotations)} Pfam annotations.")
else:
print("No Pfam annotations found.")
# Fetch CATH-Gene3D annotations
cath_gene3d_annotations = interpro_client.get_cathgene3d_annotations(uniprot_id)
if cath_gene3d_annotations:
print(f"Found {len(cath_gene3d_annotations)} CATH-Gene3D annotations.")
else:
print("No CATH-Gene3D annotations found.")
# Create tracks
axis_trk = AxisTrack(sequence_length=seq_length, label="Sequence")
pfam_trk = InterProTrack(
domain_data=pfam_annotations,
database_name_for_label="Pfam", # Important for labelling
label="Pfam Domains",
plotting_option="full",
show_domain_labels=True
)
cath_gene3d_trk = InterProTrack(
domain_data=cath_gene3d_annotations,
database_name_for_label="CATH-Gene3D", # Important for labelling
label="CATH-Gene3D Domains",
plotting_option="full",
show_domain_labels=True
)
# Plot the tracks
plot_protein_tracks(
protein_id=uniprot_id,
sequence_length=seq_length,
tracks=[axis_trk, pfam_trk, cath_gene3d_trk],
figure_width=14,
figure_height = 6,
save_option=True
)
print(f"InterPro example plot saved as {uniprot_id}_plot.png")
except Exception as e:
print(f"An error occurred during the InterPro example: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()
The previous example will generate a plot like this one:
