#!/usr/bin/env python # coding: utf-8 # # Topological feature extraction using `VietorisRipsPersistence` and `PersistenceEntropy` # # In this notebook, we showcase the ease of use of one of the core components of `giotto-tda`: `VietorisRipsPersistence`, along with vectorisation methods. We first list steps in a typical, topological-feature extraction routine and then show to encapsulate them with a standard `scikit-learn`–like pipeline. # # If you are looking at a static version of this notebook and would like to run its contents, head over to [github](https://github.com/giotto-ai/giotto-tda/blob/master/examples/vietoris_rips_quickstart.ipynb). # # **License: AGPLv3** # ## Import libraries # In[ ]: from gtda.diagrams import PersistenceEntropy from gtda.homology import VietorisRipsPersistence from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split # ## Generate data # # Let's begin by generating 3D point clouds of spheres and tori, along with a label of 0 (1) for each sphere (torus). We also add noise to each point cloud, whose effect is to displace the points sampling the surfaces by a random amount in a random direction. **Note**: You will need the auxiliary module [datasets.py](https://github.com/giotto-ai/giotto-tda/blob/master/examples/datasets.py) to run this cell. # In[ ]: from datasets import generate_point_clouds point_clouds, labels = generate_point_clouds(100, 10, 0.1) # ## Calculate persistent homology # # Instantiate a `VietorisRipsPersistence` transformer and calculate persistence diagrams for this collection of point clouds. # In[ ]: vietorisrips_tr = VietorisRipsPersistence() diagrams = vietorisrips_tr.fit_transform(point_clouds) # ## Extract features # # Instantiate a `PersistenceEntropy` transformer and extract features from the persistence diagrams. # In[ ]: entropy_tr = PersistenceEntropy() features = entropy_tr.fit_transform(diagrams) # ## Use the new features in a standard classifier # # Leverage the compatibility with `scikit-learn` to perform a train-test split and score the features. # In[ ]: X_train, X_valid, y_train, y_valid = train_test_split(features, labels) model = RandomForestClassifier() model.fit(X_train, y_train) model.score(X_valid, y_valid) # ## Encapsulates the steps above in a pipeline # # Subdivide into train-validation first, and use the pipeline. # In[ ]: from gtda.pipeline import make_pipeline # ## Define the pipeline # # Chain transformers from `giotto-tda` with `scikit-learn` ones. # In[ ]: steps = [VietorisRipsPersistence(), PersistenceEntropy(), RandomForestClassifier()] pipeline = make_pipeline(*steps) # ## Prepare the data # Train-test split on the point-cloud data # In[ ]: pcs_train, pcs_valid, labels_train, labels_valid = train_test_split( point_clouds, labels) # ## Train and score # In[ ]: pipeline.fit(pcs_train, labels_train) pipeline.score(pcs_valid, labels_valid) # In[ ]: