11_graph_spectral.py

Download
python 373 lines 10.7 KB
  1"""
  2Graph Theory and Spectral Graph Theory
  3
  4This script demonstrates:
  5- Graph construction (adjacency, degree, Laplacian matrices)
  6- Spectral decomposition of graph Laplacian
  7- Spectral clustering algorithm
  8- Simple Graph Neural Network (GNN) message passing
  9- PageRank computation
 10
 11Spectral graph theory connects graph structure to eigenvalues/eigenvectors
 12of graph matrices. It's foundational for:
 13- Graph clustering
 14- Graph Neural Networks (GNNs)
 15- Graph signal processing
 16- Community detection
 17"""
 18
 19import numpy as np
 20import matplotlib.pyplot as plt
 21from scipy.linalg import eigh
 22from scipy.sparse import csr_matrix
 23from scipy.sparse.linalg import eigs
 24import torch
 25import torch.nn.functional as F
 26
 27
 28def construct_graph_matrices(edges, num_nodes):
 29    """
 30    Construct graph matrices from edge list.
 31
 32    Args:
 33        edges: List of (source, target) tuples
 34        num_nodes: Number of nodes in graph
 35
 36    Returns:
 37        A: Adjacency matrix
 38        D: Degree matrix
 39        L: Laplacian matrix (D - A)
 40        L_norm: Normalized Laplacian
 41    """
 42    # Adjacency matrix
 43    A = np.zeros((num_nodes, num_nodes))
 44    for i, j in edges:
 45        A[i, j] = 1
 46        A[j, i] = 1  # Undirected graph
 47
 48    # Degree matrix
 49    degrees = np.sum(A, axis=1)
 50    D = np.diag(degrees)
 51
 52    # Laplacian matrix: L = D - A
 53    L = D - A
 54
 55    # Normalized Laplacian: L_norm = D^(-1/2) L D^(-1/2) = I - D^(-1/2) A D^(-1/2)
 56    D_inv_sqrt = np.diag(1.0 / np.sqrt(degrees + 1e-10))
 57    L_norm = np.eye(num_nodes) - D_inv_sqrt @ A @ D_inv_sqrt
 58
 59    return A, D, L, L_norm
 60
 61
 62def spectral_decomposition(L):
 63    """
 64    Compute eigenvalues and eigenvectors of Laplacian.
 65
 66    The eigenvalues of the Laplacian encode important graph properties:
 67    - Number of zero eigenvalues = number of connected components
 68    - Second smallest eigenvalue (Fiedler value) measures connectivity
 69    - Eigenvectors can be used for embedding and clustering
 70    """
 71    eigenvalues, eigenvectors = eigh(L)
 72
 73    # Sort by eigenvalue
 74    idx = np.argsort(eigenvalues)
 75    eigenvalues = eigenvalues[idx]
 76    eigenvectors = eigenvectors[:, idx]
 77
 78    return eigenvalues, eigenvectors
 79
 80
 81def spectral_clustering(L_norm, num_clusters=2):
 82    """
 83    Spectral clustering algorithm.
 84
 85    Steps:
 86    1. Compute k smallest eigenvectors of normalized Laplacian
 87    2. Use these as feature representations
 88    3. Apply k-means clustering
 89
 90    Args:
 91        L_norm: Normalized Laplacian matrix
 92        num_clusters: Number of clusters
 93
 94    Returns:
 95        Cluster assignments
 96    """
 97    # Get k smallest eigenvectors
 98    eigenvalues, eigenvectors = spectral_decomposition(L_norm)
 99    features = eigenvectors[:, :num_clusters]
100
101    # Simple k-means clustering (manual implementation)
102    # Initialize centroids randomly
103    num_nodes = features.shape[0]
104    centroid_indices = np.random.choice(num_nodes, num_clusters, replace=False)
105    centroids = features[centroid_indices]
106
107    # Iterative assignment and update
108    max_iters = 100
109    for _ in range(max_iters):
110        # Assign to nearest centroid
111        distances = np.linalg.norm(features[:, np.newaxis, :] - centroids[np.newaxis, :, :], axis=2)
112        assignments = np.argmin(distances, axis=1)
113
114        # Update centroids
115        new_centroids = np.array([features[assignments == k].mean(axis=0) for k in range(num_clusters)])
116
117        # Check convergence
118        if np.allclose(centroids, new_centroids):
119            break
120
121        centroids = new_centroids
122
123    return assignments
124
125
126def simple_gnn_forward_pass(A, X, W):
127    """
128    Simple Graph Neural Network (GNN) forward pass.
129
130    Message passing: H^(l+1) = σ(D^(-1/2) A D^(-1/2) H^(l) W^(l))
131
132    Args:
133        A: Adjacency matrix
134        X: Node features (num_nodes x feature_dim)
135        W: Weight matrix (feature_dim x output_dim)
136
137    Returns:
138        Updated node features
139    """
140    # Add self-loops
141    A_tilde = A + np.eye(A.shape[0])
142
143    # Degree matrix
144    D_tilde = np.diag(np.sum(A_tilde, axis=1))
145
146    # Normalized adjacency: D^(-1/2) A D^(-1/2)
147    D_inv_sqrt = np.diag(1.0 / np.sqrt(np.diag(D_tilde) + 1e-10))
148    A_norm = D_inv_sqrt @ A_tilde @ D_inv_sqrt
149
150    # Message passing
151    H = A_norm @ X @ W
152
153    # Apply activation (ReLU)
154    H = np.maximum(0, H)
155
156    return H
157
158
159def pagerank(A, damping=0.85, max_iters=100, tol=1e-6):
160    """
161    PageRank algorithm.
162
163    PageRank models a random walk on the graph:
164    r = (1-d)/N * 1 + d * A^T D^(-1) r
165
166    where d is damping factor, N is number of nodes.
167
168    Args:
169        A: Adjacency matrix
170        damping: Damping factor (probability of following links)
171        max_iters: Maximum iterations
172        tol: Convergence tolerance
173
174    Returns:
175        PageRank scores
176    """
177    num_nodes = A.shape[0]
178
179    # Out-degree (column sums for directed graph, row sums for undirected)
180    out_degree = np.sum(A, axis=1)
181    out_degree[out_degree == 0] = 1  # Avoid division by zero
182
183    # Transition matrix: P = A^T D^(-1)
184    P = (A.T / out_degree).T
185
186    # Initialize ranks uniformly
187    r = np.ones(num_nodes) / num_nodes
188
189    # Power iteration
190    for i in range(max_iters):
191        r_new = (1 - damping) / num_nodes + damping * P.T @ r
192
193        # Check convergence
194        if np.linalg.norm(r_new - r, 1) < tol:
195            print(f"PageRank converged in {i+1} iterations")
196            break
197
198        r = r_new
199
200    return r
201
202
203def visualize_graph_clustering():
204    """
205    Create a graph with clear community structure and apply spectral clustering.
206    """
207    print("=== Spectral Clustering Example ===\n")
208
209    # Create a graph with two communities
210    # Community 1: nodes 0-4
211    # Community 2: nodes 5-9
212    edges_c1 = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4), (2, 4)]
213    edges_c2 = [(5, 6), (5, 7), (6, 7), (6, 8), (7, 8), (8, 9), (7, 9)]
214    edges_between = [(4, 5)]  # Weak connection between communities
215
216    all_edges = edges_c1 + edges_c2 + edges_between
217    num_nodes = 10
218
219    # Construct matrices
220    A, D, L, L_norm = construct_graph_matrices(all_edges, num_nodes)
221
222    print(f"Number of nodes: {num_nodes}")
223    print(f"Number of edges: {len(all_edges)}")
224    print(f"Average degree: {np.sum(A) / num_nodes:.2f}\n")
225
226    # Spectral decomposition
227    eigenvalues, eigenvectors = spectral_decomposition(L_norm)
228    print(f"Smallest 5 eigenvalues: {eigenvalues[:5]}")
229    print(f"Fiedler value (2nd smallest): {eigenvalues[1]:.4f}\n")
230
231    # Spectral clustering
232    assignments = spectral_clustering(L_norm, num_clusters=2)
233    print(f"Cluster assignments: {assignments}\n")
234
235    # Visualize
236    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
237
238    # Plot 1: Adjacency matrix
239    im1 = axes[0].imshow(A, cmap='Blues', interpolation='nearest')
240    axes[0].set_title('Adjacency Matrix')
241    axes[0].set_xlabel('Node')
242    axes[0].set_ylabel('Node')
243    plt.colorbar(im1, ax=axes[0])
244
245    # Plot 2: First 3 eigenvectors
246    axes[1].plot(eigenvectors[:, 0], 'o-', label='λ=0 (constant)')
247    axes[1].plot(eigenvectors[:, 1], 's-', label=f'λ={eigenvalues[1]:.3f} (Fiedler)')
248    axes[1].plot(eigenvectors[:, 2], '^-', label=f'λ={eigenvalues[2]:.3f}')
249    axes[1].set_title('First 3 Eigenvectors of Laplacian')
250    axes[1].set_xlabel('Node Index')
251    axes[1].set_ylabel('Eigenvector Value')
252    axes[1].legend()
253    axes[1].grid(True, alpha=0.3)
254
255    # Plot 3: Spectral embedding (2D)
256    colors = ['red' if a == 0 else 'blue' for a in assignments]
257    axes[2].scatter(eigenvectors[:, 1], eigenvectors[:, 2], c=colors, s=100, alpha=0.6)
258    for i in range(num_nodes):
259        axes[2].annotate(str(i), (eigenvectors[i, 1], eigenvectors[i, 2]),
260                        ha='center', va='center', fontsize=8, color='white', weight='bold')
261    axes[2].set_title('Spectral Embedding (Fiedler vs 3rd eigenvector)')
262    axes[2].set_xlabel('2nd Eigenvector (Fiedler)')
263    axes[2].set_ylabel('3rd Eigenvector')
264    axes[2].grid(True, alpha=0.3)
265
266    plt.tight_layout()
267    plt.savefig('/tmp/graph_spectral_clustering.png', dpi=150, bbox_inches='tight')
268    print("Plot saved to /tmp/graph_spectral_clustering.png\n")
269    plt.close()
270
271
272def demonstrate_gnn():
273    """
274    Demonstrate simple GNN message passing.
275    """
276    print("=== Graph Neural Network (GNN) Message Passing ===\n")
277
278    # Small graph
279    edges = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)]
280    num_nodes = 4
281    A, _, _, _ = construct_graph_matrices(edges, num_nodes)
282
283    # Random node features (4 nodes, 3 features each)
284    X = np.random.randn(num_nodes, 3)
285    print(f"Input node features shape: {X.shape}")
286    print(f"Input features:\n{X}\n")
287
288    # Random weight matrix (3 input features -> 2 output features)
289    W = np.random.randn(3, 2)
290
291    # Forward pass
292    H = simple_gnn_forward_pass(A, X, W)
293    print(f"Output node features shape: {H.shape}")
294    print(f"Output features:\n{H}\n")
295
296    print("Message passing aggregates information from neighbors,")
297    print("weighted by the normalized adjacency matrix.\n")
298
299
300def demonstrate_pagerank():
301    """
302    Demonstrate PageRank on a simple directed graph.
303    """
304    print("=== PageRank Algorithm ===\n")
305
306    # Create a simple directed graph
307    # Node 0 links to 1, 2
308    # Node 1 links to 2
309    # Node 2 links to 0
310    # Node 3 links to 2 (dangling node pointing in)
311    A = np.array([
312        [0, 1, 1, 0],
313        [0, 0, 1, 0],
314        [1, 0, 0, 0],
315        [0, 0, 1, 0]
316    ], dtype=float)
317
318    print("Adjacency matrix (directed graph):")
319    print(A)
320    print()
321
322    # Compute PageRank
323    ranks = pagerank(A, damping=0.85)
324
325    print("PageRank scores:")
326    for i, r in enumerate(ranks):
327        print(f"  Node {i}: {r:.4f}")
328    print(f"  Sum: {np.sum(ranks):.4f} (should be 1.0)\n")
329
330    # Visualize
331    fig, ax = plt.subplots(figsize=(8, 5))
332    bars = ax.bar(range(len(ranks)), ranks, color='steelblue', alpha=0.7)
333    ax.set_title('PageRank Scores')
334    ax.set_xlabel('Node')
335    ax.set_ylabel('PageRank Score')
336    ax.set_xticks(range(len(ranks)))
337    ax.grid(True, alpha=0.3, axis='y')
338
339    # Annotate values
340    for i, (bar, rank) in enumerate(zip(bars, ranks)):
341        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
342               f'{rank:.3f}', ha='center', va='bottom', fontsize=10)
343
344    plt.tight_layout()
345    plt.savefig('/tmp/pagerank.png', dpi=150, bbox_inches='tight')
346    print("Plot saved to /tmp/pagerank.png\n")
347    plt.close()
348
349
350if __name__ == "__main__":
351    print("=" * 60)
352    print("Graph Theory and Spectral Graph Theory")
353    print("=" * 60)
354    print()
355
356    # Set random seed
357    np.random.seed(42)
358
359    # Run demonstrations
360    visualize_graph_clustering()
361
362    demonstrate_gnn()
363
364    demonstrate_pagerank()
365
366    print("=" * 60)
367    print("Summary:")
368    print("- Graph Laplacian eigenvalues encode connectivity structure")
369    print("- Spectral clustering uses eigenvectors as node embeddings")
370    print("- GNNs aggregate neighbor information via message passing")
371    print("- PageRank measures node importance via random walks")
372    print("=" * 60)