1"""
2Graph Theory and Spectral Graph Theory
3
4This script demonstrates:
5- Graph construction (adjacency, degree, Laplacian matrices)
6- Spectral decomposition of graph Laplacian
7- Spectral clustering algorithm
8- Simple Graph Neural Network (GNN) message passing
9- PageRank computation
10
11Spectral graph theory connects graph structure to eigenvalues/eigenvectors
12of graph matrices. It's foundational for:
13- Graph clustering
14- Graph Neural Networks (GNNs)
15- Graph signal processing
16- Community detection
17"""
18
19import numpy as np
20import matplotlib.pyplot as plt
21from scipy.linalg import eigh
22from scipy.sparse import csr_matrix
23from scipy.sparse.linalg import eigs
24import torch
25import torch.nn.functional as F
26
27
28def construct_graph_matrices(edges, num_nodes):
29 """
30 Construct graph matrices from edge list.
31
32 Args:
33 edges: List of (source, target) tuples
34 num_nodes: Number of nodes in graph
35
36 Returns:
37 A: Adjacency matrix
38 D: Degree matrix
39 L: Laplacian matrix (D - A)
40 L_norm: Normalized Laplacian
41 """
42 # Adjacency matrix
43 A = np.zeros((num_nodes, num_nodes))
44 for i, j in edges:
45 A[i, j] = 1
46 A[j, i] = 1 # Undirected graph
47
48 # Degree matrix
49 degrees = np.sum(A, axis=1)
50 D = np.diag(degrees)
51
52 # Laplacian matrix: L = D - A
53 L = D - A
54
55 # Normalized Laplacian: L_norm = D^(-1/2) L D^(-1/2) = I - D^(-1/2) A D^(-1/2)
56 D_inv_sqrt = np.diag(1.0 / np.sqrt(degrees + 1e-10))
57 L_norm = np.eye(num_nodes) - D_inv_sqrt @ A @ D_inv_sqrt
58
59 return A, D, L, L_norm
60
61
62def spectral_decomposition(L):
63 """
64 Compute eigenvalues and eigenvectors of Laplacian.
65
66 The eigenvalues of the Laplacian encode important graph properties:
67 - Number of zero eigenvalues = number of connected components
68 - Second smallest eigenvalue (Fiedler value) measures connectivity
69 - Eigenvectors can be used for embedding and clustering
70 """
71 eigenvalues, eigenvectors = eigh(L)
72
73 # Sort by eigenvalue
74 idx = np.argsort(eigenvalues)
75 eigenvalues = eigenvalues[idx]
76 eigenvectors = eigenvectors[:, idx]
77
78 return eigenvalues, eigenvectors
79
80
81def spectral_clustering(L_norm, num_clusters=2):
82 """
83 Spectral clustering algorithm.
84
85 Steps:
86 1. Compute k smallest eigenvectors of normalized Laplacian
87 2. Use these as feature representations
88 3. Apply k-means clustering
89
90 Args:
91 L_norm: Normalized Laplacian matrix
92 num_clusters: Number of clusters
93
94 Returns:
95 Cluster assignments
96 """
97 # Get k smallest eigenvectors
98 eigenvalues, eigenvectors = spectral_decomposition(L_norm)
99 features = eigenvectors[:, :num_clusters]
100
101 # Simple k-means clustering (manual implementation)
102 # Initialize centroids randomly
103 num_nodes = features.shape[0]
104 centroid_indices = np.random.choice(num_nodes, num_clusters, replace=False)
105 centroids = features[centroid_indices]
106
107 # Iterative assignment and update
108 max_iters = 100
109 for _ in range(max_iters):
110 # Assign to nearest centroid
111 distances = np.linalg.norm(features[:, np.newaxis, :] - centroids[np.newaxis, :, :], axis=2)
112 assignments = np.argmin(distances, axis=1)
113
114 # Update centroids
115 new_centroids = np.array([features[assignments == k].mean(axis=0) for k in range(num_clusters)])
116
117 # Check convergence
118 if np.allclose(centroids, new_centroids):
119 break
120
121 centroids = new_centroids
122
123 return assignments
124
125
126def simple_gnn_forward_pass(A, X, W):
127 """
128 Simple Graph Neural Network (GNN) forward pass.
129
130 Message passing: H^(l+1) = σ(D^(-1/2) A D^(-1/2) H^(l) W^(l))
131
132 Args:
133 A: Adjacency matrix
134 X: Node features (num_nodes x feature_dim)
135 W: Weight matrix (feature_dim x output_dim)
136
137 Returns:
138 Updated node features
139 """
140 # Add self-loops
141 A_tilde = A + np.eye(A.shape[0])
142
143 # Degree matrix
144 D_tilde = np.diag(np.sum(A_tilde, axis=1))
145
146 # Normalized adjacency: D^(-1/2) A D^(-1/2)
147 D_inv_sqrt = np.diag(1.0 / np.sqrt(np.diag(D_tilde) + 1e-10))
148 A_norm = D_inv_sqrt @ A_tilde @ D_inv_sqrt
149
150 # Message passing
151 H = A_norm @ X @ W
152
153 # Apply activation (ReLU)
154 H = np.maximum(0, H)
155
156 return H
157
158
159def pagerank(A, damping=0.85, max_iters=100, tol=1e-6):
160 """
161 PageRank algorithm.
162
163 PageRank models a random walk on the graph:
164 r = (1-d)/N * 1 + d * A^T D^(-1) r
165
166 where d is damping factor, N is number of nodes.
167
168 Args:
169 A: Adjacency matrix
170 damping: Damping factor (probability of following links)
171 max_iters: Maximum iterations
172 tol: Convergence tolerance
173
174 Returns:
175 PageRank scores
176 """
177 num_nodes = A.shape[0]
178
179 # Out-degree (column sums for directed graph, row sums for undirected)
180 out_degree = np.sum(A, axis=1)
181 out_degree[out_degree == 0] = 1 # Avoid division by zero
182
183 # Transition matrix: P = A^T D^(-1)
184 P = (A.T / out_degree).T
185
186 # Initialize ranks uniformly
187 r = np.ones(num_nodes) / num_nodes
188
189 # Power iteration
190 for i in range(max_iters):
191 r_new = (1 - damping) / num_nodes + damping * P.T @ r
192
193 # Check convergence
194 if np.linalg.norm(r_new - r, 1) < tol:
195 print(f"PageRank converged in {i+1} iterations")
196 break
197
198 r = r_new
199
200 return r
201
202
203def visualize_graph_clustering():
204 """
205 Create a graph with clear community structure and apply spectral clustering.
206 """
207 print("=== Spectral Clustering Example ===\n")
208
209 # Create a graph with two communities
210 # Community 1: nodes 0-4
211 # Community 2: nodes 5-9
212 edges_c1 = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4), (2, 4)]
213 edges_c2 = [(5, 6), (5, 7), (6, 7), (6, 8), (7, 8), (8, 9), (7, 9)]
214 edges_between = [(4, 5)] # Weak connection between communities
215
216 all_edges = edges_c1 + edges_c2 + edges_between
217 num_nodes = 10
218
219 # Construct matrices
220 A, D, L, L_norm = construct_graph_matrices(all_edges, num_nodes)
221
222 print(f"Number of nodes: {num_nodes}")
223 print(f"Number of edges: {len(all_edges)}")
224 print(f"Average degree: {np.sum(A) / num_nodes:.2f}\n")
225
226 # Spectral decomposition
227 eigenvalues, eigenvectors = spectral_decomposition(L_norm)
228 print(f"Smallest 5 eigenvalues: {eigenvalues[:5]}")
229 print(f"Fiedler value (2nd smallest): {eigenvalues[1]:.4f}\n")
230
231 # Spectral clustering
232 assignments = spectral_clustering(L_norm, num_clusters=2)
233 print(f"Cluster assignments: {assignments}\n")
234
235 # Visualize
236 fig, axes = plt.subplots(1, 3, figsize=(15, 4))
237
238 # Plot 1: Adjacency matrix
239 im1 = axes[0].imshow(A, cmap='Blues', interpolation='nearest')
240 axes[0].set_title('Adjacency Matrix')
241 axes[0].set_xlabel('Node')
242 axes[0].set_ylabel('Node')
243 plt.colorbar(im1, ax=axes[0])
244
245 # Plot 2: First 3 eigenvectors
246 axes[1].plot(eigenvectors[:, 0], 'o-', label='λ=0 (constant)')
247 axes[1].plot(eigenvectors[:, 1], 's-', label=f'λ={eigenvalues[1]:.3f} (Fiedler)')
248 axes[1].plot(eigenvectors[:, 2], '^-', label=f'λ={eigenvalues[2]:.3f}')
249 axes[1].set_title('First 3 Eigenvectors of Laplacian')
250 axes[1].set_xlabel('Node Index')
251 axes[1].set_ylabel('Eigenvector Value')
252 axes[1].legend()
253 axes[1].grid(True, alpha=0.3)
254
255 # Plot 3: Spectral embedding (2D)
256 colors = ['red' if a == 0 else 'blue' for a in assignments]
257 axes[2].scatter(eigenvectors[:, 1], eigenvectors[:, 2], c=colors, s=100, alpha=0.6)
258 for i in range(num_nodes):
259 axes[2].annotate(str(i), (eigenvectors[i, 1], eigenvectors[i, 2]),
260 ha='center', va='center', fontsize=8, color='white', weight='bold')
261 axes[2].set_title('Spectral Embedding (Fiedler vs 3rd eigenvector)')
262 axes[2].set_xlabel('2nd Eigenvector (Fiedler)')
263 axes[2].set_ylabel('3rd Eigenvector')
264 axes[2].grid(True, alpha=0.3)
265
266 plt.tight_layout()
267 plt.savefig('/tmp/graph_spectral_clustering.png', dpi=150, bbox_inches='tight')
268 print("Plot saved to /tmp/graph_spectral_clustering.png\n")
269 plt.close()
270
271
272def demonstrate_gnn():
273 """
274 Demonstrate simple GNN message passing.
275 """
276 print("=== Graph Neural Network (GNN) Message Passing ===\n")
277
278 # Small graph
279 edges = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)]
280 num_nodes = 4
281 A, _, _, _ = construct_graph_matrices(edges, num_nodes)
282
283 # Random node features (4 nodes, 3 features each)
284 X = np.random.randn(num_nodes, 3)
285 print(f"Input node features shape: {X.shape}")
286 print(f"Input features:\n{X}\n")
287
288 # Random weight matrix (3 input features -> 2 output features)
289 W = np.random.randn(3, 2)
290
291 # Forward pass
292 H = simple_gnn_forward_pass(A, X, W)
293 print(f"Output node features shape: {H.shape}")
294 print(f"Output features:\n{H}\n")
295
296 print("Message passing aggregates information from neighbors,")
297 print("weighted by the normalized adjacency matrix.\n")
298
299
300def demonstrate_pagerank():
301 """
302 Demonstrate PageRank on a simple directed graph.
303 """
304 print("=== PageRank Algorithm ===\n")
305
306 # Create a simple directed graph
307 # Node 0 links to 1, 2
308 # Node 1 links to 2
309 # Node 2 links to 0
310 # Node 3 links to 2 (dangling node pointing in)
311 A = np.array([
312 [0, 1, 1, 0],
313 [0, 0, 1, 0],
314 [1, 0, 0, 0],
315 [0, 0, 1, 0]
316 ], dtype=float)
317
318 print("Adjacency matrix (directed graph):")
319 print(A)
320 print()
321
322 # Compute PageRank
323 ranks = pagerank(A, damping=0.85)
324
325 print("PageRank scores:")
326 for i, r in enumerate(ranks):
327 print(f" Node {i}: {r:.4f}")
328 print(f" Sum: {np.sum(ranks):.4f} (should be 1.0)\n")
329
330 # Visualize
331 fig, ax = plt.subplots(figsize=(8, 5))
332 bars = ax.bar(range(len(ranks)), ranks, color='steelblue', alpha=0.7)
333 ax.set_title('PageRank Scores')
334 ax.set_xlabel('Node')
335 ax.set_ylabel('PageRank Score')
336 ax.set_xticks(range(len(ranks)))
337 ax.grid(True, alpha=0.3, axis='y')
338
339 # Annotate values
340 for i, (bar, rank) in enumerate(zip(bars, ranks)):
341 ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
342 f'{rank:.3f}', ha='center', va='bottom', fontsize=10)
343
344 plt.tight_layout()
345 plt.savefig('/tmp/pagerank.png', dpi=150, bbox_inches='tight')
346 print("Plot saved to /tmp/pagerank.png\n")
347 plt.close()
348
349
350if __name__ == "__main__":
351 print("=" * 60)
352 print("Graph Theory and Spectral Graph Theory")
353 print("=" * 60)
354 print()
355
356 # Set random seed
357 np.random.seed(42)
358
359 # Run demonstrations
360 visualize_graph_clustering()
361
362 demonstrate_gnn()
363
364 demonstrate_pagerank()
365
366 print("=" * 60)
367 print("Summary:")
368 print("- Graph Laplacian eigenvalues encode connectivity structure")
369 print("- Spectral clustering uses eigenvectors as node embeddings")
370 print("- GNNs aggregate neighbor information via message passing")
371 print("- PageRank measures node importance via random walks")
372 print("=" * 60)