1"""
2Hashing and Message Authentication Demo
3========================================
4
5Educational demonstration of hashing concepts:
6- SHA-256, SHA-3, BLAKE2 with hashlib
7- Password hashing with PBKDF2 (hashlib built-in)
8- HMAC generation and verification
9- Constant-time comparison
10- Simplified Merkle tree implementation
11
12All examples use Python's standard library (hashlib, hmac, secrets).
13No external dependencies required.
14"""
15
16import hashlib
17import hmac
18import os
19import secrets
20import time
21import struct
22
23print("=" * 65)
24print(" Hashing and Message Authentication Demo")
25print("=" * 65)
26print()
27
28
29# ============================================================
30# Section 1: Cryptographic Hash Functions
31# ============================================================
32
33print("-" * 65)
34print(" Section 1: Cryptographic Hash Functions")
35print("-" * 65)
36
37message = b"The quick brown fox jumps over the lazy dog"
38print(f"\n Input message: {message.decode()}")
39print(f" Input length: {len(message)} bytes")
40print()
41
42# SHA-256 (SHA-2 family)
43sha256_hash = hashlib.sha256(message).hexdigest()
44print(f" SHA-256: {sha256_hash}")
45
46# SHA-384 (SHA-2 family)
47sha384_hash = hashlib.sha384(message).hexdigest()
48print(f" SHA-384: {sha384_hash[:48]}...")
49
50# SHA-512 (SHA-2 family)
51sha512_hash = hashlib.sha512(message).hexdigest()
52print(f" SHA-512: {sha512_hash[:48]}...")
53
54# SHA-3 (Keccak-based, different internal structure from SHA-2)
55sha3_256 = hashlib.sha3_256(message).hexdigest()
56print(f" SHA3-256: {sha3_256}")
57
58sha3_512 = hashlib.sha3_512(message).hexdigest()
59print(f" SHA3-512: {sha3_512[:48]}...")
60
61# BLAKE2 (fast, secure, used in libsodium/WireGuard)
62blake2b_hash = hashlib.blake2b(message).hexdigest()
63print(f" BLAKE2b: {blake2b_hash[:48]}...")
64
65blake2s_hash = hashlib.blake2s(message).hexdigest()
66print(f" BLAKE2s: {blake2s_hash}")
67print()
68
69# --- Avalanche effect demonstration ---
70print(" -- Avalanche Effect --")
71msg1 = b"Hello World"
72msg2 = b"Hello World!" # One character added
73h1 = hashlib.sha256(msg1).hexdigest()
74h2 = hashlib.sha256(msg2).hexdigest()
75print(f" Input 1: 'Hello World' -> {h1[:32]}...")
76print(f" Input 2: 'Hello World!' -> {h2[:32]}...")
77
78# Count differing bits
79bits1 = bin(int(h1, 16))[2:].zfill(256)
80bits2 = bin(int(h2, 16))[2:].zfill(256)
81diff_bits = sum(b1 != b2 for b1, b2 in zip(bits1, bits2))
82print(f" Bits changed: {diff_bits}/256 ({diff_bits/256*100:.1f}%)")
83print(f" (Ideal avalanche: ~50% bits change)")
84print()
85
86# --- Incremental hashing for large data ---
87print(" -- Incremental Hashing (for large files) --")
88hasher = hashlib.sha256()
89chunks = [b"chunk1_", b"chunk2_", b"chunk3_done"]
90for chunk in chunks:
91 hasher.update(chunk)
92incremental = hasher.hexdigest()
93
94all_at_once = hashlib.sha256(b"chunk1_chunk2_chunk3_done").hexdigest()
95print(f" Incremental: {incremental[:32]}...")
96print(f" All-at-once: {all_at_once[:32]}...")
97print(f" Match: {incremental == all_at_once}")
98print()
99
100
101# ============================================================
102# Section 2: Password Hashing with PBKDF2
103# ============================================================
104
105print("-" * 65)
106print(" Section 2: Password Hashing (PBKDF2)")
107print("-" * 65)
108
109print("""
110 Why not just SHA-256 for passwords?
111 - SHA-256 is TOO FAST (~1 billion hashes/sec on GPU)
112 - Attackers can brute-force quickly
113 - Password hashing must be SLOW (key stretching)
114
115 Recommended algorithms (best to worst):
116 1. Argon2id (memory-hard, state of the art)
117 2. bcrypt (CPU-hard, widely supported)
118 3. PBKDF2 (CPU-hard, NIST approved, in stdlib)
119""")
120
121password = "MyS3cur3P@ssw0rd!"
122salt = os.urandom(16) # Unique per password
123
124# PBKDF2 with high iteration count
125iterations = 600_000 # NIST recommends >= 600,000 for SHA-256
126
127start = time.time()
128derived_key = hashlib.pbkdf2_hmac(
129 "sha256",
130 password.encode("utf-8"),
131 salt,
132 iterations,
133 dklen=32,
134)
135elapsed = time.time() - start
136
137print(f" Password: {password}")
138print(f" Salt (hex): {salt.hex()}")
139print(f" Iterations: {iterations:,}")
140print(f" Derived key: {derived_key.hex()}")
141print(f" Time: {elapsed:.3f}s")
142print()
143
144# Verify password
145print(" -- Password Verification --")
146correct_key = hashlib.pbkdf2_hmac(
147 "sha256", "MyS3cur3P@ssw0rd!".encode(), salt, iterations, dklen=32
148)
149wrong_key = hashlib.pbkdf2_hmac(
150 "sha256", "WrongPassword".encode(), salt, iterations, dklen=32
151)
152print(f" Correct password match: {hmac.compare_digest(derived_key, correct_key)}")
153print(f" Wrong password match: {hmac.compare_digest(derived_key, wrong_key)}")
154print()
155
156# Storage format
157stored = f"pbkdf2:sha256:{iterations}${salt.hex()}${derived_key.hex()}"
158print(f" Storage format: {stored[:50]}...")
159print(f" (algorithm:hash:iterations$salt$key)")
160print()
161
162
163# ============================================================
164# Section 3: HMAC Generation and Verification
165# ============================================================
166
167print("-" * 65)
168print(" Section 3: HMAC (Hash-based Message Authentication Code)")
169print("-" * 65)
170
171print("""
172 HMAC = Hash(Key || Hash(Key || Message))
173 - Proves both integrity AND authenticity
174 - Requires shared secret key
175 - Used in: JWT, API authentication, TLS
176""")
177
178api_secret = secrets.token_bytes(32)
179payload = b'{"user":"admin","action":"delete","id":42}'
180
181# Generate HMAC
182mac = hmac.new(api_secret, payload, hashlib.sha256).hexdigest()
183print(f" API Secret: {api_secret.hex()[:24]}...")
184print(f" Payload: {payload.decode()}")
185print(f" HMAC-SHA256: {mac}")
186print()
187
188# Verify HMAC
189print(" -- HMAC Verification --")
190received_mac = mac # Simulating received MAC
191computed_mac = hmac.new(api_secret, payload, hashlib.sha256).hexdigest()
192is_valid = hmac.compare_digest(received_mac, computed_mac)
193print(f" Valid MAC: {is_valid}")
194
195# Tampered payload
196tampered_payload = b'{"user":"admin","action":"delete","id":9999}'
197tampered_mac = hmac.new(api_secret, tampered_payload, hashlib.sha256).hexdigest()
198is_valid_tampered = hmac.compare_digest(received_mac, tampered_mac)
199print(f" Tampered MAC: {is_valid_tampered} (tamper detected!)")
200print()
201
202
203# ============================================================
204# Section 4: Constant-Time Comparison
205# ============================================================
206
207print("-" * 65)
208print(" Section 4: Constant-Time Comparison")
209print("-" * 65)
210
211print("""
212 Why constant-time comparison matters:
213 - Regular '==' comparison short-circuits on first mismatch
214 - Attacker can measure response time to guess bytes one by one
215 - This is called a "timing side-channel attack"
216 - hmac.compare_digest() always compares ALL bytes
217""")
218
219secret_token = "a1b2c3d4e5f6g7h8"
220
221# Demonstrate timing difference (educational - results may vary)
222def unsafe_compare(a: str, b: str) -> bool:
223 """VULNERABLE: short-circuits on first mismatch."""
224 if len(a) != len(b):
225 return False
226 for x, y in zip(a, b):
227 if x != y:
228 return False
229 return True
230
231
232def safe_compare(a: str, b: str) -> bool:
233 """SAFE: always compares all characters."""
234 return hmac.compare_digest(a.encode(), b.encode())
235
236
237# Timing test
238test_inputs = [
239 ("Completely wrong token!!", "no match at start"),
240 ("a1b2c3d4XXXXXXXX", "partial match (8 chars)"),
241 ("a1b2c3d4e5f6g7h8", "exact match"),
242]
243
244print(f"\n Secret token: {secret_token}")
245print()
246print(f" {'Input':<28} {'Unsafe':<12} {'Safe':<12}")
247print(f" {'-'*28} {'-'*12} {'-'*12}")
248
249for test_input, desc in test_inputs:
250 padded = test_input.ljust(len(secret_token))[:len(secret_token)]
251
252 # Unsafe comparison timing
253 start = time.perf_counter_ns()
254 for _ in range(10000):
255 unsafe_compare(secret_token, padded)
256 unsafe_ns = (time.perf_counter_ns() - start) // 10000
257
258 # Safe comparison timing
259 start = time.perf_counter_ns()
260 for _ in range(10000):
261 safe_compare(secret_token, padded)
262 safe_ns = (time.perf_counter_ns() - start) // 10000
263
264 print(f" {desc:<28} {unsafe_ns:>6}ns {safe_ns:>6}ns")
265
266print()
267print(" Note: Unsafe times may vary with match length; safe times")
268print(" should be roughly constant regardless of input.")
269print()
270
271
272# ============================================================
273# Section 5: Merkle Tree Implementation
274# ============================================================
275
276print("-" * 65)
277print(" Section 5: Merkle Tree (Hash Tree)")
278print("-" * 65)
279
280print("""
281 Merkle trees efficiently verify data integrity:
282 - Used in: Git, Bitcoin, IPFS, certificate transparency
283 - Verify any single block without downloading entire dataset
284 - Proof size: O(log n) hashes for n data blocks
285""")
286
287
288class MerkleTree:
289 """Simplified Merkle tree for educational purposes."""
290
291 def __init__(self, data_blocks: list[bytes]):
292 self.data_blocks = data_blocks
293 self.leaves = [self._hash_leaf(d) for d in data_blocks]
294 self.tree = self._build_tree(self.leaves[:])
295
296 @staticmethod
297 def _hash_leaf(data: bytes) -> str:
298 return hashlib.sha256(b"\x00" + data).hexdigest()
299
300 @staticmethod
301 def _hash_pair(left: str, right: str) -> str:
302 combined = b"\x01" + bytes.fromhex(left) + bytes.fromhex(right)
303 return hashlib.sha256(combined).hexdigest()
304
305 def _build_tree(self, nodes: list[str]) -> list[list[str]]:
306 tree = [nodes]
307 while len(nodes) > 1:
308 next_level = []
309 for i in range(0, len(nodes), 2):
310 if i + 1 < len(nodes):
311 next_level.append(self._hash_pair(nodes[i], nodes[i + 1]))
312 else:
313 # Odd node: promote to next level
314 next_level.append(nodes[i])
315 tree.append(next_level)
316 nodes = next_level
317 return tree
318
319 @property
320 def root(self) -> str:
321 return self.tree[-1][0]
322
323 def get_proof(self, index: int) -> list[tuple[str, str]]:
324 """Get Merkle proof for a leaf at given index."""
325 proof = []
326 for level in self.tree[:-1]:
327 if index % 2 == 0:
328 sibling_idx = index + 1
329 direction = "right"
330 else:
331 sibling_idx = index - 1
332 direction = "left"
333 if sibling_idx < len(level):
334 proof.append((direction, level[sibling_idx]))
335 index //= 2
336 return proof
337
338 @classmethod
339 def verify_proof(cls, leaf_data: bytes, proof: list, root: str) -> bool:
340 """Verify a Merkle proof against the root hash."""
341 current = cls._hash_leaf(leaf_data)
342 for direction, sibling in proof:
343 if direction == "right":
344 current = cls._hash_pair(current, sibling)
345 else:
346 current = cls._hash_pair(sibling, current)
347 return current == root
348
349
350# Build a Merkle tree
351blocks = [f"Transaction {i}: Alice pays Bob ${i * 10}".encode() for i in range(8)]
352tree = MerkleTree(blocks)
353
354print(f"\n Data blocks: {len(blocks)} transactions")
355print(f" Tree levels: {len(tree.tree)}")
356print(f" Root hash: {tree.root[:32]}...")
357print()
358
359# Show tree structure
360print(" Tree structure (abbreviated hashes):")
361for i, level in enumerate(tree.tree):
362 indent = " " * (len(tree.tree) - i)
363 hashes = " ".join(h[:8] for h in level)
364 label = "Root" if i == len(tree.tree) - 1 else f"L{i}"
365 print(f" {indent}{label}: [{hashes}]")
366print()
367
368# Generate and verify a Merkle proof
369target_idx = 3
370proof = tree.get_proof(target_idx)
371print(f" -- Merkle Proof for block {target_idx} --")
372print(f" Block data: {blocks[target_idx].decode()}")
373print(f" Proof path ({len(proof)} hashes):")
374for direction, h in proof:
375 print(f" {direction}: {h[:16]}...")
376
377is_valid = MerkleTree.verify_proof(blocks[target_idx], proof, tree.root)
378print(f" Proof valid: {is_valid}")
379
380# Tampered data
381is_valid_tampered = MerkleTree.verify_proof(b"TAMPERED DATA", proof, tree.root)
382print(f" Tampered proof valid: {is_valid_tampered}")
383print()
384
385
386# ============================================================
387# Section 6: Summary
388# ============================================================
389
390print("=" * 65)
391print(" Summary")
392print("=" * 65)
393print("""
394 Algorithm | Output | Speed | Use Case
395 --------------+----------+----------+---------------------------
396 SHA-256 | 256 bit | Fast | Data integrity, Git
397 SHA-3 | 256 bit | Medium | Quantum-resistant backup
398 BLAKE2b | 512 bit | Fastest | General purpose, libsodium
399 PBKDF2 | Variable | Slow* | Password hashing
400 HMAC-SHA256 | 256 bit | Fast | Message authentication
401 Merkle Tree | 256 bit | O(log n) | Blockchain, Git, IPFS
402
403 * Slow by design - prevents brute-force attacks
404
405 Key Takeaways:
406 - Use PBKDF2/bcrypt/Argon2 for passwords (NEVER plain SHA-256)
407 - Use HMAC for message authentication (not bare hashes)
408 - Use constant-time comparison for security-critical comparisons
409 - Merkle trees enable efficient verification of large datasets
410""")