hashing_demo.py

Download
python 411 lines 12.7 KB
  1"""
  2Hashing and Message Authentication Demo
  3========================================
  4
  5Educational demonstration of hashing concepts:
  6- SHA-256, SHA-3, BLAKE2 with hashlib
  7- Password hashing with PBKDF2 (hashlib built-in)
  8- HMAC generation and verification
  9- Constant-time comparison
 10- Simplified Merkle tree implementation
 11
 12All examples use Python's standard library (hashlib, hmac, secrets).
 13No external dependencies required.
 14"""
 15
 16import hashlib
 17import hmac
 18import os
 19import secrets
 20import time
 21import struct
 22
 23print("=" * 65)
 24print("  Hashing and Message Authentication Demo")
 25print("=" * 65)
 26print()
 27
 28
 29# ============================================================
 30# Section 1: Cryptographic Hash Functions
 31# ============================================================
 32
 33print("-" * 65)
 34print("  Section 1: Cryptographic Hash Functions")
 35print("-" * 65)
 36
 37message = b"The quick brown fox jumps over the lazy dog"
 38print(f"\n  Input message: {message.decode()}")
 39print(f"  Input length:  {len(message)} bytes")
 40print()
 41
 42# SHA-256 (SHA-2 family)
 43sha256_hash = hashlib.sha256(message).hexdigest()
 44print(f"  SHA-256:    {sha256_hash}")
 45
 46# SHA-384 (SHA-2 family)
 47sha384_hash = hashlib.sha384(message).hexdigest()
 48print(f"  SHA-384:    {sha384_hash[:48]}...")
 49
 50# SHA-512 (SHA-2 family)
 51sha512_hash = hashlib.sha512(message).hexdigest()
 52print(f"  SHA-512:    {sha512_hash[:48]}...")
 53
 54# SHA-3 (Keccak-based, different internal structure from SHA-2)
 55sha3_256 = hashlib.sha3_256(message).hexdigest()
 56print(f"  SHA3-256:   {sha3_256}")
 57
 58sha3_512 = hashlib.sha3_512(message).hexdigest()
 59print(f"  SHA3-512:   {sha3_512[:48]}...")
 60
 61# BLAKE2 (fast, secure, used in libsodium/WireGuard)
 62blake2b_hash = hashlib.blake2b(message).hexdigest()
 63print(f"  BLAKE2b:    {blake2b_hash[:48]}...")
 64
 65blake2s_hash = hashlib.blake2s(message).hexdigest()
 66print(f"  BLAKE2s:    {blake2s_hash}")
 67print()
 68
 69# --- Avalanche effect demonstration ---
 70print("  -- Avalanche Effect --")
 71msg1 = b"Hello World"
 72msg2 = b"Hello World!"  # One character added
 73h1 = hashlib.sha256(msg1).hexdigest()
 74h2 = hashlib.sha256(msg2).hexdigest()
 75print(f"  Input 1: 'Hello World'  -> {h1[:32]}...")
 76print(f"  Input 2: 'Hello World!' -> {h2[:32]}...")
 77
 78# Count differing bits
 79bits1 = bin(int(h1, 16))[2:].zfill(256)
 80bits2 = bin(int(h2, 16))[2:].zfill(256)
 81diff_bits = sum(b1 != b2 for b1, b2 in zip(bits1, bits2))
 82print(f"  Bits changed: {diff_bits}/256 ({diff_bits/256*100:.1f}%)")
 83print(f"  (Ideal avalanche: ~50% bits change)")
 84print()
 85
 86# --- Incremental hashing for large data ---
 87print("  -- Incremental Hashing (for large files) --")
 88hasher = hashlib.sha256()
 89chunks = [b"chunk1_", b"chunk2_", b"chunk3_done"]
 90for chunk in chunks:
 91    hasher.update(chunk)
 92incremental = hasher.hexdigest()
 93
 94all_at_once = hashlib.sha256(b"chunk1_chunk2_chunk3_done").hexdigest()
 95print(f"  Incremental: {incremental[:32]}...")
 96print(f"  All-at-once: {all_at_once[:32]}...")
 97print(f"  Match:       {incremental == all_at_once}")
 98print()
 99
100
101# ============================================================
102# Section 2: Password Hashing with PBKDF2
103# ============================================================
104
105print("-" * 65)
106print("  Section 2: Password Hashing (PBKDF2)")
107print("-" * 65)
108
109print("""
110  Why not just SHA-256 for passwords?
111  - SHA-256 is TOO FAST (~1 billion hashes/sec on GPU)
112  - Attackers can brute-force quickly
113  - Password hashing must be SLOW (key stretching)
114
115  Recommended algorithms (best to worst):
116  1. Argon2id  (memory-hard, state of the art)
117  2. bcrypt    (CPU-hard, widely supported)
118  3. PBKDF2    (CPU-hard, NIST approved, in stdlib)
119""")
120
121password = "MyS3cur3P@ssw0rd!"
122salt = os.urandom(16)  # Unique per password
123
124# PBKDF2 with high iteration count
125iterations = 600_000  # NIST recommends >= 600,000 for SHA-256
126
127start = time.time()
128derived_key = hashlib.pbkdf2_hmac(
129    "sha256",
130    password.encode("utf-8"),
131    salt,
132    iterations,
133    dklen=32,
134)
135elapsed = time.time() - start
136
137print(f"  Password:       {password}")
138print(f"  Salt (hex):     {salt.hex()}")
139print(f"  Iterations:     {iterations:,}")
140print(f"  Derived key:    {derived_key.hex()}")
141print(f"  Time:           {elapsed:.3f}s")
142print()
143
144# Verify password
145print("  -- Password Verification --")
146correct_key = hashlib.pbkdf2_hmac(
147    "sha256", "MyS3cur3P@ssw0rd!".encode(), salt, iterations, dklen=32
148)
149wrong_key = hashlib.pbkdf2_hmac(
150    "sha256", "WrongPassword".encode(), salt, iterations, dklen=32
151)
152print(f"  Correct password match: {hmac.compare_digest(derived_key, correct_key)}")
153print(f"  Wrong password match:   {hmac.compare_digest(derived_key, wrong_key)}")
154print()
155
156# Storage format
157stored = f"pbkdf2:sha256:{iterations}${salt.hex()}${derived_key.hex()}"
158print(f"  Storage format: {stored[:50]}...")
159print(f"  (algorithm:hash:iterations$salt$key)")
160print()
161
162
163# ============================================================
164# Section 3: HMAC Generation and Verification
165# ============================================================
166
167print("-" * 65)
168print("  Section 3: HMAC (Hash-based Message Authentication Code)")
169print("-" * 65)
170
171print("""
172  HMAC = Hash(Key || Hash(Key || Message))
173  - Proves both integrity AND authenticity
174  - Requires shared secret key
175  - Used in: JWT, API authentication, TLS
176""")
177
178api_secret = secrets.token_bytes(32)
179payload = b'{"user":"admin","action":"delete","id":42}'
180
181# Generate HMAC
182mac = hmac.new(api_secret, payload, hashlib.sha256).hexdigest()
183print(f"  API Secret:    {api_secret.hex()[:24]}...")
184print(f"  Payload:       {payload.decode()}")
185print(f"  HMAC-SHA256:   {mac}")
186print()
187
188# Verify HMAC
189print("  -- HMAC Verification --")
190received_mac = mac  # Simulating received MAC
191computed_mac = hmac.new(api_secret, payload, hashlib.sha256).hexdigest()
192is_valid = hmac.compare_digest(received_mac, computed_mac)
193print(f"  Valid MAC:     {is_valid}")
194
195# Tampered payload
196tampered_payload = b'{"user":"admin","action":"delete","id":9999}'
197tampered_mac = hmac.new(api_secret, tampered_payload, hashlib.sha256).hexdigest()
198is_valid_tampered = hmac.compare_digest(received_mac, tampered_mac)
199print(f"  Tampered MAC:  {is_valid_tampered}  (tamper detected!)")
200print()
201
202
203# ============================================================
204# Section 4: Constant-Time Comparison
205# ============================================================
206
207print("-" * 65)
208print("  Section 4: Constant-Time Comparison")
209print("-" * 65)
210
211print("""
212  Why constant-time comparison matters:
213  - Regular '==' comparison short-circuits on first mismatch
214  - Attacker can measure response time to guess bytes one by one
215  - This is called a "timing side-channel attack"
216  - hmac.compare_digest() always compares ALL bytes
217""")
218
219secret_token = "a1b2c3d4e5f6g7h8"
220
221# Demonstrate timing difference (educational - results may vary)
222def unsafe_compare(a: str, b: str) -> bool:
223    """VULNERABLE: short-circuits on first mismatch."""
224    if len(a) != len(b):
225        return False
226    for x, y in zip(a, b):
227        if x != y:
228            return False
229    return True
230
231
232def safe_compare(a: str, b: str) -> bool:
233    """SAFE: always compares all characters."""
234    return hmac.compare_digest(a.encode(), b.encode())
235
236
237# Timing test
238test_inputs = [
239    ("Completely wrong token!!", "no match at start"),
240    ("a1b2c3d4XXXXXXXX", "partial match (8 chars)"),
241    ("a1b2c3d4e5f6g7h8", "exact match"),
242]
243
244print(f"\n  Secret token: {secret_token}")
245print()
246print(f"  {'Input':<28} {'Unsafe':<12} {'Safe':<12}")
247print(f"  {'-'*28} {'-'*12} {'-'*12}")
248
249for test_input, desc in test_inputs:
250    padded = test_input.ljust(len(secret_token))[:len(secret_token)]
251
252    # Unsafe comparison timing
253    start = time.perf_counter_ns()
254    for _ in range(10000):
255        unsafe_compare(secret_token, padded)
256    unsafe_ns = (time.perf_counter_ns() - start) // 10000
257
258    # Safe comparison timing
259    start = time.perf_counter_ns()
260    for _ in range(10000):
261        safe_compare(secret_token, padded)
262    safe_ns = (time.perf_counter_ns() - start) // 10000
263
264    print(f"  {desc:<28} {unsafe_ns:>6}ns     {safe_ns:>6}ns")
265
266print()
267print("  Note: Unsafe times may vary with match length; safe times")
268print("  should be roughly constant regardless of input.")
269print()
270
271
272# ============================================================
273# Section 5: Merkle Tree Implementation
274# ============================================================
275
276print("-" * 65)
277print("  Section 5: Merkle Tree (Hash Tree)")
278print("-" * 65)
279
280print("""
281  Merkle trees efficiently verify data integrity:
282  - Used in: Git, Bitcoin, IPFS, certificate transparency
283  - Verify any single block without downloading entire dataset
284  - Proof size: O(log n) hashes for n data blocks
285""")
286
287
288class MerkleTree:
289    """Simplified Merkle tree for educational purposes."""
290
291    def __init__(self, data_blocks: list[bytes]):
292        self.data_blocks = data_blocks
293        self.leaves = [self._hash_leaf(d) for d in data_blocks]
294        self.tree = self._build_tree(self.leaves[:])
295
296    @staticmethod
297    def _hash_leaf(data: bytes) -> str:
298        return hashlib.sha256(b"\x00" + data).hexdigest()
299
300    @staticmethod
301    def _hash_pair(left: str, right: str) -> str:
302        combined = b"\x01" + bytes.fromhex(left) + bytes.fromhex(right)
303        return hashlib.sha256(combined).hexdigest()
304
305    def _build_tree(self, nodes: list[str]) -> list[list[str]]:
306        tree = [nodes]
307        while len(nodes) > 1:
308            next_level = []
309            for i in range(0, len(nodes), 2):
310                if i + 1 < len(nodes):
311                    next_level.append(self._hash_pair(nodes[i], nodes[i + 1]))
312                else:
313                    # Odd node: promote to next level
314                    next_level.append(nodes[i])
315            tree.append(next_level)
316            nodes = next_level
317        return tree
318
319    @property
320    def root(self) -> str:
321        return self.tree[-1][0]
322
323    def get_proof(self, index: int) -> list[tuple[str, str]]:
324        """Get Merkle proof for a leaf at given index."""
325        proof = []
326        for level in self.tree[:-1]:
327            if index % 2 == 0:
328                sibling_idx = index + 1
329                direction = "right"
330            else:
331                sibling_idx = index - 1
332                direction = "left"
333            if sibling_idx < len(level):
334                proof.append((direction, level[sibling_idx]))
335            index //= 2
336        return proof
337
338    @classmethod
339    def verify_proof(cls, leaf_data: bytes, proof: list, root: str) -> bool:
340        """Verify a Merkle proof against the root hash."""
341        current = cls._hash_leaf(leaf_data)
342        for direction, sibling in proof:
343            if direction == "right":
344                current = cls._hash_pair(current, sibling)
345            else:
346                current = cls._hash_pair(sibling, current)
347        return current == root
348
349
350# Build a Merkle tree
351blocks = [f"Transaction {i}: Alice pays Bob ${i * 10}".encode() for i in range(8)]
352tree = MerkleTree(blocks)
353
354print(f"\n  Data blocks: {len(blocks)} transactions")
355print(f"  Tree levels: {len(tree.tree)}")
356print(f"  Root hash:   {tree.root[:32]}...")
357print()
358
359# Show tree structure
360print("  Tree structure (abbreviated hashes):")
361for i, level in enumerate(tree.tree):
362    indent = "  " * (len(tree.tree) - i)
363    hashes = " ".join(h[:8] for h in level)
364    label = "Root" if i == len(tree.tree) - 1 else f"L{i}"
365    print(f"  {indent}{label}: [{hashes}]")
366print()
367
368# Generate and verify a Merkle proof
369target_idx = 3
370proof = tree.get_proof(target_idx)
371print(f"  -- Merkle Proof for block {target_idx} --")
372print(f"  Block data: {blocks[target_idx].decode()}")
373print(f"  Proof path ({len(proof)} hashes):")
374for direction, h in proof:
375    print(f"    {direction}: {h[:16]}...")
376
377is_valid = MerkleTree.verify_proof(blocks[target_idx], proof, tree.root)
378print(f"  Proof valid: {is_valid}")
379
380# Tampered data
381is_valid_tampered = MerkleTree.verify_proof(b"TAMPERED DATA", proof, tree.root)
382print(f"  Tampered proof valid: {is_valid_tampered}")
383print()
384
385
386# ============================================================
387# Section 6: Summary
388# ============================================================
389
390print("=" * 65)
391print("  Summary")
392print("=" * 65)
393print("""
394  Algorithm     | Output   | Speed    | Use Case
395  --------------+----------+----------+---------------------------
396  SHA-256       | 256 bit  | Fast     | Data integrity, Git
397  SHA-3         | 256 bit  | Medium   | Quantum-resistant backup
398  BLAKE2b       | 512 bit  | Fastest  | General purpose, libsodium
399  PBKDF2        | Variable | Slow*    | Password hashing
400  HMAC-SHA256   | 256 bit  | Fast     | Message authentication
401  Merkle Tree   | 256 bit  | O(log n) | Blockchain, Git, IPFS
402
403  * Slow by design - prevents brute-force attacks
404
405  Key Takeaways:
406  - Use PBKDF2/bcrypt/Argon2 for passwords (NEVER plain SHA-256)
407  - Use HMAC for message authentication (not bare hashes)
408  - Use constant-time comparison for security-critical comparisons
409  - Merkle trees enable efficient verification of large datasets
410""")