security_scanner.py

Download
python 519 lines 17.0 KB
  1"""
  2λ³΄μ•ˆ ν…ŒμŠ€νŠΈ 도ꡬ 데λͺ¨
  3Security Testing Tools Demo
  4
  5정적 뢄석(μœ„ν—˜ ν•¨μˆ˜ 탐지), μ˜μ‘΄μ„± 검사, μ½”λ“œ νŒ¨ν„΄ μŠ€μΊλ„ˆ(ν•˜λ“œμ½”λ”© νŒ¨μŠ€μ›Œλ“œ,
  6SQL μΈμ μ…˜ νŒ¨ν„΄), λ³΄μ•ˆ 체크리슀트 검증 등을 κ΅¬ν˜„ν•©λ‹ˆλ‹€.
  7
  8Demonstrates simple static analysis for dangerous function calls,
  9dependency checking, regex-based code pattern scanning (hardcoded passwords,
 10SQL injection patterns), and security checklist validation.
 11"""
 12
 13import re
 14import os
 15import json
 16import tempfile
 17from pathlib import Path
 18from dataclasses import dataclass, field
 19
 20
 21# =============================================================================
 22# 1. Static Analysis: Dangerous Function Detector (μœ„ν—˜ ν•¨μˆ˜ 탐지)
 23# =============================================================================
 24
 25@dataclass
 26class Finding:
 27    """Represents a single security finding."""
 28    severity: str       # CRITICAL, HIGH, MEDIUM, LOW, INFO
 29    category: str
 30    message: str
 31    file: str
 32    line: int
 33    code_snippet: str
 34
 35
 36# Dangerous patterns in Python code
 37DANGEROUS_FUNCTIONS = [
 38    # (pattern, severity, message)
 39    (r'\beval\s*\(', "CRITICAL",
 40     "eval() executes arbitrary code - use ast.literal_eval() instead"),
 41    (r'\bexec\s*\(', "CRITICAL",
 42     "exec() executes arbitrary code - avoid or sandbox carefully"),
 43    (r'\b__import__\s*\(', "HIGH",
 44     "__import__() can load arbitrary modules - use importlib instead"),
 45    (r'\bos\.system\s*\(', "HIGH",
 46     "os.system() is vulnerable to shell injection - use subprocess.run()"),
 47    (r'\bos\.popen\s*\(', "HIGH",
 48     "os.popen() is vulnerable to shell injection - use subprocess.run()"),
 49    (r'\bsubprocess\.\w+\(.*shell\s*=\s*True', "HIGH",
 50     "subprocess with shell=True is vulnerable to injection"),
 51    (r'\bpickle\.loads?\s*\(', "HIGH",
 52     "pickle.load() can execute arbitrary code during deserialization"),
 53    (r'\byaml\.load\s*\([^)]*\)(?!.*Loader)', "MEDIUM",
 54     "yaml.load() without SafeLoader can execute arbitrary code"),
 55    (r'\brandom\.(random|randint|choice|shuffle)\s*\(', "LOW",
 56     "random module is not cryptographically secure - use secrets module"),
 57    (r'\bmd5\s*\(', "MEDIUM",
 58     "MD5 is cryptographically broken - use SHA-256 or better"),
 59    (r'\bsha1\s*\(', "MEDIUM",
 60     "SHA-1 is deprecated for security - use SHA-256 or better"),
 61    (r'\bDESede|DES\b', "HIGH",
 62     "DES/3DES is deprecated - use AES-256"),
 63    (r'\.execute\s*\(\s*["\'].*%s', "HIGH",
 64     "String formatting in SQL query - use parameterized queries"),
 65    (r'\.execute\s*\(\s*f["\']', "HIGH",
 66     "f-string in SQL query - use parameterized queries"),
 67    (r'\bassert\s+', "INFO",
 68     "assert statements are removed with -O flag - don't use for security checks"),
 69]
 70
 71
 72def scan_python_file(content: str, filename: str = "<input>") -> list[Finding]:
 73    """Scan Python source code for dangerous function calls."""
 74    findings = []
 75    in_comment = False
 76
 77    for line_num, line in enumerate(content.splitlines(), 1):
 78        stripped = line.strip()
 79
 80        # Skip comments and docstrings (simplified)
 81        if stripped.startswith("#"):
 82            continue
 83        if '"""' in stripped or "'''" in stripped:
 84            in_comment = not in_comment
 85            continue
 86        if in_comment:
 87            continue
 88
 89        for pattern, severity, message in DANGEROUS_FUNCTIONS:
 90            if re.search(pattern, line):
 91                findings.append(Finding(
 92                    severity=severity,
 93                    category="Dangerous Function",
 94                    message=message,
 95                    file=filename,
 96                    line=line_num,
 97                    code_snippet=stripped[:80],
 98                ))
 99
100    return findings
101
102
103def demo_static_analysis():
104    print("=" * 60)
105    print("1. Static Analysis: Dangerous Function Detector")
106    print("=" * 60)
107
108    sample_code = '''
109import os
110import pickle
111import subprocess
112import random
113import sqlite3
114
115# Dangerous: eval with user input
116user_input = "2 + 3"
117result = eval(user_input)
118
119# Dangerous: shell injection
120os.system("ls " + user_input)
121
122# Dangerous: pickle deserialization
123data = pickle.loads(some_bytes)
124
125# Dangerous: shell=True in subprocess
126subprocess.run(f"echo {user_input}", shell=True)
127
128# Dangerous: SQL injection via f-string
129db.execute(f"SELECT * FROM users WHERE name = '{name}'")
130
131# Insecure random for tokens
132token = random.randint(100000, 999999)
133
134# Safe alternative examples
135import secrets
136token = secrets.token_hex(16)
137subprocess.run(["ls", "-la"], shell=False)
138'''
139
140    print("\n  Scanning sample Python code...\n")
141    findings = scan_python_file(sample_code, "sample_app.py")
142
143    severity_colors = {
144        "CRITICAL": "!!!",
145        "HIGH": "!! ",
146        "MEDIUM": "!  ",
147        "LOW": ".  ",
148        "INFO": "   ",
149    }
150
151    for f in sorted(findings, key=lambda x: ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"].index(x.severity)):
152        prefix = severity_colors.get(f.severity, "   ")
153        print(f"  {prefix} [{f.severity:8s}] Line {f.line:3d}: {f.message}")
154        print(f"          Code: {f.code_snippet}")
155        print()
156
157    # Summary
158    by_severity = {}
159    for f in findings:
160        by_severity[f.severity] = by_severity.get(f.severity, 0) + 1
161    print(f"  Summary: {dict(sorted(by_severity.items()))}")
162    print(f"  Total findings: {len(findings)}")
163    print()
164
165
166# =============================================================================
167# 2. Dependency Checker (μ˜μ‘΄μ„± 검사)
168# =============================================================================
169
170# Simulated known vulnerability database (pattern-based)
171KNOWN_VULNERABLE_PATTERNS = {
172    "django": {
173        "pattern": r"django\s*[<>=]*\s*(1\.\d|2\.[01])",
174        "advisory": "Django < 2.2 has known security vulnerabilities",
175        "severity": "HIGH",
176    },
177    "flask": {
178        "pattern": r"flask\s*[<>=]*\s*(0\.)",
179        "advisory": "Flask 0.x has known security issues, upgrade to 2.x+",
180        "severity": "MEDIUM",
181    },
182    "requests": {
183        "pattern": r"requests\s*[<>=]*\s*2\.(0|1|2|3|4|5)\.",
184        "advisory": "requests < 2.6.0 vulnerable to CVE-2014-1829",
185        "severity": "HIGH",
186    },
187    "pyyaml": {
188        "pattern": r"pyyaml\s*[<>=]*\s*(3\.|4\.)",
189        "advisory": "PyYAML < 5.1 vulnerable to arbitrary code execution",
190        "severity": "CRITICAL",
191    },
192    "cryptography": {
193        "pattern": r"cryptography\s*[<>=]*\s*(1\.|2\.[0-4])",
194        "advisory": "cryptography < 2.5 has known vulnerabilities",
195        "severity": "HIGH",
196    },
197    "jinja2": {
198        "pattern": r"jinja2\s*[<>=]*\s*2\.\d\b",
199        "advisory": "Jinja2 2.x has known sandbox escape vulnerabilities",
200        "severity": "MEDIUM",
201    },
202}
203
204
205def check_dependencies(requirements_text: str) -> list[dict]:
206    """Check requirements.txt content against known vulnerable patterns."""
207    issues = []
208
209    for line in requirements_text.splitlines():
210        line = line.strip().lower()
211        if not line or line.startswith("#") or line.startswith("-"):
212            continue
213
214        for pkg_name, info in KNOWN_VULNERABLE_PATTERNS.items():
215            if re.match(info["pattern"], line, re.IGNORECASE):
216                issues.append({
217                    "package": line,
218                    "severity": info["severity"],
219                    "advisory": info["advisory"],
220                })
221
222    return issues
223
224
225def demo_dependency_checker():
226    print("=" * 60)
227    print("2. Dependency Checker")
228    print("=" * 60)
229
230    sample_requirements = """
231# requirements.txt
232django==2.0.13
233flask==2.3.1
234requests==2.28.0
235pyyaml==3.13
236cryptography==2.3
237numpy==1.24.0
238jinja2==2.11.3
239pandas>=1.5.0
240"""
241
242    print(f"\n  Checking sample requirements.txt...\n")
243    issues = check_dependencies(sample_requirements)
244
245    if issues:
246        for issue in sorted(issues, key=lambda x: ["CRITICAL", "HIGH", "MEDIUM"].index(x["severity"])):
247            print(f"  [{issue['severity']:8s}] {issue['package']}")
248            print(f"            {issue['advisory']}\n")
249        print(f"  Total vulnerable packages: {len(issues)}")
250    else:
251        print("  No known vulnerabilities detected.")
252
253    # Check safe requirements
254    safe_req = "flask==3.0.0\ndjango==4.2.0\nrequests==2.31.0\n"
255    safe_issues = check_dependencies(safe_req)
256    print(f"\n  Check modern versions: {len(safe_issues)} issues found (expected 0)")
257    print()
258
259
260# =============================================================================
261# 3. Code Pattern Scanner (μ½”λ“œ νŒ¨ν„΄ μŠ€μΊλ„ˆ)
262# =============================================================================
263
264SECURITY_PATTERNS = {
265    "Hardcoded Password": [
266        r'(?i)(password|passwd|pwd)\s*=\s*["\'][^"\']{4,}["\']',
267        r'(?i)(password|passwd|pwd)\s*:\s*["\'][^"\']{4,}["\']',
268    ],
269    "Hardcoded Secret/Key": [
270        r'(?i)(secret|api_key|apikey|access_key)\s*=\s*["\'][^"\']{8,}["\']',
271        r'(?i)(token|auth_token)\s*=\s*["\'][^"\']{8,}["\']',
272    ],
273    "SQL Injection Risk": [
274        r'(?i)(execute|query)\s*\(\s*["\'].*\+',
275        r'(?i)(execute|query)\s*\(\s*f["\']',
276        r'(?i)(execute|query)\s*\(\s*["\'].*%\s',
277        r'(?i)\.format\s*\(.*\).*(?:SELECT|INSERT|UPDATE|DELETE)',
278    ],
279    "Command Injection Risk": [
280        r'os\.system\s*\(.*\+',
281        r'os\.popen\s*\(.*\+',
282        r'subprocess\.\w+\(.*\+.*shell\s*=\s*True',
283    ],
284    "Insecure HTTP": [
285        r'http://(?!localhost|127\.0\.0\.1|0\.0\.0\.0)',
286    ],
287    "Debug Mode in Production": [
288        r'(?i)debug\s*=\s*True',
289        r'(?i)DEBUG\s*=\s*True',
290    ],
291    "Hardcoded IP Address": [
292        r'\b(?:(?:25[0-5]|2[0-4]\d|1?\d{1,2})\.){3}(?:25[0-5]|2[0-4]\d|1?\d{1,2})\b',
293    ],
294}
295
296
297def scan_code_patterns(content: str, filename: str = "<input>") -> list[dict]:
298    """Scan code for security anti-patterns using regex."""
299    results = []
300
301    for line_num, line in enumerate(content.splitlines(), 1):
302        stripped = line.strip()
303        if stripped.startswith("#"):
304            continue
305
306        for category, patterns in SECURITY_PATTERNS.items():
307            for pattern in patterns:
308                if re.search(pattern, line):
309                    results.append({
310                        "category": category,
311                        "file": filename,
312                        "line": line_num,
313                        "code": stripped[:80],
314                    })
315                    break  # One match per category per line
316
317    return results
318
319
320def demo_pattern_scanner():
321    print("=" * 60)
322    print("3. Code Pattern Scanner")
323    print("=" * 60)
324
325    sample = '''
326# Application config
327DATABASE_PASSWORD = "MyDbPass123!"
328API_KEY = "sk-proj-abcdef123456789"
329DEBUG = True
330
331# SQL query building (vulnerable)
332query = "SELECT * FROM users WHERE id = " + user_id
333cursor.execute(f"DELETE FROM sessions WHERE user = '{username}'")
334
335# Command execution (vulnerable)
336os.system("ping " + target_host)
337
338# Insecure HTTP
339response = requests.get("http://api.example.com/data")
340
341# Hardcoded server
342server = "192.168.1.100"
343
344# Safe examples (should not trigger)
345password = os.environ.get("DB_PASSWORD")
346cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
347'''
348
349    print(f"\n  Scanning code for security anti-patterns...\n")
350    results = scan_code_patterns(sample, "app_config.py")
351
352    by_category: dict[str, list] = {}
353    for r in results:
354        by_category.setdefault(r["category"], []).append(r)
355
356    for category, items in by_category.items():
357        print(f"  [{category}] ({len(items)} finding{'s' if len(items) > 1 else ''})")
358        for item in items:
359            print(f"    Line {item['line']:3d}: {item['code']}")
360        print()
361
362    print(f"  Total findings: {len(results)} across {len(by_category)} categories")
363    print()
364
365
366# =============================================================================
367# 4. Security Checklist Validator (λ³΄μ•ˆ 체크리슀트 검증)
368# =============================================================================
369
370@dataclass
371class ChecklistItem:
372    name: str
373    description: str
374    check_fn: object  # callable
375    severity: str
376    passed: bool = False
377    details: str = ""
378
379
380class SecurityChecklist:
381    """Validate a project against a security checklist."""
382
383    def __init__(self):
384        self.items: list[ChecklistItem] = []
385
386    def add_check(self, name: str, description: str, check_fn, severity: str):
387        self.items.append(ChecklistItem(name, description, check_fn, severity))
388
389    def run_all(self, context: dict) -> list[ChecklistItem]:
390        """Run all checks against the given context."""
391        for item in self.items:
392            try:
393                passed, details = item.check_fn(context)
394                item.passed = passed
395                item.details = details
396            except Exception as e:
397                item.passed = False
398                item.details = f"Check error: {e}"
399        return self.items
400
401    def report(self) -> str:
402        lines = []
403        passed = sum(1 for i in self.items if i.passed)
404        total = len(self.items)
405        lines.append(f"\n  Security Checklist: {passed}/{total} passed\n")
406
407        for item in self.items:
408            symbol = "PASS" if item.passed else "FAIL"
409            lines.append(f"  [{symbol}] [{item.severity:6s}] {item.name}")
410            lines.append(f"          {item.description}")
411            if item.details:
412                lines.append(f"          -> {item.details}")
413            lines.append("")
414
415        return "\n".join(lines)
416
417
418# Check functions
419def check_no_hardcoded_secrets(ctx):
420    code = ctx.get("source_code", "")
421    findings = scan_code_patterns(code)
422    secret_findings = [f for f in findings if "Secret" in f["category"] or "Password" in f["category"]]
423    if secret_findings:
424        return False, f"Found {len(secret_findings)} hardcoded secret(s)"
425    return True, "No hardcoded secrets detected"
426
427
428def check_no_debug_mode(ctx):
429    code = ctx.get("source_code", "")
430    if re.search(r'(?i)\bDEBUG\s*=\s*True\b', code):
431        return False, "DEBUG=True found in source"
432    return True, "Debug mode not enabled"
433
434
435def check_https_only(ctx):
436    code = ctx.get("source_code", "")
437    matches = re.findall(r'http://(?!localhost|127\.0\.0\.1)', code)
438    if matches:
439        return False, f"Found {len(matches)} insecure HTTP URL(s)"
440    return True, "All external URLs use HTTPS"
441
442
443def check_sql_parameterized(ctx):
444    code = ctx.get("source_code", "")
445    patterns = [r'execute\s*\(\s*f["\']', r'execute\s*\(.*\+']
446    for p in patterns:
447        if re.search(p, code):
448            return False, "SQL queries use string formatting instead of parameters"
449    return True, "SQL queries appear to use parameterized queries"
450
451
452def check_dependencies_updated(ctx):
453    reqs = ctx.get("requirements", "")
454    issues = check_dependencies(reqs)
455    if issues:
456        return False, f"{len(issues)} vulnerable package(s) found"
457    return True, "No known vulnerable dependencies"
458
459
460def check_input_validation(ctx):
461    code = ctx.get("source_code", "")
462    # Check if there is any form of input validation
463    validation_patterns = [r'validate', r'sanitize', r'clean', r'escape', r'strip\(\)']
464    for p in validation_patterns:
465        if re.search(p, code, re.IGNORECASE):
466            return True, "Input validation functions detected"
467    return False, "No input validation patterns found"
468
469
470def demo_security_checklist():
471    print("=" * 60)
472    print("4. Security Checklist Validator")
473    print("=" * 60)
474
475    checklist = SecurityChecklist()
476    checklist.add_check("No Hardcoded Secrets", "Source code should not contain hardcoded passwords or keys", check_no_hardcoded_secrets, "HIGH")
477    checklist.add_check("Debug Mode Off", "DEBUG should not be True in production", check_no_debug_mode, "MEDIUM")
478    checklist.add_check("HTTPS Only", "All external URLs should use HTTPS", check_https_only, "HIGH")
479    checklist.add_check("Parameterized SQL", "SQL queries should use parameter binding", check_sql_parameterized, "HIGH")
480    checklist.add_check("Dependencies Updated", "No known vulnerable packages", check_dependencies_updated, "HIGH")
481    checklist.add_check("Input Validation", "User input should be validated", check_input_validation, "MEDIUM")
482
483    # Context: a project with some issues
484    context = {
485        "source_code": '''
486DATABASE_PASSWORD = "MyPass123"
487DEBUG = True
488cursor.execute(f"SELECT * FROM users WHERE id = '{uid}'")
489response = requests.get("http://api.example.com/data")
490user_input = sanitize(request.form["name"])
491''',
492        "requirements": "django==2.0.13\nflask==3.0.0\n",
493    }
494
495    checklist.run_all(context)
496    print(checklist.report())
497
498
499# =============================================================================
500# Main
501# =============================================================================
502
503if __name__ == "__main__":
504    print("\n" + "=" * 60)
505    print("  Security Testing Tools Demo")
506    print("  λ³΄μ•ˆ ν…ŒμŠ€νŠΈ 도ꡬ 데λͺ¨")
507    print("=" * 60 + "\n")
508
509    demo_static_analysis()
510    demo_dependency_checker()
511    demo_pattern_scanner()
512    demo_security_checklist()
513
514    print("=" * 60)
515    print("  Demo complete. All examples use stdlib only.")
516    print("  These tools are educational - use professional SAST/DAST")
517    print("  tools (Bandit, Safety, Semgrep) for production scanning.")
518    print("=" * 60)