- Split embedded OpenCL kernel_source into kernel_parts[] array so no single string literal exceeds MSVC's 65535-byte limit. clCreateProgramWithSource now receives multiple source strings. - Added batch-inversion kernels (field_inv, affine_add, jac_to_affine) using per-workgroup Montgomery's trick with __local memory. - OpenCL BIP352 benchmark scaffold and kernel stubs. - Source graph kit for indexed codebase exploration. - Assorted doc, benchmark, and audit report updates.
9218 lines
396 KiB
Python
9218 lines
396 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Source Graph Universal — Multi-language SQLite-based codebase index.
|
|
|
|
Supported languages: C++, Python, TypeScript/JavaScript, Rust, Go, Java, C#
|
|
|
|
Usage:
|
|
python source_graph.py build # Full rebuild of the database
|
|
python source_graph.py build -i # Incremental build (changed files only)
|
|
python source_graph.py find <term> # Search everything (FTS5 full-text)
|
|
python source_graph.py singleton <name> # Find singleton by macro or class
|
|
python source_graph.py file <pattern> # Find files by name pattern
|
|
python source_graph.py handler <headcode> # Find packet handler
|
|
python source_graph.py event <name> # Find event/dungeon system
|
|
python source_graph.py method <name> # Find method by name
|
|
python source_graph.py player <domain> # Find Player partial file by domain
|
|
python source_graph.py ai <target> # Find AI handler
|
|
python source_graph.py const <name> # Find constant/define
|
|
python source_graph.py config <name> # Find config XML
|
|
python source_graph.py deps <class> # Find what depends on a class
|
|
python source_graph.py enum <name> # Find enum by name or value
|
|
python source_graph.py struct <name> # Find struct/packet layout
|
|
python source_graph.py class <name> # Find class hierarchy (parent/children)
|
|
python source_graph.py todo [filter] # Find TODO/FIXME/HACK comments
|
|
python source_graph.py dbtable <name> # Find database table usage in queries
|
|
python source_graph.py query <name> # Find prepared statement by QUERY_ name
|
|
python source_graph.py cfgkey <name> # Find config key usage (sConfig->GetXxx)
|
|
python source_graph.py define <name> # Find #define macros by name or value
|
|
python source_graph.py complexity # Show largest files by line count
|
|
python source_graph.py leaks # Memory leak risk analysis
|
|
python source_graph.py crashes [file] # Crash risk patterns
|
|
python source_graph.py nullrisks [file] # NULL-check risk analysis
|
|
python source_graph.py rawptrs [file] # Raw pointer class members
|
|
python source_graph.py casts [file] # C-style casts
|
|
python source_graph.py looprisks [file] # Infinite loop risk patterns
|
|
python source_graph.py deadmethods # Unreferenced methods
|
|
python source_graph.py duplicates # Duplicate code blocks
|
|
python source_graph.py context <file> # Unified context: summary + functions + deps
|
|
python source_graph.py func <name> # Find function with line ranges
|
|
python source_graph.py body <name> # Print function source from DB (no file read)
|
|
python source_graph.py gaps # Find documentation/coverage gaps
|
|
python source_graph.py summary # Project overview with statistics
|
|
python source_graph.py stats # Database statistics
|
|
python source_graph.py tags <term> # Find semantic tags
|
|
python source_graph.py hotspots [term] # Semantic hotspots with risk/gain scores
|
|
python source_graph.py coverage [term] # Audit/test coverage signals
|
|
python source_graph.py churn [term] # Git history/churn signals
|
|
python source_graph.py reviewqueue [term] # Prioritized review queue
|
|
python source_graph.py ownership [term] # Git blame ownership signals
|
|
python source_graph.py testmap [term] # Test-to-function mappings
|
|
python source_graph.py calls <term> # Function call graph edges
|
|
python source_graph.py symbols [term] # Symbol-level semantic metadata
|
|
python source_graph.py bottlenecks [term] # Scored optimization queue
|
|
python source_graph.py auditmap [term] # Symbol audit coverage
|
|
python source_graph.py aitasks [term] # Proactive AI task queue
|
|
python source_graph.py trace <term> # Minimal code/config/test trace
|
|
python source_graph.py impact <term> # Change impact analysis
|
|
python source_graph.py research [term] # Search research assets
|
|
python source_graph.py validateai [path] # Validate Monster AI XML
|
|
python source_graph.py sql "<query>" # Run raw SQL query
|
|
|
|
Token-saving commands:
|
|
python source_graph.py focus <term> [budget] [--core] # Compact ranked snapshot for agents
|
|
python source_graph.py slice <term> [budget] [--core] # Minimal dependency/call slice
|
|
python source_graph.py bundle bugfix <term> # Assemble bugfix context bundle
|
|
python source_graph.py bundle feature <term> # Assemble feature context bundle
|
|
python source_graph.py bundle refactor <term> # Assemble refactor context bundle
|
|
python source_graph.py bundle audit <term> # Assemble audit/security context bundle
|
|
python source_graph.py bundle optimize <term> # Assemble optimization context bundle
|
|
python source_graph.py bundle explore <term> # Assemble general exploration bundle
|
|
python source_graph.py summarize [pattern] # Show/generate function summaries
|
|
python source_graph.py decide "<text>" --why Y # Log architecture decision
|
|
python source_graph.py decisions [term] # Query decision log
|
|
python source_graph.py claudemd # Generate CLAUDE.md documentation
|
|
|
|
Project setup:
|
|
python source_graph.py init # Generate starter source_graph.toml
|
|
python source_graph.py export-config # Export seed data to TOML config
|
|
"""
|
|
import json
|
|
import sqlite3
|
|
import sys
|
|
import os
|
|
import re
|
|
import glob
|
|
import hashlib
|
|
import subprocess
|
|
from datetime import date, datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
from xml.etree import ElementTree as ET
|
|
|
|
try:
|
|
import tomllib
|
|
except ImportError:
|
|
tomllib = None
|
|
|
|
SCRIPT_DIR = Path(__file__).parent
|
|
REPO_ROOT = SCRIPT_DIR.parent
|
|
DB_PATH = SCRIPT_DIR / "source_graph.db"
|
|
SCHEMA_VERSION = 5
|
|
EXTRACTOR_VERSION = "2026.03.18.3"
|
|
|
|
# ---------- Config-driven project settings ----------
|
|
# These are set by load_config() at startup, with fallback defaults.
|
|
SOURCE_DIRS = [] # list of (label, Path, [extensions])
|
|
CATEGORY_RULES = [] # list of dicts: {match, pattern, category, source_dir?}
|
|
EXTERNAL_PATHS = {} # e.g. {"runtime_data": Path(...), "research_dir": Path(...)}
|
|
SEED_DATA = {} # e.g. {"singletons": [...], "events": [...], ...}
|
|
CONFIG_PATH = None # path to loaded config file, or None
|
|
|
|
# Legacy aliases — set by load_config() for backward compat with existing scanners
|
|
GAME_DIR = REPO_ROOT / "Game"
|
|
COMMON_DIR = REPO_ROOT / "Common"
|
|
RUNTIME_DATA_ROOT = Path(r"E:\Dev\DevEmuMuServer\Data")
|
|
RESEARCH_DIR = REPO_ROOT / "ReversingResearch"
|
|
|
|
# Active language adapter — set by load_config()
|
|
LANG_ADAPTER = None # type: LanguageAdapter
|
|
|
|
|
|
# ============================================================
|
|
# LANGUAGE ADAPTER SYSTEM
|
|
# ============================================================
|
|
|
|
class LanguageAdapter:
|
|
"""Base class for language-specific parsing logic.
|
|
|
|
Each adapter provides regex patterns, comment-stripping, scope detection,
|
|
and risk-analysis patterns for a single language family.
|
|
"""
|
|
|
|
name = "base"
|
|
extensions = [] # e.g. ["*.py"]
|
|
header_extensions = [] # e.g. ["*.h"] for C++, [] for Python
|
|
uses_braces = True # False for Python (indent-based)
|
|
|
|
# --- Import / Dependency ---
|
|
def import_pattern(self):
|
|
"""Return compiled regex that captures the imported module/file name."""
|
|
return None
|
|
|
|
# --- Enum ---
|
|
def enum_pattern(self):
|
|
"""Return compiled regex matching enum declarations. Group(1) = enum name."""
|
|
return None
|
|
|
|
# --- Struct / Data class ---
|
|
def struct_pattern(self):
|
|
"""Return compiled regex matching struct/dataclass. Group(1) = name."""
|
|
return None
|
|
|
|
# --- Class ---
|
|
def class_pattern(self):
|
|
"""Return compiled regex matching class declaration.
|
|
Group(1) = class name, Group(2) = parent class (optional)."""
|
|
return None
|
|
|
|
# --- Method declaration (in headers/interfaces) ---
|
|
def method_pattern(self):
|
|
"""Return compiled regex for method declarations.
|
|
Group(1) = return type or decorator, Group(2) = method name."""
|
|
return None
|
|
|
|
def method_skip_names(self):
|
|
"""Return frozenset of keywords that look like methods but aren't."""
|
|
return frozenset()
|
|
|
|
# --- Function definition ---
|
|
def function_sig_pattern(self):
|
|
"""Return compiled regex for function/method definition.
|
|
Group(1) = class_name (optional), Group(2) = function name."""
|
|
return None
|
|
|
|
def constructor_pattern(self):
|
|
"""Return compiled regex for constructor definitions, or None."""
|
|
return None
|
|
|
|
def function_skip_names(self):
|
|
"""Return frozenset of names to skip when scanning function defs."""
|
|
return frozenset()
|
|
|
|
# --- Comment / string stripping ---
|
|
def strip_comments_and_strings(self, text):
|
|
"""Remove comments and string literals, preserving newlines."""
|
|
return text
|
|
|
|
def strip_block_comments_from_lines(self, lines):
|
|
"""Return list of cleaned lines with block/line comments removed."""
|
|
return lines
|
|
|
|
# --- Scope detection ---
|
|
def find_opening_scope(self, lines, start):
|
|
"""Find the line containing the opening scope marker ('{' or ':' for Python).
|
|
Returns line index or None."""
|
|
if self.uses_braces:
|
|
return _find_opening_brace(lines, start)
|
|
return self._find_indent_scope_start(lines, start)
|
|
|
|
def find_scope_end(self, lines, scope_start):
|
|
"""Find the line where the scope ends.
|
|
Returns line index or None."""
|
|
if self.uses_braces:
|
|
return _find_function_end(lines, scope_start)
|
|
return self._find_indent_scope_end(lines, scope_start)
|
|
|
|
def _find_indent_scope_start(self, lines, start):
|
|
"""For indent-based languages: find the colon that opens the block."""
|
|
for i in range(start, min(start + 10, len(lines))):
|
|
if ':' in lines[i] and not lines[i].strip().startswith('#'):
|
|
return i
|
|
return None
|
|
|
|
def _find_indent_scope_end(self, lines, scope_start):
|
|
"""For indent-based languages: find where indentation returns to base level."""
|
|
if scope_start >= len(lines):
|
|
return None
|
|
# Find the indentation of the first line inside the block
|
|
base_indent = len(lines[scope_start]) - len(lines[scope_start].lstrip())
|
|
body_indent = None
|
|
for i in range(scope_start + 1, len(lines)):
|
|
stripped = lines[i].strip()
|
|
if not stripped:
|
|
continue
|
|
indent = len(lines[i]) - len(lines[i].lstrip())
|
|
if body_indent is None:
|
|
if indent > base_indent:
|
|
body_indent = indent
|
|
else:
|
|
return scope_start # empty body
|
|
else:
|
|
if indent < body_indent and stripped and not stripped.startswith('#'):
|
|
return i - 1
|
|
# Reached end of file
|
|
return len(lines) - 1
|
|
|
|
def is_declaration_not_definition(self, lines, sig_line, scope_line):
|
|
"""Check if a matched signature is a forward declaration, not a definition."""
|
|
if self.uses_braces:
|
|
for check_idx in range(sig_line, scope_line + 1):
|
|
check_line = lines[check_idx]
|
|
paren_pos = check_line.rfind(')')
|
|
brace_pos = check_line.find('{')
|
|
semi_pos = check_line.find(';')
|
|
if semi_pos >= 0 and (brace_pos < 0 or semi_pos < brace_pos):
|
|
if paren_pos >= 0 and semi_pos > paren_pos:
|
|
return True
|
|
return False
|
|
|
|
# --- Define / Constant patterns ---
|
|
def define_pattern(self):
|
|
"""Return compiled regex for constant/define declarations, or None.
|
|
Group(1) = name, Group(2) = value."""
|
|
return None
|
|
|
|
# --- Call detection ---
|
|
def call_patterns(self):
|
|
"""Return dict of compiled regex patterns for call detection.
|
|
Keys: 'plain', 'scoped', 'member'. Each captures callee name."""
|
|
return {
|
|
'plain': re.compile(r'\b([A-Za-z_~]\w*)\s*\('),
|
|
'scoped': re.compile(r'\b([A-Za-z_]\w*)(?:::|\.)\s*([A-Za-z_~]\w*)\s*\('),
|
|
'member': re.compile(r'(?:->|\.)\s*([A-Za-z_~]\w*)\s*\('),
|
|
}
|
|
|
|
def call_skip_names(self):
|
|
"""Return frozenset of names to skip in call graph analysis."""
|
|
return frozenset()
|
|
|
|
# --- Null / None check risks ---
|
|
def null_risk_calls(self):
|
|
"""Return list of function names that return nullable values."""
|
|
return []
|
|
|
|
def null_check_pattern(self, var_name):
|
|
"""Return True if the given window text contains a null check for var_name."""
|
|
return False
|
|
|
|
# --- Leak / Resource risks ---
|
|
def leak_patterns(self):
|
|
"""Return dict with 'alloc' and 'dealloc' compiled regex patterns, or None."""
|
|
return None
|
|
|
|
# --- Unsafe cast patterns ---
|
|
def unsafe_cast_pattern(self):
|
|
"""Return compiled regex for unsafe casts, or None."""
|
|
return None
|
|
|
|
# --- TODO comment pattern ---
|
|
def todo_pattern(self):
|
|
"""Return compiled regex for TODO/FIXME/HACK comments."""
|
|
return re.compile(r'(?://|/\*|#)\s*(TODO|FIXME|HACK|BUG|XXX|NOTE)\s*:?\s*(.*)', re.IGNORECASE)
|
|
|
|
# --- Raw pointer / ownership risks ---
|
|
def raw_pointer_pattern(self):
|
|
"""Return compiled regex for raw pointer members, or None."""
|
|
return None
|
|
|
|
# --- Crash risk patterns ---
|
|
def crash_risk_patterns(self):
|
|
"""Return list of (pattern_re, risk_type, severity) tuples, or empty list."""
|
|
return []
|
|
|
|
|
|
# ============================================================
|
|
# CONCRETE LANGUAGE ADAPTERS
|
|
# ============================================================
|
|
|
|
class CppAdapter(LanguageAdapter):
|
|
"""C/C++ language adapter."""
|
|
name = "cpp"
|
|
extensions = ["*.cpp", "*.h", "*.cc", "*.cxx", "*.hpp", "*.hxx", "*.c"]
|
|
header_extensions = ["*.h", "*.hpp", "*.hxx"]
|
|
uses_braces = True
|
|
|
|
def import_pattern(self):
|
|
return re.compile(r'#include\s+[<"]([^>"]+)[>"]')
|
|
|
|
def enum_pattern(self):
|
|
return re.compile(r'^\s*enum\s+(?:class\s+)?(\w+)')
|
|
|
|
def struct_pattern(self):
|
|
return re.compile(r'^\s*(?:typedef\s+)?struct\s+(\w+)')
|
|
|
|
def class_pattern(self):
|
|
return re.compile(r'^\s*class\s+(\w+)\s*(?::\s*(?:public|protected|private)\s+(\w+))?')
|
|
|
|
def method_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:virtual\s+|static\s+|inline\s+)*'
|
|
r'(?:const\s+)?(\w[\w:*&<> ]*?)\s+'
|
|
r'(\w+)\s*\([^)]*\)\s*'
|
|
r'(?:const\s*)?(?:override\s*)?(?:=\s*0\s*)?;'
|
|
)
|
|
|
|
def method_skip_names(self):
|
|
return frozenset({'if', 'for', 'while', 'switch', 'return', 'delete', 'new',
|
|
'class', 'struct', 'enum', 'typedef', 'using', 'namespace'})
|
|
|
|
def function_sig_pattern(self):
|
|
return re.compile(
|
|
r'^[\s]*'
|
|
r'(?:[\w:*&<>,\s]+?)\s+'
|
|
r'(?:(\w+)\s*::\s*)?'
|
|
r'(~?\w+)\s*\('
|
|
)
|
|
|
|
def constructor_pattern(self):
|
|
return re.compile(r'^\s*(\w+)\s*::\s*(\1)\s*\(')
|
|
|
|
def function_skip_names(self):
|
|
return frozenset({
|
|
'if', 'else', 'for', 'while', 'switch', 'catch', 'do', 'return',
|
|
'case', 'throw', 'new', 'delete', 'sizeof', 'typeof', 'alignof',
|
|
'noexcept', 'static_assert', 'ASSERT', 'CHECK', 'TEST_CASE',
|
|
'LOG', 'define', 'ifdef', 'ifndef', 'elif', 'endif',
|
|
'SingletonInstance', 'DECLARE', 'IMPLEMENT', 'macro', 'emit',
|
|
})
|
|
|
|
def strip_comments_and_strings(self, text):
|
|
return _strip_cpp_comments_and_strings(text)
|
|
|
|
def strip_block_comments_from_lines(self, lines):
|
|
in_block_comment = False
|
|
cleaned = []
|
|
for line in lines:
|
|
result = []
|
|
i = 0
|
|
while i < len(line):
|
|
if in_block_comment:
|
|
end = line.find('*/', i)
|
|
if end >= 0:
|
|
in_block_comment = False
|
|
i = end + 2
|
|
else:
|
|
break
|
|
else:
|
|
start = line.find('/*', i)
|
|
start_line_comment = line.find('//', i)
|
|
if start_line_comment >= 0 and (start < 0 or start_line_comment < start):
|
|
result.append(line[i:start_line_comment])
|
|
break
|
|
if start >= 0:
|
|
result.append(line[i:start])
|
|
end = line.find('*/', start + 2)
|
|
if end >= 0:
|
|
i = end + 2
|
|
else:
|
|
in_block_comment = True
|
|
break
|
|
else:
|
|
result.append(line[i:])
|
|
break
|
|
cleaned.append(''.join(result))
|
|
return cleaned
|
|
|
|
def define_pattern(self):
|
|
return re.compile(r'^\s*#define\s+(\w+)\s+(.+?)\s*(?://.*)?$')
|
|
|
|
def call_patterns(self):
|
|
return {
|
|
'plain': re.compile(r'\b([A-Za-z_~]\w*)\s*\('),
|
|
'scoped': re.compile(r'\b([A-Za-z_]\w*)\s*::\s*([A-Za-z_~]\w*)\s*\('),
|
|
'member': re.compile(r'(?:->|\.)\s*([A-Za-z_~]\w*)\s*\('),
|
|
}
|
|
|
|
def call_skip_names(self):
|
|
return frozenset({
|
|
"if", "else", "for", "while", "switch", "catch", "return", "sizeof", "alignof", "decltype",
|
|
"static_cast", "dynamic_cast", "reinterpret_cast", "const_cast", "ASSERT", "CHECK", "TEST_CASE",
|
|
"min", "max", "defined", "new", "delete", "noexcept", "throw"
|
|
})
|
|
|
|
def null_risk_calls(self):
|
|
return [
|
|
'FindPlayer', 'GetPlayer', 'GetCharacter', 'GetWorld', 'GetParty',
|
|
'GetGuild', 'GetMonster', 'GetUnit', 'GetItem', 'GetNpc',
|
|
'GetDamageData', 'GetMember', 'GetLeader', 'GetBoss', 'GetFreeConnection',
|
|
]
|
|
|
|
def null_check_pattern(self, var_name):
|
|
"""Build a check string for C++ null checks."""
|
|
return re.compile(
|
|
rf'if\s*\(?\s*!?\s*{re.escape(var_name)}\b|'
|
|
rf'{re.escape(var_name)}\s*==\s*nullptr|'
|
|
rf'{re.escape(var_name)}\s*==\s*NULL|'
|
|
rf'!\s*{re.escape(var_name)}\b'
|
|
)
|
|
|
|
def leak_patterns(self):
|
|
return {
|
|
'alloc': re.compile(r'\bnew\s+\w'),
|
|
'dealloc': re.compile(r'\bdelete\s'),
|
|
'smart_unique': re.compile(r'\bmake_unique\b|\bunique_ptr\b'),
|
|
'smart_shared': re.compile(r'\bmake_shared\b|\bshared_ptr\b'),
|
|
}
|
|
|
|
def unsafe_cast_pattern(self):
|
|
return re.compile(r'\(\s*(\w[\w:]+\s*\*+)\s*\)\s*(\w+)')
|
|
|
|
def raw_pointer_pattern(self):
|
|
return re.compile(r'^\s+(\w[\w:*<> ]+\*)\s+(m_\w+|_\w+)\s*[;=]')
|
|
|
|
def todo_pattern(self):
|
|
return re.compile(r'(?://|/\*)\s*(TODO|FIXME|HACK|BUG|XXX|NOTE)\s*:?\s*(.*)', re.IGNORECASE)
|
|
|
|
def crash_risk_patterns(self):
|
|
return [
|
|
(re.compile(r'(?:->|\.)(?:GetTarget|GetSummoner|GetWorld|GetOwner|GetParty|GetGuild|GetSummoned|GetInterfaceState)\s*\(\)\s*->\s*(\w+)'),
|
|
'null_chain', 'critical'),
|
|
]
|
|
|
|
|
|
class PythonAdapter(LanguageAdapter):
|
|
"""Python language adapter."""
|
|
name = "python"
|
|
extensions = ["*.py"]
|
|
header_extensions = [] # Python has no headers
|
|
uses_braces = False
|
|
|
|
def import_pattern(self):
|
|
return re.compile(r'^\s*(?:import\s+([\w.]+)|from\s+([\w.]+)\s+import)')
|
|
|
|
def enum_pattern(self):
|
|
return re.compile(r'^\s*class\s+(\w+)\s*\(\s*(?:enum\.)?(?:Enum|IntEnum|StrEnum|Flag|IntFlag)\s*\)')
|
|
|
|
def struct_pattern(self):
|
|
return re.compile(r'^\s*@dataclass[^)]*\)?\s*\n\s*class\s+(\w+)', re.MULTILINE)
|
|
|
|
def class_pattern(self):
|
|
return re.compile(r'^\s*class\s+(\w+)\s*(?:\(\s*(\w+))?')
|
|
|
|
def method_pattern(self):
|
|
return re.compile(r'^\s+def\s+(\w+)\s*\(self')
|
|
|
|
def method_skip_names(self):
|
|
return frozenset({'__init__', '__del__', '__repr__', '__str__'})
|
|
|
|
def function_sig_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:async\s+)?def\s+(?:(\w+)\.)?(\w+)\s*\('
|
|
)
|
|
|
|
def constructor_pattern(self):
|
|
return re.compile(r'^\s+def\s+(__init__)\s*\(self')
|
|
|
|
def function_skip_names(self):
|
|
return frozenset()
|
|
|
|
def strip_comments_and_strings(self, text):
|
|
result = []
|
|
i = 0
|
|
in_triple_dq = False
|
|
in_triple_sq = False
|
|
in_string_dq = False
|
|
in_string_sq = False
|
|
|
|
while i < len(text):
|
|
ch = text[i]
|
|
# Triple-quoted strings
|
|
if not in_triple_dq and not in_triple_sq and not in_string_dq and not in_string_sq:
|
|
if text[i:i+3] == '"""':
|
|
in_triple_dq = True
|
|
i += 3
|
|
continue
|
|
if text[i:i+3] == "'''":
|
|
in_triple_sq = True
|
|
i += 3
|
|
continue
|
|
if ch == '#':
|
|
# Line comment — skip to newline
|
|
while i < len(text) and text[i] != '\n':
|
|
i += 1
|
|
if i < len(text):
|
|
result.append('\n')
|
|
i += 1
|
|
continue
|
|
if ch == '"':
|
|
in_string_dq = True
|
|
i += 1
|
|
continue
|
|
if ch == "'":
|
|
in_string_sq = True
|
|
i += 1
|
|
continue
|
|
result.append(ch)
|
|
i += 1
|
|
elif in_triple_dq:
|
|
if text[i:i+3] == '"""':
|
|
in_triple_dq = False
|
|
i += 3
|
|
else:
|
|
if ch == '\n':
|
|
result.append('\n')
|
|
i += 1
|
|
elif in_triple_sq:
|
|
if text[i:i+3] == "'''":
|
|
in_triple_sq = False
|
|
i += 3
|
|
else:
|
|
if ch == '\n':
|
|
result.append('\n')
|
|
i += 1
|
|
elif in_string_dq:
|
|
if ch == '\\' and i + 1 < len(text):
|
|
i += 2
|
|
elif ch == '"':
|
|
in_string_dq = False
|
|
i += 1
|
|
else:
|
|
i += 1
|
|
elif in_string_sq:
|
|
if ch == '\\' and i + 1 < len(text):
|
|
i += 2
|
|
elif ch == "'":
|
|
in_string_sq = False
|
|
i += 1
|
|
else:
|
|
i += 1
|
|
|
|
return "".join(result)
|
|
|
|
def strip_block_comments_from_lines(self, lines):
|
|
cleaned = []
|
|
in_triple = False
|
|
quote_char = None
|
|
for line in lines:
|
|
if in_triple:
|
|
idx = line.find(quote_char * 3)
|
|
if idx >= 0:
|
|
in_triple = False
|
|
cleaned.append(line[idx + 3:])
|
|
else:
|
|
cleaned.append('')
|
|
else:
|
|
# Check for triple quotes
|
|
for qc in ('"""', "'''"):
|
|
idx = line.find(qc)
|
|
if idx >= 0:
|
|
end_idx = line.find(qc, idx + 3)
|
|
if end_idx >= 0:
|
|
# Single-line docstring, remove it
|
|
cleaned.append(line[:idx] + line[end_idx + 3:])
|
|
break
|
|
else:
|
|
in_triple = True
|
|
quote_char = qc[0]
|
|
cleaned.append(line[:idx])
|
|
break
|
|
else:
|
|
# Strip inline comments
|
|
comment_idx = line.find('#')
|
|
if comment_idx >= 0:
|
|
cleaned.append(line[:comment_idx])
|
|
else:
|
|
cleaned.append(line)
|
|
return cleaned
|
|
|
|
def define_pattern(self):
|
|
# Python constants: UPPER_CASE = value
|
|
return re.compile(r'^\s*([A-Z][A-Z0-9_]{2,})\s*(?::\s*\w+\s*)?=\s*(.+?)$')
|
|
|
|
def call_patterns(self):
|
|
return {
|
|
'plain': re.compile(r'\b([A-Za-z_]\w*)\s*\('),
|
|
'scoped': re.compile(r'\b([A-Za-z_]\w*)\.\s*([A-Za-z_]\w*)\s*\('),
|
|
'member': re.compile(r'\.\s*([A-Za-z_]\w*)\s*\('),
|
|
}
|
|
|
|
def call_skip_names(self):
|
|
return frozenset({
|
|
"if", "elif", "else", "for", "while", "with", "return", "yield",
|
|
"print", "len", "range", "enumerate", "zip", "map", "filter",
|
|
"isinstance", "issubclass", "type", "super", "property",
|
|
"staticmethod", "classmethod", "abstractmethod",
|
|
})
|
|
|
|
def null_risk_calls(self):
|
|
return ['get', 'find', 'search', 'match', 'fetchone']
|
|
|
|
def leak_patterns(self):
|
|
return {
|
|
'alloc': re.compile(r'\bopen\s*\('),
|
|
'dealloc': re.compile(r'\.close\s*\('),
|
|
'smart_unique': re.compile(r'\bwith\s+open\b'),
|
|
'smart_shared': re.compile(r'contextmanager|__enter__|__exit__'),
|
|
}
|
|
|
|
def todo_pattern(self):
|
|
return re.compile(r'#\s*(TODO|FIXME|HACK|BUG|XXX|NOTE)\s*:?\s*(.*)', re.IGNORECASE)
|
|
|
|
|
|
class TypeScriptAdapter(LanguageAdapter):
|
|
"""TypeScript/JavaScript language adapter."""
|
|
name = "typescript"
|
|
extensions = ["*.ts", "*.tsx", "*.js", "*.jsx", "*.mts", "*.mjs"]
|
|
header_extensions = ["*.d.ts"]
|
|
uses_braces = True
|
|
|
|
def import_pattern(self):
|
|
return re.compile(r'''(?:import\s+.*?from\s+['"]([^'"]+)['"]|require\s*\(\s*['"]([^'"]+)['"]\s*\))''')
|
|
|
|
def enum_pattern(self):
|
|
return re.compile(r'^\s*(?:export\s+)?(?:const\s+)?enum\s+(\w+)')
|
|
|
|
def struct_pattern(self):
|
|
return re.compile(r'^\s*(?:export\s+)?(?:interface|type)\s+(\w+)')
|
|
|
|
def class_pattern(self):
|
|
return re.compile(r'^\s*(?:export\s+)?(?:abstract\s+)?class\s+(\w+)\s*(?:extends\s+(\w+))?')
|
|
|
|
def method_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:public|private|protected|static|async|readonly|abstract|override)*\s*'
|
|
r'(\w+)\s*(?:<[^>]*>)?\s*\([^)]*\)\s*(?::\s*\w[^{;]*)?[;]'
|
|
)
|
|
|
|
def method_skip_names(self):
|
|
return frozenset({'if', 'for', 'while', 'switch', 'return', 'new', 'delete',
|
|
'class', 'interface', 'type', 'enum', 'import', 'export'})
|
|
|
|
def function_sig_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:export\s+)?(?:async\s+)?'
|
|
r'(?:function\s+(\w+)|(?:(\w+)\s*(?:=|:)\s*(?:async\s+)?(?:function|\([^)]*\)\s*(?:=>|:)))|'
|
|
r'(?:(?:public|private|protected|static|async|override)\s+)*(\w+)\s*\()'
|
|
)
|
|
|
|
def constructor_pattern(self):
|
|
return re.compile(r'^\s*constructor\s*\(')
|
|
|
|
def function_skip_names(self):
|
|
return frozenset({
|
|
'if', 'else', 'for', 'while', 'switch', 'catch', 'return',
|
|
'case', 'throw', 'new', 'delete', 'typeof', 'instanceof',
|
|
'import', 'export', 'from', 'require', 'const', 'let', 'var',
|
|
})
|
|
|
|
def strip_comments_and_strings(self, text):
|
|
# JS/TS uses same comment style as C++ plus template literals
|
|
result = []
|
|
i = 0
|
|
in_block = False
|
|
in_line = False
|
|
in_dq = False
|
|
in_sq = False
|
|
in_template = False
|
|
|
|
while i < len(text):
|
|
ch = text[i]
|
|
nxt = text[i + 1] if i + 1 < len(text) else ""
|
|
|
|
if in_block:
|
|
if ch == "*" and nxt == "/":
|
|
in_block = False
|
|
i += 2
|
|
else:
|
|
if ch == '\n':
|
|
result.append('\n')
|
|
i += 1
|
|
continue
|
|
|
|
if in_line:
|
|
if ch == "\n":
|
|
in_line = False
|
|
result.append("\n")
|
|
i += 1
|
|
continue
|
|
|
|
if in_dq:
|
|
if ch == "\\" and nxt:
|
|
i += 2
|
|
elif ch == '"':
|
|
in_dq = False
|
|
i += 1
|
|
else:
|
|
i += 1
|
|
continue
|
|
|
|
if in_sq:
|
|
if ch == "\\" and nxt:
|
|
i += 2
|
|
elif ch == "'":
|
|
in_sq = False
|
|
i += 1
|
|
else:
|
|
i += 1
|
|
continue
|
|
|
|
if in_template:
|
|
if ch == "\\" and nxt:
|
|
i += 2
|
|
elif ch == '`':
|
|
in_template = False
|
|
i += 1
|
|
else:
|
|
if ch == '\n':
|
|
result.append('\n')
|
|
i += 1
|
|
continue
|
|
|
|
if ch == "/" and nxt == "*":
|
|
in_block = True
|
|
i += 2
|
|
continue
|
|
if ch == "/" and nxt == "/":
|
|
in_line = True
|
|
i += 2
|
|
continue
|
|
if ch == '"':
|
|
in_dq = True
|
|
i += 1
|
|
continue
|
|
if ch == "'":
|
|
in_sq = True
|
|
i += 1
|
|
continue
|
|
if ch == '`':
|
|
in_template = True
|
|
i += 1
|
|
continue
|
|
|
|
result.append(ch)
|
|
i += 1
|
|
|
|
return "".join(result)
|
|
|
|
def strip_block_comments_from_lines(self, lines):
|
|
# Same as C++ (/* */ and //)
|
|
return CppAdapter().strip_block_comments_from_lines(lines)
|
|
|
|
def define_pattern(self):
|
|
return re.compile(r'^\s*(?:export\s+)?const\s+([A-Z][A-Z0-9_]{2,})\s*(?::\s*\w+\s*)?=\s*(.+?)$')
|
|
|
|
def call_patterns(self):
|
|
return {
|
|
'plain': re.compile(r'\b([A-Za-z_$]\w*)\s*\('),
|
|
'scoped': re.compile(r'\b([A-Za-z_$]\w*)\.\s*([A-Za-z_$]\w*)\s*\('),
|
|
'member': re.compile(r'\.\s*([A-Za-z_$]\w*)\s*\('),
|
|
}
|
|
|
|
def call_skip_names(self):
|
|
return frozenset({
|
|
"if", "else", "for", "while", "switch", "catch", "return", "typeof",
|
|
"instanceof", "new", "delete", "throw", "require", "import", "from",
|
|
"console", "setTimeout", "setInterval", "clearTimeout", "clearInterval",
|
|
})
|
|
|
|
def null_risk_calls(self):
|
|
return ['getElementById', 'querySelector', 'find', 'get', 'match', 'exec']
|
|
|
|
def leak_patterns(self):
|
|
return {
|
|
'alloc': re.compile(r'\baddEventListener\b|\bnew\s+\w'),
|
|
'dealloc': re.compile(r'\bremoveEventListener\b'),
|
|
'smart_unique': re.compile(r'\busing\b|\bfinally\b'),
|
|
'smart_shared': re.compile(r'\bdispose\b|\bcleanup\b'),
|
|
}
|
|
|
|
def todo_pattern(self):
|
|
return re.compile(r'(?://|/\*)\s*(TODO|FIXME|HACK|BUG|XXX|NOTE)\s*:?\s*(.*)', re.IGNORECASE)
|
|
|
|
|
|
class RustAdapter(LanguageAdapter):
|
|
"""Rust language adapter."""
|
|
name = "rust"
|
|
extensions = ["*.rs"]
|
|
header_extensions = []
|
|
uses_braces = True
|
|
|
|
def import_pattern(self):
|
|
return re.compile(r'^\s*use\s+([\w:]+)')
|
|
|
|
def enum_pattern(self):
|
|
return re.compile(r'^\s*(?:pub\s+)?enum\s+(\w+)')
|
|
|
|
def struct_pattern(self):
|
|
return re.compile(r'^\s*(?:pub\s+)?struct\s+(\w+)')
|
|
|
|
def class_pattern(self):
|
|
# Rust uses impl blocks instead of classes
|
|
return re.compile(r'^\s*impl\s+(?:<[^>]*>\s+)?(\w+)\s*(?:for\s+(\w+))?')
|
|
|
|
def method_pattern(self):
|
|
return re.compile(r'^\s*(?:pub\s+)?(?:async\s+)?fn\s+(\w+)\s*\(')
|
|
|
|
def method_skip_names(self):
|
|
return frozenset()
|
|
|
|
def function_sig_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:pub(?:\(crate\))?\s+)?(?:async\s+)?(?:unsafe\s+)?(?:extern\s+"C"\s+)?'
|
|
r'fn\s+(\w+)\s*(?:<[^>]*>)?\s*\('
|
|
)
|
|
|
|
def constructor_pattern(self):
|
|
return re.compile(r'^\s*(?:pub\s+)?fn\s+(new)\s*\(')
|
|
|
|
def function_skip_names(self):
|
|
return frozenset({'macro_rules'})
|
|
|
|
def strip_comments_and_strings(self, text):
|
|
# Rust uses // and /* */ like C++, plus raw strings r#""#
|
|
return _strip_cpp_comments_and_strings(text)
|
|
|
|
def strip_block_comments_from_lines(self, lines):
|
|
return CppAdapter().strip_block_comments_from_lines(lines)
|
|
|
|
def define_pattern(self):
|
|
return re.compile(r'^\s*(?:pub\s+)?const\s+([A-Z][A-Z0-9_]*)\s*:\s*\w+\s*=\s*(.+?)\s*;')
|
|
|
|
def call_patterns(self):
|
|
return {
|
|
'plain': re.compile(r'\b([A-Za-z_]\w*)\s*[!(]\s*'),
|
|
'scoped': re.compile(r'\b([A-Za-z_]\w*)\s*::\s*([A-Za-z_]\w*)\s*\('),
|
|
'member': re.compile(r'\.\s*([A-Za-z_]\w*)\s*\('),
|
|
}
|
|
|
|
def call_skip_names(self):
|
|
return frozenset({
|
|
"if", "else", "for", "while", "match", "loop", "return",
|
|
"let", "mut", "ref", "Box", "Vec", "Some", "None", "Ok", "Err",
|
|
"println", "eprintln", "format", "write", "writeln",
|
|
"panic", "unreachable", "unimplemented", "todo", "assert",
|
|
})
|
|
|
|
def null_risk_calls(self):
|
|
return ['unwrap', 'expect', 'unwrap_or']
|
|
|
|
def leak_patterns(self):
|
|
return {
|
|
'alloc': re.compile(r'\bBox::new\b|\bunsafe\b'),
|
|
'dealloc': re.compile(r'\bdrop\s*\('),
|
|
'smart_unique': re.compile(r'\bBox<|Arc<|Rc<'),
|
|
'smart_shared': re.compile(r'\bRc::new\b|\bArc::new\b'),
|
|
}
|
|
|
|
def todo_pattern(self):
|
|
return re.compile(r'(?://|/\*)\s*(TODO|FIXME|HACK|BUG|XXX|NOTE)\s*:?\s*(.*)', re.IGNORECASE)
|
|
|
|
|
|
class GoAdapter(LanguageAdapter):
|
|
"""Go language adapter."""
|
|
name = "go"
|
|
extensions = ["*.go"]
|
|
header_extensions = []
|
|
uses_braces = True
|
|
|
|
def import_pattern(self):
|
|
return re.compile(r'^\s*(?:import\s+)?"([\w./\-]+)"')
|
|
|
|
def enum_pattern(self):
|
|
# Go uses const blocks with iota as enums
|
|
return re.compile(r'^\s*type\s+(\w+)\s+(?:int|uint|string)\s*$')
|
|
|
|
def struct_pattern(self):
|
|
return re.compile(r'^\s*type\s+(\w+)\s+struct\s*\{?')
|
|
|
|
def class_pattern(self):
|
|
# Go uses interfaces instead of classes
|
|
return re.compile(r'^\s*type\s+(\w+)\s+interface\s*\{?')
|
|
|
|
def method_pattern(self):
|
|
return re.compile(r'^\s*func\s+\(\s*\w+\s+\*?(\w+)\s*\)\s+(\w+)\s*\(')
|
|
|
|
def method_skip_names(self):
|
|
return frozenset()
|
|
|
|
def function_sig_pattern(self):
|
|
return re.compile(
|
|
r'^\s*func\s+(?:\(\s*\w+\s+\*?(\w+)\s*\)\s+)?(\w+)\s*\('
|
|
)
|
|
|
|
def constructor_pattern(self):
|
|
return re.compile(r'^\s*func\s+New(\w+)\s*\(')
|
|
|
|
def function_skip_names(self):
|
|
return frozenset({'init'})
|
|
|
|
def strip_comments_and_strings(self, text):
|
|
# Go uses // and /* */ like C++, plus backtick raw strings
|
|
result = []
|
|
i = 0
|
|
in_block = False
|
|
in_line_comment = False
|
|
in_dq = False
|
|
in_raw = False
|
|
|
|
while i < len(text):
|
|
ch = text[i]
|
|
nxt = text[i + 1] if i + 1 < len(text) else ""
|
|
|
|
if in_block:
|
|
if ch == '*' and nxt == '/':
|
|
in_block = False
|
|
i += 2
|
|
else:
|
|
if ch == '\n':
|
|
result.append('\n')
|
|
i += 1
|
|
continue
|
|
|
|
if in_line_comment:
|
|
if ch == '\n':
|
|
in_line_comment = False
|
|
result.append('\n')
|
|
i += 1
|
|
continue
|
|
|
|
if in_dq:
|
|
if ch == '\\' and nxt:
|
|
i += 2
|
|
elif ch == '"':
|
|
in_dq = False
|
|
i += 1
|
|
else:
|
|
i += 1
|
|
continue
|
|
|
|
if in_raw:
|
|
if ch == '`':
|
|
in_raw = False
|
|
i += 1
|
|
else:
|
|
if ch == '\n':
|
|
result.append('\n')
|
|
i += 1
|
|
continue
|
|
|
|
if ch == '/' and nxt == '*':
|
|
in_block = True
|
|
i += 2
|
|
continue
|
|
if ch == '/' and nxt == '/':
|
|
in_line_comment = True
|
|
i += 2
|
|
continue
|
|
if ch == '"':
|
|
in_dq = True
|
|
i += 1
|
|
continue
|
|
if ch == '`':
|
|
in_raw = True
|
|
i += 1
|
|
continue
|
|
|
|
result.append(ch)
|
|
i += 1
|
|
|
|
return "".join(result)
|
|
|
|
def strip_block_comments_from_lines(self, lines):
|
|
return CppAdapter().strip_block_comments_from_lines(lines)
|
|
|
|
def define_pattern(self):
|
|
return re.compile(r'^\s*(?:const\s+)?([A-Z][A-Za-z0-9_]*)\s*(?:\w+\s*)?=\s*(.+?)$')
|
|
|
|
def call_patterns(self):
|
|
return {
|
|
'plain': re.compile(r'\b([A-Za-z_]\w*)\s*\('),
|
|
'scoped': re.compile(r'\b([A-Za-z_]\w*)\.\s*([A-Za-z_]\w*)\s*\('),
|
|
'member': re.compile(r'\.\s*([A-Za-z_]\w*)\s*\('),
|
|
}
|
|
|
|
def call_skip_names(self):
|
|
return frozenset({
|
|
"if", "else", "for", "switch", "select", "case", "return", "go",
|
|
"defer", "range", "make", "append", "len", "cap", "new", "close",
|
|
"delete", "copy", "panic", "recover", "print", "println",
|
|
"fmt", "log", "errors",
|
|
})
|
|
|
|
def null_risk_calls(self):
|
|
return [] # Go uses error returns, not null pointers
|
|
|
|
def leak_patterns(self):
|
|
return {
|
|
'alloc': re.compile(r'\bos\.Open\b|\bnet\.Listen\b|\bhttp\.Get\b'),
|
|
'dealloc': re.compile(r'\.Close\s*\('),
|
|
'smart_unique': re.compile(r'\bdefer\s+\w+\.Close\b'),
|
|
'smart_shared': re.compile(r'\bdefer\b'),
|
|
}
|
|
|
|
def todo_pattern(self):
|
|
return re.compile(r'(?://|/\*)\s*(TODO|FIXME|HACK|BUG|XXX|NOTE)\s*:?\s*(.*)', re.IGNORECASE)
|
|
|
|
|
|
class JavaAdapter(LanguageAdapter):
|
|
"""Java language adapter."""
|
|
name = "java"
|
|
extensions = ["*.java"]
|
|
header_extensions = []
|
|
uses_braces = True
|
|
|
|
def import_pattern(self):
|
|
return re.compile(r'^\s*import\s+(?:static\s+)?([\w.]+)')
|
|
|
|
def enum_pattern(self):
|
|
return re.compile(r'^\s*(?:public\s+|private\s+|protected\s+)?enum\s+(\w+)')
|
|
|
|
def struct_pattern(self):
|
|
# Java records (Java 16+)
|
|
return re.compile(r'^\s*(?:public\s+)?record\s+(\w+)')
|
|
|
|
def class_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:public\s+|private\s+|protected\s+)?(?:abstract\s+|final\s+)?'
|
|
r'(?:class|interface)\s+(\w+)\s*(?:extends\s+(\w+))?'
|
|
)
|
|
|
|
def method_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:public|private|protected|static|final|abstract|synchronized|native|default)*\s*'
|
|
r'(?:<[^>]*>\s+)?(\w[\w<>\[\], ]*?)\s+(\w+)\s*\([^)]*\)\s*'
|
|
r'(?:throws\s+\w[\w, ]*)?\s*[;{]'
|
|
)
|
|
|
|
def method_skip_names(self):
|
|
return frozenset({'if', 'for', 'while', 'switch', 'return', 'new',
|
|
'class', 'interface', 'enum', 'import', 'package'})
|
|
|
|
def function_sig_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:@\w+\s*(?:\([^)]*\)\s*)?)*'
|
|
r'(?:public|private|protected|static|final|abstract|synchronized|native|default|\s)*'
|
|
r'(?:<[^>]*>\s+)?(?:[\w<>\[\], ]+?)\s+'
|
|
r'(\w+)\s*\('
|
|
)
|
|
|
|
def constructor_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:public|private|protected)\s+(\w+)\s*\('
|
|
)
|
|
|
|
def function_skip_names(self):
|
|
return frozenset({
|
|
'if', 'else', 'for', 'while', 'switch', 'catch', 'return',
|
|
'case', 'throw', 'new', 'class', 'interface', 'enum',
|
|
'import', 'package', 'assert',
|
|
})
|
|
|
|
def strip_comments_and_strings(self, text):
|
|
return _strip_cpp_comments_and_strings(text)
|
|
|
|
def strip_block_comments_from_lines(self, lines):
|
|
return CppAdapter().strip_block_comments_from_lines(lines)
|
|
|
|
def define_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:public|private|protected)?\s*static\s+final\s+\w+\s+([A-Z][A-Z0-9_]*)\s*=\s*(.+?)\s*;'
|
|
)
|
|
|
|
def call_patterns(self):
|
|
return {
|
|
'plain': re.compile(r'\b([A-Za-z_]\w*)\s*\('),
|
|
'scoped': re.compile(r'\b([A-Za-z_]\w*)\.\s*([A-Za-z_]\w*)\s*\('),
|
|
'member': re.compile(r'\.\s*([A-Za-z_]\w*)\s*\('),
|
|
}
|
|
|
|
def call_skip_names(self):
|
|
return frozenset({
|
|
"if", "else", "for", "while", "switch", "catch", "return",
|
|
"throw", "new", "instanceof", "assert",
|
|
"System", "String", "Integer", "Long", "Double", "Boolean",
|
|
})
|
|
|
|
def null_risk_calls(self):
|
|
return ['get', 'find', 'findById', 'getParameter', 'getAttribute',
|
|
'getElementById', 'querySelector']
|
|
|
|
def leak_patterns(self):
|
|
return {
|
|
'alloc': re.compile(r'\bnew\s+(?:FileInputStream|BufferedReader|Connection|Socket)\b'),
|
|
'dealloc': re.compile(r'\.close\s*\('),
|
|
'smart_unique': re.compile(r'\btry\s*\('), # try-with-resources
|
|
'smart_shared': re.compile(r'\bAutoCloseable\b|\bCloseable\b'),
|
|
}
|
|
|
|
def todo_pattern(self):
|
|
return re.compile(r'(?://|/\*)\s*(TODO|FIXME|HACK|BUG|XXX|NOTE)\s*:?\s*(.*)', re.IGNORECASE)
|
|
|
|
|
|
class CSharpAdapter(LanguageAdapter):
|
|
"""C# language adapter."""
|
|
name = "csharp"
|
|
extensions = ["*.cs"]
|
|
header_extensions = []
|
|
uses_braces = True
|
|
|
|
def import_pattern(self):
|
|
return re.compile(r'^\s*using\s+(?:static\s+)?([\w.]+)\s*;')
|
|
|
|
def enum_pattern(self):
|
|
return re.compile(r'^\s*(?:public\s+|private\s+|protected\s+|internal\s+)?enum\s+(\w+)')
|
|
|
|
def struct_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:public\s+|private\s+|protected\s+|internal\s+)?'
|
|
r'(?:readonly\s+)?(?:ref\s+)?struct\s+(\w+)'
|
|
)
|
|
|
|
def class_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:public|private|protected|internal|abstract|sealed|static|partial|\s)*'
|
|
r'class\s+(\w+)\s*(?::\s*(\w+))?'
|
|
)
|
|
|
|
def method_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:public|private|protected|internal|static|virtual|override|abstract|sealed|async|partial|\s)*'
|
|
r'(?:[\w<>\[\]?, ]+?)\s+(\w+)\s*\([^)]*\)\s*[;{]'
|
|
)
|
|
|
|
def method_skip_names(self):
|
|
return frozenset({'if', 'for', 'foreach', 'while', 'switch', 'return', 'new',
|
|
'class', 'struct', 'enum', 'interface', 'using', 'namespace'})
|
|
|
|
def function_sig_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:\[[\w()]+\]\s*)*'
|
|
r'(?:public|private|protected|internal|static|virtual|override|abstract|sealed|async|partial|\s)*'
|
|
r'(?:[\w<>\[\]?, ]+?)\s+'
|
|
r'(\w+)\s*\('
|
|
)
|
|
|
|
def constructor_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:public|private|protected|internal|static)\s+(\w+)\s*\('
|
|
)
|
|
|
|
def function_skip_names(self):
|
|
return frozenset({
|
|
'if', 'else', 'for', 'foreach', 'while', 'switch', 'catch', 'return',
|
|
'case', 'throw', 'new', 'typeof', 'sizeof', 'nameof',
|
|
'class', 'struct', 'interface', 'enum', 'namespace', 'using',
|
|
})
|
|
|
|
def strip_comments_and_strings(self, text):
|
|
# C# uses same comment syntax as C++, plus verbatim strings @""
|
|
result = []
|
|
i = 0
|
|
in_block = False
|
|
in_line = False
|
|
in_dq = False
|
|
in_verbatim = False
|
|
in_char = False
|
|
|
|
while i < len(text):
|
|
ch = text[i]
|
|
nxt = text[i + 1] if i + 1 < len(text) else ""
|
|
|
|
if in_block:
|
|
if ch == '*' and nxt == '/':
|
|
in_block = False
|
|
i += 2
|
|
else:
|
|
if ch == '\n':
|
|
result.append('\n')
|
|
i += 1
|
|
continue
|
|
|
|
if in_line:
|
|
if ch == '\n':
|
|
in_line = False
|
|
result.append('\n')
|
|
i += 1
|
|
continue
|
|
|
|
if in_verbatim:
|
|
if ch == '"' and nxt == '"':
|
|
i += 2 # escaped quote inside verbatim
|
|
elif ch == '"':
|
|
in_verbatim = False
|
|
i += 1
|
|
else:
|
|
if ch == '\n':
|
|
result.append('\n')
|
|
i += 1
|
|
continue
|
|
|
|
if in_dq:
|
|
if ch == '\\' and nxt:
|
|
i += 2
|
|
elif ch == '"':
|
|
in_dq = False
|
|
i += 1
|
|
else:
|
|
i += 1
|
|
continue
|
|
|
|
if in_char:
|
|
if ch == '\\' and nxt:
|
|
i += 2
|
|
elif ch == "'":
|
|
in_char = False
|
|
i += 1
|
|
else:
|
|
i += 1
|
|
continue
|
|
|
|
if ch == '/' and nxt == '*':
|
|
in_block = True
|
|
i += 2
|
|
continue
|
|
if ch == '/' and nxt == '/':
|
|
in_line = True
|
|
i += 2
|
|
continue
|
|
if ch == '@' and nxt == '"':
|
|
in_verbatim = True
|
|
i += 2
|
|
continue
|
|
if ch == '"':
|
|
in_dq = True
|
|
i += 1
|
|
continue
|
|
if ch == "'":
|
|
in_char = True
|
|
i += 1
|
|
continue
|
|
|
|
result.append(ch)
|
|
i += 1
|
|
|
|
return "".join(result)
|
|
|
|
def strip_block_comments_from_lines(self, lines):
|
|
return CppAdapter().strip_block_comments_from_lines(lines)
|
|
|
|
def define_pattern(self):
|
|
return re.compile(
|
|
r'^\s*(?:public|private|protected|internal)?\s*'
|
|
r'(?:static\s+)?(?:readonly\s+)?const\s+\w+\s+([A-Z][A-Za-z0-9_]*)\s*=\s*(.+?)\s*;'
|
|
)
|
|
|
|
def call_patterns(self):
|
|
return {
|
|
'plain': re.compile(r'\b([A-Za-z_]\w*)\s*\('),
|
|
'scoped': re.compile(r'\b([A-Za-z_]\w*)\.\s*([A-Za-z_]\w*)\s*\('),
|
|
'member': re.compile(r'\.\s*([A-Za-z_]\w*)\s*\('),
|
|
}
|
|
|
|
def call_skip_names(self):
|
|
return frozenset({
|
|
"if", "else", "for", "foreach", "while", "switch", "catch", "return",
|
|
"throw", "new", "typeof", "sizeof", "nameof", "is", "as",
|
|
"Console", "String", "Math", "Convert",
|
|
})
|
|
|
|
def null_risk_calls(self):
|
|
return ['Find', 'FindObjectOfType', 'GetComponent', 'FirstOrDefault',
|
|
'SingleOrDefault', 'Find', 'FindById']
|
|
|
|
def leak_patterns(self):
|
|
return {
|
|
'alloc': re.compile(r'\bnew\s+(?:FileStream|StreamReader|SqlConnection|HttpClient)\b'),
|
|
'dealloc': re.compile(r'\.Dispose\s*\(|\.Close\s*\('),
|
|
'smart_unique': re.compile(r'\busing\s*\('),
|
|
'smart_shared': re.compile(r'\bIDisposable\b|\bIAsyncDisposable\b'),
|
|
}
|
|
|
|
def todo_pattern(self):
|
|
return re.compile(r'(?://|/\*)\s*(TODO|FIXME|HACK|BUG|XXX|NOTE)\s*:?\s*(.*)', re.IGNORECASE)
|
|
|
|
|
|
# --- Adapter registry ---
|
|
ADAPTER_REGISTRY = {
|
|
"cpp": CppAdapter,
|
|
"c++": CppAdapter,
|
|
"c": CppAdapter,
|
|
"python": PythonAdapter,
|
|
"py": PythonAdapter,
|
|
"typescript": TypeScriptAdapter,
|
|
"ts": TypeScriptAdapter,
|
|
"javascript": TypeScriptAdapter,
|
|
"js": TypeScriptAdapter,
|
|
"rust": RustAdapter,
|
|
"rs": RustAdapter,
|
|
"go": GoAdapter,
|
|
"golang": GoAdapter,
|
|
"java": JavaAdapter,
|
|
"csharp": CSharpAdapter,
|
|
"c#": CSharpAdapter,
|
|
"cs": CSharpAdapter,
|
|
}
|
|
|
|
|
|
def get_adapter(language_name):
|
|
"""Get language adapter by name. Returns CppAdapter as default."""
|
|
cls = ADAPTER_REGISTRY.get(language_name.lower(), CppAdapter)
|
|
return cls()
|
|
|
|
|
|
# ============================================================
|
|
# SCHEMA
|
|
# ============================================================
|
|
SCHEMA = """
|
|
CREATE TABLE IF NOT EXISTS graph_metadata (
|
|
id INTEGER PRIMARY KEY,
|
|
schema_version INTEGER NOT NULL,
|
|
extractor_version TEXT NOT NULL,
|
|
graph_build_revision TEXT,
|
|
built_at TEXT NOT NULL,
|
|
repo_root TEXT NOT NULL
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS files (
|
|
id INTEGER PRIMARY KEY,
|
|
path TEXT UNIQUE NOT NULL,
|
|
project TEXT NOT NULL, -- 'Game', 'Common', 'Server_Link', etc.
|
|
category TEXT, -- 'core', 'event', 'ai', 'script', 'player', 'monster', 'network', 'db', 'util'
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS singletons (
|
|
id INTEGER PRIMARY KEY,
|
|
macro TEXT NOT NULL, -- e.g. 'sGameServer'
|
|
class_name TEXT NOT NULL, -- e.g. 'GameServer'
|
|
header TEXT NOT NULL, -- e.g. 'GameServer.h'
|
|
project TEXT DEFAULT 'Game',
|
|
category TEXT, -- 'core', 'inter_server', 'player_social', 'item', 'skill', 'event', 'vip', 'quest', 'misc', 'common'
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS classes (
|
|
id INTEGER PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
header TEXT,
|
|
cpp_file TEXT,
|
|
parent_class TEXT,
|
|
project TEXT DEFAULT 'Game',
|
|
category TEXT,
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS methods (
|
|
id INTEGER PRIMARY KEY,
|
|
class_name TEXT NOT NULL,
|
|
method_name TEXT NOT NULL,
|
|
file TEXT NOT NULL,
|
|
line_hint INTEGER, -- approximate line number
|
|
category TEXT, -- 'combat', 'db', 'network', 'trade', 'party', etc.
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS packet_handlers (
|
|
id INTEGER PRIMARY KEY,
|
|
headcode_name TEXT NOT NULL,
|
|
headcode_value INTEGER,
|
|
headcode_hex TEXT,
|
|
handler_method TEXT,
|
|
source_file TEXT NOT NULL, -- 'Player.cpp', 'ServerLink.cpp', etc.
|
|
handler_type TEXT NOT NULL, -- 'client', 'inter_server', 'login', 'connect'
|
|
category TEXT,
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS events (
|
|
id INTEGER PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
singleton_macro TEXT,
|
|
cpp_file TEXT,
|
|
header_file TEXT,
|
|
def_file TEXT,
|
|
ai_file TEXT,
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS ai_handlers (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
target TEXT NOT NULL,
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS inventory_scripts (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS constants (
|
|
id INTEGER PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
value TEXT,
|
|
header TEXT NOT NULL,
|
|
category TEXT,
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS config_files (
|
|
id INTEGER PRIMARY KEY,
|
|
path TEXT NOT NULL,
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS player_files (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
domain TEXT NOT NULL,
|
|
key_methods TEXT,
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS dependencies (
|
|
id INTEGER PRIMARY KEY,
|
|
source_file TEXT NOT NULL,
|
|
target_file TEXT NOT NULL,
|
|
dep_type TEXT -- 'include', 'singleton_use', 'call'
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS enums (
|
|
id INTEGER PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
file TEXT NOT NULL,
|
|
line INTEGER,
|
|
value_count INTEGER, -- number of enum values
|
|
values_preview TEXT, -- first few values as comma-separated string
|
|
category TEXT,
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS structs (
|
|
id INTEGER PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
file TEXT NOT NULL,
|
|
line INTEGER,
|
|
size_bytes INTEGER, -- sizeof if detectable
|
|
field_count INTEGER,
|
|
is_packed INTEGER DEFAULT 0,-- 1 if inside #pragma pack(1)
|
|
fields_preview TEXT, -- first few fields
|
|
category TEXT,
|
|
description TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS todos (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
line INTEGER NOT NULL,
|
|
todo_type TEXT NOT NULL, -- 'TODO', 'FIXME', 'HACK', 'BUG', 'XXX', 'NOTE'
|
|
text TEXT NOT NULL,
|
|
project TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS db_tables (
|
|
id INTEGER PRIMARY KEY,
|
|
table_name TEXT NOT NULL,
|
|
source_file TEXT NOT NULL,
|
|
line INTEGER,
|
|
query_type TEXT, -- 'SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE'
|
|
context TEXT -- snippet of surrounding code
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS prepared_statements (
|
|
id INTEGER PRIMARY KEY,
|
|
query_name TEXT NOT NULL, -- e.g. 'QUERY_PARTY_DELETE'
|
|
sql_text TEXT NOT NULL, -- the SQL string
|
|
connection_type TEXT, -- 'CONNECTION_ASYNC', 'CONNECTION_SYNCH'
|
|
source_file TEXT NOT NULL,
|
|
line INTEGER
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS config_keys (
|
|
id INTEGER PRIMARY KEY,
|
|
key_name TEXT NOT NULL, -- the config key string
|
|
getter_type TEXT, -- 'GetInt', 'GetString', 'GetBool', 'GetFloat'
|
|
source_file TEXT NOT NULL,
|
|
line INTEGER,
|
|
context TEXT -- surrounding code snippet
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS defines (
|
|
id INTEGER PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
value TEXT,
|
|
file TEXT NOT NULL,
|
|
line INTEGER,
|
|
category TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS file_lines (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
project TEXT,
|
|
line_count INTEGER NOT NULL,
|
|
category TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS leak_risks (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
project TEXT,
|
|
new_count INTEGER DEFAULT 0,
|
|
delete_count INTEGER DEFAULT 0,
|
|
make_unique_count INTEGER DEFAULT 0,
|
|
make_shared_count INTEGER DEFAULT 0,
|
|
risk_score INTEGER DEFAULT 0, -- new_count - delete_count - smart_ptr_count
|
|
sample_lines TEXT -- first few 'new' lines for reference
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS null_risks (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
line INTEGER NOT NULL,
|
|
function_call TEXT NOT NULL, -- e.g. 'FindPlayer', 'GetWorld'
|
|
pointer_var TEXT, -- the variable being assigned
|
|
risk_type TEXT, -- 'unchecked_deref', 'no_null_check'
|
|
context TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS raw_pointers (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
line INTEGER NOT NULL,
|
|
class_name TEXT,
|
|
member_type TEXT NOT NULL, -- e.g. 'Player*'
|
|
member_name TEXT NOT NULL, -- e.g. 'm_pOwner'
|
|
has_destructor_delete INTEGER DEFAULT 0
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS unsafe_casts (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
line INTEGER NOT NULL,
|
|
cast_expr TEXT NOT NULL, -- the (Type*)ptr expression
|
|
context TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS dead_methods (
|
|
id INTEGER PRIMARY KEY,
|
|
class_name TEXT NOT NULL,
|
|
method_name TEXT NOT NULL,
|
|
header_file TEXT NOT NULL,
|
|
header_line INTEGER,
|
|
ref_count INTEGER DEFAULT 0 -- 0 = truly dead
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS duplicate_blocks (
|
|
id INTEGER PRIMARY KEY,
|
|
block_hash TEXT NOT NULL,
|
|
file_a TEXT NOT NULL,
|
|
line_a INTEGER NOT NULL,
|
|
file_b TEXT NOT NULL,
|
|
line_b INTEGER NOT NULL,
|
|
line_count INTEGER NOT NULL, -- how many lines are duplicated
|
|
preview TEXT -- first line of the block
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS research_assets (
|
|
id INTEGER PRIMARY KEY,
|
|
path TEXT NOT NULL,
|
|
file_name TEXT NOT NULL,
|
|
asset_type TEXT NOT NULL, -- 'markdown', 'asm', 'text', 'binary', 'other'
|
|
size_bytes INTEGER NOT NULL,
|
|
title TEXT,
|
|
summary TEXT,
|
|
symbol_refs TEXT,
|
|
protocol_refs TEXT,
|
|
notes TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS research_mentions (
|
|
id INTEGER PRIMARY KEY,
|
|
asset_path TEXT NOT NULL,
|
|
symbol TEXT NOT NULL,
|
|
mention_type TEXT NOT NULL, -- 'symbol', 'protocol', 'asm_label'
|
|
context TEXT
|
|
);
|
|
|
|
-- Full-text search virtual table
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS fts_index USING fts5(
|
|
entity_type, -- 'singleton', 'class', 'method', 'handler', 'event', 'ai', 'constant', 'config', 'file'
|
|
name,
|
|
file,
|
|
category,
|
|
description,
|
|
tokenize='unicode61'
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_singletons_macro ON singletons(macro);
|
|
CREATE INDEX IF NOT EXISTS idx_singletons_class ON singletons(class_name);
|
|
CREATE INDEX IF NOT EXISTS idx_methods_class ON methods(class_name);
|
|
CREATE INDEX IF NOT EXISTS idx_methods_name ON methods(method_name);
|
|
CREATE INDEX IF NOT EXISTS idx_handlers_name ON packet_handlers(headcode_name);
|
|
CREATE INDEX IF NOT EXISTS idx_handlers_type ON packet_handlers(handler_type);
|
|
CREATE INDEX IF NOT EXISTS idx_constants_name ON constants(name);
|
|
CREATE INDEX IF NOT EXISTS idx_deps_source ON dependencies(source_file);
|
|
CREATE INDEX IF NOT EXISTS idx_deps_target ON dependencies(target_file);
|
|
CREATE INDEX IF NOT EXISTS idx_enums_name ON enums(name);
|
|
CREATE INDEX IF NOT EXISTS idx_structs_name ON structs(name);
|
|
CREATE INDEX IF NOT EXISTS idx_todos_type ON todos(todo_type);
|
|
CREATE INDEX IF NOT EXISTS idx_todos_file ON todos(file);
|
|
CREATE INDEX IF NOT EXISTS idx_db_tables_name ON db_tables(table_name);
|
|
CREATE INDEX IF NOT EXISTS idx_classes_name ON classes(name);
|
|
CREATE INDEX IF NOT EXISTS idx_classes_parent ON classes(parent_class);
|
|
CREATE INDEX IF NOT EXISTS idx_prepared_name ON prepared_statements(query_name);
|
|
CREATE INDEX IF NOT EXISTS idx_config_keys_name ON config_keys(key_name);
|
|
CREATE INDEX IF NOT EXISTS idx_defines_name ON defines(name);
|
|
CREATE INDEX IF NOT EXISTS idx_file_lines_count ON file_lines(line_count DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_leak_risks_score ON leak_risks(risk_score DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_null_risks_file ON null_risks(file);
|
|
CREATE INDEX IF NOT EXISTS idx_raw_pointers_file ON raw_pointers(file);
|
|
CREATE INDEX IF NOT EXISTS idx_unsafe_casts_file ON unsafe_casts(file);
|
|
CREATE INDEX IF NOT EXISTS idx_dead_methods_class ON dead_methods(class_name);
|
|
CREATE INDEX IF NOT EXISTS idx_research_assets_name ON research_assets(file_name);
|
|
CREATE INDEX IF NOT EXISTS idx_research_assets_type ON research_assets(asset_type);
|
|
CREATE INDEX IF NOT EXISTS idx_research_mentions_symbol ON research_mentions(symbol);
|
|
CREATE INDEX IF NOT EXISTS idx_research_mentions_asset ON research_mentions(asset_path);
|
|
CREATE TABLE IF NOT EXISTS crash_risks (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
line INTEGER NOT NULL,
|
|
risk_type TEXT NOT NULL, -- 'null_chain', 'div_zero', 'use_after_free', 'unchecked_target', 'unchecked_world', 'unchecked_summoner'
|
|
severity TEXT NOT NULL, -- 'critical', 'high', 'medium'
|
|
expression TEXT NOT NULL, -- the risky code expression
|
|
context TEXT -- full line of code
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_crash_risks_file ON crash_risks(file);
|
|
CREATE INDEX IF NOT EXISTS idx_crash_risks_type ON crash_risks(risk_type);
|
|
CREATE INDEX IF NOT EXISTS idx_crash_risks_severity ON crash_risks(severity);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_duplicate_blocks_hash ON duplicate_blocks(block_hash);
|
|
|
|
CREATE TABLE IF NOT EXISTS infinite_loop_risks (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
line INTEGER NOT NULL,
|
|
risk_type TEXT NOT NULL,
|
|
severity TEXT NOT NULL,
|
|
expression TEXT NOT NULL,
|
|
context TEXT
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_infinite_loop_risks_file ON infinite_loop_risks(file);
|
|
CREATE INDEX IF NOT EXISTS idx_infinite_loop_risks_type ON infinite_loop_risks(risk_type);
|
|
|
|
CREATE TABLE IF NOT EXISTS file_summaries (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT UNIQUE NOT NULL,
|
|
project TEXT,
|
|
summary TEXT NOT NULL,
|
|
category TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS function_index (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
function_name TEXT NOT NULL,
|
|
class_name TEXT,
|
|
start_line INTEGER NOT NULL,
|
|
end_line INTEGER NOT NULL,
|
|
signature TEXT,
|
|
project TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS edges (
|
|
id INTEGER PRIMARY KEY,
|
|
source TEXT NOT NULL,
|
|
target TEXT NOT NULL,
|
|
edge_type TEXT NOT NULL,
|
|
detail TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS semantic_tags (
|
|
id INTEGER PRIMARY KEY,
|
|
entity_type TEXT NOT NULL, -- 'file', 'function', 'class', 'handler', 'event', 'config'
|
|
entity_name TEXT NOT NULL,
|
|
file TEXT NOT NULL,
|
|
line INTEGER,
|
|
tag TEXT NOT NULL,
|
|
confidence INTEGER DEFAULT 100,
|
|
evidence TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS semantic_profiles (
|
|
id INTEGER PRIMARY KEY,
|
|
entity_type TEXT NOT NULL,
|
|
entity_name TEXT NOT NULL,
|
|
file TEXT NOT NULL,
|
|
line INTEGER,
|
|
security_sensitive INTEGER DEFAULT 0,
|
|
hot_path_candidate INTEGER DEFAULT 0,
|
|
optimization_candidate INTEGER DEFAULT 0,
|
|
network_surface INTEGER DEFAULT 0,
|
|
fragile_surface INTEGER DEFAULT 0,
|
|
test_surface INTEGER DEFAULT 0,
|
|
risk_score INTEGER DEFAULT 0,
|
|
gain_score INTEGER DEFAULT 0,
|
|
evidence TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS audit_coverage (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
project TEXT,
|
|
unit_test_refs INTEGER DEFAULT 0,
|
|
has_summary INTEGER DEFAULT 0,
|
|
has_function_index INTEGER DEFAULT 0,
|
|
todo_count INTEGER DEFAULT 0,
|
|
crash_risk_count INTEGER DEFAULT 0,
|
|
null_risk_count INTEGER DEFAULT 0,
|
|
duplicate_pair_count INTEGER DEFAULT 0,
|
|
dead_method_count INTEGER DEFAULT 0,
|
|
leak_risk_score INTEGER DEFAULT 0,
|
|
semantic_risk_max INTEGER DEFAULT 0,
|
|
semantic_gain_max INTEGER DEFAULT 0,
|
|
coverage_score INTEGER DEFAULT 0,
|
|
notes TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS history_metrics (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
commit_count INTEGER DEFAULT 0,
|
|
recent_commit_count INTEGER DEFAULT 0,
|
|
unique_authors INTEGER DEFAULT 0,
|
|
bugfix_commits INTEGER DEFAULT 0,
|
|
perf_commits INTEGER DEFAULT 0,
|
|
audit_commits INTEGER DEFAULT 0,
|
|
last_commit_date TEXT,
|
|
churn_score INTEGER DEFAULT 0
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS review_queue (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
queue_type TEXT NOT NULL,
|
|
priority_score INTEGER DEFAULT 0,
|
|
rationale TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS ownership_metrics (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
primary_author TEXT,
|
|
primary_author_share INTEGER DEFAULT 0,
|
|
author_count INTEGER DEFAULT 0,
|
|
blamed_lines INTEGER DEFAULT 0,
|
|
most_recent_line_date TEXT,
|
|
bus_factor_risk INTEGER DEFAULT 0
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS test_function_map (
|
|
id INTEGER PRIMARY KEY,
|
|
test_file TEXT NOT NULL,
|
|
target_file TEXT NOT NULL,
|
|
function_name TEXT NOT NULL,
|
|
class_name TEXT,
|
|
mapping_type TEXT NOT NULL
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS call_edges (
|
|
id INTEGER PRIMARY KEY,
|
|
caller_symbol TEXT NOT NULL,
|
|
caller_file TEXT NOT NULL,
|
|
caller_project TEXT,
|
|
callee_symbol TEXT NOT NULL,
|
|
callee_file TEXT NOT NULL,
|
|
callee_project TEXT,
|
|
confidence INTEGER DEFAULT 0,
|
|
call_count INTEGER DEFAULT 1,
|
|
evidence TEXT,
|
|
UNIQUE(caller_symbol, caller_file, callee_symbol, callee_file)
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS symbol_metadata (
|
|
id INTEGER PRIMARY KEY,
|
|
symbol_name TEXT NOT NULL,
|
|
file_path TEXT NOT NULL,
|
|
project TEXT,
|
|
class_name TEXT,
|
|
summary TEXT,
|
|
semantic_tags TEXT,
|
|
hot_path INTEGER DEFAULT 0,
|
|
ct_sensitive INTEGER DEFAULT 0,
|
|
batchable INTEGER DEFAULT 0,
|
|
gpu_candidate INTEGER DEFAULT 0,
|
|
risk_level TEXT DEFAULT 'low',
|
|
review_priority INTEGER DEFAULT 0,
|
|
risk_score INTEGER DEFAULT 0,
|
|
gain_score INTEGER DEFAULT 0,
|
|
audit_coverage_score INTEGER DEFAULT 0,
|
|
change_frequency INTEGER DEFAULT 0,
|
|
line_span INTEGER DEFAULT 0,
|
|
loop_count INTEGER DEFAULT 0,
|
|
branch_count INTEGER DEFAULT 0,
|
|
caller_count INTEGER DEFAULT 0,
|
|
callee_count INTEGER DEFAULT 0,
|
|
reasons TEXT,
|
|
UNIQUE(symbol_name, file_path)
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS analysis_scores (
|
|
id INTEGER PRIMARY KEY,
|
|
symbol_name TEXT NOT NULL,
|
|
file_path TEXT NOT NULL,
|
|
hotness_score INTEGER DEFAULT 0,
|
|
complexity_score INTEGER DEFAULT 0,
|
|
fanin_score INTEGER DEFAULT 0,
|
|
fanout_score INTEGER DEFAULT 0,
|
|
optimization_score INTEGER DEFAULT 0,
|
|
gpu_score INTEGER DEFAULT 0,
|
|
ct_risk_score INTEGER DEFAULT 0,
|
|
audit_gap_score INTEGER DEFAULT 0,
|
|
perf_priority INTEGER DEFAULT 0,
|
|
safe_priority INTEGER DEFAULT 0,
|
|
overall_priority INTEGER DEFAULT 0,
|
|
reasons TEXT NOT NULL,
|
|
UNIQUE(symbol_name, file_path)
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS symbol_audit_coverage (
|
|
id INTEGER PRIMARY KEY,
|
|
symbol_name TEXT NOT NULL,
|
|
file_path TEXT NOT NULL,
|
|
covered_by_tests INTEGER DEFAULT 0,
|
|
test_count INTEGER DEFAULT 0,
|
|
mapping_types TEXT,
|
|
review_queue_types TEXT,
|
|
audit_modules TEXT,
|
|
coverage_score INTEGER DEFAULT 0,
|
|
last_status TEXT DEFAULT 'unknown',
|
|
historical_failures INTEGER DEFAULT 0,
|
|
evidence TEXT,
|
|
UNIQUE(symbol_name, file_path)
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS ai_tasks (
|
|
id INTEGER PRIMARY KEY,
|
|
task_type TEXT NOT NULL,
|
|
symbol_name TEXT NOT NULL,
|
|
file_path TEXT NOT NULL,
|
|
prompt TEXT NOT NULL,
|
|
status TEXT DEFAULT 'pending',
|
|
priority INTEGER DEFAULT 0,
|
|
created_at TEXT NOT NULL,
|
|
rationale TEXT,
|
|
UNIQUE(task_type, symbol_name, file_path)
|
|
);
|
|
|
|
CREATE VIEW IF NOT EXISTS v_bottleneck_queue AS
|
|
SELECT
|
|
a.symbol_name,
|
|
a.file_path,
|
|
a.hotness_score,
|
|
a.complexity_score,
|
|
a.fanin_score,
|
|
a.fanout_score,
|
|
a.optimization_score,
|
|
a.gpu_score,
|
|
a.ct_risk_score,
|
|
a.audit_gap_score,
|
|
a.perf_priority,
|
|
a.safe_priority,
|
|
a.overall_priority,
|
|
a.reasons,
|
|
sm.summary,
|
|
sm.semantic_tags,
|
|
sm.hot_path,
|
|
sm.ct_sensitive,
|
|
sm.batchable,
|
|
sm.gpu_candidate,
|
|
sm.risk_level,
|
|
sm.review_priority,
|
|
sm.audit_coverage_score,
|
|
sm.change_frequency
|
|
FROM analysis_scores a
|
|
LEFT JOIN symbol_metadata sm
|
|
ON sm.symbol_name = a.symbol_name AND sm.file_path = a.file_path;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_file_summaries_file ON file_summaries(file);
|
|
CREATE INDEX IF NOT EXISTS idx_function_index_file ON function_index(file);
|
|
CREATE INDEX IF NOT EXISTS idx_function_index_name ON function_index(function_name);
|
|
CREATE INDEX IF NOT EXISTS idx_function_index_class ON function_index(class_name);
|
|
CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source);
|
|
CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target);
|
|
CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(edge_type);
|
|
CREATE INDEX IF NOT EXISTS idx_semantic_tags_tag ON semantic_tags(tag);
|
|
CREATE INDEX IF NOT EXISTS idx_semantic_tags_file ON semantic_tags(file);
|
|
CREATE INDEX IF NOT EXISTS idx_semantic_tags_entity ON semantic_tags(entity_type, entity_name);
|
|
CREATE INDEX IF NOT EXISTS idx_semantic_profiles_risk ON semantic_profiles(risk_score DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_semantic_profiles_gain ON semantic_profiles(gain_score DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_semantic_profiles_file ON semantic_profiles(file);
|
|
CREATE INDEX IF NOT EXISTS idx_audit_coverage_file ON audit_coverage(file);
|
|
CREATE INDEX IF NOT EXISTS idx_audit_coverage_score ON audit_coverage(coverage_score);
|
|
CREATE INDEX IF NOT EXISTS idx_history_metrics_file ON history_metrics(file);
|
|
CREATE INDEX IF NOT EXISTS idx_history_metrics_churn ON history_metrics(churn_score DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_review_queue_type ON review_queue(queue_type);
|
|
CREATE INDEX IF NOT EXISTS idx_review_queue_priority ON review_queue(priority_score DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_ownership_metrics_file ON ownership_metrics(file);
|
|
CREATE INDEX IF NOT EXISTS idx_test_function_map_target ON test_function_map(target_file);
|
|
CREATE INDEX IF NOT EXISTS idx_test_function_map_test ON test_function_map(test_file);
|
|
CREATE INDEX IF NOT EXISTS idx_call_edges_caller ON call_edges(caller_symbol, caller_file);
|
|
CREATE INDEX IF NOT EXISTS idx_call_edges_callee ON call_edges(callee_symbol, callee_file);
|
|
CREATE INDEX IF NOT EXISTS idx_call_edges_confidence ON call_edges(confidence DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_symbol_metadata_priority ON symbol_metadata(review_priority DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_symbol_metadata_hot ON symbol_metadata(hot_path, gpu_candidate, ct_sensitive);
|
|
CREATE INDEX IF NOT EXISTS idx_symbol_metadata_risk ON symbol_metadata(risk_score DESC, gain_score DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_analysis_scores_priority ON analysis_scores(overall_priority DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_analysis_scores_perf ON analysis_scores(perf_priority DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_symbol_audit_coverage_symbol ON symbol_audit_coverage(symbol_name, file_path);
|
|
CREATE INDEX IF NOT EXISTS idx_symbol_audit_coverage_score ON symbol_audit_coverage(coverage_score);
|
|
CREATE INDEX IF NOT EXISTS idx_ai_tasks_status_priority ON ai_tasks(status, priority DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_ai_tasks_type ON ai_tasks(task_type);
|
|
|
|
CREATE TABLE IF NOT EXISTS file_hashes (
|
|
id INTEGER PRIMARY KEY,
|
|
file_path TEXT UNIQUE NOT NULL,
|
|
content_hash TEXT NOT NULL,
|
|
mtime_ns INTEGER NOT NULL,
|
|
size_bytes INTEGER NOT NULL,
|
|
last_scan_at TEXT NOT NULL
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS function_bodies (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
function_name TEXT NOT NULL,
|
|
class_name TEXT,
|
|
start_line INTEGER NOT NULL,
|
|
end_line INTEGER NOT NULL,
|
|
body TEXT NOT NULL,
|
|
body_hash TEXT NOT NULL,
|
|
line_count INTEGER NOT NULL,
|
|
project TEXT
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_function_bodies_name ON function_bodies(function_name);
|
|
CREATE INDEX IF NOT EXISTS idx_function_bodies_file ON function_bodies(file);
|
|
|
|
CREATE TABLE IF NOT EXISTS function_summaries (
|
|
id INTEGER PRIMARY KEY,
|
|
file TEXT NOT NULL,
|
|
function_name TEXT NOT NULL,
|
|
class_name TEXT,
|
|
start_line INTEGER NOT NULL,
|
|
end_line INTEGER NOT NULL,
|
|
summary TEXT,
|
|
params TEXT,
|
|
return_type TEXT,
|
|
side_effects TEXT,
|
|
body_hash TEXT NOT NULL,
|
|
stale INTEGER DEFAULT 0,
|
|
generated_at TEXT,
|
|
generator TEXT,
|
|
UNIQUE(file, function_name, start_line)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_function_summaries_file ON function_summaries(file);
|
|
CREATE INDEX IF NOT EXISTS idx_function_summaries_name ON function_summaries(function_name);
|
|
CREATE INDEX IF NOT EXISTS idx_function_summaries_stale ON function_summaries(stale);
|
|
|
|
CREATE TABLE IF NOT EXISTS decisions (
|
|
id INTEGER PRIMARY KEY,
|
|
created_at TEXT NOT NULL,
|
|
file TEXT,
|
|
function_name TEXT,
|
|
decision TEXT NOT NULL,
|
|
rationale TEXT NOT NULL,
|
|
alternatives TEXT,
|
|
author TEXT,
|
|
tags TEXT,
|
|
status TEXT DEFAULT 'active'
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_decisions_file ON decisions(file);
|
|
CREATE INDEX IF NOT EXISTS idx_decisions_status ON decisions(status);
|
|
CREATE INDEX IF NOT EXISTS idx_decisions_tags ON decisions(tags);
|
|
"""
|
|
|
|
|
|
# ============================================================
|
|
# CONFIG SYSTEM
|
|
# ============================================================
|
|
|
|
_DEFAULT_CATEGORY_RULES = [
|
|
# Common project rules
|
|
{"match": "contains", "pattern": "Database", "category": "db", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "MySQL", "category": "db", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Query", "category": "db", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Transaction", "category": "db", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Statement", "category": "db", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Field", "category": "db", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Socket", "category": "network", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "TCP", "category": "network", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Acceptor", "category": "network", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Buffer", "category": "network", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Packet", "category": "network", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Log", "category": "logging", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Appender", "category": "logging", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Timer", "category": "util", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Util", "category": "util", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Random", "category": "util", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Byte", "category": "util", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Enc", "category": "security", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "MD5", "category": "security", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Sha", "category": "security", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "base64", "category": "security", "source_dir": "Common"},
|
|
{"match": "contains", "pattern": "Security", "category": "security", "source_dir": "Common"},
|
|
# Game project rules
|
|
{"match": "prefix", "pattern": "ai_", "category": "ai"},
|
|
{"match": "prefix", "pattern": "Player", "category": "player"},
|
|
{"match": "prefix", "pattern": "Monster", "category": "monster"},
|
|
{"match": "contains", "pattern": "Script", "category": "script"},
|
|
{"match": "prefix", "pattern": "World", "category": "world"},
|
|
{"match": "suffix", "pattern": "Def.h", "category": "definition"},
|
|
{"match": "suffix", "pattern": "Packet.h", "category": "packet"},
|
|
]
|
|
|
|
|
|
def _find_config_file():
|
|
"""Search for source_graph.toml in standard locations."""
|
|
candidates = [
|
|
SCRIPT_DIR / "source_graph.toml",
|
|
REPO_ROOT / "source_graph.toml",
|
|
]
|
|
for p in candidates:
|
|
if p.exists():
|
|
return p
|
|
return None
|
|
|
|
|
|
def _parse_toml(path):
|
|
"""Parse a TOML config file. Requires Python 3.11+ tomllib or fallback."""
|
|
if tomllib is not None:
|
|
with open(path, "rb") as f:
|
|
return tomllib.load(f)
|
|
# Minimal fallback for older Python: try json with toml-like keys
|
|
# (users on <3.11 can use JSON config instead)
|
|
json_path = path.with_suffix(".json")
|
|
if json_path.exists():
|
|
with open(json_path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
print(f"[!] Python 3.11+ required for TOML config, or provide {json_path}")
|
|
sys.exit(1)
|
|
|
|
|
|
def _detect_language(directory):
|
|
"""Detect the primary language in a directory by counting files."""
|
|
lang_exts = {
|
|
"cpp": ["*.cpp", "*.h", "*.cc", "*.cxx", "*.hpp", "*.c"],
|
|
"python": ["*.py"],
|
|
"typescript": ["*.ts", "*.tsx", "*.js", "*.jsx"],
|
|
"rust": ["*.rs"],
|
|
"go": ["*.go"],
|
|
"java": ["*.java"],
|
|
"csharp": ["*.cs"],
|
|
}
|
|
counts = {}
|
|
for lang, exts in lang_exts.items():
|
|
total = 0
|
|
for ext in exts:
|
|
total += len(list(directory.glob(ext)))
|
|
# Also check one level of subdirectories
|
|
total += len(list(directory.glob(f"*/{ext}")))
|
|
if total > 0:
|
|
counts[lang] = total
|
|
if not counts:
|
|
return "cpp", ["*.cpp", "*.h"] # default fallback
|
|
best = max(counts, key=counts.get)
|
|
adapter = get_adapter(best)
|
|
return best, adapter.extensions
|
|
|
|
|
|
def _auto_detect_config():
|
|
"""Auto-detect project structure and language when no config file exists."""
|
|
skip_dirs = {
|
|
"tools", "build", "bin", "obj", "Debug", "Release", "x64", ".vs",
|
|
"packages", "UnitTests", "node_modules", "__pycache__", "target",
|
|
"dist", "out", ".git", ".idea", ".vscode", "vendor", "venv",
|
|
".env", "env", ".tox", ".mypy_cache", ".pytest_cache",
|
|
}
|
|
|
|
# First detect project-level language
|
|
lang, exts = _detect_language(REPO_ROOT)
|
|
|
|
cfg = {
|
|
"project": {"name": REPO_ROOT.name, "language": lang},
|
|
"source_dirs": [],
|
|
"external_paths": {},
|
|
"category_rules": _DEFAULT_CATEGORY_RULES if lang == "cpp" else [],
|
|
}
|
|
|
|
# Detect source directories containing files of the detected language
|
|
for subdir in sorted(REPO_ROOT.iterdir()):
|
|
if not subdir.is_dir():
|
|
continue
|
|
if subdir.name.startswith(".") or subdir.name in skip_dirs:
|
|
continue
|
|
file_count = 0
|
|
for ext in exts:
|
|
file_count += len(list(subdir.glob(ext)))
|
|
if file_count > 0:
|
|
cfg["source_dirs"].append({
|
|
"label": subdir.name,
|
|
"path": subdir.name,
|
|
"extensions": exts,
|
|
})
|
|
|
|
if not cfg["source_dirs"]:
|
|
# Check root directory itself for source files
|
|
file_count = sum(len(list(REPO_ROOT.glob(ext))) for ext in exts)
|
|
if file_count > 0:
|
|
cfg["source_dirs"].append({
|
|
"label": REPO_ROOT.name,
|
|
"path": ".",
|
|
"extensions": exts,
|
|
})
|
|
# Also check common project structures: src/, lib/, app/
|
|
for common_dir in ("src", "lib", "app", "pkg", "cmd", "internal"):
|
|
d = REPO_ROOT / common_dir
|
|
if d.exists() and d.is_dir():
|
|
file_count = sum(len(list(d.rglob(ext))) for ext in exts)
|
|
if file_count > 0:
|
|
cfg["source_dirs"].append({
|
|
"label": common_dir,
|
|
"path": common_dir,
|
|
"extensions": exts,
|
|
})
|
|
|
|
if not cfg["source_dirs"]:
|
|
cfg["source_dirs"].append({
|
|
"label": REPO_ROOT.name,
|
|
"path": ".",
|
|
"extensions": exts,
|
|
})
|
|
return cfg
|
|
|
|
|
|
def load_config():
|
|
"""Load project config and set global variables."""
|
|
global SOURCE_DIRS, CATEGORY_RULES, EXTERNAL_PATHS, SEED_DATA, CONFIG_PATH
|
|
global GAME_DIR, COMMON_DIR, RUNTIME_DATA_ROOT, RESEARCH_DIR
|
|
global LANG_ADAPTER
|
|
|
|
config_path = _find_config_file()
|
|
CONFIG_PATH = config_path
|
|
|
|
if config_path:
|
|
cfg = _parse_toml(config_path)
|
|
config_base = config_path.parent
|
|
else:
|
|
cfg = _auto_detect_config()
|
|
config_base = REPO_ROOT
|
|
|
|
# Language adapter
|
|
project_cfg = cfg.get("project", {})
|
|
language = project_cfg.get("language", "cpp")
|
|
LANG_ADAPTER = get_adapter(language)
|
|
print(f"[*] Language: {LANG_ADAPTER.name} ({language})")
|
|
|
|
# Source directories
|
|
SOURCE_DIRS = []
|
|
default_exts = LANG_ADAPTER.extensions
|
|
for sd in cfg.get("source_dirs", []):
|
|
label = sd["label"]
|
|
path = Path(sd["path"])
|
|
if not path.is_absolute():
|
|
path = REPO_ROOT / path
|
|
exts = sd.get("extensions", default_exts)
|
|
optional = sd.get("optional", False)
|
|
if path.exists() or not optional:
|
|
SOURCE_DIRS.append((label, path, exts))
|
|
|
|
# Category rules
|
|
if "category_rules" in cfg:
|
|
CATEGORY_RULES = cfg["category_rules"]
|
|
else:
|
|
CATEGORY_RULES = _DEFAULT_CATEGORY_RULES
|
|
|
|
# External paths
|
|
ext = cfg.get("external_paths", {})
|
|
EXTERNAL_PATHS = {}
|
|
for key, val in ext.items():
|
|
p = Path(val)
|
|
if not p.is_absolute():
|
|
p = REPO_ROOT / p
|
|
EXTERNAL_PATHS[key] = p
|
|
|
|
# Seed data
|
|
SEED_DATA = {}
|
|
for table in ("singletons", "player_files", "events", "ai_handlers",
|
|
"inventory_scripts", "constants", "config_files", "packet_handlers"):
|
|
if table in cfg:
|
|
SEED_DATA[table] = cfg[table]
|
|
|
|
# Legacy aliases for backward compat
|
|
for label, path, _exts in SOURCE_DIRS:
|
|
if label == "Game":
|
|
GAME_DIR = path
|
|
elif label == "Common":
|
|
COMMON_DIR = path
|
|
if "runtime_data" in EXTERNAL_PATHS:
|
|
RUNTIME_DATA_ROOT = EXTERNAL_PATHS["runtime_data"]
|
|
if "research_dir" in EXTERNAL_PATHS:
|
|
RESEARCH_DIR = EXTERNAL_PATHS["research_dir"]
|
|
|
|
|
|
def create_db(preserve_persistent=False):
|
|
"""Create fresh database with schema.
|
|
|
|
If preserve_persistent=True, keeps decisions and function_summaries tables
|
|
(marks summaries as stale instead of deleting).
|
|
"""
|
|
if DB_PATH.exists():
|
|
if preserve_persistent:
|
|
conn = sqlite3.connect(str(DB_PATH))
|
|
conn.row_factory = sqlite3.Row
|
|
# Preserve persistent tables
|
|
persistent_tables = {"decisions", "function_summaries"}
|
|
existing = {r[0] for r in conn.execute(
|
|
"SELECT name FROM sqlite_master WHERE type='table'"
|
|
).fetchall()}
|
|
# Mark summaries stale
|
|
if "function_summaries" in existing:
|
|
conn.execute("UPDATE function_summaries SET stale = 1")
|
|
conn.commit()
|
|
# Save persistent data
|
|
saved = {}
|
|
for tbl in persistent_tables:
|
|
if tbl in existing:
|
|
cur = conn.execute(f"SELECT * FROM {tbl}")
|
|
rows = cur.fetchall()
|
|
col_names = [d[0] for d in cur.description]
|
|
saved[tbl] = (col_names, rows)
|
|
conn.close()
|
|
DB_PATH.unlink()
|
|
conn = sqlite3.connect(str(DB_PATH))
|
|
conn.row_factory = sqlite3.Row
|
|
conn.executescript(SCHEMA)
|
|
# Restore persistent data
|
|
for tbl, (cols, rows) in saved.items():
|
|
if rows:
|
|
placeholders = ",".join("?" * len(cols))
|
|
col_list = ",".join(cols)
|
|
conn.executemany(
|
|
f"INSERT OR IGNORE INTO {tbl} ({col_list}) VALUES ({placeholders})",
|
|
[tuple(r) for r in rows]
|
|
)
|
|
conn.commit()
|
|
return conn
|
|
else:
|
|
DB_PATH.unlink()
|
|
conn = sqlite3.connect(str(DB_PATH))
|
|
conn.row_factory = sqlite3.Row
|
|
conn.executescript(SCHEMA)
|
|
conn.commit()
|
|
return conn
|
|
|
|
|
|
def populate_graph_metadata(conn):
|
|
"""Store graph schema/build provenance for drift checks and debugging."""
|
|
revision = None
|
|
try:
|
|
result = subprocess.run(
|
|
["git", "rev-parse", "HEAD"],
|
|
cwd=str(REPO_ROOT),
|
|
capture_output=True,
|
|
text=True,
|
|
encoding="utf-8",
|
|
errors="ignore",
|
|
check=True,
|
|
)
|
|
revision = result.stdout.strip() or None
|
|
except Exception:
|
|
revision = None
|
|
|
|
conn.execute(
|
|
"INSERT INTO graph_metadata (schema_version, extractor_version, graph_build_revision, built_at, repo_root) VALUES (?,?,?,?,?)",
|
|
(
|
|
SCHEMA_VERSION,
|
|
EXTRACTOR_VERSION,
|
|
revision,
|
|
datetime.now(timezone.utc).isoformat(),
|
|
str(REPO_ROOT),
|
|
)
|
|
)
|
|
|
|
|
|
# ============================================================
|
|
# DATA POPULATION — All known entities from graph
|
|
# ============================================================
|
|
|
|
def populate_singletons(conn):
|
|
"""Insert all 120+ singletons (Game + Common)."""
|
|
data = [
|
|
# Core Infrastructure
|
|
("sGameServer", "GameServer", "GameServer.h", "Game", "core", "Main server config and state"),
|
|
("sMain", "MainApplication", "MainApp.h", "Game", "core", "Application entry point and main loop"),
|
|
("sObjectMgr", "CObjectMgr", "ObjectManager.h", "Game", "core", "Find players/monsters by ID, character map"),
|
|
("sWorldMgr", "WorldMgr", "WorldManager.h", "Game", "core", "World data loader, map flags, exp rates"),
|
|
("sMonsterManager", "MonsterManager", "MonsterManager.h", "Game", "core", "Monster templates, skills, AI configs, respawn"),
|
|
# Inter-Server
|
|
("sAuthServer", "AuthServer", "AuthServer.h", "Game", "inter_server", "LoginServer communication, auth"),
|
|
("sConnectServer", "ConnectServer", "ConnectServer.h", "Game", "inter_server", "ConnectServer comm, channels"),
|
|
("sServerLink", "ServerLink", "ServerLink.h", "Game", "inter_server", "Inter-server communication hub"),
|
|
("sServerToServer", "ServerToServer", "ServerToServer.h", "Game", "inter_server", "Server-to-server direct messaging"),
|
|
("sPacketRecorder", "PacketRecorder", "PacketRecorder.h", "Game", "inter_server", "Packet recording and replay"),
|
|
# Player & Social
|
|
("sCharacterBase", "CharacterBaseMgr", "CharacterBase.h", "Game", "player_social", "Character data structures"),
|
|
("sGuildMgr", "CGuildMgr", "GuildMgr.h", "Game", "player_social", "Guild management"),
|
|
("sPartyMgr", "PartyMgr", "PartyMgr.h", "Game", "player_social", "Party management"),
|
|
("sGuildMatching", "GuildMatching", "GuildMatching.h", "Game", "player_social", "Guild matching system"),
|
|
("sPartyMatching", "PartyMatching", "PartyMatching.h", "Game", "player_social", "Party matching system"),
|
|
("sGenMgr", "CGenMgr", "GenMgr.h", "Game", "player_social", "Loyalty faction system"),
|
|
("sDuelMgr", "DuelMgr", "DuelMgr.h", "Game", "player_social", "Duel management"),
|
|
("sDuelBetItem", "CDuelBetIem", "DuelBetItem.h", "Game", "player_social", "Duel bet items"),
|
|
("sGuildWarMgr", "GuildWarMgr", "GuildWar.h", "Game", "player_social", "Guild wars"),
|
|
# Items & Equipment
|
|
("sItemMgr", "CItemMgr", "ItemMgr.h", "Game", "item", "Item loading, templates, management"),
|
|
("sMixMgr", "MixMgr", "MixMgr.h", "Game", "item", "Mix/craft system"),
|
|
("sJewelMix", "JewelMix", "JewelMix.h", "Game", "item", "Jewel combination"),
|
|
("sPentagramSystem", "PentagramSystem", "PentagramSystem.h", "Game", "item", "Pentagram system"),
|
|
("sSocketSystem", "SocketSystem", "SocketSystem.h", "Game", "item", "Socket item system"),
|
|
("sWingSystem", "WingSystem", "WingSystem.h", "Game", "item", "Wing system"),
|
|
("sExcellentSystem", "ExcellentSystem", "ExcellentSystem.h", "Game", "item", "Excellent option system"),
|
|
("sNewLegendarySystem", "NewLegendarySystem", "NewLegendarySystem.h", "Game", "item", "Legendary item system"),
|
|
("sNewItemBagSystem", "NewItemBagSystem", "NewItemBagSystem.h", "Game", "item", "Item bag system"),
|
|
("sItemDisassambly", "ItemDisassembleSystem", "ItemDisassembleSystem.h", "Game", "item", "Item disassemble"),
|
|
("sCashShopMgr", "CashShopMgr", "CashShop.h", "Game", "item", "Cash shop microtransactions"),
|
|
("sPetExperience", "CPetItemExperience", "Item.h", "Game", "item", "Pet item experience"),
|
|
("sDropSystem", "DropSystem", "DropSystem.h", "Game", "item", "Drop system"),
|
|
("sCustomJewelsManager", "CustomJewelsManager", "CustomJewelsManager.h", "Game", "item", "Custom jewels"),
|
|
# Skills & Combat
|
|
("sSkillMgr", "SkillMgr", "SkillMgr.h", "Game", "skill", "Skill data management"),
|
|
("sNewSkillSystem", "FiveClassSkillSystem", "FiveClassSkillSystem.h", "Game", "skill", "5th class skill system"),
|
|
("sMasteryChangeOption", "MasteryChangeOption", "MasteryChangeOption.h", "Game", "skill", "Mastery change options"),
|
|
# VIP & Progression
|
|
("sVipMgr", "CVipMgr", "VIPMgr.h", "Game", "vip", "VIP level system"),
|
|
("sResetSystemMgr", "CResetMgr", "ResetSystemMgr.h", "Game", "vip", "Reset system manager"),
|
|
("sResetSystem", "ResetSystem", "ResetSystem.h", "Game", "vip", "Reset system"),
|
|
("sDynamicExperienceMgr", "CDynamicExpMgr", "DynamicExperienceMgr.h", "Game", "vip", "Dynamic exp rates"),
|
|
("sLevelUpCompensationMgr", "LevelUpCompensationMgr", "LevelUpCompensation.h", "Game", "vip", "Level-up compensation"),
|
|
("sExpConfigs", "ExpConfigs", "ExpConfigs.h", "Game", "vip", "Exp rate configs"),
|
|
("sHuntingRecord", "HuntingRecord", "HuntingRecord.h", "Game", "vip", "Hunting records"),
|
|
("sCustomRankTitle", "CCustomRankTitle", "CustomRankTitle.h", "Game", "vip", "Custom rank titles"),
|
|
("sCustomRankServer", "CCustomRankServer", "CustomRankServer.h", "Game", "vip", "Custom rank server"),
|
|
# Quest & Event Management
|
|
("sQuestMgr", "CQuestMgr", "QuestMgr.h", "Game", "quest", "Quest data management"),
|
|
("sEventMgr", "CEventMgr", "EventManager.h", "Game", "quest", "Event scheduling engine"),
|
|
("sEventInventory", "EventInventory", "EventInventory.h", "Game", "quest", "Event inventory system"),
|
|
("sNewEventWindow", "NewEventWindow", "NewEventWindow.h", "Game", "quest", "New event window UI"),
|
|
("sMessage", "SystemMessage", "System_Message.h", "Game", "quest", "System messages"),
|
|
("sNoticeSystem", "CNoticeSystem", "NoticeSystem.h", "Game", "quest", "Notice system"),
|
|
("sRuudSystem", "RuudSystem", "RuudSystem.h", "Game", "quest", "Ruud currency system"),
|
|
("sPlayTimeEventMgr", "PlayTimeEventMgr", "PlayTimeEvent.h", "Game", "quest", "Play time events"),
|
|
("sHappyHour", "HappyHour", "HappyHour.h", "Game", "quest", "Happy hour boost events"),
|
|
("sMuPassEvent", "MuPassEvent", "MuPassEvent.h", "Game", "quest", "Battle pass system"),
|
|
# Events/Dungeons
|
|
("sBloodCastleMgr", "CBloodCastleMgr", "BloodCastle.h", "Game", "event", "Blood Castle dungeon"),
|
|
("sDevilSquareMgr", "DevilSquareMgr", "DevilSquare.h", "Game", "event", "Devil Square event"),
|
|
("sChaosCastleMgr", "CChaosCastleMgr", "ChaosCastle.h", "Game", "event", "Chaos Castle event"),
|
|
("sChaosCastleSurvivalMgr", "ChaosCastleSurvivalMgr", "ChaosCastleSurvival.h", "Game", "event", "Chaos Castle Survival"),
|
|
("sImperialFortressMgr", "ImperialFortressMgr", "ImperialFortress.h", "Game", "event", "Imperial Fortress"),
|
|
("sCastleSiege", "CCastleSiege", "CastleSiege.h", "Game", "event", "Castle Siege warfare"),
|
|
("sIllusionTemple", "IllusionTemple", "IllusionTemple.h", "Game", "event", "Illusion Temple"),
|
|
("sDoppelganger", "Doppelganger", "Doppelganger.h", "Game", "event", "Doppelganger dungeon"),
|
|
("sInvasionMgr", "InvasionMgr", "Invasion.h", "Game", "event", "Monster invasion events"),
|
|
("sCrywolf", "Crywolf", "Crywolf.h", "Game", "event", "Crywolf altar defense"),
|
|
("sKanturuMgr", "KanturuMgr", "Kanturu.h", "Game", "event", "Kanturu 3-stage tower"),
|
|
("sRaklion", "Raklion", "Raklion.h", "Game", "event", "Raklion event"),
|
|
("sDungeon", "Dungeon", "Dungeon.h", "Game", "event", "General dungeon framework"),
|
|
("sAllBossEvent", "AllBossTogetherEvent", "AllBossTogetherEvent.h", "Game", "event", "All Boss Together event"),
|
|
("sBattleSoccerMgr", "CBattleSoccerMgr", "BattleSoccer.h", "Game", "event", "Battle Soccer mini-game"),
|
|
("sProtectorOfAcheron", "ProtectorOfAcheron", "ProtectorOfAcheron.h", "Game", "event", "Acheron raid boss"),
|
|
("sTormentedSquare", "TormentedSquare", "TormentedSquare.h", "Game", "event", "Tormented Square waves"),
|
|
("sTormentedSquareSurvival", "TormentedSquareSurvival", "TormentedSquareSurvival.h", "Game", "event", "Tormented Square Survival"),
|
|
("sGremoryCase", "GremoryCase", "GremoryCase.h", "Game", "event", "Gremory Case item container"),
|
|
("sEvomon", "Evomon", "Evomon.h", "Game", "event", "Evomon evolution event"),
|
|
("sLastManStanding", "LastManStanding", "LastManStanding.h", "Game", "event", "Last Man Standing FFA"),
|
|
("sMiniBomb", "MiniBomb", "MiniBomb.h", "Game", "event", "MiniBomb mini-game"),
|
|
("sJewelBingo", "JewelBingo", "JewelBingo.h", "Game", "event", "Jewel Bingo lottery"),
|
|
("sMossMerchant", "MossMerchant", "MossMerchant.h", "Game", "event", "Moss Merchant NPC"),
|
|
("sSwampOfDarkness", "SwampOfDarkness", "SwampOfDarkness.h", "Game", "event", "Swamp of Darkness"),
|
|
("sNumericBaseball", "NumericBaseball", "NumericBaseball.h", "Game", "event", "Numeric Baseball mini-game"),
|
|
("sLabyrinthDimensions", "LabyrinthDimensions", "LabyrinthDimensions.h", "Game", "event", "Labyrinth of Dimensions"),
|
|
("sNixiesLake", "NixiesLake", "NixiesLake.h", "Game", "event", "Nixies Lake dungeon"),
|
|
("sWorldBoss", "WorldBoss", "WorldBoss.h", "Game", "event", "Open-world raid bosses"),
|
|
("sArkaWar", "ArkaWar", "ArkaWar.h", "Game", "event", "Arka War PvP faction war"),
|
|
("sScramble", "Scramble", "Scramble.h", "Game", "event", "Scramble box collection"),
|
|
("sCastleDeep", "CastleDeep", "CastleDeep.h", "Game", "event", "Castle Deep multi-floor dungeon"),
|
|
# Misc Game singletons
|
|
("sTeleport", "TeleportManager", "TeleportManager.h", "Game", "misc", "Teleport/gate system"),
|
|
("sShopMgr", "ShopMgr", "ShopMgr.h", "Game", "misc", "NPC shop management"),
|
|
("sFormulaMgr", "FormulaMgr", "FormulaData.h", "Game", "misc", "Damage/exp formulas"),
|
|
("sCommandMgr", "CommandMgr", "CommandMgr.h", "Game", "misc", "Admin/GM commands"),
|
|
("sParticleMgr", "ParticleMgr", "ParticleMgr.h", "Game", "misc", "Particle effects"),
|
|
("sMiniMap", "MiniMap", "MiniMap.h", "Game", "misc", "Mini-map system"),
|
|
("sSummonScroll", "SummonScroll", "SummonScroll.h", "Game", "misc", "Summon scroll system"),
|
|
("sArtifactmMgr", "ArtifactMgr", "ArtifactMgr.h", "Game", "misc", "Artifact system"),
|
|
("sBlessingBoxMgr", "BlessingBoxMgr", "BlessingBox.h", "Game", "misc", "Blessing box rewards"),
|
|
("sHelperPlusMgr", "HelperPlusMgr", "HelperPlusMgr.h", "Game", "misc", "Helper Plus auto-hunt zones"),
|
|
("sMonsterSoul", "MonsterSoul", "MonsterSoul.h", "Game", "misc", "Monster soul converter"),
|
|
("sScriptAI", "ScriptAIMgr", "ScriptAI.h", "Game", "misc", "Script-based AI"),
|
|
("sMiningSystem", "MiningSystem", "MiningSystem.h", "Game", "misc", "Mining/gathering system"),
|
|
("sMuRoomy", "MuRoomy", "MuRoomy.h", "Game", "misc", "Mu Room event"),
|
|
("sRegExMngr", "RegExMngr", "RegExMngr.h", "Game", "misc", "Regex manager"),
|
|
("sHttpRequest", "MyHttpRequest", "MyHttpRequest.h", "Game", "misc", "HTTP request handler"),
|
|
("sDiscord", "Discord", "Discord.h", "Game", "misc", "Discord webhook integration"),
|
|
("sScriptLoader", "CScriptLoader", "CScriptLoader.h", "Game", "misc", "Script file loader"),
|
|
("sClientLuaMgr", "ClientLuaMgr", "ClientLuaMgr.h", "Game", "misc", "Client Lua bridge"),
|
|
("sPath", "CPath", "CPath.h", "Game", "misc", "File path manager"),
|
|
("sSGlobals", "GSGlobalObjects", "GSGlobalObjects.h", "Game", "misc", "Global objects container"),
|
|
("sNewExpRecoverySystem", "NewExpRecoverySystem", "NewExpRecoverySystem.h", "Game", "misc", "Exp recovery system"),
|
|
("sDungeonRace", "DungeonRace", "DungeonRace.h", "Game", "misc", "Dungeon race event"),
|
|
("sLosttowerRace", "LosttowerRace", "LosttowerRace.h", "Game", "misc", "Lost Tower race"),
|
|
("sPartyUpdateQueue", "PartyUpdateQueue", "GamePCH.h", "Game", "misc", "Party update queue"),
|
|
# Common singletons
|
|
("sLog", "Log", "Log.h", "Common", "common", "Async logging system"),
|
|
("sEncDec", "EncodeDecode", "EncDec.h", "Common", "common", "XOR packet encryption"),
|
|
("sLargeRandom", "CLargeRandom", "LargeRandom.h", "Common", "common", "MT19937 RNG"),
|
|
("sSecurity", "Security", "Security.h", "Common", "common", "MAC + system security"),
|
|
("sCustomUtil", "CCustomUtil", "Util.h", "Common", "common", "Utility functions"),
|
|
("sOpcodeMgr", "OpcodeMgr", "CustomPacket.h", "Common", "common", "Opcode management"),
|
|
("sMiniDump", "MiniDump", "MiniDump.h", "Common", "common", "Crash dump generation"),
|
|
("sConfig", "Config", "Config.h", "Common", "common", "INI config reader"),
|
|
]
|
|
conn.executemany(
|
|
"INSERT INTO singletons (macro, class_name, header, project, category, description) VALUES (?,?,?,?,?,?)",
|
|
data
|
|
)
|
|
|
|
|
|
def populate_player_files(conn):
|
|
"""Insert Player partial file mapping."""
|
|
data = [
|
|
("Player.cpp", "core", "Player(), ~Player(), protocol_core(), PlayerSaveTransaction", "Core lifecycle, protocol dispatch, save transaction"),
|
|
("PlayerDB.cpp", "database", "LoadDBNew(), LoadDBInfoNew(), LoadDBItemNew(), LoadDBSkillNew()", "Database loading (~30 methods)"),
|
|
("PlayerSocket.cpp", "network", "sendPacket(), SendPacket(), CloseSocket(), kick()", "Network I/O"),
|
|
("PlayerSkillUse.cpp", "combat", "NormalAttack(), NormalMagicAttack()", "Combat/skill execution"),
|
|
("PlayerCharacter.cpp", "init", "ClearCharacter()", "Character init/reset"),
|
|
("PlayerChat.cpp", "chat", "ChatRequest(), ChatProcess()", "Chat routing"),
|
|
("PlayerTrade.cpp", "trade", "TradeRequest(), TradeRequestAnswer(), TradeBegin()", "Player-to-player trade"),
|
|
("PlayerParty.cpp", "party", "PartyRequest()", "Party system"),
|
|
("PlayerGuild.cpp", "guild", "GuildCreateRequest(), GuildJoinRequest/Result()", "Guild operations"),
|
|
("PlayerDuel.cpp", "duel", "DuelRequest(), DuelRequestAnswer()", "PVP duels"),
|
|
("PlayerPVP.cpp", "pvp", "AttackAllowedToPlayer()", "PVP rules (29+ world checks)"),
|
|
("PlayerFriend.cpp", "friend", "IsFriend(), AddFriend(), FriendAddRequest()", "Friends list (max 50)"),
|
|
("PlayerMix.cpp", "mix", "ChaosMixButton(), HarmonyMixConfirmOption()", "Crafting/alchemy"),
|
|
("PlayerCashShop.cpp", "cashShop", "CashShopSendPath(), CashShopGetItem()", "Microtransactions"),
|
|
("PlayerHelper.cpp", "helper", "HelperSendSettings(), HandleHelperPlusRun()", "Auto-hunt helper + HelperPlus"),
|
|
("PlayerDarkSpirit.cpp", "darkSpirit", "DarkSpiritCalculate(), DarkSpiritRun()", "Dark Lord pet AI"),
|
|
("PlayerGen.cpp", "gen", "GenJoinRequest(), GenJoinResult()", "Loyalty factions"),
|
|
("PlayerArtifact.cpp", "artifact", "ArtifactInsertIn(), GetFullsetStatus()", "Artifact equipment"),
|
|
("PlayerInterface.cpp", "interface", "InterfaceSharedCheck(), TransactionSerialCheck()", "UI window state"),
|
|
("PlayerSkillTree.cpp", "skillTree", "MasterSendStatus(), MasterSkillPointAdd()", "Master skill tree"),
|
|
("PlayerMajesticSkillTree.cpp", "majestic", "MajesticSkillTreeSend()", "Majestic (4th class) skills"),
|
|
("PlayerQuestEvolution.cpp", "questEvo", "QuestEvolutionUpdateState()", "Class evolution quests"),
|
|
("PlayerQuestGuided.cpp", "questGuided", "QuestGuidedSend(), QuestGuidedMonsterKill()", "Tutorial quests"),
|
|
("PlayerQuestMU.cpp", "questMU", "GetQuestMU(), AddQuestMU()", "Open-world quests"),
|
|
("PlayerPersonalStore.cpp", "personalStore", "PersonalStoreClose(), UpdatePersonalStore()", "Offline shop"),
|
|
("PlayerMuPassEvent.cpp", "muPass", "EnableMuPassEvent(), SendMuPassEventData()", "Battle pass"),
|
|
("PlayerStatFruit.cpp", "statFruit", "UsePlusStatFruit(), UseStatFruitResult()", "Stat fruits"),
|
|
("PlayerWingMixSystem.cpp", "wingMix", "OpenWingMixWindow(), GrantWingOptions()", "Wing socket upgrades"),
|
|
]
|
|
conn.executemany(
|
|
"INSERT INTO player_files (file, domain, key_methods, description) VALUES (?,?,?,?)",
|
|
data
|
|
)
|
|
|
|
|
|
def populate_events(conn):
|
|
"""Insert all event/dungeon systems."""
|
|
data = [
|
|
("AllBossTogether", "sAllBossEvent", "AllBossTogetherEvent.cpp", "AllBossTogetherEvent.h", None, "ai_all_boss_together.cpp", "4 bosses simultaneous spawn"),
|
|
("ArkaWar", "sArkaWar", "ArkaWar.cpp", "ArkaWar.h", None, "ai_arka_war.cpp", "PvP faction war"),
|
|
("BattleSoccer", "sBattleSoccerMgr", "BattleSoccer.cpp", "BattleSoccer.h", "BattleSoccerDef.h", "ai_soccer_ball.cpp", "Soccer mini-game"),
|
|
("BloodCastle", "sBloodCastleMgr", "BloodCastle.cpp", "BloodCastle.h", "BloodCastleDef.h", "ai_blood_castle.cpp", "Gate-Trap-Boss dungeon"),
|
|
("CastleDeep", "sCastleDeep", "CastleDeep.cpp", "CastleDeep.h", None, "ai_castle_deep.cpp", "Multi-floor dungeon"),
|
|
("CastleSiege", "sCastleSiege", "CastleSiege.cpp", "CastleSiege.h", "CastleSiegeDef.h", "ai_castle_siege.cpp", "Guild siege warfare"),
|
|
("ChaosCastle", "sChaosCastleMgr", "ChaosCastle.cpp", "ChaosCastle.h", "ChaosCastleDef.h", "ai_chaos_castle.cpp", "Kill-count event"),
|
|
("ChaosCastleSurvival", "sChaosCastleSurvivalMgr", "ChaosCastleSurvival.cpp", "ChaosCastleSurvival.h", None, None, "Survival waves"),
|
|
("Crywolf", "sCrywolf", "Crywolf.cpp", "Crywolf.h", "CrywolfDef.h", "ai_crywolf.cpp", "Altar defense event"),
|
|
("DevilSquare", "sDevilSquareMgr", "DevilSquare.cpp", "DevilSquare.h", "DevilSquareDef.h", "ai_devil_square.cpp", "Kill-counter event"),
|
|
("Doppelganger", "sDoppelganger", "Doppelganger.cpp", "Doppelganger.h", None, "ai_doppelganger.cpp", "Clone boss dungeon"),
|
|
("Dungeon", "sDungeon", "Dungeon.cpp", "Dungeon.h", "DungeonDef.h", None, "General dungeon framework"),
|
|
("DungeonInstance", None, "DungeonInstance.cpp", "DungeonInstance.h", None, "ai_instanced_dungeon.cpp", "Private party instances"),
|
|
("DungeonRace", "sDungeonRace", "DungeonRace.cpp", "DungeonRace.h", None, None, "Timed race event"),
|
|
("Evomon", "sEvomon", "Evomon.cpp", "Evomon.h", None, "ai_evomon.cpp", "Evolution boss event"),
|
|
("HappyHour", "sHappyHour", "HappyHour.cpp", "HappyHour.h", None, None, "Exp/Zen/Drop boost"),
|
|
("IllusionTemple", "sIllusionTemple", "IllusionTemple.cpp", "IllusionTemple.h", "IllusionTempleDef.h", None, "Puzzle dungeon"),
|
|
("ImperialFortress", "sImperialFortressMgr", "ImperialFortress.cpp", "ImperialFortress.h", "ImperialFortressDef.h", "ai_imperial_fortress.cpp", "5-zone fortress"),
|
|
("Invasion", "sInvasionMgr", "Invasion.cpp", "Invasion.h", None, "ai_invasion.cpp", "Random monster invasion"),
|
|
("JewelBingo", "sJewelBingo", "JewelBingo.cpp", "JewelBingo.h", None, None, "Jewel lottery game"),
|
|
("Kanturu", "sKanturuMgr", "Kanturu.cpp", "Kanturu.h", "KanturuDef.h", "ai_kanturu.cpp", "3-stage tower event"),
|
|
("LabyrinthDimensions", "sLabyrinthDimensions", "LabyrinthDimensions.cpp", "LabyrinthDimensions.h", "LabyrinthDimensionsDef.h", "ai_labyrinth_of_dimensions.cpp", "Chaos maze event"),
|
|
("LastManStanding", "sLastManStanding", "LastManStanding.cpp", "LastManStanding.h", None, None, "Battle royale FFA"),
|
|
("LosttowerRace", "sLosttowerRace", "LosttowerRace.cpp", "LosttowerRace.h", None, None, "Lost Tower ascent"),
|
|
("MiniBomb", "sMiniBomb", "MiniBomb.cpp", "MiniBomb.h", None, None, "Bomb mini-game"),
|
|
("MiningSystem", "sMiningSystem", "MiningSystem.cpp", "MiningSystem.h", None, None, "Item mining/gathering"),
|
|
("MossMerchant", "sMossMerchant", "MossMerchant.cpp", "MossMerchant.h", None, "ai_moss_merchant.cpp", "Rare item exchange NPC"),
|
|
("MuRoomy", "sMuRoomy", "MuRoomy.cpp", "MuRoomy.h", None, None, "Room event mini-game"),
|
|
("NixiesLake", "sNixiesLake", "NixiesLake.cpp", "NixiesLake.h", None, "ai_nixies_lake.cpp", "Water hazard dungeon"),
|
|
("ProtectorOfAcheron", "sProtectorOfAcheron", "ProtectorOfAcheron.cpp", "ProtectorOfAcheron.h", None, "ai_protector_of_acheron.cpp", "Raid boss"),
|
|
("Scramble", "sScramble", "Scramble.cpp", "Scramble.h", "ScrambleDef.h", None, "Box collection event"),
|
|
("SwampOfDarkness", "sSwampOfDarkness", "SwampOfDarkness.cpp", "SwampOfDarkness.h", None, "ai_swamp_of_darkness.cpp", "Poison/trap dungeon"),
|
|
("TormentedSquare", "sTormentedSquare", "TormentedSquare.cpp", "TormentedSquare.h", "TormentedSquareDef.h", "ai_tormented_square.cpp", "Wave dungeon"),
|
|
("TormentedSquareSurvival", "sTormentedSquareSurvival", "TormentedSquareSurvival.cpp", "TormentedSquareSurvival.h", None, None, "Survival mode"),
|
|
("WorldBoss", "sWorldBoss", "WorldBoss.cpp", "WorldBoss.h", None, "ai_world_boss.cpp", "Open-world raid bosses"),
|
|
]
|
|
conn.executemany(
|
|
"INSERT INTO events (name, singleton_macro, cpp_file, header_file, def_file, ai_file, description) VALUES (?,?,?,?,?,?,?)",
|
|
data
|
|
)
|
|
|
|
|
|
def populate_ai_handlers(conn):
|
|
"""Insert AI handler files."""
|
|
data = [
|
|
("ai_all_boss_together.cpp", "AllBossTogether", "All Boss Together event mob AI"),
|
|
("ai_arka_war.cpp", "ArkaWar", "Arka War event AI"),
|
|
("ai_blood_castle.cpp", "BloodCastle", "Blood Castle gates/statues/bosses"),
|
|
("ai_castle_deep.cpp", "CastleDeep", "Castle Deep instance mobs"),
|
|
("ai_castle_siege.cpp", "CastleSiege", "Castle Siege NPCs & defense"),
|
|
("ai_champion.cpp", "Champion", "Champion/boss monster specials"),
|
|
("ai_chaos_castle.cpp", "ChaosCastle", "Chaos Castle mobs"),
|
|
("ai_crywolf.cpp", "Crywolf", "Crywolf altar guardians"),
|
|
("ai_custom_boss.cpp", "CustomBoss", "Custom boss encounters"),
|
|
("ai_devil_square.cpp", "DevilSquare", "Devil Square spawns"),
|
|
("ai_doppelganger.cpp", "Doppelganger", "Doppelganger dungeon mobs"),
|
|
("ai_evomon.cpp", "Evomon", "Evomon evolution bosses"),
|
|
("ai_ferea.cpp", "Ferea", "Ferea raid boss (Gods of Darkness)"),
|
|
("ai_ground_darkness.cpp", "GroundDarkness", "Swamp of Darkness mobs"),
|
|
("ai_guard.cpp", "Guard", "Guard NPCs (gates, turrets)"),
|
|
("ai_imperial_fortress.cpp", "ImperialFortress", "Imperial Fortress traps & mobs"),
|
|
("ai_instanced_dungeon.cpp", "InstancedDungeon", "Dungeon instance spawning"),
|
|
("ai_invasion.cpp", "Invasion", "Invasion event mobs"),
|
|
("ai_kalima_gate.cpp", "KalimaGate", "Kalima gate guardians"),
|
|
("ai_kanturu.cpp", "Kanturu", "Kanturu event enemies"),
|
|
("ai_kundun.cpp", "Kundun", "Kundun final boss mechanics"),
|
|
("ai_labyrinth_of_dimensions.cpp", "LabyrinthDimensions", "Labyrinth maze mobs"),
|
|
("ai_majestic_debuff.cpp", "MajesticDebuff", "Majestic skill debuffs"),
|
|
("ai_medusa.cpp", "Medusa", "Medusa raid boss"),
|
|
("ai_moss_merchant.cpp", "MossMerchant", "Moss Merchant NPC AI"),
|
|
("ai_moving_npc.cpp", "MovingNPC", "NPCs with patrol routes"),
|
|
("ai_nars.cpp", "Nars", "Nars vendor AI"),
|
|
("ai_nixies_lake.cpp", "NixiesLake", "Nixie's Lake mobs"),
|
|
("ai_personal_merchant.cpp", "PersonalMerchant", "Personal store NPCs"),
|
|
("ai_protector_of_acheron.cpp", "ProtectorOfAcheron", "Acheron raid boss"),
|
|
("ai_quest.cpp", "Quest", "Quest-specific mob behaviors"),
|
|
("ai_raklion.cpp", "Raklion", "Raklion event mobs"),
|
|
("ai_soccer_ball.cpp", "SoccerBall", "Battle Soccer ball physics"),
|
|
("ai_special_map.cpp", "SpecialMap", "Special map mobs"),
|
|
("ai_summon.cpp", "Summon", "Summoned creature AI"),
|
|
("ai_summoner_debuff.cpp", "SummonerDebuff", "Summoner debuff mechanics"),
|
|
("ai_summon_player.cpp", "SummonPlayer", "Player summon (DL) AI"),
|
|
("ai_swamp_of_darkness.cpp", "SwampOfDarkness", "Swamp water/traps"),
|
|
("ai_tormented_square.cpp", "TormentedSquare", "Tormented Square wave mobs"),
|
|
("ai_trap.cpp", "Trap", "Environmental trap triggers"),
|
|
("ai_uruk_mountain.cpp", "UrukMountain", "Uruk Mountain mobs"),
|
|
("ai_world_boss.cpp", "WorldBoss", "World Boss AI"),
|
|
("ai_world_boss_new.cpp", "WorldBossNew", "New Gen World Boss AI"),
|
|
]
|
|
conn.executemany(
|
|
"INSERT INTO ai_handlers (file, target, description) VALUES (?,?,?)",
|
|
data
|
|
)
|
|
|
|
|
|
def populate_inventory_scripts(conn):
|
|
"""Insert inventory/script files."""
|
|
data = [
|
|
("InventoryScript.cpp", "Main player inventory: item grid, wear slots"),
|
|
("WarehouseScript.cpp", "Bank warehouse multi-tab storage"),
|
|
("EventInventoryScript.cpp", "Event temporary bags"),
|
|
("ArtifactInventoryScript.cpp", "Artifact bag slots"),
|
|
("ExpRecoveryInventoryScript.cpp", "Exp recovery items"),
|
|
("GremoryCaseScript.cpp", "Gremory Case container"),
|
|
("MuunScript.cpp", "Muun pet inventory"),
|
|
("SkillBookInventoryScript.cpp", "Skill book collection"),
|
|
("WingCoreInventoryScript.cpp", "Wing core container"),
|
|
("PersonalStoreScript.cpp", "Player shop display"),
|
|
("NPCSellScript.cpp", "NPC shop layout"),
|
|
("StoreScript.cpp", "Base storage class (parent)"),
|
|
]
|
|
conn.executemany(
|
|
"INSERT INTO inventory_scripts (file, description) VALUES (?,?)",
|
|
data
|
|
)
|
|
|
|
|
|
def populate_constants(conn):
|
|
"""Insert key constants and defines."""
|
|
data = [
|
|
# CommonDef.h
|
|
("MAX_VIEWPORT", "120", "CommonDef.h", "limits", "Max viewport objects"),
|
|
("normal_inventory_size", "64", "CommonDef.h", "limits", "Normal inventory slots"),
|
|
("inventory_size", "239", "CommonDef.h", "limits", "Total inventory size"),
|
|
("SkillBookInventorySize", "540", "CommonDef.h", "limits", "Skill book inventory"),
|
|
("EVENT_INVENTORY_SIZE", "32", "CommonDef.h", "limits", "Event inventory size"),
|
|
# Common.h / Define.h
|
|
("MAX_BUFFER_SIZE", "524288", "Common.h", "limits", "Network I/O buffer (512KB)"),
|
|
("MAX_QUERY_LEN", "65536", "Common.h", "limits", "Single SQL query size (64KB)"),
|
|
("MAX_CHARACTER_LENGTH", "10", "Common.h", "limits", "Character name max"),
|
|
("MAX_ACCOUNT_LENGTH", "10", "Common.h", "limits", "Account name max"),
|
|
("MAX_GUILD_MEMBER", "80", "Common.h", "limits", "Guild size cap"),
|
|
("MAX_CHARACTER_PER_ACCOUNT", "15", "Common.h", "limits", "Characters per account"),
|
|
("MAX_MULTI_WAREHOUSE", "5", "Common.h", "limits", "Warehouse slots (normal)"),
|
|
("MAX_SERVER_PER_GROUP", "20", "Common.h", "limits", "Channels per server group"),
|
|
# BuffDef.h
|
|
("MAX_BUFF", "32", "BuffDef.h", "limits", "Max active buffs"),
|
|
# PlayerDef.h
|
|
("FRIEND_MAX", "50", "PlayerDef.h", "limits", "Max friends"),
|
|
("MAIL_MAX", "150", "PlayerDef.h", "limits", "Max mails"),
|
|
("MAX_SKILL", "60", "SkillDef.h", "limits", "Playable skill slots"),
|
|
# Key buff IDs
|
|
("BUFF_GREATER_ATTACK", "1", "BuffDef.h", "buff", "Greater attack buff"),
|
|
("BUFF_GREATER_DEFENSE", "2", "BuffDef.h", "buff", "Greater defense buff"),
|
|
("BUFF_MANA_SHIELD", "4", "BuffDef.h", "buff", "Mana shield"),
|
|
("BUFF_POISON", "55", "BuffDef.h", "buff", "Poison debuff"),
|
|
("BUFF_ICE", "56", "BuffDef.h", "buff", "Ice debuff"),
|
|
("BUFF_STUN", "61", "BuffDef.h", "buff", "Stun debuff"),
|
|
# World IDs
|
|
("WORLD_LORENCIA", "0", "WorldDef.h", "world", "Lorencia"),
|
|
("WORLD_DUNGEON", "1", "WorldDef.h", "world", "Dungeon"),
|
|
("WORLD_DEVIAS", "2", "WorldDef.h", "world", "Devias"),
|
|
("WORLD_NORIA", "3", "WorldDef.h", "world", "Noria"),
|
|
("WORLD_LOSTTOWER", "4", "WorldDef.h", "world", "Lost Tower"),
|
|
("WORLD_ATLANS", "7", "WorldDef.h", "world", "Atlans"),
|
|
("WORLD_TARKAN", "8", "WorldDef.h", "world", "Tarkan"),
|
|
("WORLD_DEVIL_SQUARE", "9", "WorldDef.h", "world", "Devil Square"),
|
|
("WORLD_ICARUS", "10", "WorldDef.h", "world", "Icarus"),
|
|
("WORLD_BLOOD_CASTLE_1", "11", "WorldDef.h", "world", "Blood Castle 1"),
|
|
("WORLD_CASTLE_SIEGE", "30", "WorldDef.h", "world", "Castle Siege"),
|
|
("WORLD_CRYWOLF", "34", "WorldDef.h", "world", "Crywolf"),
|
|
("WORLD_AIDA", "33", "WorldDef.h", "world", "Aida"),
|
|
("WORLD_ELBELAND", "51", "WorldDef.h", "world", "Elbeland"),
|
|
]
|
|
conn.executemany(
|
|
"INSERT INTO constants (name, value, header, category, description) VALUES (?,?,?,?,?)",
|
|
data
|
|
)
|
|
|
|
|
|
def populate_config_files(conn):
|
|
"""Insert config file paths."""
|
|
data = [
|
|
("ServerMap.xml", "Server topology, ports, default spawn"),
|
|
("FilterText.xml", "Profanity filter"),
|
|
("Notice.xml", "Server notices"),
|
|
("GoblinPoint.xml", "Goblin Point level restrictions"),
|
|
("Commands/Commands.xml", "Admin/GM commands"),
|
|
("Commands/LuaCommands.xml", "Lua commands"),
|
|
("Lang/LangBase.xml", "Server language strings"),
|
|
("Lang/ClientTexts.xml", "Client text strings"),
|
|
("FormulaData.xml", "Damage/exp formulas"),
|
|
("World/WorldTemplate.xml", "World definitions & properties"),
|
|
("World/WorldAI.xml", "World AI path data"),
|
|
("World/WorldBuff.xml", "World-specific buffs"),
|
|
("World/WorldAreaRestriction.xml", "Area access rules"),
|
|
("World/WorldAttribute.xml", "Map tile attributes"),
|
|
("World/WorldExpParty.xml", "Party exp bonuses"),
|
|
("Item/ItemList.xml", "Item database"),
|
|
("Item/ItemDecomposition.xml", "Decomposition recipes"),
|
|
("Artifact/ArtifactSystem.xml", "Artifact mechanics"),
|
|
("EventData/HelperPlus.xml", "Helper Plus zones/spots"),
|
|
("game_common.conf", "Main server INI config"),
|
|
]
|
|
conn.executemany(
|
|
"INSERT INTO config_files (path, description) VALUES (?,?)",
|
|
data
|
|
)
|
|
|
|
|
|
def populate_packet_handlers(conn):
|
|
"""Insert known packet handler mappings."""
|
|
# Client handlers (Player::protocol_core)
|
|
client = [
|
|
("HEADCODE_ACCOUNT_DATA_IN", 67, "0x43", "LoginRequest", "Player.cpp", "client", "account", "Account login"),
|
|
("HEADCODE_MISC_CHARACTER_DATA_IN", 66, "0x42", "CharacterCreate/Delete/Select", "Player.cpp", "client", "account", "Character management"),
|
|
("HEADCODE_MOVE_IN", 215, "0xD7", "CharacterMove", "Player.cpp", "client", "movement", "Player movement"),
|
|
("HEADCODE_POSITION_SET_IN", 16, "0x10", "PositionSet", "Player.cpp", "client", "movement", "Position sync"),
|
|
("HEADCODE_ATTACK_NORMAL_IN", 223, "0xDF", "NormalAttack", "Player.cpp", "client", "combat", "Normal attack"),
|
|
("HEADCODE_NORMAL_MAGIC_ATTACK_IN", 52, "0x34", "NormalMagicAttack", "Player.cpp", "client", "combat", "Magic attack"),
|
|
("HEADCODE_DURATION_MAGIC_ATTACK_IN", 30, "0x1E", "DurationMagicAttack", "Player.cpp", "client", "combat", "Duration skill"),
|
|
("HEADCODE_ATTACK_MULTI_TARGET_IN", 211, "0xD3", "MultiTargetMagicAttack", "Player.cpp", "client", "combat", "Multi-target attack"),
|
|
("HEADCODE_CANCEL_MAGIC_IN", 27, "0x1B", "MagicCancel", "Player.cpp", "client", "combat", "Cancel magic"),
|
|
("HEADCODE_ITEM_GET_IN", 178, "0xB2", "ItemGet", "Player.cpp", "client", "item", "Pick up item"),
|
|
("HEADCODE_ITEM_DROP_IN", 134, "0x86", "ItemDrop", "Player.cpp", "client", "item", "Drop item"),
|
|
("HEADCODE_ITEM_MOVE_IN", 97, "0x61", "ItemMove", "Player.cpp", "client", "item", "Move item"),
|
|
("HEADCODE_ITEM_USE_IN", 150, "0x96", "ItemUse", "Player.cpp", "client", "item", "Use item"),
|
|
("HEADCODE_CHAT_IN", 114, "0x72", "ChatRequest", "Player.cpp", "client", "chat", "Chat message"),
|
|
("HEADCODE_WHISPER_IN", 60, "0x3C", "WhisperRequest", "Player.cpp", "client", "chat", "Whisper message"),
|
|
("HEADCODE_TRADE_REQUEST_IN", 75, "0x4B", "TradeRequest", "Player.cpp", "client", "trade", "Trade request"),
|
|
("HEADCODE_NPC_TALK_IN", 145, "0x91", "TalkToNpc", "Player.cpp", "client", "npc", "NPC interaction"),
|
|
("HEADCODE_CLOSE_INTERFACE_IN", 131, "0x83", "CloseInterface", "Player.cpp", "client", "npc", "Close NPC UI"),
|
|
("HEADCODE_PARTY_REQUEST_IN", 36, "0x24", "PartyRequest", "Player.cpp", "client", "party", "Party request"),
|
|
("HEADCODE_TIME_CHECK_IN", 50, "0x32", "Ping", "Player.cpp", "client", "misc", "Client keepalive"),
|
|
]
|
|
conn.executemany(
|
|
"INSERT INTO packet_handlers (headcode_name, headcode_value, headcode_hex, handler_method, source_file, handler_type, category, description) VALUES (?,?,?,?,?,?,?,?)",
|
|
client
|
|
)
|
|
|
|
# Inter-server handlers (ServerLink)
|
|
interserver = [
|
|
("HEADCODE_SERVER_LINK_ON_CONNECT", 0, "0x00", "HandleHeadcodeOnConnect", "ServerLink.cpp", "inter_server", "connection", "Handshake"),
|
|
("HEADCODE_SERVER_LINK_GUILD_CHAT", 1, "0x01", "HandleHeadcodeGuildChat", "ServerLink.cpp", "inter_server", "guild", "Guild chat sync"),
|
|
("HEADCODE_SERVER_LINK_ALLIANCE_CHAT", 2, "0x02", "HandleAllianceChat", "ServerLink.cpp", "inter_server", "guild", "Alliance chat sync"),
|
|
("HEADCODE_SERVER_LINK_GUILD_ADD", 7, "0x07", "GuildCreateResult", "ServerLink.cpp", "inter_server", "guild", "Guild create result"),
|
|
("HEADCODE_SERVER_LINK_GUILD_REMOVE", 8, "0x08", "GuildDeleteResult", "ServerLink.cpp", "inter_server", "guild", "Guild delete result"),
|
|
("HEADCODE_SERVER_LINK_CHARACTER_ON_OFF", 25, "0x19", "CharacterOnOff", "ServerLink.cpp", "inter_server", "character", "Online/offline status"),
|
|
("HEADCODE_SERVER_LINK_PARTY_DESTROY", 57, "0x39", "CharacterPartyDestroyHandler", "ServerLink.cpp", "inter_server", "party", "Party destroy sync"),
|
|
("HEADCODE_SERVER_LINK_PARTY_DELETE_MEMBER", 58, "0x3A", "CharacterPartyDeleteMemberHandler", "ServerLink.cpp", "inter_server", "party", "Party member delete"),
|
|
("HEADCODE_SERVER_LINK_EVENT_NOTIFICATION", 45, "0x2D", "EventNotification", "ServerLink.cpp", "inter_server", "event", "Event notification"),
|
|
]
|
|
conn.executemany(
|
|
"INSERT INTO packet_handlers (headcode_name, headcode_value, headcode_hex, handler_method, source_file, handler_type, category, description) VALUES (?,?,?,?,?,?,?,?)",
|
|
interserver
|
|
)
|
|
|
|
# Login server handlers (AuthServer)
|
|
login = [
|
|
("HEADCODE_LOGIN_SERVER_CONNECT", 0, "0x00", "HandleHeadcodeOnConnect", "AuthServer.cpp", "login", "connection", "LoginServer handshake"),
|
|
("HEADCODE_LOGIN_SERVER_ACCOUNT_LOGIN", 3, "0x03", "PlayerLoginResult", "AuthServer.cpp", "login", "account", "Login result"),
|
|
("HEADCODE_LOGIN_SERVER_ACCOUNT_SERVER_MOVE", 5, "0x05", "PlayerServerMoveResult", "AuthServer.cpp", "login", "account", "Server move result"),
|
|
("HEADCODE_LOGIN_SERVER_ACCOUNT_SERVER_AUTH", 6, "0x06", "PlayerServerMoveAuthResult", "AuthServer.cpp", "login", "account", "Server move auth"),
|
|
("HEADCODE_LOGIN_SERVER_ACCOUNT_KICK", 7, "0x07", "PlayerAccountKick", "AuthServer.cpp", "login", "account", "Kick account"),
|
|
("HEADCODE_LOGIN_SERVER_COMPLETE_BAN", 9, "0x09", "AccountCompleteBan", "AuthServer.cpp", "login", "account", "Complete ban"),
|
|
("HEADCODE_LOGIN_SERVER_ACCOUNT_OFFLINE_LOGIN", 14, "0x0E", "PlayerOfflineLoginResult", "AuthServer.cpp", "login", "account", "Offline login"),
|
|
]
|
|
conn.executemany(
|
|
"INSERT INTO packet_handlers (headcode_name, headcode_value, headcode_hex, handler_method, source_file, handler_type, category, description) VALUES (?,?,?,?,?,?,?,?)",
|
|
login
|
|
)
|
|
|
|
# ConnectServer handlers
|
|
connect = [
|
|
("HEADCODE_CONNECT_SERVER_CONNECT", 0, "0x00", "HandleHeadcodeOnConnect", "ConnectServer.cpp", "connect", "connection", "ConnectServer handshake"),
|
|
("HEADCODE_CONNECT_SERVER_FLAG", 2, "0x02", "HandleHeadcodeFlag", "ConnectServer.cpp", "connect", "connection", "Server flag update"),
|
|
("HEADCODE_CONNECT_SERVER_CHANNELS", 3, "0x03", "HandleHeadcodeChannel", "ConnectServer.cpp", "connect", "connection", "Channel list update"),
|
|
]
|
|
conn.executemany(
|
|
"INSERT INTO packet_handlers (headcode_name, headcode_value, headcode_hex, handler_method, source_file, handler_type, category, description) VALUES (?,?,?,?,?,?,?,?)",
|
|
connect
|
|
)
|
|
|
|
|
|
# ============================================================
|
|
# LIVE SCANNER — Scans actual source files
|
|
# ============================================================
|
|
|
|
def _rel_name(f, base_dir):
|
|
"""Return POSIX-style relative path from base_dir, e.g. 'editor/codeeditor.cpp'."""
|
|
try:
|
|
return str(f.relative_to(base_dir)).replace("\\", "/")
|
|
except ValueError:
|
|
return f.name
|
|
|
|
|
|
def scan_files(conn):
|
|
"""Scan source directories and register all source files."""
|
|
for project, base_dir, extensions in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in extensions:
|
|
for f in base_dir.rglob(ext):
|
|
rel = _rel_name(f, base_dir)
|
|
category = categorize_file(rel, project)
|
|
try:
|
|
conn.execute(
|
|
"INSERT OR IGNORE INTO files (path, project, category) VALUES (?,?,?)",
|
|
(rel, project, category)
|
|
)
|
|
except sqlite3.IntegrityError:
|
|
pass
|
|
|
|
|
|
def categorize_file(name, project):
|
|
"""Auto-categorize a file by its name pattern using config rules."""
|
|
for rule in CATEGORY_RULES:
|
|
# Optional source_dir filter
|
|
rule_dir = rule.get("source_dir")
|
|
if rule_dir and rule_dir != project:
|
|
continue
|
|
match_type = rule["match"]
|
|
pattern = rule["pattern"]
|
|
if match_type == "prefix" and name.startswith(pattern):
|
|
return rule["category"]
|
|
if match_type == "suffix" and name.endswith(pattern):
|
|
return rule["category"]
|
|
if match_type == "contains" and pattern in name:
|
|
return rule["category"]
|
|
if match_type == "regex" and re.search(pattern, name):
|
|
return rule["category"]
|
|
# Default: 'common' for Common project, 'core' for everything else
|
|
if project == "Common":
|
|
return "common"
|
|
return "core"
|
|
|
|
|
|
def populate_seed_data(conn, cfg_seed=None):
|
|
"""Insert seed data from config. Falls back to hardcoded data if no config."""
|
|
seed = cfg_seed or SEED_DATA
|
|
|
|
if "singletons" in seed and seed["singletons"]:
|
|
items = seed["singletons"]
|
|
if isinstance(items[0], dict):
|
|
data = [(s["macro"], s["class_name"], s["header"],
|
|
s.get("project", "Game"), s.get("category"), s.get("description"))
|
|
for s in items]
|
|
else:
|
|
data = items
|
|
conn.executemany(
|
|
"INSERT INTO singletons (macro, class_name, header, project, category, description) VALUES (?,?,?,?,?,?)",
|
|
data
|
|
)
|
|
|
|
if "player_files" in seed and seed["player_files"]:
|
|
items = seed["player_files"]
|
|
if isinstance(items[0], dict):
|
|
data = [(p["file"], p["domain"], p.get("key_methods", ""), p.get("description", ""))
|
|
for p in items]
|
|
else:
|
|
data = items
|
|
conn.executemany(
|
|
"INSERT INTO player_files (file, domain, key_methods, description) VALUES (?,?,?,?)",
|
|
data
|
|
)
|
|
|
|
if "events" in seed and seed["events"]:
|
|
items = seed["events"]
|
|
if isinstance(items[0], dict):
|
|
data = [(e["name"], e.get("singleton_macro"), e.get("cpp_file"),
|
|
e.get("header_file"), e.get("def_file"), e.get("ai_file"),
|
|
e.get("description", ""))
|
|
for e in items]
|
|
else:
|
|
data = items
|
|
conn.executemany(
|
|
"INSERT INTO events (name, singleton_macro, cpp_file, header_file, def_file, ai_file, description) VALUES (?,?,?,?,?,?,?)",
|
|
data
|
|
)
|
|
|
|
if "ai_handlers" in seed and seed["ai_handlers"]:
|
|
items = seed["ai_handlers"]
|
|
if isinstance(items[0], dict):
|
|
data = [(a["file"], a["target"], a.get("description", "")) for a in items]
|
|
else:
|
|
data = items
|
|
conn.executemany(
|
|
"INSERT INTO ai_handlers (file, target, description) VALUES (?,?,?)",
|
|
data
|
|
)
|
|
|
|
if "inventory_scripts" in seed and seed["inventory_scripts"]:
|
|
items = seed["inventory_scripts"]
|
|
if isinstance(items[0], dict):
|
|
data = [(s["file"], s.get("description", "")) for s in items]
|
|
else:
|
|
data = items
|
|
conn.executemany(
|
|
"INSERT INTO inventory_scripts (file, description) VALUES (?,?)",
|
|
data
|
|
)
|
|
|
|
if "constants" in seed and seed["constants"]:
|
|
items = seed["constants"]
|
|
if isinstance(items[0], dict):
|
|
data = [(c["name"], c.get("value"), c["header"],
|
|
c.get("category"), c.get("description", ""))
|
|
for c in items]
|
|
else:
|
|
data = items
|
|
conn.executemany(
|
|
"INSERT INTO constants (name, value, header, category, description) VALUES (?,?,?,?,?)",
|
|
data
|
|
)
|
|
|
|
if "config_files" in seed and seed["config_files"]:
|
|
items = seed["config_files"]
|
|
if isinstance(items[0], dict):
|
|
data = [(c["path"], c.get("description", "")) for c in items]
|
|
else:
|
|
data = items
|
|
conn.executemany(
|
|
"INSERT INTO config_files (path, description) VALUES (?,?)",
|
|
data
|
|
)
|
|
|
|
if "packet_handlers" in seed and seed["packet_handlers"]:
|
|
items = seed["packet_handlers"]
|
|
if isinstance(items[0], dict):
|
|
data = [(h["headcode_name"], h.get("headcode_value"), h.get("headcode_hex"),
|
|
h.get("handler_method"), h["source_file"], h["handler_type"],
|
|
h.get("category"), h.get("description", ""))
|
|
for h in items]
|
|
else:
|
|
data = items
|
|
conn.executemany(
|
|
"INSERT INTO packet_handlers (headcode_name, headcode_value, headcode_hex, handler_method, source_file, handler_type, category, description) VALUES (?,?,?,?,?,?,?,?)",
|
|
data
|
|
)
|
|
|
|
|
|
RESEARCH_SYMBOL_RE = re.compile(
|
|
r"\b(?:"
|
|
r"HEADCODE_[A-Z0-9_]+|"
|
|
r"sub_[0-9A-Fa-f]+|"
|
|
r"(?:GC|CG)[A-Za-z0-9_]+|"
|
|
r"[A-Z][A-Za-z0-9_]+::[A-Za-z0-9_]+|"
|
|
r"[A-Za-z_][A-Za-z0-9_]*(?:Recv|Send|Handler|Manager|System|Window|Protocol|Packet|Attack|Damage|Viewport|Automata|AI)"
|
|
r"[A-Za-z0-9_]*"
|
|
r")\b"
|
|
)
|
|
RESEARCH_PROTOCOL_RE = re.compile(r"\b(?:HEADCODE_[A-Z0-9_]+|HC\s*\d+|0x[0-9A-Fa-f]{2,4})\b")
|
|
RESEARCH_ASM_LABEL_RE = re.compile(r"^\s*(sub_[0-9A-Fa-f]+):")
|
|
|
|
|
|
def _trim_text(text, limit=240):
|
|
text = re.sub(r"\s+", " ", str(text or "")).strip()
|
|
if len(text) <= limit:
|
|
return text
|
|
return text[: limit - 3].rstrip() + "..."
|
|
|
|
|
|
def _iter_research_files():
|
|
if not RESEARCH_DIR.exists():
|
|
return []
|
|
return sorted(
|
|
[path for path in RESEARCH_DIR.rglob("*") if path.is_file()],
|
|
key=lambda item: str(item).lower(),
|
|
)
|
|
|
|
|
|
def _research_asset_type(path):
|
|
suffix = path.suffix.lower()
|
|
if suffix in {".md", ".markdown"}:
|
|
return "markdown"
|
|
if suffix in {".txt", ".log"}:
|
|
return "text"
|
|
if suffix == ".asm":
|
|
return "asm"
|
|
if suffix in {".exe", ".dll", ".bin"}:
|
|
return "binary"
|
|
return "other"
|
|
|
|
|
|
def _extract_research_symbols(text, limit=80):
|
|
seen = set()
|
|
symbols = []
|
|
for match in RESEARCH_SYMBOL_RE.finditer(text or ""):
|
|
symbol = match.group(0).strip()
|
|
if len(symbol) < 4 or symbol in seen:
|
|
continue
|
|
seen.add(symbol)
|
|
symbols.append(symbol)
|
|
if len(symbols) >= limit:
|
|
break
|
|
return symbols
|
|
|
|
|
|
def _extract_protocol_refs(text, limit=40):
|
|
seen = set()
|
|
refs = []
|
|
for match in RESEARCH_PROTOCOL_RE.finditer(text or ""):
|
|
ref = match.group(0).strip()
|
|
if ref in seen:
|
|
continue
|
|
seen.add(ref)
|
|
refs.append(ref)
|
|
if len(refs) >= limit:
|
|
break
|
|
return refs
|
|
|
|
|
|
def _scan_markdown_research_file(path):
|
|
try:
|
|
text = path.read_text(encoding="utf-8", errors="ignore")
|
|
except Exception:
|
|
return None
|
|
|
|
title = None
|
|
summary = None
|
|
mentions = []
|
|
seen_mentions = set()
|
|
lines = text.splitlines()
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
if title is None and stripped.startswith("#"):
|
|
title = stripped.lstrip("#").strip()
|
|
continue
|
|
if summary is None and not stripped.startswith("#") and not set(stripped) <= {"-", "="}:
|
|
summary = _trim_text(stripped, 260)
|
|
for symbol in _extract_research_symbols(stripped, limit=12):
|
|
key = ("symbol", symbol)
|
|
if key not in seen_mentions:
|
|
seen_mentions.add(key)
|
|
mentions.append((symbol, "symbol", _trim_text(stripped, 220)))
|
|
for ref in _extract_protocol_refs(stripped, limit=8):
|
|
key = ("protocol", ref)
|
|
if key not in seen_mentions:
|
|
seen_mentions.add(key)
|
|
mentions.append((ref, "protocol", _trim_text(stripped, 220)))
|
|
|
|
if title is None:
|
|
title = path.stem.replace("_", " ")
|
|
symbols = [entry[0] for entry in mentions if entry[1] == "symbol"]
|
|
protocols = [entry[0] for entry in mentions if entry[1] == "protocol"]
|
|
return {
|
|
"title": title,
|
|
"summary": summary or f"Research asset: {path.name}",
|
|
"symbol_refs": symbols[:40],
|
|
"protocol_refs": protocols[:20],
|
|
"notes": f"lines={len(lines)}",
|
|
"mentions": mentions,
|
|
}
|
|
|
|
|
|
def _scan_asm_research_file(path):
|
|
mentions = []
|
|
seen_labels = set()
|
|
label_count = 0
|
|
preview = []
|
|
try:
|
|
with open(path, "r", encoding="utf-8", errors="ignore") as handle:
|
|
for line in handle:
|
|
match = RESEARCH_ASM_LABEL_RE.match(line)
|
|
if not match:
|
|
continue
|
|
label = match.group(1)
|
|
label_count += 1
|
|
if label not in seen_labels:
|
|
seen_labels.add(label)
|
|
mentions.append((label, "asm_label", label))
|
|
if len(preview) < 16:
|
|
preview.append(label)
|
|
except Exception:
|
|
return None
|
|
|
|
summary = f"Client disassembly listing with {label_count} labeled subroutines"
|
|
notes = f"preview={', '.join(preview[:8])}" if preview else "raw disassembly"
|
|
return {
|
|
"title": path.name,
|
|
"summary": summary,
|
|
"symbol_refs": preview,
|
|
"protocol_refs": [],
|
|
"notes": notes,
|
|
"mentions": mentions,
|
|
}
|
|
|
|
|
|
def scan_research_assets(conn):
|
|
"""Index ReversingResearch notes and lightweight symbol references."""
|
|
for path in _iter_research_files():
|
|
rel_path = str(path.relative_to(REPO_ROOT)).replace("\\", "/")
|
|
asset_type = _research_asset_type(path)
|
|
parsed = None
|
|
if asset_type in {"markdown", "text"}:
|
|
parsed = _scan_markdown_research_file(path)
|
|
elif asset_type == "asm":
|
|
parsed = _scan_asm_research_file(path)
|
|
|
|
title = path.stem
|
|
summary = None
|
|
symbol_refs = []
|
|
protocol_refs = []
|
|
notes = None
|
|
mentions = []
|
|
if parsed:
|
|
title = parsed["title"] or title
|
|
summary = parsed["summary"]
|
|
symbol_refs = parsed["symbol_refs"]
|
|
protocol_refs = parsed["protocol_refs"]
|
|
notes = parsed["notes"]
|
|
mentions = parsed["mentions"]
|
|
else:
|
|
summary = f"Research asset: {path.name}"
|
|
|
|
conn.execute(
|
|
"INSERT INTO research_assets (path, file_name, asset_type, size_bytes, title, summary, symbol_refs, protocol_refs, notes) "
|
|
"VALUES (?,?,?,?,?,?,?,?,?)",
|
|
(
|
|
rel_path,
|
|
path.name,
|
|
asset_type,
|
|
path.stat().st_size,
|
|
title,
|
|
summary,
|
|
", ".join(symbol_refs[:20]),
|
|
", ".join(protocol_refs[:12]),
|
|
notes,
|
|
),
|
|
)
|
|
|
|
for symbol, mention_type, context in mentions:
|
|
conn.execute(
|
|
"INSERT INTO research_mentions (asset_path, symbol, mention_type, context) VALUES (?,?,?,?)",
|
|
(rel_path, symbol, mention_type, context),
|
|
)
|
|
|
|
|
|
def scan_includes(conn):
|
|
"""Scan import/include directives to build dependency graph."""
|
|
import_re = LANG_ADAPTER.import_pattern()
|
|
if not import_re:
|
|
return
|
|
for project, base_dir, exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in exts:
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
for line in fh:
|
|
m = import_re.search(line)
|
|
if m:
|
|
# Get first non-None group
|
|
inc = next((g for g in m.groups() if g), None)
|
|
if not inc:
|
|
continue
|
|
inc = inc.replace("\\", "/").split("/")[-1]
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO dependencies (source_file, target_file, dep_type) VALUES (?,?,?)",
|
|
(_rel_name(f, base_dir), inc, "include")
|
|
)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_singleton_usage(conn):
|
|
"""Scan for singleton macro usage patterns (sXxx->) in .cpp files."""
|
|
singleton_macros = set()
|
|
for row in conn.execute("SELECT macro FROM singletons"):
|
|
singleton_macros.add(row[0])
|
|
|
|
pattern = re.compile(r'\b(' + '|'.join(re.escape(m) for m in singleton_macros) + r')->')
|
|
|
|
for _label, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for f in base_dir.rglob("*.cpp"):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
content = fh.read()
|
|
found = set(pattern.findall(content))
|
|
for macro in found:
|
|
# Find which header the singleton is in
|
|
row = conn.execute("SELECT header FROM singletons WHERE macro=?", (macro,)).fetchone()
|
|
if row:
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO dependencies (source_file, target_file, dep_type) VALUES (?,?,?)",
|
|
(_rel_name(f, base_dir), row[0], "singleton_use")
|
|
)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_enums(conn):
|
|
"""Scan source files for enum declarations and extract values."""
|
|
enum_re = LANG_ADAPTER.enum_pattern()
|
|
if not enum_re:
|
|
return
|
|
# For non-brace languages (Python), scan all files; for C++ scan headers too
|
|
header_exts = LANG_ADAPTER.header_extensions or LANG_ADAPTER.extensions
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
scan_exts = header_exts if header_exts else _exts
|
|
for ext in scan_exts:
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
lines = fh.readlines()
|
|
i = 0
|
|
while i < len(lines):
|
|
m = enum_re.match(lines[i])
|
|
if m:
|
|
enum_name = m.group(1)
|
|
enum_line = i + 1
|
|
values = []
|
|
if LANG_ADAPTER.uses_braces:
|
|
j = i + 1
|
|
brace_found = '{' in lines[i]
|
|
while not brace_found and j < min(i + 3, len(lines)):
|
|
if '{' in lines[j]:
|
|
brace_found = True
|
|
j += 1
|
|
if not brace_found:
|
|
i += 1
|
|
continue
|
|
while j < len(lines):
|
|
line = lines[j].strip()
|
|
if '}' in line:
|
|
break
|
|
vm = re.match(r'(\w+)\s*(?:=\s*([^,/\n]+))?\s*[,]?', line)
|
|
if vm and not line.startswith('//') and not line.startswith('#'):
|
|
values.append(vm.group(1))
|
|
j += 1
|
|
else:
|
|
# Python/indent-based: read indented values
|
|
j = i + 1
|
|
base_indent = len(lines[i]) - len(lines[i].lstrip())
|
|
while j < len(lines):
|
|
ln = lines[j]
|
|
stripped = ln.strip()
|
|
if not stripped or stripped.startswith('#'):
|
|
j += 1
|
|
continue
|
|
indent = len(ln) - len(ln.lstrip())
|
|
if indent <= base_indent:
|
|
break
|
|
vm = re.match(r'(\w+)\s*=', stripped)
|
|
if vm:
|
|
values.append(vm.group(1))
|
|
j += 1
|
|
preview = ", ".join(values[:8])
|
|
if len(values) > 8:
|
|
preview += f", ... ({len(values)} total)"
|
|
cat = categorize_file(_rel_name(f, base_dir), project)
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO enums (name, file, line, value_count, values_preview, category) VALUES (?,?,?,?,?,?)",
|
|
(enum_name, _rel_name(f, base_dir), enum_line, len(values), preview, cat)
|
|
)
|
|
except Exception:
|
|
pass
|
|
i += 1
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_structs(conn):
|
|
"""Scan source files for struct/dataclass/record declarations."""
|
|
struct_re = LANG_ADAPTER.struct_pattern()
|
|
if not struct_re:
|
|
return
|
|
scan_exts = LANG_ADAPTER.header_extensions or LANG_ADAPTER.extensions
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in (scan_exts if scan_exts else _exts):
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
lines = fh.readlines()
|
|
in_pack = False
|
|
i = 0
|
|
while i < len(lines):
|
|
line = lines[i].strip()
|
|
# Track C++ #pragma pack state
|
|
if LANG_ADAPTER.name == "cpp" and '#pragma' in line and 'pack' in line:
|
|
if 'pack(1)' in line or 'pack(push' in line:
|
|
in_pack = True
|
|
elif 'pack()' in line or 'pack(pop' in line:
|
|
in_pack = False
|
|
i += 1
|
|
continue
|
|
m = struct_re.match(lines[i])
|
|
if m:
|
|
struct_name = m.group(1)
|
|
struct_line = i + 1
|
|
fields = []
|
|
if LANG_ADAPTER.uses_braces:
|
|
j = i + 1
|
|
brace_found = '{' in lines[i]
|
|
while not brace_found and j < min(i + 3, len(lines)):
|
|
if '{' in lines[j]:
|
|
brace_found = True
|
|
j += 1
|
|
if not brace_found:
|
|
i += 1
|
|
continue
|
|
brace_depth = 1
|
|
while j < len(lines) and brace_depth > 0:
|
|
fl = lines[j].strip()
|
|
brace_depth += fl.count('{') - fl.count('}')
|
|
if brace_depth <= 0:
|
|
break
|
|
fm = re.match(r'((?:const\s+)?(?:unsigned\s+)?\w+(?:\s*\*)?)\s+(\w+)(?:\[.*\])?\s*;', fl)
|
|
if fm and brace_depth == 1:
|
|
fields.append(f"{fm.group(1)} {fm.group(2)}")
|
|
j += 1
|
|
else:
|
|
# Indent-based (Python dataclass): read indented fields
|
|
j = i + 1
|
|
base_indent = len(lines[i]) - len(lines[i].lstrip())
|
|
while j < len(lines):
|
|
ln = lines[j]
|
|
stripped = ln.strip()
|
|
if not stripped or stripped.startswith('#'):
|
|
j += 1
|
|
continue
|
|
indent = len(ln) - len(ln.lstrip())
|
|
if indent <= base_indent and stripped not in ('', 'pass'):
|
|
break
|
|
fm = re.match(r'(\w+)\s*(?::\s*([\w\[\], ]+))?', stripped)
|
|
if fm and ':' in stripped:
|
|
fields.append(f"{fm.group(2) or '?'} {fm.group(1)}")
|
|
j += 1
|
|
preview = "; ".join(fields[:6])
|
|
if len(fields) > 6:
|
|
preview += f"; ... ({len(fields)} total)"
|
|
cat = "packet" if in_pack else "data"
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO structs (name, file, line, field_count, is_packed, fields_preview, category) VALUES (?,?,?,?,?,?,?)",
|
|
(struct_name, _rel_name(f, base_dir), struct_line, len(fields), 1 if in_pack else 0, preview, cat)
|
|
)
|
|
except Exception:
|
|
pass
|
|
i += 1
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_classes(conn):
|
|
"""Scan source files for class/interface/impl declarations and inheritance."""
|
|
class_re = LANG_ADAPTER.class_pattern()
|
|
if not class_re:
|
|
return
|
|
scan_exts = LANG_ADAPTER.header_extensions or LANG_ADAPTER.extensions
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in (scan_exts if scan_exts else _exts):
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
for line in fh:
|
|
m = class_re.match(line)
|
|
if m:
|
|
class_name = m.group(1)
|
|
parent = m.group(2) if m.lastindex and m.lastindex >= 2 else None
|
|
# Try to find a companion implementation file
|
|
impl_ext = ".cpp" if LANG_ADAPTER.name == "cpp" else f.suffix
|
|
impl_name = f.stem + impl_ext
|
|
impl_path = f.parent / impl_name
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO classes (name, header, cpp_file, parent_class, project) VALUES (?,?,?,?,?)",
|
|
(class_name, _rel_name(f, base_dir), impl_name if impl_path.exists() else None, parent, project)
|
|
)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_methods(conn):
|
|
"""Scan source files for method/function declarations in classes."""
|
|
method_re = LANG_ADAPTER.method_pattern()
|
|
class_re = LANG_ADAPTER.class_pattern()
|
|
if not method_re:
|
|
return
|
|
skip_names = LANG_ADAPTER.method_skip_names()
|
|
scan_exts = LANG_ADAPTER.header_extensions or LANG_ADAPTER.extensions
|
|
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in (scan_exts if scan_exts else _exts):
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
current_class = None
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
for lineno, line in enumerate(fh, 1):
|
|
# Track current class context
|
|
if class_re:
|
|
cm = class_re.match(line)
|
|
if cm:
|
|
current_class = cm.group(1)
|
|
continue
|
|
m = method_re.match(line)
|
|
if m:
|
|
# For Python: group(1) is method name; for C++: group(2)
|
|
method_name = m.group(2) if m.lastindex and m.lastindex >= 2 else m.group(1)
|
|
if not method_name or method_name in skip_names:
|
|
continue
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO methods (class_name, method_name, file, line_hint) VALUES (?,?,?,?)",
|
|
(current_class, method_name, _rel_name(f, base_dir), lineno)
|
|
)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_todos(conn):
|
|
"""Scan source files for TODO, FIXME, HACK, BUG, XXX, NOTE comments."""
|
|
todo_re = LANG_ADAPTER.todo_pattern()
|
|
for project, base_dir, exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in exts:
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
for lineno, line in enumerate(fh, 1):
|
|
m = todo_re.search(line)
|
|
if m:
|
|
todo_type = m.group(1).upper()
|
|
text = m.group(2).strip().rstrip('*/')
|
|
if len(text) < 3:
|
|
continue
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO todos (file, line, todo_type, text, project) VALUES (?,?,?,?,?)",
|
|
(_rel_name(f, base_dir), lineno, todo_type, text[:200], project)
|
|
)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_db_tables(conn):
|
|
"""Scan source files for SQL table references in queries."""
|
|
# Match common SQL patterns: FROM table, INTO table, UPDATE table, JOIN table, TABLE table
|
|
sql_re = re.compile(
|
|
r'(?:FROM|INTO|UPDATE|JOIN|TABLE)\s+`?(\w+)`?',
|
|
re.IGNORECASE
|
|
)
|
|
# Also match DirectDB and MuDatabase query strings
|
|
query_type_re = re.compile(r'\b(SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|DROP)\b', re.IGNORECASE)
|
|
|
|
seen = set()
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in ("*.cpp", "*.h"):
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
for lineno, line in enumerate(fh, 1):
|
|
# Only look at lines with SQL-like content
|
|
if not any(kw in line.upper() for kw in ('SELECT', 'INSERT', 'UPDATE', 'DELETE', 'FROM', 'CREATE TABLE', 'ALTER TABLE')):
|
|
continue
|
|
tables = sql_re.findall(line)
|
|
qt_match = query_type_re.search(line)
|
|
query_type = qt_match.group(1).upper() if qt_match else None
|
|
for table_name in tables:
|
|
# Filter out common false positives
|
|
if table_name.upper() in ('SET', 'WHERE', 'AND', 'OR', 'NOT', 'NULL', 'VALUES', 'INTO', 'AS', 'ON', 'BY', 'IF', 'IN', 'IS'):
|
|
continue
|
|
key = (table_name, _rel_name(f, base_dir), query_type)
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
context = line.strip()[:150]
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO db_tables (table_name, source_file, line, query_type, context) VALUES (?,?,?,?,?)",
|
|
(table_name, _rel_name(f, base_dir), lineno, query_type, context)
|
|
)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_prepared_statements(conn):
|
|
"""Scan MuDatabase.cpp (and similar) for PrepareStatement calls."""
|
|
prep_re = re.compile(
|
|
r'PrepareStatement\s*\(\s*(QUERY_\w+)\s*,\s*"([^"]+)"\s*,\s*(CONNECTION_\w+)',
|
|
re.DOTALL
|
|
)
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in ("*.cpp", "*.h"):
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
content = fh.read()
|
|
for m in prep_re.finditer(content):
|
|
query_name = m.group(1)
|
|
sql_text = m.group(2).strip()
|
|
conn_type = m.group(3)
|
|
# Calculate line number
|
|
lineno = content[:m.start()].count('\n') + 1
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO prepared_statements (query_name, sql_text, connection_type, source_file, line) VALUES (?,?,?,?,?)",
|
|
(query_name, sql_text, conn_type, _rel_name(f, base_dir), lineno)
|
|
)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_config_keys(conn):
|
|
"""Scan source files for sConfig->GetXxx(\"key\") calls."""
|
|
cfg_re = re.compile(
|
|
r'sConfig->\s*(Get(?:Int|String|Bool|Float|IntDefault))\s*\(\s*"([^"]+)"'
|
|
)
|
|
seen = set()
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in ("*.cpp", "*.h"):
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
for lineno, line in enumerate(fh, 1):
|
|
for m in cfg_re.finditer(line):
|
|
getter = m.group(1)
|
|
key = m.group(2)
|
|
dedup = (key, _rel_name(f, base_dir), getter)
|
|
if dedup in seen:
|
|
continue
|
|
seen.add(dedup)
|
|
context = line.strip()[:150]
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO config_keys (key_name, getter_type, source_file, line, context) VALUES (?,?,?,?,?)",
|
|
(key, getter, _rel_name(f, base_dir), lineno, context)
|
|
)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_defines(conn):
|
|
"""Scan source files for constant/define declarations."""
|
|
define_re = LANG_ADAPTER.define_pattern()
|
|
if not define_re:
|
|
return
|
|
# C++ scans headers; other languages scan all source files
|
|
scan_exts = LANG_ADAPTER.header_extensions if LANG_ADAPTER.header_extensions else LANG_ADAPTER.extensions
|
|
# C++ guard skip patterns
|
|
cpp_skip_patterns = {'_H', '_H_', 'INCLUDED', 'GUARD'}
|
|
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in (scan_exts if scan_exts else _exts):
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
for lineno, line in enumerate(fh, 1):
|
|
m = define_re.match(line)
|
|
if m:
|
|
name = m.group(1)
|
|
value = m.group(2).strip() if m.lastindex and m.lastindex >= 2 else ""
|
|
if not name:
|
|
continue
|
|
# C++ specific: skip include guards and function-like macros
|
|
if LANG_ADAPTER.name == "cpp":
|
|
if any(name.endswith(s) for s in cpp_skip_patterns):
|
|
continue
|
|
if name.startswith('_') and name.endswith('_'):
|
|
continue
|
|
if '(' in name:
|
|
continue
|
|
if value.startswith('\\'):
|
|
continue
|
|
# Auto-categorize
|
|
cat = "misc"
|
|
name_upper = name.upper()
|
|
if any(k in name_upper for k in ("MAX", "MIN", "SIZE", "COUNT", "LIMIT")):
|
|
cat = "limits"
|
|
elif any(k in name_upper for k in ("WORLD", "MAP", "ZONE")):
|
|
cat = "world"
|
|
elif "ITEM" in name_upper:
|
|
cat = "item"
|
|
elif "SKILL" in name_upper:
|
|
cat = "skill"
|
|
elif "BUFF" in name_upper:
|
|
cat = "buff"
|
|
elif any(k in name_upper for k in ("PACKET", "HEADCODE", "OPCODE")):
|
|
cat = "packet"
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO defines (name, value, file, line, category) VALUES (?,?,?,?,?)",
|
|
(name, value[:100], _rel_name(f, base_dir), lineno, cat)
|
|
)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_file_lines(conn):
|
|
"""Count lines in each source file for complexity metrics."""
|
|
for project, base_dir, exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in exts:
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
count = sum(1 for _ in fh)
|
|
category = categorize_file(_rel_name(f, base_dir), project)
|
|
conn.execute(
|
|
"INSERT INTO file_lines (file, project, line_count, category) VALUES (?,?,?,?)",
|
|
(_rel_name(f, base_dir), project, count, category)
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_leak_risks(conn):
|
|
"""Analyze resource alloc/dealloc balance per file to find potential leaks."""
|
|
patterns = LANG_ADAPTER.leak_patterns()
|
|
if not patterns:
|
|
return
|
|
alloc_re = patterns['alloc']
|
|
dealloc_re = patterns['dealloc']
|
|
smart1_re = patterns.get('smart_unique')
|
|
smart2_re = patterns.get('smart_shared')
|
|
|
|
# Scan implementation files (not headers)
|
|
impl_exts = [e for e in LANG_ADAPTER.extensions if e not in LANG_ADAPTER.header_extensions]
|
|
if not impl_exts:
|
|
impl_exts = LANG_ADAPTER.extensions
|
|
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
scan_exts = [e for e in _exts if e not in (LANG_ADAPTER.header_extensions or [])]
|
|
if not scan_exts:
|
|
scan_exts = _exts
|
|
for ext in scan_exts:
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
lines = fh.readlines()
|
|
content = "".join(lines)
|
|
alloc_count = len(alloc_re.findall(content))
|
|
dealloc_count = len(dealloc_re.findall(content))
|
|
smart1_count = len(smart1_re.findall(content)) if smart1_re else 0
|
|
smart2_count = len(smart2_re.findall(content)) if smart2_re else 0
|
|
if alloc_count == 0 and dealloc_count == 0:
|
|
continue
|
|
risk = alloc_count - dealloc_count - smart1_count - smart2_count
|
|
samples = []
|
|
for i, line in enumerate(lines):
|
|
if alloc_re.search(line) and len(samples) < 5:
|
|
samples.append(f"L{i+1}: {line.strip()[:80]}")
|
|
conn.execute(
|
|
"INSERT INTO leak_risks (file, project, new_count, delete_count, make_unique_count, make_shared_count, risk_score, sample_lines) VALUES (?,?,?,?,?,?,?,?)",
|
|
(_rel_name(f, base_dir), project, alloc_count, dealloc_count, smart1_count, smart2_count, risk, "\n".join(samples))
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_null_risks(conn):
|
|
"""Find nullable-returning calls used without null checks."""
|
|
risky_calls = LANG_ADAPTER.null_risk_calls()
|
|
if not risky_calls:
|
|
return
|
|
call_pattern = re.compile(
|
|
r'(?:\w+\s*(?:->|\.)\s*)?(' + '|'.join(re.escape(c) for c in risky_calls) + r')\s*\([^)]*\)'
|
|
)
|
|
# For C++ use -> dereference check; for others use attribute access
|
|
deref_op = '->' if LANG_ADAPTER.name == "cpp" else '.'
|
|
|
|
scan_exts = [e for e in LANG_ADAPTER.extensions if e not in (LANG_ADAPTER.header_extensions or [])]
|
|
if not scan_exts:
|
|
scan_exts = LANG_ADAPTER.extensions
|
|
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in [e for e in _exts if e not in (LANG_ADAPTER.header_extensions or [])]:
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
lines = fh.readlines()
|
|
for i, line in enumerate(lines):
|
|
for m in call_pattern.finditer(line):
|
|
func_call = m.group(1)
|
|
assign_match = re.search(r'(\w+)\s*=\s*' + re.escape(m.group(0)), line)
|
|
ptr_var = assign_match.group(1) if assign_match else None
|
|
if ptr_var:
|
|
window = "".join(lines[i+1:i+4]) if i+1 < len(lines) else ""
|
|
null_re = LANG_ADAPTER.null_check_pattern(ptr_var)
|
|
has_check = bool(null_re.search(window)) if null_re else False
|
|
if not has_check and 'if' not in line and '?' not in line:
|
|
conn.execute(
|
|
"INSERT INTO null_risks (file, line, function_call, pointer_var, risk_type, context) VALUES (?,?,?,?,?,?)",
|
|
(_rel_name(f, base_dir), i+1, func_call, ptr_var, 'no_null_check', line.strip()[:150])
|
|
)
|
|
else:
|
|
# Direct dereference on same line
|
|
if deref_op in line[m.end():]:
|
|
conn.execute(
|
|
"INSERT INTO null_risks (file, line, function_call, pointer_var, risk_type, context) VALUES (?,?,?,?,?,?)",
|
|
(_rel_name(f, base_dir), i+1, func_call, None, 'unchecked_deref', line.strip()[:150])
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_raw_pointers(conn):
|
|
"""Find raw pointer / unmanaged resource class members (RAII/ownership candidates)."""
|
|
member_re = LANG_ADAPTER.raw_pointer_pattern()
|
|
class_re = LANG_ADAPTER.class_pattern()
|
|
if not member_re:
|
|
return
|
|
scan_exts = LANG_ADAPTER.header_extensions or LANG_ADAPTER.extensions
|
|
for _label, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in (scan_exts if scan_exts else _exts):
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
lines = fh.readlines()
|
|
current_class = None
|
|
for i, line in enumerate(lines):
|
|
if class_re:
|
|
cm = class_re.match(line)
|
|
if cm:
|
|
current_class = cm.group(1)
|
|
m = member_re.match(line)
|
|
if m and current_class:
|
|
member_type = m.group(1).strip()
|
|
member_name = m.group(2).strip()
|
|
if member_type in ('char*', 'const char*', 'void*'):
|
|
continue
|
|
conn.execute(
|
|
"INSERT INTO raw_pointers (file, line, class_name, member_type, member_name) VALUES (?,?,?,?,?)",
|
|
(_rel_name(f, base_dir), i+1, current_class, member_type, member_name)
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_unsafe_casts(conn):
|
|
"""Find unsafe type casts (language-specific)."""
|
|
cast_re = LANG_ADAPTER.unsafe_cast_pattern()
|
|
if not cast_re:
|
|
return
|
|
safe_types = {'void', 'char', 'unsigned', 'const', 'BYTE', 'LPBYTE', 'LPSTR', 'LPCSTR', 'LPVOID'}
|
|
for project, base_dir, exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in exts:
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
for lineno, line in enumerate(fh, 1):
|
|
stripped = line.strip()
|
|
if stripped.startswith('//') or stripped.startswith('/*'):
|
|
continue
|
|
for m in cast_re.finditer(line):
|
|
cast_type = m.group(1).strip().rstrip('*').strip()
|
|
if cast_type.lower() in {s.lower() for s in safe_types}:
|
|
continue
|
|
expr = m.group(0)
|
|
conn.execute(
|
|
"INSERT INTO unsafe_casts (file, line, cast_expr, context) VALUES (?,?,?,?)",
|
|
(_rel_name(f, base_dir), lineno, expr[:80], stripped[:150])
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_crash_risks(conn):
|
|
"""Detect crash-prone patterns: null chains, division by zero, use-after-free.
|
|
|
|
For non-C++ languages, runs adapter-provided crash risk patterns only.
|
|
"""
|
|
# Run adapter-provided generic crash risk patterns for any language
|
|
adapter_patterns = LANG_ADAPTER.crash_risk_patterns()
|
|
if adapter_patterns and LANG_ADAPTER.name != "cpp":
|
|
for project, base_dir, exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in exts:
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
lines = fh.readlines()
|
|
for i, line in enumerate(lines):
|
|
for pattern_re, risk_type, severity in adapter_patterns:
|
|
for m in pattern_re.finditer(line):
|
|
conn.execute(
|
|
"INSERT INTO crash_risks (file, line, risk_type, severity, expression, context) VALUES (?,?,?,?,?,?)",
|
|
(_rel_name(f, base_dir), i+1, risk_type, severity, m.group(0)[:100], line.strip()[:200])
|
|
)
|
|
except Exception:
|
|
pass
|
|
return
|
|
|
|
# C++ specific analysis below
|
|
if LANG_ADAPTER.name != "cpp":
|
|
return
|
|
|
|
# --- Pattern 1: Unchecked chain dereferences ---
|
|
# Matches: expr->Method()->AnotherMethod() (triple+ chain without null guard)
|
|
chain_re = re.compile(
|
|
r'(\w+(?:->|\.)(?:Get\w+|To\w+)\s*\([^)]*\))\s*->\s*(\w+)'
|
|
)
|
|
# Specific dangerous chains: GetTarget()-> GetSummoner()-> GetWorld()->
|
|
target_chain_re = re.compile(
|
|
r'(?:->|\.)(?:GetTarget|GetSummoner|GetWorld|GetOwner|GetParty|GetGuild|GetSummoned|GetInterfaceState)\s*\(\)\s*->\s*(\w+)'
|
|
)
|
|
# Division patterns: / varname or / expr (excluding /= and // comments)
|
|
div_re = re.compile(
|
|
r'(?<!/)\s/\s+(?!/)(?!\*)(?!0[^.])([a-zA-Z_]\w*)'
|
|
)
|
|
# Use-after patterns: ->Method() after ->Remove() or SetTarget(nullptr) on same object
|
|
remove_then_use_re = re.compile(
|
|
r'->(?:Remove|SetTarget\s*\(\s*nullptr\s*\))'
|
|
)
|
|
|
|
cpp_impl_exts = ["*.cpp", "*.cc", "*.cxx"]
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for cpp_ext in cpp_impl_exts:
|
|
for f in base_dir.rglob(cpp_ext):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
lines = fh.readlines()
|
|
|
|
for i, line in enumerate(lines):
|
|
stripped = line.strip()
|
|
# Skip comments and blank lines
|
|
if stripped.startswith('//') or stripped.startswith('/*') or stripped.startswith('*') or not stripped:
|
|
continue
|
|
|
|
# --- Dangerous chain dereferences ---
|
|
for m in target_chain_re.finditer(line):
|
|
window = "".join(lines[max(0,i-5):i+1])
|
|
call_name = m.group(0).split('(')[0].split('.')[-1].split('>')[-1]
|
|
has_guard = False
|
|
guard_funcs = ['GetTarget', 'GetSummoner', 'GetWorld', 'GetOwner',
|
|
'GetParty', 'GetGuild', 'GetSummoned', 'GetInterfaceState']
|
|
for gf in guard_funcs:
|
|
if gf in call_name:
|
|
if re.search(rf'if\s*\(?\s*!?\s*\w+(?:->|\.)' + gf, window):
|
|
has_guard = True
|
|
if re.search(rf'{gf}\s*\(\)\s*&&', window) or re.search(rf'&&\s*\w+(?:->|\.)' + gf, window):
|
|
has_guard = True
|
|
if re.search(rf'=\s*\w+(?:->|\.)' + gf + r'\s*\([^)]*\)', "".join(lines[max(0,i-3):i])):
|
|
prev_window = "".join(lines[max(0,i-3):i+1])
|
|
if 'if' in prev_window and ('nullptr' in prev_window or '!' in prev_window or 'NULL' in prev_window):
|
|
has_guard = True
|
|
if not has_guard:
|
|
expr = m.group(0).strip()
|
|
severity = 'critical' if any(c in call_name for c in ['GetTarget', 'GetSummoner', 'GetWorld']) else 'high'
|
|
conn.execute(
|
|
"INSERT INTO crash_risks (file, line, risk_type, severity, expression, context) VALUES (?,?,?,?,?,?)",
|
|
(_rel_name(f, base_dir), i+1, 'null_chain', severity, expr[:100], stripped[:200])
|
|
)
|
|
|
|
# --- Division by zero ---
|
|
if '/' in stripped and not stripped.startswith('#') and '//' not in stripped.split('/', 1)[0] + '/' and '/*' not in stripped:
|
|
no_strings = re.sub(r'"[^"]*"', '""', stripped)
|
|
no_strings = re.sub(r"'[^']*'", "''", no_strings)
|
|
no_strings = re.sub(r'//.*$', '', no_strings)
|
|
if '/' in no_strings:
|
|
for dm in re.finditer(r'\b(\w+)\s*/\s*(\w+)\b', no_strings):
|
|
divisor = dm.group(2)
|
|
if divisor in ('2', '3', '4', '5', '8', '10', '16', '32', '64', '100', '128', '255', '256',
|
|
'1000', '1024', '2048', '4096', '0x', 'sizeof', 'MAX', 'CLOCKS_PER_SEC',
|
|
'MINUTE', 'HOUR', 'IN_MILLISECONDS', 'MAX_SERVER_PER_GROUP'):
|
|
continue
|
|
if divisor.isdigit():
|
|
continue
|
|
full_match_end = dm.end()
|
|
if full_match_end < len(no_strings) and no_strings[full_match_end] == '=':
|
|
continue
|
|
window = "".join(lines[max(0,i-5):i+1])
|
|
if re.search(rf'{re.escape(divisor)}\s*>\s*0|{re.escape(divisor)}\s*!=\s*0|{re.escape(divisor)}\s*>=\s*1|if\s*\(\s*{re.escape(divisor)}\s*\)', window):
|
|
continue
|
|
if re.search(rf'{re.escape(divisor)}\s*[>!]=?\s*0\s*\?', stripped) or re.search(rf'{re.escape(divisor)}\s*\?', stripped):
|
|
continue
|
|
conn.execute(
|
|
"INSERT INTO crash_risks (file, line, risk_type, severity, expression, context) VALUES (?,?,?,?,?,?)",
|
|
(_rel_name(f, base_dir), i+1, 'div_zero', 'critical', f'/ {divisor}', stripped[:200])
|
|
)
|
|
|
|
# --- Use after Remove/SetTarget(nullptr) ---
|
|
if '->Remove()' in stripped:
|
|
obj_match = re.search(r'(\w+)->Remove\(\)', stripped)
|
|
if obj_match:
|
|
obj_name = obj_match.group(1)
|
|
for j in range(i+1, min(i+6, len(lines))):
|
|
next_line = lines[j].strip()
|
|
if next_line.startswith('//') or not next_line:
|
|
continue
|
|
if f'{obj_name}->' in next_line and 'return' not in lines[j-1].strip():
|
|
conn.execute(
|
|
"INSERT INTO crash_risks (file, line, risk_type, severity, expression, context) VALUES (?,?,?,?,?,?)",
|
|
(_rel_name(f, base_dir), j+1, 'use_after_free', 'critical', f'{obj_name}->... after Remove()', next_line[:200])
|
|
)
|
|
break
|
|
if 'return' in next_line or 'break' in next_line:
|
|
break
|
|
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_infinite_loop_risks(conn):
|
|
"""Detect potential infinite loop patterns.
|
|
|
|
Risk types:
|
|
- while_true_no_break: while(true/1)/for(;;) without break/return/goto in body
|
|
- retry_no_limit: Retry/reconnect loop without max attempt counter
|
|
- loop_var_unused: Loop condition variable never modified inside body
|
|
- recursive_no_base: Simple recursive call without visible base case
|
|
"""
|
|
# Patterns for unconditional loops
|
|
infinite_loop_re = re.compile(r'^\s*(?:while\s*\(\s*(?:true|1|TRUE)\s*\)|for\s*\(\s*;\s*;\s*\))\s*$')
|
|
# while(condition) — extract condition variable
|
|
while_var_re = re.compile(r'^\s*while\s*\(\s*(!?\s*)(\w+)\s*\)\s*$')
|
|
# Retry pattern keywords
|
|
retry_re = re.compile(r'(?:retry|reconnect|reattempt|try_again)', re.IGNORECASE)
|
|
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in ("*.cpp", "*.h"):
|
|
for filepath in base_dir.rglob(ext):
|
|
try:
|
|
with open(filepath, "r", encoding="utf-8", errors="ignore") as fh:
|
|
lines = fh.readlines()
|
|
_scan_infinite_loops_in_file(conn, _rel_name(filepath, base_dir), lines)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def _scan_infinite_loops_in_file(conn, filename, lines):
|
|
"""Scan a single file for infinite loop risks."""
|
|
infinite_loop_re = re.compile(r'^\s*(?:while\s*\(\s*(?:true|1|TRUE)\s*\)|for\s*\(\s*;\s*;\s*\))')
|
|
while_var_re = re.compile(r'^\s*while\s*\(\s*!?\s*(\w+)\s*\)')
|
|
retry_kw_re = re.compile(r'(?:retry|reconnect|reattempt|try.?again)', re.IGNORECASE)
|
|
|
|
i = 0
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
stripped = line.strip()
|
|
|
|
# Skip comments
|
|
if stripped.startswith('//') or stripped.startswith('/*') or stripped.startswith('*') or not stripped:
|
|
i += 1
|
|
continue
|
|
|
|
# --- Pattern 1: while(true)/for(;;) without break/return ---
|
|
if infinite_loop_re.match(line):
|
|
body_end = _find_brace_end(lines, i)
|
|
if body_end is not None and body_end > i:
|
|
body = ''.join(lines[i+1:body_end])
|
|
body_no_comments = re.sub(r'//.*$', '', body, flags=re.MULTILINE)
|
|
body_no_comments = re.sub(r'/\*.*?\*/', '', body_no_comments, flags=re.DOTALL)
|
|
has_exit = bool(re.search(r'\b(?:break|return|goto|exit|throw|continue)\b', body_no_comments))
|
|
if not has_exit:
|
|
# Check for callback/event-driven patterns (acceptable)
|
|
is_main_loop = bool(re.search(r'(?:Sleep|sleep|select|poll|accept|recv|wait|WaitFor|SleepEx|std::this_thread)', body_no_comments))
|
|
severity = 'medium' if is_main_loop else 'critical'
|
|
conn.execute(
|
|
"INSERT INTO infinite_loop_risks (file, line, risk_type, severity, expression, context) VALUES (?,?,?,?,?,?)",
|
|
(filename, i+1, 'while_true_no_break', severity, stripped[:100], stripped[:200])
|
|
)
|
|
i = (body_end or i) + 1
|
|
continue
|
|
|
|
# --- Pattern 2: Retry loop without max attempts ---
|
|
if retry_kw_re.search(stripped) and ('while' in stripped or 'for' in stripped):
|
|
body_end = _find_brace_end(lines, i)
|
|
if body_end is not None and body_end > i:
|
|
body = ''.join(lines[i:body_end+1])
|
|
has_max = bool(re.search(r'(?:max|MAX|limit|count|attempt|tries|retry_count|num_retries)\s*[<>!=]|\+\+\s*\w*(?:count|attempt|tries|retry)', body, re.IGNORECASE))
|
|
if not has_max:
|
|
conn.execute(
|
|
"INSERT INTO infinite_loop_risks (file, line, risk_type, severity, expression, context) VALUES (?,?,?,?,?,?)",
|
|
(filename, i+1, 'retry_no_limit', 'high', stripped[:100], stripped[:200])
|
|
)
|
|
i = (body_end or i) + 1
|
|
continue
|
|
|
|
# --- Pattern 3: while(var) where var is never modified in body ---
|
|
m = while_var_re.match(line)
|
|
if m and '{' not in stripped:
|
|
var_name = m.group(1)
|
|
# Skip common safe patterns: iterators, pointers, known flags, streams, atomics
|
|
if var_name not in ('true', 'false', '1', '0', 'TRUE', 'FALSE', 'it', 'iter', 'result', 'row', 'node', 'current', 'next', 'p', 'ptr', 'pNode',
|
|
'ss', 'stream', 'iss', 'oss', 'in', 'out', 'file', 'fin', 'fout', 'input', 'output'):
|
|
body_end = _find_brace_end(lines, i)
|
|
if body_end is not None and body_end - i > 2 and body_end - i < 200:
|
|
body = ''.join(lines[i+1:body_end])
|
|
body_no_comments = re.sub(r'//.*$', '', body, flags=re.MULTILINE)
|
|
body_no_comments = re.sub(r'/\*.*?\*/', '', body_no_comments, flags=re.DOTALL)
|
|
# Check if variable is ever modified (assigned, incremented, decremented, or passed by ref)
|
|
var_modified = bool(re.search(
|
|
rf'\b{re.escape(var_name)}\b\s*(?:[+\-*/%&|^]?=|\+\+|\-\-)|'
|
|
rf'(?:\+\+|\-\-)\s*\b{re.escape(var_name)}\b|'
|
|
rf'>>\s*\b{re.escape(var_name)}\b|'
|
|
rf'&\s*\b{re.escape(var_name)}\b',
|
|
body_no_comments
|
|
))
|
|
has_break = bool(re.search(r'\b(?:break|return|goto)\b', body_no_comments))
|
|
# Skip atomic/stop-flag vars (modified from other threads)
|
|
is_atomic = bool(re.search(rf'(?:atomic|_stop|_running|_shutdown|_terminate)', var_name, re.IGNORECASE))
|
|
if not var_modified and not has_break and not is_atomic:
|
|
conn.execute(
|
|
"INSERT INTO infinite_loop_risks (file, line, risk_type, severity, expression, context) VALUES (?,?,?,?,?,?)",
|
|
(filename, i+1, 'loop_var_unused', 'high', f'while({var_name}) — never modified', stripped[:200])
|
|
)
|
|
|
|
i += 1
|
|
|
|
|
|
def _find_brace_end(lines, start_line):
|
|
"""Find the closing brace matching the first opening brace at or after start_line."""
|
|
depth = 0
|
|
found_open = False
|
|
for i in range(start_line, min(start_line + 500, len(lines))):
|
|
for ch in lines[i]:
|
|
if ch == '{':
|
|
depth += 1
|
|
found_open = True
|
|
elif ch == '}':
|
|
depth -= 1
|
|
if found_open and depth == 0:
|
|
return i
|
|
return None
|
|
|
|
|
|
def scan_dead_methods(conn):
|
|
"""Find methods declared in .h files that are never referenced in any .cpp file."""
|
|
# First collect all .cpp content into a single searchable index
|
|
all_cpp_content = {}
|
|
for _label, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for f in base_dir.rglob("*.cpp"):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
all_cpp_content[_rel_name(f, base_dir)] = fh.read()
|
|
except Exception:
|
|
pass
|
|
combined_cpp = "\n".join(all_cpp_content.values())
|
|
|
|
# Check each method from the methods table
|
|
rows = conn.execute("SELECT class_name, method_name, file, line_hint FROM methods").fetchall()
|
|
skip_methods = {'operator', 'instance', 'Instance', 'Get', 'Set', 'Is', 'Has',
|
|
'begin', 'end', 'size', 'empty', 'clear', 'push_back', 'erase',
|
|
'find', 'insert', 'remove', 'update', 'reset', 'init', 'load'}
|
|
for row in rows:
|
|
class_name, method_name, header, line_hint = row
|
|
if method_name in skip_methods or len(method_name) < 4:
|
|
continue
|
|
# Check if method_name appears anywhere in .cpp files
|
|
ref_count = combined_cpp.count(method_name)
|
|
if ref_count == 0:
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO dead_methods (class_name, method_name, header_file, header_line, ref_count) VALUES (?,?,?,?,?)",
|
|
(class_name, method_name, header, line_hint, 0)
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def scan_duplicate_blocks(conn):
|
|
"""Find duplicate code blocks (5+ consecutive identical lines) across files."""
|
|
BLOCK_SIZE = 5 # minimum consecutive lines to count as duplicate
|
|
block_hashes = defaultdict(list) # hash -> [(file, start_line, first_line_text)]
|
|
|
|
for project, base_dir, _exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for f in base_dir.rglob("*.cpp"):
|
|
try:
|
|
with open(f, "r", encoding="utf-8", errors="ignore") as fh:
|
|
lines = [l.strip() for l in fh.readlines()]
|
|
# Slide window of BLOCK_SIZE lines
|
|
for i in range(len(lines) - BLOCK_SIZE + 1):
|
|
block = lines[i:i+BLOCK_SIZE]
|
|
# Skip blocks that are mostly empty/trivial
|
|
non_empty = [l for l in block if l and l != '{' and l != '}' and l != 'break;' and not l.startswith('//')]
|
|
if len(non_empty) < 3:
|
|
continue
|
|
text = "\n".join(block)
|
|
h = hashlib.md5(text.encode()).hexdigest()
|
|
block_hashes[h].append((_rel_name(f, base_dir), i+1, block[0][:80]))
|
|
except Exception:
|
|
pass
|
|
|
|
# Only keep hashes that appear in different files
|
|
for h, locations in block_hashes.items():
|
|
files_set = set(loc[0] for loc in locations)
|
|
if len(files_set) < 2:
|
|
continue
|
|
# Insert cross-file pairs (limit to first 3 pairs per hash)
|
|
seen_pairs = set()
|
|
pairs = 0
|
|
for i, (fa, la, pa) in enumerate(locations):
|
|
for j, (fb, lb, pb) in enumerate(locations[i+1:], i+1):
|
|
if fa == fb:
|
|
continue
|
|
pair_key = tuple(sorted([fa, fb]))
|
|
if pair_key in seen_pairs:
|
|
continue
|
|
seen_pairs.add(pair_key)
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO duplicate_blocks (block_hash, file_a, line_a, file_b, line_b, line_count, preview) VALUES (?,?,?,?,?,?,?)",
|
|
(h, fa, la, fb, lb, BLOCK_SIZE, pa)
|
|
)
|
|
except Exception:
|
|
pass
|
|
pairs += 1
|
|
if pairs >= 3:
|
|
break
|
|
if pairs >= 3:
|
|
break
|
|
|
|
|
|
def scan_file_summaries(conn):
|
|
"""Generate one-line summaries for important source files."""
|
|
comment_re = re.compile(r'^\s*(?://|/\*)\s*(.+?)(?:\*/\s*)?$')
|
|
class_re = re.compile(r'^\s*class\s+(\w+)')
|
|
|
|
dirs = [(label, path) for label, path, _exts in SOURCE_DIRS]
|
|
|
|
for project, base_dir in dirs:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in ("*.cpp", "*.h"):
|
|
for filepath in base_dir.rglob(ext):
|
|
try:
|
|
summary = _extract_file_summary(filepath)
|
|
if summary:
|
|
category = _categorize_file(_rel_name(filepath, base_dir))
|
|
conn.execute(
|
|
"INSERT OR IGNORE INTO file_summaries (file, project, summary, category) VALUES (?,?,?,?)",
|
|
(_rel_name(filepath, base_dir), project, summary, category)
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def _extract_file_summary(filepath):
|
|
"""Extract a one-line summary from a source file."""
|
|
comment_re = re.compile(r'^\s*(?://|/\*)\s*(.+?)(?:\*/\s*)?$')
|
|
class_re = re.compile(r'^\s*class\s+(\w+)')
|
|
struct_re = re.compile(r'^\s*struct\s+(\w+)')
|
|
singleton_re = re.compile(r'SingletonInstance\((\w+)\)')
|
|
|
|
classes = []
|
|
singletons = []
|
|
first_comment = None
|
|
|
|
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
for lineno, line in enumerate(f, 1):
|
|
if lineno > 50:
|
|
break
|
|
stripped = line.strip()
|
|
# Skip blank lines and includes
|
|
if not stripped or stripped.startswith('#include') or stripped.startswith('#pragma'):
|
|
continue
|
|
# Capture first meaningful comment
|
|
if first_comment is None:
|
|
m = comment_re.match(line)
|
|
if m:
|
|
text = m.group(1).strip().rstrip('*/')
|
|
# Skip boilerplate
|
|
if len(text) > 10 and not text.startswith('Copyright') and not text.startswith('==='):
|
|
first_comment = text[:120]
|
|
# Track classes
|
|
m = class_re.match(line)
|
|
if m:
|
|
classes.append(m.group(1))
|
|
m = struct_re.match(line)
|
|
if m and not m.group(1).startswith('_'):
|
|
classes.append(m.group(1))
|
|
m = singleton_re.search(line)
|
|
if m:
|
|
singletons.append(m.group(1))
|
|
|
|
# Build summary
|
|
if singletons:
|
|
return f"Singleton: {', '.join(singletons[:3])}" + (f" — {first_comment}" if first_comment else "")
|
|
if first_comment:
|
|
return first_comment
|
|
if classes:
|
|
return f"Defines: {', '.join(classes[:4])}"
|
|
return None
|
|
|
|
|
|
def _categorize_file(filename):
|
|
"""Categorize a file based on its name."""
|
|
name = filename.lower()
|
|
if 'player' in name:
|
|
return 'player'
|
|
if 'monster' in name or 'ai_' in name:
|
|
return 'monster'
|
|
if 'event' in name or 'castle' in name or 'blood' in name or 'devil' in name:
|
|
return 'event'
|
|
if 'packet' in name or 'protocol' in name or 'handler' in name:
|
|
return 'network'
|
|
if 'database' in name or 'query' in name or 'mysql' in name or 'db' in name:
|
|
return 'db'
|
|
if 'item' in name or 'inventory' in name or 'shop' in name:
|
|
return 'item'
|
|
if 'guild' in name or 'party' in name or 'friend' in name:
|
|
return 'social'
|
|
if 'skill' in name or 'magic' in name or 'buff' in name:
|
|
return 'skill'
|
|
if 'world' in name or 'map' in name or 'viewport' in name:
|
|
return 'world'
|
|
if 'config' in name or 'setting' in name:
|
|
return 'config'
|
|
if 'log' in name or 'appender' in name:
|
|
return 'logging'
|
|
if 'quest' in name:
|
|
return 'quest'
|
|
return 'core'
|
|
|
|
|
|
SEMANTIC_TAG_RULES = [
|
|
("security", ("auth", "login", "secure", "password", "warehouse", "otp", "pin")),
|
|
("auth", ("auth", "login", "account", "session", "token")),
|
|
("warehouse", ("warehouse", "vault")),
|
|
("movement", ("move", "path", "teleport", "dir", "viewport", "position")),
|
|
("world-flag", ("world", "map", "terrain", "summon", "partymove", "party_move")),
|
|
("party", ("party", "member", "leader", "matching", "recruit")),
|
|
("boss-tab", ("neweventwindow", "bossfight", "ctrl+t", "ctrlt", "boss tab")),
|
|
("event", ("event", "castle", "blood", "devil", "kanturu", "raklion", "medusa", "kundun", "evomon")),
|
|
("ai", ("monsterai", "monstermovement", "ai_", "automata", "monster ai")),
|
|
("network-packet", ("packet", "protocol", "headcode", "serverlink", "sendpacket", "recv")),
|
|
("mix", ("mix", "chaos", "goblin")),
|
|
("inventory", ("inventory", "itembag", "item", "warehouse")),
|
|
("combat", ("attack", "damage", "skill", "combat", "threat")),
|
|
("db", ("query", "database", "mysql", "sql", "db")),
|
|
("quest", ("quest", "objective", "mission")),
|
|
("npc", ("npc", "guard", "merchant", "shop")),
|
|
]
|
|
|
|
HOT_PATH_TAGS = {"movement", "combat", "ai", "network-packet", "world-flag", "boss-tab"}
|
|
OPTIMIZATION_TAGS = {"movement", "combat", "ai", "db", "network-packet", "world-flag"}
|
|
SECURITY_TAGS = {"security", "auth", "warehouse"}
|
|
FRAGILE_TAGS = {"security", "auth", "warehouse", "boss-tab", "world-flag", "event", "mix"}
|
|
NETWORK_TAGS = {"network-packet", "boss-tab"}
|
|
|
|
|
|
def _collect_semantic_tags(*parts):
|
|
text = " ".join(str(part or "") for part in parts).lower()
|
|
tags = set()
|
|
for tag, keywords in SEMANTIC_TAG_RULES:
|
|
if any(keyword in text for keyword in keywords):
|
|
tags.add(tag)
|
|
return tags
|
|
|
|
|
|
def _emit_semantic_tag(conn, seen, entity_type, entity_name, file, line, tag, confidence, evidence):
|
|
key = (entity_type, entity_name, file, line or 0, tag)
|
|
if key in seen:
|
|
return
|
|
seen.add(key)
|
|
conn.execute(
|
|
"INSERT INTO semantic_tags (entity_type, entity_name, file, line, tag, confidence, evidence) VALUES (?,?,?,?,?,?,?)",
|
|
(entity_type, entity_name, file, line, tag, confidence, evidence[:240] if evidence else None)
|
|
)
|
|
|
|
|
|
def _project_relative_path(project, file_name):
|
|
if project == "Game":
|
|
return f"Game/{file_name}"
|
|
if project == "Common":
|
|
return f"Common/{file_name}"
|
|
if project == "Server_Link":
|
|
return f"Server_Link/{file_name}"
|
|
if project == "LoginServer":
|
|
return f"LoginServer/{file_name}"
|
|
if project == "ConnectServer":
|
|
return f"ConnectServer/{file_name}"
|
|
return file_name
|
|
|
|
|
|
def _project_base_dir(project):
|
|
for label, path, _exts in SOURCE_DIRS:
|
|
if label == project:
|
|
return path
|
|
# Fallback to legacy paths
|
|
if project == "Game":
|
|
return GAME_DIR
|
|
if project == "Common":
|
|
return COMMON_DIR
|
|
fallback = REPO_ROOT / project
|
|
if fallback.exists():
|
|
return fallback
|
|
return None
|
|
|
|
|
|
def _read_project_file_lines(project, file_name, cache):
|
|
key = (project, file_name)
|
|
if key in cache:
|
|
return cache[key]
|
|
|
|
base_dir = _project_base_dir(project)
|
|
if not base_dir:
|
|
cache[key] = None
|
|
return None
|
|
|
|
path = base_dir / file_name
|
|
if not path.exists():
|
|
cache[key] = None
|
|
return None
|
|
|
|
try:
|
|
with open(path, "r", encoding="utf-8", errors="ignore") as handle:
|
|
cache[key] = handle.readlines()
|
|
except Exception:
|
|
cache[key] = None
|
|
return cache[key]
|
|
|
|
|
|
def _strip_cpp_comments_and_strings(text):
|
|
result = []
|
|
i = 0
|
|
in_block_comment = False
|
|
in_line_comment = False
|
|
in_string = False
|
|
in_char = False
|
|
|
|
while i < len(text):
|
|
ch = text[i]
|
|
nxt = text[i + 1] if i + 1 < len(text) else ""
|
|
|
|
if in_block_comment:
|
|
if ch == "*" and nxt == "/":
|
|
in_block_comment = False
|
|
i += 2
|
|
else:
|
|
i += 1
|
|
continue
|
|
|
|
if in_line_comment:
|
|
if ch == "\n":
|
|
in_line_comment = False
|
|
result.append("\n")
|
|
i += 1
|
|
continue
|
|
|
|
if in_string:
|
|
if ch == "\\" and nxt:
|
|
i += 2
|
|
continue
|
|
if ch == '"':
|
|
in_string = False
|
|
i += 1
|
|
continue
|
|
|
|
if in_char:
|
|
if ch == "\\" and nxt:
|
|
i += 2
|
|
continue
|
|
if ch == "'":
|
|
in_char = False
|
|
i += 1
|
|
continue
|
|
|
|
if ch == "/" and nxt == "*":
|
|
in_block_comment = True
|
|
i += 2
|
|
continue
|
|
if ch == "/" and nxt == "/":
|
|
in_line_comment = True
|
|
i += 2
|
|
continue
|
|
if ch == '"':
|
|
in_string = True
|
|
i += 1
|
|
continue
|
|
if ch == "'":
|
|
in_char = True
|
|
i += 1
|
|
continue
|
|
|
|
result.append(ch)
|
|
i += 1
|
|
|
|
return "".join(result)
|
|
|
|
|
|
def _function_label(class_name, function_name):
|
|
return f"{class_name}::{function_name}" if class_name else function_name
|
|
|
|
|
|
def _extract_function_body_text(lines, start_line, end_line):
|
|
snippet = "".join(lines[start_line - 1:end_line])
|
|
if not snippet:
|
|
return ""
|
|
|
|
open_brace = snippet.find("{")
|
|
close_brace = snippet.rfind("}")
|
|
if open_brace >= 0 and close_brace > open_brace:
|
|
snippet = snippet[open_brace + 1:close_brace]
|
|
elif open_brace >= 0:
|
|
snippet = snippet[open_brace + 1:]
|
|
return snippet
|
|
|
|
|
|
CALL_SKIP_NAMES = frozenset({
|
|
"if", "else", "for", "while", "switch", "catch", "return", "sizeof", "alignof", "decltype",
|
|
"static_cast", "dynamic_cast", "reinterpret_cast", "const_cast", "ASSERT", "CHECK", "TEST_CASE",
|
|
"min", "max", "defined", "new", "delete", "noexcept", "throw"
|
|
})
|
|
|
|
|
|
def _get_call_skip_names():
|
|
"""Return the union of the global C++ skip names and the adapter-specific skip names."""
|
|
return CALL_SKIP_NAMES | LANG_ADAPTER.call_skip_names()
|
|
|
|
|
|
def _resolve_call_target(caller_row, scope_name, callee_name, by_name):
|
|
candidates = by_name.get(callee_name, [])
|
|
if not candidates:
|
|
return None, 0
|
|
|
|
if scope_name:
|
|
scoped = [row for row in candidates if (row["class_name"] or "") == scope_name]
|
|
if len(scoped) == 1:
|
|
return scoped[0], 95
|
|
|
|
same_class = [
|
|
row for row in candidates
|
|
if (row["class_name"] or "") == (caller_row["class_name"] or "") and caller_row["class_name"]
|
|
]
|
|
if len(same_class) == 1:
|
|
return same_class[0], 90
|
|
|
|
same_file = [row for row in candidates if row["file"] == caller_row["file"]]
|
|
if len(same_file) == 1:
|
|
return same_file[0], 85
|
|
|
|
same_project = [row for row in candidates if row["project"] == caller_row["project"]]
|
|
if len(same_project) == 1:
|
|
return same_project[0], 72
|
|
|
|
if len(candidates) == 1:
|
|
return candidates[0], 80
|
|
|
|
return None, 0
|
|
|
|
|
|
def scan_semantic_tags(conn):
|
|
"""Derive semantic tags from filenames, symbols, handlers, events, and summaries."""
|
|
seen = set()
|
|
|
|
file_rows = conn.execute(
|
|
"SELECT f.path, f.category, COALESCE(fs.summary, '') AS summary "
|
|
"FROM files f LEFT JOIN file_summaries fs ON f.path = fs.file"
|
|
).fetchall()
|
|
for row in file_rows:
|
|
file_name = row["path"]
|
|
tags = _collect_semantic_tags(file_name, row["category"], row["summary"])
|
|
for tag in tags:
|
|
_emit_semantic_tag(
|
|
conn, seen, "file", file_name, file_name, None, tag, 70,
|
|
f"file/category/summary matched: {file_name}"
|
|
)
|
|
|
|
class_rows = conn.execute("SELECT name, header, cpp_file, parent_class, category, description FROM classes").fetchall()
|
|
for row in class_rows:
|
|
file_name = row["cpp_file"] or row["header"] or ""
|
|
label = row["name"]
|
|
tags = _collect_semantic_tags(label, row["parent_class"], row["category"], row["description"], file_name)
|
|
for tag in tags:
|
|
_emit_semantic_tag(
|
|
conn, seen, "class", label, file_name or label, None, tag, 82,
|
|
f"class metadata matched: {label}"
|
|
)
|
|
|
|
method_rows = conn.execute("SELECT class_name, method_name, file, line_hint, category, description FROM methods").fetchall()
|
|
for row in method_rows:
|
|
label = f"{row['class_name']}::{row['method_name']}"
|
|
tags = _collect_semantic_tags(label, row["category"], row["description"], row["file"])
|
|
for tag in tags:
|
|
_emit_semantic_tag(
|
|
conn, seen, "method", label, row["file"], row["line_hint"], tag, 88,
|
|
f"method metadata matched: {label}"
|
|
)
|
|
|
|
function_rows = conn.execute("SELECT file, function_name, class_name, start_line, signature, project FROM function_index").fetchall()
|
|
for row in function_rows:
|
|
label = f"{row['class_name']}::{row['function_name']}" if row["class_name"] else row["function_name"]
|
|
tags = _collect_semantic_tags(label, row["signature"], row["file"], row["project"])
|
|
for tag in tags:
|
|
_emit_semantic_tag(
|
|
conn, seen, "function", label, row["file"], row["start_line"], tag, 92,
|
|
f"function signature matched: {row['signature'] or label}"
|
|
)
|
|
|
|
handler_rows = conn.execute(
|
|
"SELECT headcode_name, handler_method, source_file, handler_type, category, description FROM packet_handlers"
|
|
).fetchall()
|
|
for row in handler_rows:
|
|
label = f"{row['headcode_name']} {row['handler_method']}"
|
|
tags = _collect_semantic_tags(label, row["handler_type"], row["category"], row["description"], row["source_file"])
|
|
tags.add("network-packet")
|
|
for tag in tags:
|
|
_emit_semantic_tag(
|
|
conn, seen, "handler", label, row["source_file"], None, tag, 95,
|
|
f"packet handler matched: {label}"
|
|
)
|
|
|
|
event_rows = conn.execute("SELECT name, singleton_macro, cpp_file, header_file, def_file, ai_file, description FROM events").fetchall()
|
|
for row in event_rows:
|
|
label = row["name"]
|
|
tags = _collect_semantic_tags(
|
|
row["name"], row["singleton_macro"], row["cpp_file"], row["header_file"], row["def_file"], row["ai_file"], row["description"]
|
|
)
|
|
tags.add("event")
|
|
for tag in tags:
|
|
_emit_semantic_tag(
|
|
conn, seen, "event", label, row["cpp_file"] or row["header_file"] or label, None, tag, 96,
|
|
f"event metadata matched: {label}"
|
|
)
|
|
|
|
config_rows = conn.execute("SELECT path, description FROM config_files").fetchall()
|
|
for row in config_rows:
|
|
tags = _collect_semantic_tags(row["path"], row["description"], "config")
|
|
for tag in tags:
|
|
_emit_semantic_tag(
|
|
conn, seen, "config", row["path"], row["path"], None, tag, 72,
|
|
f"config path matched: {row['path']}"
|
|
)
|
|
|
|
|
|
def scan_semantic_profiles(conn):
|
|
"""Aggregate semantic tags into coarse risk/gain profiles for reasoning queries."""
|
|
rows = conn.execute(
|
|
"SELECT entity_type, entity_name, file, COALESCE(line, 0) AS line, tag, confidence, COALESCE(evidence, '') AS evidence "
|
|
"FROM semantic_tags ORDER BY entity_type, entity_name, file, line"
|
|
).fetchall()
|
|
|
|
grouped = {}
|
|
for row in rows:
|
|
key = (row["entity_type"], row["entity_name"], row["file"], row["line"])
|
|
grouped.setdefault(key, {"tags": set(), "evidence": []})
|
|
grouped[key]["tags"].add(row["tag"])
|
|
if row["evidence"]:
|
|
grouped[key]["evidence"].append(row["evidence"])
|
|
|
|
for (entity_type, entity_name, file_name, line), payload in grouped.items():
|
|
tags = payload["tags"]
|
|
evidence = "; ".join(payload["evidence"][:4])
|
|
lower_name = entity_name.lower()
|
|
lower_file = file_name.lower()
|
|
|
|
test_surface = 1 if ("test" in lower_name or "test" in lower_file) else 0
|
|
security_sensitive = 1 if tags & SECURITY_TAGS else 0
|
|
hot_path_candidate = 1 if tags & HOT_PATH_TAGS else 0
|
|
optimization_candidate = 1 if tags & OPTIMIZATION_TAGS else 0
|
|
network_surface = 1 if tags & NETWORK_TAGS else 0
|
|
fragile_surface = 1 if tags & FRAGILE_TAGS else 0
|
|
|
|
if entity_type in {"function", "method"} and any(token in lower_name for token in ("update", "process", "attack", "move", "send")):
|
|
hot_path_candidate = 1
|
|
optimization_candidate = 1
|
|
if entity_type == "handler":
|
|
network_surface = 1
|
|
fragile_surface = 1
|
|
if entity_type == "event":
|
|
fragile_surface = 1
|
|
|
|
risk_score = (
|
|
security_sensitive * 5 +
|
|
fragile_surface * 3 +
|
|
network_surface * 2 +
|
|
(1 - test_surface) * 1
|
|
)
|
|
gain_score = (
|
|
hot_path_candidate * 4 +
|
|
optimization_candidate * 3 +
|
|
network_surface * 2 +
|
|
(1 if entity_type in {"function", "method"} else 0)
|
|
)
|
|
|
|
conn.execute(
|
|
"INSERT INTO semantic_profiles (entity_type, entity_name, file, line, security_sensitive, hot_path_candidate, "
|
|
"optimization_candidate, network_surface, fragile_surface, test_surface, risk_score, gain_score, evidence) "
|
|
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
|
(
|
|
entity_type, entity_name, file_name, line or None,
|
|
security_sensitive, hot_path_candidate, optimization_candidate,
|
|
network_surface, fragile_surface, test_surface,
|
|
risk_score, gain_score, evidence[:240] if evidence else None
|
|
)
|
|
)
|
|
|
|
|
|
def scan_audit_coverage(conn):
|
|
"""Aggregate test/coverage/risk signals per file."""
|
|
file_rows = conn.execute(
|
|
"SELECT fl.file, fl.project FROM file_lines fl GROUP BY fl.file, fl.project ORDER BY fl.file"
|
|
).fetchall()
|
|
|
|
for row in file_rows:
|
|
file_name = row["file"]
|
|
project = row["project"]
|
|
unit_test_refs = conn.execute(
|
|
"SELECT COUNT(*) FROM edges WHERE target = ? AND edge_type = 'tests'",
|
|
(file_name,)
|
|
).fetchone()[0]
|
|
has_summary = 1 if conn.execute("SELECT 1 FROM file_summaries WHERE file = ? LIMIT 1", (file_name,)).fetchone() else 0
|
|
has_function_index = 1 if conn.execute("SELECT 1 FROM function_index WHERE file = ? LIMIT 1", (file_name,)).fetchone() else 0
|
|
todo_count = conn.execute("SELECT COUNT(*) FROM todos WHERE file = ?", (file_name,)).fetchone()[0]
|
|
crash_risk_count = conn.execute("SELECT COUNT(*) FROM crash_risks WHERE file = ?", (file_name,)).fetchone()[0]
|
|
null_risk_count = conn.execute("SELECT COUNT(*) FROM null_risks WHERE file = ?", (file_name,)).fetchone()[0]
|
|
duplicate_pair_count = conn.execute(
|
|
"SELECT COUNT(*) FROM duplicate_blocks WHERE file_a = ? OR file_b = ?",
|
|
(file_name, file_name)
|
|
).fetchone()[0]
|
|
dead_method_count = conn.execute("SELECT COUNT(*) FROM dead_methods WHERE header_file = ?", (file_name,)).fetchone()[0]
|
|
leak_risk_row = conn.execute("SELECT risk_score FROM leak_risks WHERE file = ? LIMIT 1", (file_name,)).fetchone()
|
|
leak_risk_score = leak_risk_row[0] if leak_risk_row else 0
|
|
semantic_row = conn.execute(
|
|
"SELECT COALESCE(MAX(risk_score), 0), COALESCE(MAX(gain_score), 0) FROM semantic_profiles WHERE file = ?",
|
|
(file_name,)
|
|
).fetchone()
|
|
semantic_risk_max, semantic_gain_max = semantic_row[0], semantic_row[1]
|
|
|
|
coverage_score = 20
|
|
coverage_score += min(unit_test_refs * 20, 40)
|
|
coverage_score += 15 if has_summary else 0
|
|
coverage_score += 15 if has_function_index else 0
|
|
coverage_score -= min(todo_count * 2, 10)
|
|
coverage_score -= min(crash_risk_count, 10)
|
|
coverage_score -= min(null_risk_count // 5, 10)
|
|
coverage_score -= min(duplicate_pair_count // 10, 10)
|
|
coverage_score = max(0, min(100, coverage_score))
|
|
|
|
notes = []
|
|
if unit_test_refs == 0:
|
|
notes.append("no unit test edges")
|
|
if crash_risk_count > 0:
|
|
notes.append(f"{crash_risk_count} crash risks")
|
|
if null_risk_count > 0:
|
|
notes.append(f"{null_risk_count} null risks")
|
|
if duplicate_pair_count > 0:
|
|
notes.append(f"{duplicate_pair_count} duplicate pairs")
|
|
|
|
conn.execute(
|
|
"INSERT INTO audit_coverage (file, project, unit_test_refs, has_summary, has_function_index, todo_count, "
|
|
"crash_risk_count, null_risk_count, duplicate_pair_count, dead_method_count, leak_risk_score, "
|
|
"semantic_risk_max, semantic_gain_max, coverage_score, notes) "
|
|
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
|
(
|
|
file_name, project, unit_test_refs, has_summary, has_function_index, todo_count,
|
|
crash_risk_count, null_risk_count, duplicate_pair_count, dead_method_count, leak_risk_score,
|
|
semantic_risk_max, semantic_gain_max, coverage_score, "; ".join(notes)[:240]
|
|
)
|
|
)
|
|
|
|
|
|
def scan_history_metrics(conn):
|
|
"""Aggregate per-file git history/churn signals."""
|
|
try:
|
|
result = subprocess.run(
|
|
[
|
|
"git", "log", "--date=short", "--pretty=format:__COMMIT__|%ad|%an|%s",
|
|
"--name-only", "--no-renames", "--", "Game", "Common", "Server_Link", "LoginServer", "ConnectServer"
|
|
],
|
|
cwd=str(REPO_ROOT),
|
|
capture_output=True,
|
|
text=True,
|
|
encoding="utf-8",
|
|
errors="ignore",
|
|
check=True,
|
|
)
|
|
except Exception:
|
|
return
|
|
|
|
cutoff = date.today() - timedelta(days=90)
|
|
bugfix_keywords = ("fix", "bug", "crash", "regress", "issue", "hotfix")
|
|
perf_keywords = ("perf", "optim", "speed", "fast", "hot path", "latency", "cache")
|
|
audit_keywords = ("audit", "test", "fuzz", "coverage", "validate", "harden", "security")
|
|
|
|
history = {}
|
|
commit_date = None
|
|
author = ""
|
|
subject = ""
|
|
|
|
for raw_line in result.stdout.splitlines():
|
|
line = raw_line.strip()
|
|
if not line:
|
|
continue
|
|
if line.startswith("__COMMIT__|"):
|
|
parts = line.split("|", 3)
|
|
if len(parts) == 4:
|
|
commit_date = datetime.strptime(parts[1], "%Y-%m-%d").date()
|
|
author = parts[2]
|
|
subject = parts[3].lower()
|
|
continue
|
|
|
|
file_name = os.path.basename(line)
|
|
if not file_name:
|
|
continue
|
|
|
|
entry = history.setdefault(file_name, {
|
|
"commit_count": 0,
|
|
"recent_commit_count": 0,
|
|
"authors": set(),
|
|
"bugfix_commits": 0,
|
|
"perf_commits": 0,
|
|
"audit_commits": 0,
|
|
"last_commit_date": None,
|
|
})
|
|
entry["commit_count"] += 1
|
|
if commit_date and commit_date >= cutoff:
|
|
entry["recent_commit_count"] += 1
|
|
if author:
|
|
entry["authors"].add(author)
|
|
if any(keyword in subject for keyword in bugfix_keywords):
|
|
entry["bugfix_commits"] += 1
|
|
if any(keyword in subject for keyword in perf_keywords):
|
|
entry["perf_commits"] += 1
|
|
if any(keyword in subject for keyword in audit_keywords):
|
|
entry["audit_commits"] += 1
|
|
if commit_date and (entry["last_commit_date"] is None or commit_date > entry["last_commit_date"]):
|
|
entry["last_commit_date"] = commit_date
|
|
|
|
for file_name, entry in history.items():
|
|
churn_score = (
|
|
entry["commit_count"] +
|
|
entry["recent_commit_count"] * 2 +
|
|
entry["bugfix_commits"] * 2 +
|
|
entry["perf_commits"] +
|
|
len(entry["authors"])
|
|
)
|
|
conn.execute(
|
|
"INSERT INTO history_metrics (file, commit_count, recent_commit_count, unique_authors, bugfix_commits, "
|
|
"perf_commits, audit_commits, last_commit_date, churn_score) VALUES (?,?,?,?,?,?,?,?,?)",
|
|
(
|
|
file_name,
|
|
entry["commit_count"],
|
|
entry["recent_commit_count"],
|
|
len(entry["authors"]),
|
|
entry["bugfix_commits"],
|
|
entry["perf_commits"],
|
|
entry["audit_commits"],
|
|
entry["last_commit_date"].isoformat() if entry["last_commit_date"] else None,
|
|
churn_score,
|
|
)
|
|
)
|
|
|
|
|
|
def scan_review_queue(conn):
|
|
"""Build actionable queues for review, hardening, and optimization."""
|
|
rows = conn.execute(
|
|
"SELECT a.file, a.project, a.coverage_score, a.unit_test_refs, a.crash_risk_count, a.null_risk_count, "
|
|
"a.semantic_risk_max, a.semantic_gain_max, a.notes, "
|
|
"COALESCE(h.churn_score, 0) AS churn_score, COALESCE(h.recent_commit_count, 0) AS recent_commit_count, "
|
|
"COALESCE(h.bugfix_commits, 0) AS bugfix_commits, "
|
|
"COALESCE(o.bus_factor_risk, 0) AS bus_factor_risk, COALESCE(o.primary_author_share, 0) AS primary_author_share, "
|
|
"COALESCE(tf.tested_functions, 0) AS tested_functions "
|
|
"FROM audit_coverage a "
|
|
"LEFT JOIN history_metrics h ON a.file = h.file "
|
|
"LEFT JOIN ownership_metrics o ON a.file = o.file "
|
|
"LEFT JOIN (SELECT target_file, COUNT(DISTINCT function_name || '|' || COALESCE(class_name,'')) AS tested_functions "
|
|
" FROM test_function_map GROUP BY target_file) tf ON a.file = tf.target_file"
|
|
).fetchall()
|
|
|
|
for row in rows:
|
|
file_name = row["file"]
|
|
coverage_score = row["coverage_score"]
|
|
semantic_risk = row["semantic_risk_max"]
|
|
semantic_gain = row["semantic_gain_max"]
|
|
churn_score = row["churn_score"]
|
|
recent_commit_count = row["recent_commit_count"]
|
|
crash_risk_count = row["crash_risk_count"]
|
|
null_risk_count = row["null_risk_count"]
|
|
bus_factor_risk = row["bus_factor_risk"]
|
|
tested_functions = row["tested_functions"]
|
|
|
|
if semantic_gain >= 7 and semantic_risk <= 3 and coverage_score >= 45:
|
|
priority = semantic_gain * 5 + coverage_score - semantic_risk * 3
|
|
conn.execute(
|
|
"INSERT INTO review_queue (file, queue_type, priority_score, rationale) VALUES (?,?,?,?)",
|
|
(file_name, "high_gain_low_risk", priority,
|
|
f"gain={semantic_gain} risk={semantic_risk} coverage={coverage_score}")
|
|
)
|
|
|
|
if semantic_risk >= 5 and coverage_score < 45:
|
|
priority = semantic_risk * 8 + max(0, 50 - coverage_score) + crash_risk_count * 3 + null_risk_count // 2
|
|
conn.execute(
|
|
"INSERT INTO review_queue (file, queue_type, priority_score, rationale) VALUES (?,?,?,?)",
|
|
(file_name, "audit_gap", priority,
|
|
f"risk={semantic_risk} coverage={coverage_score} crash={crash_risk_count} null={null_risk_count}")
|
|
)
|
|
|
|
if recent_commit_count >= 2 and coverage_score < 55:
|
|
priority = churn_score + max(0, 60 - coverage_score) + row["bugfix_commits"] * 3
|
|
conn.execute(
|
|
"INSERT INTO review_queue (file, queue_type, priority_score, rationale) VALUES (?,?,?,?)",
|
|
(file_name, "churn_watch", priority,
|
|
f"recent={recent_commit_count} churn={churn_score} coverage={coverage_score}")
|
|
)
|
|
|
|
if crash_risk_count > 0 or null_risk_count >= 10:
|
|
priority = crash_risk_count * 6 + null_risk_count // 2 + semantic_risk * 4
|
|
conn.execute(
|
|
"INSERT INTO review_queue (file, queue_type, priority_score, rationale) VALUES (?,?,?,?)",
|
|
(file_name, "stability_hotspot", priority,
|
|
f"crash={crash_risk_count} null={null_risk_count} risk={semantic_risk}")
|
|
)
|
|
|
|
if bus_factor_risk >= 5 and semantic_gain >= 7:
|
|
priority = bus_factor_risk * 8 + semantic_gain * 4 + max(0, 50 - coverage_score)
|
|
conn.execute(
|
|
"INSERT INTO review_queue (file, queue_type, priority_score, rationale) VALUES (?,?,?,?)",
|
|
(file_name, "ownership_risk", priority,
|
|
f"bus_factor={bus_factor_risk} gain={semantic_gain} coverage={coverage_score} share={row['primary_author_share']}")
|
|
)
|
|
|
|
if semantic_gain >= 7 and tested_functions == 0:
|
|
priority = semantic_gain * 6 + semantic_risk * 3 + max(0, 45 - coverage_score)
|
|
conn.execute(
|
|
"INSERT INTO review_queue (file, queue_type, priority_score, rationale) VALUES (?,?,?,?)",
|
|
(file_name, "untested_hotspot", priority,
|
|
f"gain={semantic_gain} risk={semantic_risk} tested_functions={tested_functions} coverage={coverage_score}")
|
|
)
|
|
|
|
|
|
def scan_ownership_metrics(conn):
|
|
"""Build coarse ownership/bus-factor signals from git blame."""
|
|
files = conn.execute(
|
|
"SELECT file, project FROM file_lines WHERE project IN ('Game', 'Common', 'Server_Link', 'LoginServer', 'ConnectServer')"
|
|
).fetchall()
|
|
|
|
for row in files:
|
|
file_name = row["file"]
|
|
rel_path = _project_relative_path(row["project"], file_name)
|
|
try:
|
|
result = subprocess.run(
|
|
["git", "blame", "--line-porcelain", "--", rel_path],
|
|
cwd=str(REPO_ROOT),
|
|
capture_output=True,
|
|
text=True,
|
|
encoding="utf-8",
|
|
errors="ignore",
|
|
check=True,
|
|
)
|
|
except Exception:
|
|
continue
|
|
|
|
author_lines = defaultdict(int)
|
|
total_lines = 0
|
|
most_recent = None
|
|
current_author = None
|
|
current_time = None
|
|
|
|
for raw_line in result.stdout.splitlines():
|
|
if raw_line.startswith("author "):
|
|
current_author = raw_line[7:].strip()
|
|
elif raw_line.startswith("author-time "):
|
|
try:
|
|
current_time = datetime.fromtimestamp(int(raw_line[12:].strip()), timezone.utc).date()
|
|
except Exception:
|
|
current_time = None
|
|
elif raw_line.startswith("\t"):
|
|
total_lines += 1
|
|
author_lines[current_author or "unknown"] += 1
|
|
if current_time and (most_recent is None or current_time > most_recent):
|
|
most_recent = current_time
|
|
|
|
if total_lines <= 0:
|
|
continue
|
|
|
|
primary_author, primary_lines = max(author_lines.items(), key=lambda item: item[1])
|
|
primary_share = int((primary_lines * 100) / total_lines)
|
|
author_count = len(author_lines)
|
|
bus_factor_risk = 0
|
|
if author_count <= 1:
|
|
bus_factor_risk += 5
|
|
elif author_count == 2:
|
|
bus_factor_risk += 2
|
|
if primary_share >= 80:
|
|
bus_factor_risk += 4
|
|
elif primary_share >= 60:
|
|
bus_factor_risk += 2
|
|
|
|
conn.execute(
|
|
"INSERT INTO ownership_metrics (file, primary_author, primary_author_share, author_count, blamed_lines, most_recent_line_date, bus_factor_risk) "
|
|
"VALUES (?,?,?,?,?,?,?)",
|
|
(
|
|
file_name, primary_author, primary_share, author_count, total_lines,
|
|
most_recent.isoformat() if most_recent else None, bus_factor_risk
|
|
)
|
|
)
|
|
|
|
|
|
def scan_test_function_map(conn):
|
|
"""Map tests to target functions using file edges and name mentions."""
|
|
target_mentions = {}
|
|
test_dir = REPO_ROOT / "UnitTests" / "Tests"
|
|
mention_re = re.compile(r'\b([A-Za-z_]\w*)::([A-Za-z_~]\w*)\b')
|
|
|
|
if test_dir.exists():
|
|
for filepath in test_dir.glob("*.cpp"):
|
|
try:
|
|
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
text = f.read()
|
|
target_mentions[_rel_name(filepath, test_dir)] = set(mention_re.findall(text))
|
|
except Exception:
|
|
target_mentions[_rel_name(filepath, test_dir)] = set()
|
|
|
|
edges = conn.execute("SELECT source, target FROM edges WHERE edge_type = 'tests'").fetchall()
|
|
for edge in edges:
|
|
test_file = edge["source"]
|
|
target_file = edge["target"]
|
|
|
|
# Whole-file coverage mapping
|
|
target_funcs = conn.execute(
|
|
"SELECT function_name, class_name FROM function_index WHERE file = ? ORDER BY start_line",
|
|
(target_file,)
|
|
).fetchall()
|
|
for func in target_funcs:
|
|
conn.execute(
|
|
"INSERT INTO test_function_map (test_file, target_file, function_name, class_name, mapping_type) VALUES (?,?,?,?,?)",
|
|
(test_file, target_file, func["function_name"], func["class_name"], "file_edge")
|
|
)
|
|
|
|
# Stronger mapping for explicit symbol mentions inside test source
|
|
mentions = target_mentions.get(test_file, set())
|
|
if not mentions:
|
|
continue
|
|
for class_name, function_name in mentions:
|
|
match = conn.execute(
|
|
"SELECT function_name, class_name FROM function_index WHERE file = ? AND function_name = ? AND COALESCE(class_name, '') = ? LIMIT 1",
|
|
(target_file, function_name, class_name)
|
|
).fetchone()
|
|
if match:
|
|
conn.execute(
|
|
"INSERT INTO test_function_map (test_file, target_file, function_name, class_name, mapping_type) VALUES (?,?,?,?,?)",
|
|
(test_file, target_file, function_name, class_name, "explicit_mention")
|
|
)
|
|
|
|
|
|
def scan_call_edges(conn):
|
|
"""Heuristically build a function call graph from indexed function bodies."""
|
|
rows = conn.execute(
|
|
"SELECT file, function_name, class_name, start_line, end_line, project FROM function_index ORDER BY file, start_line"
|
|
).fetchall()
|
|
by_name = defaultdict(list)
|
|
for row in rows:
|
|
by_name[row["function_name"]].append(row)
|
|
|
|
file_cache = {}
|
|
call_pats = LANG_ADAPTER.call_patterns()
|
|
plain_call_re = call_pats.get('plain', re.compile(r'\b([A-Za-z_~]\w*)\s*\('))
|
|
scoped_call_re = call_pats.get('scoped', re.compile(r'\b([A-Za-z_]\w*)::([A-Za-z_~]\w*)\s*\('))
|
|
member_call_re = call_pats.get('member', re.compile(r'(?:->|\.)\s*([A-Za-z_~]\w*)\s*\('))
|
|
skip_names = _get_call_skip_names()
|
|
|
|
for row in rows:
|
|
lines = _read_project_file_lines(row["project"], row["file"], file_cache)
|
|
if not lines:
|
|
continue
|
|
|
|
body = _extract_function_body_text(lines, row["start_line"], row["end_line"])
|
|
if not body:
|
|
continue
|
|
|
|
sanitized = LANG_ADAPTER.strip_comments_and_strings(body)
|
|
counts = defaultdict(int)
|
|
scoped_counts = {}
|
|
|
|
scoped_matches = scoped_call_re.findall(sanitized)
|
|
for match in scoped_matches:
|
|
if isinstance(match, tuple) and len(match) == 2:
|
|
scope_name, callee_name = match
|
|
else:
|
|
continue
|
|
if callee_name in skip_names:
|
|
continue
|
|
key = (scope_name, callee_name)
|
|
scoped_counts[key] = scoped_counts.get(key, 0) + 1
|
|
|
|
for callee_name in member_call_re.findall(sanitized):
|
|
if callee_name in skip_names:
|
|
continue
|
|
counts[callee_name] += 1
|
|
|
|
for callee_name in plain_call_re.findall(sanitized):
|
|
if callee_name in skip_names:
|
|
continue
|
|
counts[callee_name] += 1
|
|
|
|
caller_symbol = _function_label(row["class_name"], row["function_name"])
|
|
inserted = set()
|
|
|
|
for (scope_name, callee_name), call_count in scoped_counts.items():
|
|
target_row, confidence = _resolve_call_target(row, scope_name, callee_name, by_name)
|
|
if not target_row:
|
|
continue
|
|
callee_symbol = _function_label(target_row["class_name"], target_row["function_name"])
|
|
dedup = (callee_symbol, target_row["file"])
|
|
inserted.add(dedup)
|
|
conn.execute(
|
|
"INSERT OR REPLACE INTO call_edges (caller_symbol, caller_file, caller_project, callee_symbol, callee_file, callee_project, confidence, call_count, evidence) "
|
|
"VALUES (?,?,?,?,?,?,?,?,?)",
|
|
(
|
|
caller_symbol,
|
|
row["file"],
|
|
row["project"],
|
|
callee_symbol,
|
|
target_row["file"],
|
|
target_row["project"],
|
|
confidence,
|
|
call_count,
|
|
f"scoped-call:{scope_name}::{callee_name}"
|
|
)
|
|
)
|
|
|
|
for callee_name, call_count in counts.items():
|
|
target_row, confidence = _resolve_call_target(row, None, callee_name, by_name)
|
|
if not target_row:
|
|
continue
|
|
callee_symbol = _function_label(target_row["class_name"], target_row["function_name"])
|
|
dedup = (callee_symbol, target_row["file"])
|
|
if dedup in inserted:
|
|
continue
|
|
conn.execute(
|
|
"INSERT OR REPLACE INTO call_edges (caller_symbol, caller_file, caller_project, callee_symbol, callee_file, callee_project, confidence, call_count, evidence) "
|
|
"VALUES (?,?,?,?,?,?,?,?,?)",
|
|
(
|
|
caller_symbol,
|
|
row["file"],
|
|
row["project"],
|
|
callee_symbol,
|
|
target_row["file"],
|
|
target_row["project"],
|
|
confidence,
|
|
call_count,
|
|
f"heuristic-call:{callee_name}"
|
|
)
|
|
)
|
|
|
|
|
|
def scan_symbol_metadata(conn):
|
|
"""Aggregate symbol-level metadata for reasoning and prioritization."""
|
|
rows = conn.execute(
|
|
"SELECT file, function_name, class_name, start_line, end_line, signature, project FROM function_index ORDER BY file, start_line"
|
|
).fetchall()
|
|
|
|
file_cache = {}
|
|
file_summaries = {
|
|
row["file"]: row["summary"]
|
|
for row in conn.execute("SELECT file, summary FROM file_summaries").fetchall()
|
|
}
|
|
audit_scores = {
|
|
row["file"]: row["coverage_score"]
|
|
for row in conn.execute("SELECT file, coverage_score FROM audit_coverage").fetchall()
|
|
}
|
|
history_metrics = {
|
|
row["file"]: (row["recent_commit_count"], row["churn_score"])
|
|
for row in conn.execute("SELECT file, recent_commit_count, churn_score FROM history_metrics").fetchall()
|
|
}
|
|
|
|
entity_tags = defaultdict(set)
|
|
for row in conn.execute("SELECT entity_name, file, tag FROM semantic_tags").fetchall():
|
|
entity_tags[(row["entity_name"], row["file"])].add(row["tag"])
|
|
file_tags = defaultdict(set)
|
|
for row in conn.execute("SELECT entity_name, file, tag FROM semantic_tags WHERE entity_type = 'file'").fetchall():
|
|
file_tags[row["file"]].add(row["tag"])
|
|
|
|
profiles = {}
|
|
for row in conn.execute(
|
|
"SELECT entity_name, file, security_sensitive, hot_path_candidate, optimization_candidate, network_surface, fragile_surface, risk_score, gain_score, evidence "
|
|
"FROM semantic_profiles"
|
|
).fetchall():
|
|
profiles[(row["entity_name"], row["file"])] = row
|
|
|
|
incoming = defaultdict(int)
|
|
outgoing = defaultdict(int)
|
|
for row in conn.execute(
|
|
"SELECT caller_symbol, caller_file, callee_symbol, callee_file FROM call_edges"
|
|
).fetchall():
|
|
outgoing[(row["caller_symbol"], row["caller_file"])] += 1
|
|
incoming[(row["callee_symbol"], row["callee_file"])] += 1
|
|
|
|
loop_re = re.compile(r'\bfor\s*\(|\bwhile\s*\(|\bdo\b')
|
|
branch_re = re.compile(r'\bif\s*\(|\bswitch\s*\(|\?')
|
|
|
|
for row in rows:
|
|
symbol_name = _function_label(row["class_name"], row["function_name"])
|
|
key = (symbol_name, row["file"])
|
|
lines = _read_project_file_lines(row["project"], row["file"], file_cache)
|
|
body = ""
|
|
if lines:
|
|
body = _extract_function_body_text(lines, row["start_line"], row["end_line"])
|
|
sanitized = _strip_cpp_comments_and_strings(body)
|
|
|
|
tags = set(file_tags.get(row["file"], set()))
|
|
tags.update(entity_tags.get(key, set()))
|
|
profile = profiles.get(key)
|
|
line_span = row["end_line"] - row["start_line"] + 1
|
|
loop_count = len(loop_re.findall(sanitized))
|
|
branch_count = len(branch_re.findall(sanitized))
|
|
caller_count = incoming.get(key, 0)
|
|
callee_count = outgoing.get(key, 0)
|
|
lower_symbol = symbol_name.lower()
|
|
lower_file = row["file"].lower()
|
|
|
|
hot_path = int(
|
|
(profile["hot_path_candidate"] if profile else 0) or
|
|
bool(tags & HOT_PATH_TAGS) or
|
|
caller_count >= 3 or
|
|
(loop_count >= 2 and line_span >= 40)
|
|
)
|
|
ct_sensitive = int(
|
|
(profile["security_sensitive"] if profile else 0) or
|
|
bool(tags & SECURITY_TAGS) or
|
|
any(token in lower_symbol for token in ("auth", "login", "password", "secure", "warehouse", "token", "session"))
|
|
)
|
|
batchable = int(
|
|
any(token in lower_symbol for token in ("batch", "scan", "verify", "broadcast", "sendall", "foreach")) or
|
|
(loop_count >= 2 and caller_count >= 2)
|
|
)
|
|
gpu_candidate = int(
|
|
(batchable and any(token in lower_symbol for token in ("batch", "scan", "verify", "field", "scalar", "point", "hash"))) or
|
|
("cuda" in lower_file or lower_file.endswith(".cu"))
|
|
)
|
|
|
|
base_risk = profile["risk_score"] if profile else 0
|
|
base_gain = profile["gain_score"] if profile else 0
|
|
audit_coverage_score = audit_scores.get(row["file"], 0)
|
|
recent_commit_count, churn_score = history_metrics.get(row["file"], (0, 0))
|
|
change_frequency = recent_commit_count + churn_score
|
|
|
|
risk_score = base_risk + (4 if ct_sensitive else 0) + min(branch_count, 6) + (2 if tags & FRAGILE_TAGS else 0) + (1 if line_span > 120 else 0)
|
|
gain_score = base_gain + (4 if hot_path else 0) + min(loop_count * 2, 6) + min(caller_count, 6) + (2 if batchable else 0) + (2 if gpu_candidate else 0)
|
|
|
|
if risk_score >= 12:
|
|
risk_level = "critical"
|
|
elif risk_score >= 8:
|
|
risk_level = "high"
|
|
elif risk_score >= 4:
|
|
risk_level = "medium"
|
|
else:
|
|
risk_level = "low"
|
|
|
|
reasons = []
|
|
if hot_path:
|
|
reasons.append("hot-path heuristic matched")
|
|
if ct_sensitive:
|
|
reasons.append("security-sensitive symbol or file")
|
|
if batchable:
|
|
reasons.append("loop-heavy or batch-style workload")
|
|
if gpu_candidate:
|
|
reasons.append("candidate for parallel/offload review")
|
|
if caller_count >= 3:
|
|
reasons.append(f"high fan-in via {caller_count} callers")
|
|
if audit_coverage_score < 45:
|
|
reasons.append(f"low file coverage score {audit_coverage_score}")
|
|
if change_frequency >= 10:
|
|
reasons.append(f"recent churn signal {change_frequency}")
|
|
if not reasons:
|
|
reasons.append("baseline semantic metadata")
|
|
|
|
review_priority = gain_score * 4 + max(0, 60 - audit_coverage_score) // 5 + min(caller_count, 6) - (risk_score // 3)
|
|
summary = row["signature"] or file_summaries.get(row["file"], "") or symbol_name
|
|
|
|
conn.execute(
|
|
"INSERT OR REPLACE INTO symbol_metadata (symbol_name, file_path, project, class_name, summary, semantic_tags, hot_path, ct_sensitive, batchable, gpu_candidate, "
|
|
"risk_level, review_priority, risk_score, gain_score, audit_coverage_score, change_frequency, line_span, loop_count, branch_count, caller_count, callee_count, reasons) "
|
|
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
|
(
|
|
symbol_name,
|
|
row["file"],
|
|
row["project"],
|
|
row["class_name"],
|
|
summary[:240],
|
|
", ".join(sorted(tags))[:240],
|
|
hot_path,
|
|
ct_sensitive,
|
|
batchable,
|
|
gpu_candidate,
|
|
risk_level,
|
|
review_priority,
|
|
risk_score,
|
|
gain_score,
|
|
audit_coverage_score,
|
|
change_frequency,
|
|
line_span,
|
|
loop_count,
|
|
branch_count,
|
|
caller_count,
|
|
callee_count,
|
|
json.dumps(reasons[:8])
|
|
)
|
|
)
|
|
|
|
|
|
def scan_analysis_scores(conn):
|
|
"""Score symbols for proactive bottleneck and audit queue generation."""
|
|
rows = conn.execute(
|
|
"SELECT symbol_name, file_path, hot_path, ct_sensitive, batchable, gpu_candidate, risk_level, review_priority, risk_score, gain_score, "
|
|
"audit_coverage_score, change_frequency, line_span, loop_count, branch_count, caller_count, callee_count, reasons "
|
|
"FROM symbol_metadata"
|
|
).fetchall()
|
|
|
|
hot_name_tokens = ("mul", "verify", "scan", "hash", "point", "field", "batch", "move", "update", "send")
|
|
|
|
for row in rows:
|
|
lower_name = row["symbol_name"].lower()
|
|
hotness_score = (
|
|
(5 if row["hot_path"] else 0) +
|
|
min(row["loop_count"] * 2, 8) +
|
|
min(row["caller_count"], 8) +
|
|
(2 if row["line_span"] > 80 else 0) +
|
|
(3 if any(token in lower_name for token in hot_name_tokens) else 0)
|
|
)
|
|
complexity_score = row["branch_count"] + row["loop_count"] * 2 + (row["line_span"] // 40) + min(row["callee_count"], 6)
|
|
fanin_score = row["caller_count"]
|
|
fanout_score = row["callee_count"]
|
|
optimization_score = row["gain_score"] + (3 if row["batchable"] else 0) + (2 if row["hot_path"] else 0)
|
|
gpu_score = (4 if row["gpu_candidate"] else 0) + (2 if row["batchable"] else 0) + min(row["loop_count"], 4) - (1 if row["ct_sensitive"] else 0)
|
|
ct_risk_score = (
|
|
(5 if row["ct_sensitive"] else 0) +
|
|
(3 if row["risk_level"] == "critical" else 2 if row["risk_level"] == "high" else 1 if row["risk_level"] == "medium" else 0) +
|
|
(2 if row["audit_coverage_score"] < 40 else 0) +
|
|
(2 if complexity_score > 10 else 0)
|
|
)
|
|
audit_gap_score = max(0, 60 - row["audit_coverage_score"]) // 10
|
|
if row["hot_path"] and row["audit_coverage_score"] < 50:
|
|
audit_gap_score += 2
|
|
if row["ct_sensitive"] and row["audit_coverage_score"] < 60:
|
|
audit_gap_score += 2
|
|
|
|
perf_priority = hotness_score * 4 + fanin_score * 2 + gpu_score * 3
|
|
safe_priority = row["risk_score"] * 3 + ct_risk_score * 2 + audit_gap_score * 3
|
|
overall_priority = perf_priority + audit_gap_score * 3 - ct_risk_score * 2
|
|
|
|
reasons = []
|
|
try:
|
|
reasons.extend(json.loads(row["reasons"] or "[]"))
|
|
except Exception:
|
|
pass
|
|
if hotness_score >= 10:
|
|
reasons.append(f"hotness={hotness_score}")
|
|
if gpu_score >= 6:
|
|
reasons.append(f"gpu_score={gpu_score}")
|
|
if audit_gap_score >= 3:
|
|
reasons.append(f"audit_gap={audit_gap_score}")
|
|
if ct_risk_score >= 6:
|
|
reasons.append(f"ct_risk={ct_risk_score}")
|
|
if not reasons:
|
|
reasons.append("baseline analysis score")
|
|
|
|
conn.execute(
|
|
"INSERT OR REPLACE INTO analysis_scores (symbol_name, file_path, hotness_score, complexity_score, fanin_score, fanout_score, optimization_score, "
|
|
"gpu_score, ct_risk_score, audit_gap_score, perf_priority, safe_priority, overall_priority, reasons) "
|
|
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
|
(
|
|
row["symbol_name"],
|
|
row["file_path"],
|
|
hotness_score,
|
|
complexity_score,
|
|
fanin_score,
|
|
fanout_score,
|
|
optimization_score,
|
|
gpu_score,
|
|
ct_risk_score,
|
|
audit_gap_score,
|
|
perf_priority,
|
|
safe_priority,
|
|
overall_priority,
|
|
json.dumps(reasons[:10])
|
|
)
|
|
)
|
|
|
|
|
|
def scan_symbol_audit_coverage(conn):
|
|
"""Build symbol-level audit memory from tests, queues, and semantic evidence."""
|
|
queue_by_file = defaultdict(set)
|
|
for row in conn.execute("SELECT file, queue_type FROM review_queue").fetchall():
|
|
queue_by_file[row["file"]].add(row["queue_type"])
|
|
|
|
tests_by_symbol = defaultdict(lambda: {"count": 0, "types": set(), "tests": set()})
|
|
for row in conn.execute(
|
|
"SELECT target_file, function_name, class_name, mapping_type, test_file FROM test_function_map"
|
|
).fetchall():
|
|
symbol_name = _function_label(row["class_name"], row["function_name"])
|
|
key = (symbol_name, row["target_file"])
|
|
tests_by_symbol[key]["count"] += 1
|
|
tests_by_symbol[key]["types"].add(row["mapping_type"])
|
|
tests_by_symbol[key]["tests"].add(row["test_file"])
|
|
|
|
tag_rows = conn.execute("SELECT entity_name, file, tag FROM semantic_tags").fetchall()
|
|
tags_by_symbol = defaultdict(set)
|
|
for row in tag_rows:
|
|
tags_by_symbol[(row["entity_name"], row["file"])].add(row["tag"])
|
|
|
|
rows = conn.execute(
|
|
"SELECT symbol_name, file_path, risk_score, gain_score, audit_coverage_score, hot_path, ct_sensitive, reasons "
|
|
"FROM symbol_metadata"
|
|
).fetchall()
|
|
|
|
for row in rows:
|
|
key = (row["symbol_name"], row["file_path"])
|
|
test_info = tests_by_symbol.get(key, {"count": 0, "types": set(), "tests": set()})
|
|
covered_by_tests = 1 if test_info["count"] > 0 else 0
|
|
queue_types = queue_by_file.get(row["file_path"], set())
|
|
semantic_tags = tags_by_symbol.get(key, set())
|
|
|
|
audit_modules = set()
|
|
if row["ct_sensitive"]:
|
|
audit_modules.add("ct_review")
|
|
if row["hot_path"]:
|
|
audit_modules.add("perf_review")
|
|
if covered_by_tests:
|
|
audit_modules.add("unit_tests")
|
|
if "network-packet" in semantic_tags:
|
|
audit_modules.add("packet_surface")
|
|
if "security" in semantic_tags or "auth" in semantic_tags:
|
|
audit_modules.add("security_review")
|
|
|
|
coverage_score = row["audit_coverage_score"]
|
|
coverage_score += min(len(test_info["tests"]) * 10, 30)
|
|
coverage_score += 10 if "explicit_mention" in test_info["types"] else 0
|
|
coverage_score += 10 if row["ct_sensitive"] and "ct_review" in audit_modules else 0
|
|
coverage_score -= 10 if not covered_by_tests and row["hot_path"] else 0
|
|
coverage_score = max(0, min(100, coverage_score))
|
|
|
|
if coverage_score >= 75:
|
|
last_status = "covered"
|
|
elif coverage_score >= 45:
|
|
last_status = "partial"
|
|
else:
|
|
last_status = "gap"
|
|
|
|
historical_failures = len([queue for queue in queue_types if queue in {"audit_gap", "stability_hotspot", "untested_hotspot"}])
|
|
evidence = []
|
|
if covered_by_tests:
|
|
evidence.append(f"tests={len(test_info['tests'])}")
|
|
if test_info["types"]:
|
|
evidence.append(f"mappings={','.join(sorted(test_info['types']))}")
|
|
if queue_types:
|
|
evidence.append(f"queues={','.join(sorted(queue_types))}")
|
|
if audit_modules:
|
|
evidence.append(f"modules={','.join(sorted(audit_modules))}")
|
|
if not evidence:
|
|
evidence.append("no direct audit evidence")
|
|
|
|
conn.execute(
|
|
"INSERT OR REPLACE INTO symbol_audit_coverage (symbol_name, file_path, covered_by_tests, test_count, mapping_types, review_queue_types, audit_modules, coverage_score, last_status, historical_failures, evidence) "
|
|
"VALUES (?,?,?,?,?,?,?,?,?,?,?)",
|
|
(
|
|
row["symbol_name"],
|
|
row["file_path"],
|
|
covered_by_tests,
|
|
len(test_info["tests"]),
|
|
", ".join(sorted(test_info["types"])),
|
|
", ".join(sorted(queue_types)),
|
|
", ".join(sorted(audit_modules)),
|
|
coverage_score,
|
|
last_status,
|
|
historical_failures,
|
|
"; ".join(evidence)[:240]
|
|
)
|
|
)
|
|
|
|
|
|
def scan_ai_tasks(conn):
|
|
"""Generate a proactive AI task queue from bottlenecks and audit gaps."""
|
|
rows = conn.execute(
|
|
"SELECT b.symbol_name, b.file_path, b.overall_priority, b.perf_priority, b.safe_priority, b.hotness_score, b.gpu_score, b.ct_risk_score, b.audit_gap_score, "
|
|
"b.semantic_tags, b.summary, s.coverage_score, s.last_status, sm.batchable, sm.gpu_candidate, sm.ct_sensitive "
|
|
"FROM v_bottleneck_queue b "
|
|
"LEFT JOIN symbol_audit_coverage s ON s.symbol_name = b.symbol_name AND s.file_path = b.file_path "
|
|
"LEFT JOIN symbol_metadata sm ON sm.symbol_name = b.symbol_name AND sm.file_path = b.file_path "
|
|
"ORDER BY b.overall_priority DESC, b.hotness_score DESC"
|
|
).fetchall()
|
|
|
|
created = set()
|
|
for row in rows:
|
|
tasks = []
|
|
if row["overall_priority"] >= 45:
|
|
tasks.append((
|
|
"optimize",
|
|
row["overall_priority"],
|
|
f"Analyze {row['symbol_name']} as a performance bottleneck. Focus on loop structure, call fan-in, memory layout, and low-risk simplifications. Preserve gameplay behavior and constant-time constraints where applicable.",
|
|
f"priority={row['overall_priority']} hot={row['hotness_score']} summary={row['summary'] or row['symbol_name']}"
|
|
))
|
|
if row["audit_gap_score"] >= 3 or (row["last_status"] or "unknown") == "gap":
|
|
tasks.append((
|
|
"audit_expand",
|
|
max(row["safe_priority"], row["overall_priority"]),
|
|
f"Expand audit coverage for {row['symbol_name']}. Focus on edge cases, failure handling, missing tests, and crash-sensitive branches in {row['file_path']}.",
|
|
f"audit_gap={row['audit_gap_score']} coverage={row['coverage_score']} status={row['last_status']}"
|
|
))
|
|
if row["ct_sensitive"] and row["ct_risk_score"] >= 6:
|
|
tasks.append((
|
|
"ct_review",
|
|
row["safe_priority"] + 5,
|
|
f"Review {row['symbol_name']} for constant-time and security-sensitive behavior. Check control-flow divergence, secret-dependent branching, and correctness under edge conditions.",
|
|
f"ct_risk={row['ct_risk_score']} tags={row['semantic_tags'] or ''}"
|
|
))
|
|
if row["gpu_candidate"] and row["gpu_score"] >= 6:
|
|
tasks.append((
|
|
"gpu_candidate",
|
|
row["perf_priority"] + 3,
|
|
f"Evaluate {row['symbol_name']} as a GPU or parallel batch candidate. Focus on batchability, data independence, and acceptable risk for offload or job parallelism.",
|
|
f"gpu_score={row['gpu_score']} batchable={row['batchable']}"
|
|
))
|
|
|
|
for task_type, priority, prompt, rationale in tasks:
|
|
dedup = (task_type, row["symbol_name"], row["file_path"])
|
|
if dedup in created:
|
|
continue
|
|
created.add(dedup)
|
|
conn.execute(
|
|
"INSERT OR REPLACE INTO ai_tasks (task_type, symbol_name, file_path, prompt, status, priority, created_at, rationale) VALUES (?,?,?,?,?,?,?,?)",
|
|
(
|
|
task_type,
|
|
row["symbol_name"],
|
|
row["file_path"],
|
|
prompt,
|
|
"pending",
|
|
priority,
|
|
datetime.now(timezone.utc).isoformat(),
|
|
rationale[:240]
|
|
)
|
|
)
|
|
|
|
|
|
def scan_function_index(conn):
|
|
"""Scan source files for function definitions with exact line ranges."""
|
|
dirs = [(label, path) for label, path, _exts in SOURCE_DIRS]
|
|
|
|
count = 0
|
|
for project, base_dir in dirs:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in ("*.cpp", "*.h"):
|
|
for filepath in base_dir.rglob(ext):
|
|
try:
|
|
n = _scan_file_functions(conn, filepath, project, base_dir)
|
|
count += n
|
|
except Exception:
|
|
pass
|
|
return count
|
|
|
|
|
|
def _scan_file_functions(conn, filepath, project, base_dir=None):
|
|
"""Extract function definitions from a single file with line ranges."""
|
|
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
lines = f.readlines()
|
|
|
|
if not lines:
|
|
return 0
|
|
|
|
fname = _rel_name(filepath, base_dir) if base_dir else filepath.name
|
|
count = 0
|
|
|
|
# Preprocess: strip block/line comments using the language adapter
|
|
cleaned = LANG_ADAPTER.strip_block_comments_from_lines(lines)
|
|
|
|
# Get patterns from adapter
|
|
func_sig_re = LANG_ADAPTER.function_sig_pattern()
|
|
ctor_re = LANG_ADAPTER.constructor_pattern()
|
|
skip_names = LANG_ADAPTER.function_skip_names()
|
|
|
|
if not func_sig_re:
|
|
return 0
|
|
|
|
i = 0
|
|
while i < len(cleaned):
|
|
line = cleaned[i]
|
|
stripped = line.strip()
|
|
|
|
# Skip empty, preprocessor, pure comments
|
|
if not stripped or stripped.startswith('#'):
|
|
i += 1
|
|
continue
|
|
|
|
# Try to match function signature
|
|
m = func_sig_re.match(line)
|
|
class_name = None
|
|
func_name = None
|
|
|
|
if m:
|
|
# Determine class_name and func_name from adapter pattern groups
|
|
if m.lastindex and m.lastindex >= 2:
|
|
class_name = m.group(1) # e.g. ClassName:: prefix for C++
|
|
func_name = m.group(2)
|
|
elif m.lastindex == 1:
|
|
func_name = m.group(1)
|
|
else:
|
|
func_name = stripped.split('(')[0].split()[-1] if '(' in stripped else None
|
|
elif ctor_re:
|
|
m = ctor_re.match(line)
|
|
if m:
|
|
class_name = m.group(1) if m.lastindex and m.lastindex >= 1 else None
|
|
func_name = m.group(1) if m.lastindex == 1 else m.group(2) if m.lastindex >= 2 else None
|
|
else:
|
|
i += 1
|
|
continue
|
|
else:
|
|
i += 1
|
|
continue
|
|
|
|
if not func_name or func_name in skip_names:
|
|
i += 1
|
|
continue
|
|
|
|
# Find the opening scope marker (brace for C++/Java/etc., colon for Python)
|
|
scope_line = LANG_ADAPTER.find_opening_scope(cleaned, i)
|
|
if scope_line is None or scope_line - i > 15:
|
|
i += 1
|
|
continue
|
|
|
|
# For brace-based languages, check it's not a forward declaration
|
|
if LANG_ADAPTER.is_declaration_not_definition(cleaned, i, scope_line):
|
|
i += 1
|
|
continue
|
|
|
|
# Found a function definition — find the end of its scope
|
|
start_line = i + 1 # 1-based
|
|
end_line = LANG_ADAPTER.find_scope_end(cleaned, scope_line)
|
|
if end_line is None:
|
|
i += 1
|
|
continue
|
|
|
|
end_line_1based = end_line + 1 # convert to 1-based
|
|
|
|
# Build signature from original (non-cleaned) lines
|
|
sig_parts = []
|
|
for si in range(i, min(scope_line + 1, len(lines))):
|
|
sig_parts.append(lines[si].strip())
|
|
signature = ' '.join(sig_parts)
|
|
# Trim at opening scope marker
|
|
for trim_ch in ('{', ':'):
|
|
brace_idx = signature.find(trim_ch)
|
|
if brace_idx > 0:
|
|
signature = signature[:brace_idx].strip()
|
|
break
|
|
if len(signature) > 200:
|
|
signature = signature[:200] + "..."
|
|
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO function_index (file, function_name, class_name, start_line, end_line, signature, project) VALUES (?,?,?,?,?,?,?)",
|
|
(fname, func_name, class_name, start_line, end_line_1based, signature, project)
|
|
)
|
|
count += 1
|
|
except Exception:
|
|
pass
|
|
|
|
# Store function body in snippet cache
|
|
try:
|
|
body_lines = lines[start_line - 1:end_line_1based]
|
|
body_text = "".join(body_lines)
|
|
body_hash = hashlib.sha256(body_text.encode("utf-8", errors="ignore")).hexdigest()
|
|
body_line_count = end_line_1based - start_line + 1
|
|
conn.execute(
|
|
"INSERT INTO function_bodies (file, function_name, class_name, start_line, end_line, body, body_hash, line_count, project) VALUES (?,?,?,?,?,?,?,?,?)",
|
|
(fname, func_name, class_name, start_line, end_line_1based, body_text, body_hash, body_line_count, project)
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
# Skip past this function
|
|
i = end_line + 1
|
|
|
|
return count
|
|
|
|
|
|
def _find_opening_brace(lines, start):
|
|
"""Find the line containing the opening '{' of a function definition."""
|
|
paren_depth = 0
|
|
found_open_paren = False
|
|
for i in range(start, min(start + 20, len(lines))):
|
|
for ch in lines[i]:
|
|
if ch == '(':
|
|
paren_depth += 1
|
|
found_open_paren = True
|
|
elif ch == ')':
|
|
paren_depth -= 1
|
|
elif ch == '{' and found_open_paren and paren_depth <= 0:
|
|
return i
|
|
elif ch == ';' and found_open_paren and paren_depth <= 0:
|
|
return None
|
|
return None
|
|
|
|
|
|
def _find_function_end(lines, brace_line):
|
|
"""Find the closing '}' of a function by counting braces from the opening '{' line."""
|
|
depth = 0
|
|
for i in range(brace_line, len(lines)):
|
|
for ch in lines[i]:
|
|
if ch == '{':
|
|
depth += 1
|
|
elif ch == '}':
|
|
depth -= 1
|
|
if depth == 0:
|
|
return i
|
|
return None
|
|
|
|
|
|
def scan_edges(conn):
|
|
"""Scan for relationships between files: tests, extends, implements."""
|
|
# 1. Test edges: UnitTests/Tests/*.cpp → Game/*.cpp or Game/*.h
|
|
test_dir = REPO_ROOT / "UnitTests" / "Tests"
|
|
if test_dir.exists():
|
|
include_re = re.compile(r'//\s*(?:Tests?\s+for|Reproduce\s+from)\s+(\S+\.(?:h|cpp))', re.IGNORECASE)
|
|
# Also try filename-based matching: BloodCastleDefTests.cpp → BloodCastle
|
|
test_name_re = re.compile(r'^(\w+?)(?:Def)?Tests?\.cpp$', re.IGNORECASE)
|
|
for filepath in test_dir.glob("*.cpp"):
|
|
test_file = _rel_name(filepath, test_dir)
|
|
targets_found = set()
|
|
# Check for explicit references in comments
|
|
try:
|
|
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
for lineno, line in enumerate(f, 1):
|
|
if lineno > 30:
|
|
break
|
|
m = include_re.search(line)
|
|
if m:
|
|
targets_found.add(m.group(1))
|
|
except Exception:
|
|
pass
|
|
|
|
# Filename-based matching
|
|
m = test_name_re.match(test_file)
|
|
if m:
|
|
base = m.group(1)
|
|
# Look for matching Game files
|
|
for candidate_ext in ('.cpp', '.h'):
|
|
candidate = GAME_DIR / (base + candidate_ext)
|
|
if candidate.exists():
|
|
targets_found.add(base + candidate_ext)
|
|
# Also try with "Def" suffix for definition headers
|
|
candidate2 = GAME_DIR / (base + "Def" + candidate_ext)
|
|
if candidate2.exists():
|
|
targets_found.add(base + "Def" + candidate_ext)
|
|
|
|
for target in targets_found:
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO edges (source, target, edge_type, detail) VALUES (?,?,?,?)",
|
|
(test_file, target, "tests", f"Unit test file")
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
# 2. Class hierarchy edges from classes table
|
|
rows = conn.execute("SELECT name, parent_class, header FROM classes WHERE parent_class IS NOT NULL AND parent_class != ''").fetchall()
|
|
for row in rows:
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO edges (source, target, edge_type, detail) VALUES (?,?,?,?)",
|
|
(row[0], row[1], "extends", f"in {row[2]}")
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def build_fts_index(conn):
|
|
"""Rebuild full-text search index from all tables."""
|
|
conn.execute("DELETE FROM fts_index")
|
|
|
|
# Singletons
|
|
for row in conn.execute("SELECT macro, class_name, header, category, description FROM singletons"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("singleton", f"{row[0]} {row[1]}", row[2], row[3] or "", row[4] or ""))
|
|
|
|
# Events
|
|
for row in conn.execute("SELECT name, cpp_file, singleton_macro, description FROM events"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("event", f"{row[0]} {row[2] or ''}", row[1] or "", "event", row[3] or ""))
|
|
|
|
# AI handlers
|
|
for row in conn.execute("SELECT file, target, description FROM ai_handlers"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("ai", row[1], row[0], "ai", row[2] or ""))
|
|
|
|
# Player files
|
|
for row in conn.execute("SELECT file, domain, key_methods, description FROM player_files"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("player_file", f"{row[1]} {row[2] or ''}", row[0], "player", row[3] or ""))
|
|
|
|
# Constants
|
|
for row in conn.execute("SELECT name, value, header, category, description FROM constants"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("constant", f"{row[0]}={row[1]}", row[2], row[3] or "", row[4] or ""))
|
|
|
|
# Packet handlers
|
|
for row in conn.execute("SELECT headcode_name, handler_method, source_file, handler_type, description FROM packet_handlers"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("handler", f"{row[0]} {row[1]}", row[2], row[3], row[4] or ""))
|
|
|
|
# Config files
|
|
for row in conn.execute("SELECT path, description FROM config_files"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("config", row[0], row[0], "config", row[1] or ""))
|
|
|
|
# Enums
|
|
for row in conn.execute("SELECT name, file, values_preview, category FROM enums"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("enum", row[0], row[1], row[3] or "", row[2] or ""))
|
|
|
|
# Structs
|
|
for row in conn.execute("SELECT name, file, fields_preview, category FROM structs"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("struct", row[0], row[1], row[3] or "", row[2] or ""))
|
|
|
|
# Classes (hierarchy)
|
|
for row in conn.execute("SELECT name, header, parent_class, project FROM classes"):
|
|
desc = f"extends {row[2]}" if row[2] else "base class"
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("class", row[0], row[1] or "", row[3] or "", desc))
|
|
|
|
# Methods
|
|
for row in conn.execute("SELECT class_name, method_name, file FROM methods"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("method", f"{row[0]}::{row[1]}", row[2], "method", ""))
|
|
|
|
# DB tables
|
|
seen_tables = set()
|
|
for row in conn.execute("SELECT DISTINCT table_name, source_file, query_type FROM db_tables"):
|
|
key = row[0]
|
|
if key not in seen_tables:
|
|
seen_tables.add(key)
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("db_table", row[0], row[1], "database", f"Used with {row[2] or 'query'}"))
|
|
|
|
# TODOs (only add summary per file, not every single one)
|
|
for row in conn.execute("SELECT file, todo_type, COUNT(*) as cnt FROM todos GROUP BY file, todo_type"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("todo", f"{row[1]}x{row[2]}", row[0], "todo", f"{row[2]} {row[1]} comments"))
|
|
|
|
# Prepared statements
|
|
for row in conn.execute("SELECT query_name, sql_text, connection_type, source_file FROM prepared_statements"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("prepared_stmt", row[0], row[3], "database", row[1][:100]))
|
|
|
|
# Config keys (deduplicated)
|
|
seen_keys = set()
|
|
for row in conn.execute("SELECT key_name, getter_type, source_file FROM config_keys"):
|
|
if row[0] not in seen_keys:
|
|
seen_keys.add(row[0])
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("config_key", row[0], row[2], "config", f"Read via {row[1]}"))
|
|
|
|
# Defines
|
|
for row in conn.execute("SELECT name, value, file, category FROM defines"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("define", f"{row[0]}={row[1]}", row[2], row[3] or "", ""))
|
|
|
|
# Leak risks
|
|
for row in conn.execute("SELECT file, project, risk_score, sample_lines FROM leak_risks WHERE risk_score > 0"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("leak_risk", f"risk={row[2]}", row[0], row[1], (row[3] or "")[:200]))
|
|
|
|
# NULL risks
|
|
for row in conn.execute("SELECT file, function_call, pointer_var, risk_type, context FROM null_risks"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("null_risk", row[1], row[0], row[3], (row[4] or "")[:200]))
|
|
|
|
# Raw pointers
|
|
for row in conn.execute("SELECT file, class_name, member_type, member_name FROM raw_pointers"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("raw_pointer", f"{row[1]}::{row[3]}", row[0], row[2], ""))
|
|
|
|
# Unsafe casts
|
|
for row in conn.execute("SELECT file, cast_expr, context FROM unsafe_casts"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("unsafe_cast", row[1], row[0], "", (row[2] or "")[:200]))
|
|
|
|
# Dead methods
|
|
for row in conn.execute("SELECT class_name, method_name, header_file FROM dead_methods"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("dead_method", f"{row[0]}::{row[1]}", row[2], "", ""))
|
|
|
|
# Infinite loop risks
|
|
for row in conn.execute("SELECT file, line, risk_type, severity, expression FROM infinite_loop_risks"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("loop_risk", f"{row[2]}:{row[4]}", row[0], row[3], f"L{row[1]} {row[2]}"))
|
|
|
|
# Duplicate blocks
|
|
for row in conn.execute("SELECT file_a, file_b, line_a, line_b, preview FROM duplicate_blocks"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("duplicate", row[4] or "", f"{row[0]}:{row[2]}", f"{row[1]}:{row[3]}", ""))
|
|
|
|
# File summaries
|
|
for row in conn.execute("SELECT file, project, summary, category FROM file_summaries"):
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("file_summary", row[0], row[0], row[3] or "", row[2] or ""))
|
|
|
|
# Research assets
|
|
for row in conn.execute("SELECT path, file_name, asset_type, title, summary, symbol_refs, protocol_refs, notes FROM research_assets"):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
(
|
|
"research_asset",
|
|
f"{row[1]} {row[3] or ''}",
|
|
row[0],
|
|
row[2] or "",
|
|
_trim_text(f"{row[4] or ''} symbols={row[5] or ''} protocols={row[6] or ''} {row[7] or ''}", 220),
|
|
),
|
|
)
|
|
|
|
# Research mentions
|
|
for row in conn.execute("SELECT asset_path, symbol, mention_type, context FROM research_mentions"):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("research_mention", row[1], row[0], row[2], row[3] or ""),
|
|
)
|
|
|
|
# Function index
|
|
for row in conn.execute("SELECT file, function_name, class_name, start_line, end_line, signature, project FROM function_index"):
|
|
label = f"{row[2]}::{row[1]}" if row[2] else row[1]
|
|
conn.execute("INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("function", label, row[0], row[6] or "", f"L{row[3]}-{row[4]} {(row[5] or '')[:100]}"))
|
|
|
|
# Semantic tags
|
|
for row in conn.execute("SELECT entity_type, entity_name, file, tag, confidence, evidence FROM semantic_tags"):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("semantic_tag", f"{row[3]} {row[1]}", row[2], row[0], f"confidence={row[4]} {row[5] or ''}")
|
|
)
|
|
|
|
# Semantic profiles
|
|
for row in conn.execute(
|
|
"SELECT entity_type, entity_name, file, risk_score, gain_score, security_sensitive, "
|
|
"hot_path_candidate, optimization_candidate, evidence FROM semantic_profiles"
|
|
):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
(
|
|
"semantic_profile",
|
|
f"{row[1]} risk={row[3]} gain={row[4]}",
|
|
row[2],
|
|
row[0],
|
|
f"security={row[5]} hot={row[6]} opt={row[7]} {row[8] or ''}"
|
|
)
|
|
)
|
|
|
|
# Audit coverage
|
|
for row in conn.execute(
|
|
"SELECT file, project, coverage_score, unit_test_refs, crash_risk_count, null_risk_count, notes FROM audit_coverage"
|
|
):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
(
|
|
"audit_coverage",
|
|
f"{row[0]} coverage={row[2]}",
|
|
row[0],
|
|
row[1] or "",
|
|
f"tests={row[3]} crash={row[4]} null={row[5]} {row[6] or ''}"
|
|
)
|
|
)
|
|
|
|
# History metrics
|
|
for row in conn.execute(
|
|
"SELECT file, commit_count, recent_commit_count, bugfix_commits, perf_commits, audit_commits, churn_score FROM history_metrics"
|
|
):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
(
|
|
"history_metrics",
|
|
f"{row[0]} churn={row[6]}",
|
|
row[0],
|
|
"history",
|
|
f"commits={row[1]} recent={row[2]} fixes={row[3]} perf={row[4]} audit={row[5]}"
|
|
)
|
|
)
|
|
|
|
# Review queue
|
|
for row in conn.execute("SELECT file, queue_type, priority_score, rationale FROM review_queue"):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("review_queue", f"{row[1]} priority={row[2]}", row[0], row[1], row[3] or "")
|
|
)
|
|
|
|
# Ownership metrics
|
|
for row in conn.execute(
|
|
"SELECT file, primary_author, primary_author_share, author_count, bus_factor_risk FROM ownership_metrics"
|
|
):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
(
|
|
"ownership_metrics",
|
|
f"{row[0]} owner={row[1] or 'unknown'}",
|
|
row[0],
|
|
"ownership",
|
|
f"share={row[2]} authors={row[3]} bus_factor_risk={row[4]}"
|
|
)
|
|
)
|
|
|
|
# Test function map
|
|
for row in conn.execute(
|
|
"SELECT test_file, target_file, function_name, class_name, mapping_type FROM test_function_map"
|
|
):
|
|
label = f"{row[3]}::{row[2]}" if row[3] else row[2]
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
("test_function_map", label, row[1], row[4], f"test={row[0]}")
|
|
)
|
|
|
|
# Call graph
|
|
for row in conn.execute(
|
|
"SELECT caller_symbol, caller_file, callee_symbol, callee_file, confidence, call_count, evidence FROM call_edges"
|
|
):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
(
|
|
"call_edge",
|
|
f"{row[0]} -> {row[2]}",
|
|
row[1],
|
|
"call-graph",
|
|
f"callee={row[3]} confidence={row[4]} count={row[5]} {row[6] or ''}"
|
|
)
|
|
)
|
|
|
|
# Symbol metadata
|
|
for row in conn.execute(
|
|
"SELECT symbol_name, file_path, semantic_tags, hot_path, ct_sensitive, batchable, gpu_candidate, risk_level, review_priority, reasons FROM symbol_metadata"
|
|
):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
(
|
|
"symbol_metadata",
|
|
row[0],
|
|
row[1],
|
|
row[7],
|
|
f"tags={row[2] or ''} hot={row[3]} ct={row[4]} batch={row[5]} gpu={row[6]} priority={row[8]} {row[9] or ''}"
|
|
)
|
|
)
|
|
|
|
# Analysis scores
|
|
for row in conn.execute(
|
|
"SELECT symbol_name, file_path, hotness_score, complexity_score, fanin_score, gpu_score, ct_risk_score, audit_gap_score, overall_priority, reasons FROM analysis_scores"
|
|
):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
(
|
|
"analysis_score",
|
|
f"{row[0]} priority={row[8]}",
|
|
row[1],
|
|
"bottleneck",
|
|
f"hot={row[2]} complexity={row[3]} fanin={row[4]} gpu={row[5]} ct={row[6]} gap={row[7]} {row[9] or ''}"
|
|
)
|
|
)
|
|
|
|
# Symbol audit coverage
|
|
for row in conn.execute(
|
|
"SELECT symbol_name, file_path, coverage_score, last_status, test_count, mapping_types, audit_modules, evidence FROM symbol_audit_coverage"
|
|
):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
(
|
|
"symbol_audit",
|
|
f"{row[0]} coverage={row[2]}",
|
|
row[1],
|
|
row[3],
|
|
f"tests={row[4]} mappings={row[5] or ''} modules={row[6] or ''} {row[7] or ''}"
|
|
)
|
|
)
|
|
|
|
# AI tasks
|
|
for row in conn.execute(
|
|
"SELECT task_type, symbol_name, file_path, status, priority, rationale FROM ai_tasks"
|
|
):
|
|
conn.execute(
|
|
"INSERT INTO fts_index (entity_type, name, file, category, description) VALUES (?,?,?,?,?)",
|
|
(
|
|
"ai_task",
|
|
f"{row[0]} {row[1]}",
|
|
row[2],
|
|
row[3],
|
|
f"priority={row[4]} {row[5] or ''}"
|
|
)
|
|
)
|
|
|
|
|
|
# ============================================================
|
|
# BUILD COMMAND
|
|
# ============================================================
|
|
|
|
def build():
|
|
"""Build the complete source graph database."""
|
|
print("[*] Creating database...")
|
|
conn = create_db()
|
|
|
|
print("[*] Recording graph metadata...")
|
|
populate_graph_metadata(conn)
|
|
|
|
if SEED_DATA:
|
|
print("[*] Populating seed data from config...")
|
|
populate_seed_data(conn)
|
|
else:
|
|
print("[*] Populating singletons (120+)...")
|
|
populate_singletons(conn)
|
|
print("[*] Populating Player files (28)...")
|
|
populate_player_files(conn)
|
|
print("[*] Populating events (35)...")
|
|
populate_events(conn)
|
|
print("[*] Populating AI handlers (43)...")
|
|
populate_ai_handlers(conn)
|
|
print("[*] Populating inventory scripts (12)...")
|
|
populate_inventory_scripts(conn)
|
|
print("[*] Populating constants (35+)...")
|
|
populate_constants(conn)
|
|
print("[*] Populating config files (20)...")
|
|
populate_config_files(conn)
|
|
print("[*] Populating packet handlers (40+)...")
|
|
populate_packet_handlers(conn)
|
|
|
|
print("[*] Scanning source files...")
|
|
scan_files(conn)
|
|
|
|
print("[*] Scanning #include dependencies...")
|
|
scan_includes(conn)
|
|
|
|
print("[*] Scanning singleton usage patterns...")
|
|
scan_singleton_usage(conn)
|
|
|
|
print("[*] Scanning enums...")
|
|
scan_enums(conn)
|
|
|
|
print("[*] Scanning structs/packets...")
|
|
scan_structs(conn)
|
|
|
|
print("[*] Scanning class hierarchy...")
|
|
scan_classes(conn)
|
|
|
|
print("[*] Scanning methods...")
|
|
scan_methods(conn)
|
|
|
|
print("[*] Scanning TODO/FIXME/HACK comments...")
|
|
scan_todos(conn)
|
|
|
|
print("[*] Scanning database table usage...")
|
|
scan_db_tables(conn)
|
|
|
|
print("[*] Scanning prepared statements...")
|
|
scan_prepared_statements(conn)
|
|
|
|
print("[*] Scanning config keys...")
|
|
scan_config_keys(conn)
|
|
|
|
print("[*] Scanning #define macros...")
|
|
scan_defines(conn)
|
|
|
|
print("[*] Counting file lines...")
|
|
scan_file_lines(conn)
|
|
|
|
print("[*] Analyzing memory leak risks...")
|
|
scan_leak_risks(conn)
|
|
|
|
print("[*] Scanning NULL-check risks...")
|
|
scan_null_risks(conn)
|
|
|
|
print("[*] Scanning raw pointer members...")
|
|
scan_raw_pointers(conn)
|
|
|
|
print("[*] Scanning C-style casts...")
|
|
scan_unsafe_casts(conn)
|
|
|
|
print("[*] Scanning crash risk patterns...")
|
|
scan_crash_risks(conn)
|
|
|
|
print("[*] Scanning infinite loop risks...")
|
|
scan_infinite_loop_risks(conn)
|
|
|
|
print("[*] Detecting dead methods...")
|
|
scan_dead_methods(conn)
|
|
|
|
print("[*] Detecting duplicate code blocks...")
|
|
scan_duplicate_blocks(conn)
|
|
|
|
print("[*] Generating file summaries...")
|
|
scan_file_summaries(conn)
|
|
|
|
print("[*] Indexing ReversingResearch assets...")
|
|
scan_research_assets(conn)
|
|
|
|
print("[*] Scanning function definitions (line ranges)...")
|
|
func_count = scan_function_index(conn)
|
|
print(f" Found {func_count} function definitions")
|
|
|
|
print("[*] Scanning edges (tests, extends)...")
|
|
scan_edges(conn)
|
|
|
|
print("[*] Scanning heuristic call graph...")
|
|
scan_call_edges(conn)
|
|
|
|
print("[*] Deriving semantic tags...")
|
|
scan_semantic_tags(conn)
|
|
|
|
print("[*] Aggregating semantic profiles...")
|
|
scan_semantic_profiles(conn)
|
|
|
|
print("[*] Aggregating audit coverage...")
|
|
scan_audit_coverage(conn)
|
|
|
|
print("[*] Aggregating git history metrics...")
|
|
scan_history_metrics(conn)
|
|
|
|
print("[*] Aggregating ownership metrics...")
|
|
scan_ownership_metrics(conn)
|
|
|
|
print("[*] Mapping tests to functions...")
|
|
scan_test_function_map(conn)
|
|
|
|
print("[*] Aggregating symbol metadata...")
|
|
scan_symbol_metadata(conn)
|
|
|
|
print("[*] Computing analysis scores...")
|
|
scan_analysis_scores(conn)
|
|
|
|
print("[*] Building review queue...")
|
|
scan_review_queue(conn)
|
|
|
|
print("[*] Building symbol audit coverage...")
|
|
scan_symbol_audit_coverage(conn)
|
|
|
|
print("[*] Generating AI task queue...")
|
|
scan_ai_tasks(conn)
|
|
|
|
print("[*] Building full-text search index...")
|
|
build_fts_index(conn)
|
|
|
|
print("[*] Updating file hashes...")
|
|
_update_file_hashes(conn)
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
db_size = DB_PATH.stat().st_size / 1024
|
|
print(f"\n[+] Source graph built: {DB_PATH}")
|
|
print(f"[+] Database size: {db_size:.0f} KB")
|
|
stats_cmd(show_header=False)
|
|
|
|
|
|
def build_incremental():
|
|
"""Incremental build: skip if no files changed, full rebuild with preserve if changed."""
|
|
if not DB_PATH.exists():
|
|
print("[!] No existing database. Running full build...")
|
|
build()
|
|
return
|
|
|
|
conn = sqlite3.connect(str(DB_PATH))
|
|
conn.row_factory = sqlite3.Row
|
|
|
|
# Check if file_hashes table exists
|
|
tables = {r[0] for r in conn.execute(
|
|
"SELECT name FROM sqlite_master WHERE type='table'"
|
|
).fetchall()}
|
|
if "file_hashes" not in tables:
|
|
print("[!] No file_hashes table. Running full build with preserve...")
|
|
conn.close()
|
|
_full_build_with_preserve()
|
|
return
|
|
|
|
print("[*] Computing changed files...")
|
|
changed_files = _compute_changed_files(conn)
|
|
conn.close()
|
|
|
|
if not changed_files:
|
|
print("[+] No files changed. Nothing to do.")
|
|
return
|
|
|
|
print(f"[*] {len(changed_files)} files changed. Rebuilding with preserve...")
|
|
_full_build_with_preserve()
|
|
|
|
|
|
def _full_build_with_preserve():
|
|
"""Full build that preserves persistent tables."""
|
|
conn = create_db(preserve_persistent=True)
|
|
populate_graph_metadata(conn)
|
|
|
|
if SEED_DATA:
|
|
populate_seed_data(conn)
|
|
else:
|
|
populate_singletons(conn)
|
|
populate_player_files(conn)
|
|
populate_events(conn)
|
|
populate_ai_handlers(conn)
|
|
populate_inventory_scripts(conn)
|
|
populate_constants(conn)
|
|
populate_config_files(conn)
|
|
populate_packet_handlers(conn)
|
|
|
|
# Run all scanners (same as build())
|
|
scan_files(conn)
|
|
scan_includes(conn)
|
|
scan_singleton_usage(conn)
|
|
scan_enums(conn)
|
|
scan_structs(conn)
|
|
scan_classes(conn)
|
|
scan_methods(conn)
|
|
scan_todos(conn)
|
|
scan_db_tables(conn)
|
|
scan_prepared_statements(conn)
|
|
scan_config_keys(conn)
|
|
scan_defines(conn)
|
|
scan_file_lines(conn)
|
|
scan_leak_risks(conn)
|
|
scan_null_risks(conn)
|
|
scan_raw_pointers(conn)
|
|
scan_unsafe_casts(conn)
|
|
scan_crash_risks(conn)
|
|
scan_infinite_loop_risks(conn)
|
|
scan_dead_methods(conn)
|
|
scan_duplicate_blocks(conn)
|
|
scan_file_summaries(conn)
|
|
scan_research_assets(conn)
|
|
scan_function_index(conn)
|
|
scan_edges(conn)
|
|
scan_call_edges(conn)
|
|
|
|
_rebuild_aggregation_tables(conn)
|
|
_update_file_hashes(conn)
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
db_size = DB_PATH.stat().st_size / 1024
|
|
print(f"\n[+] Full build (with preserve) complete: {DB_PATH}")
|
|
print(f"[+] Database size: {db_size:.0f} KB")
|
|
|
|
|
|
def _rebuild_aggregation_tables(conn):
|
|
"""Rebuild tables that aggregate across files (always safe to fully rebuild)."""
|
|
# Clear aggregation tables
|
|
agg_tables = [
|
|
"dead_methods", "duplicate_blocks", "semantic_tags", "semantic_profiles",
|
|
"audit_coverage", "history_metrics", "review_queue", "ownership_metrics",
|
|
"test_function_map", "symbol_metadata", "analysis_scores",
|
|
"symbol_audit_coverage", "ai_tasks", "edges",
|
|
]
|
|
tables = {r[0] for r in conn.execute(
|
|
"SELECT name FROM sqlite_master WHERE type='table'"
|
|
).fetchall()}
|
|
for t in agg_tables:
|
|
if t in tables:
|
|
conn.execute(f"DELETE FROM {t}")
|
|
conn.commit()
|
|
|
|
scan_dead_methods(conn)
|
|
scan_duplicate_blocks(conn)
|
|
scan_edges(conn)
|
|
scan_semantic_tags(conn)
|
|
scan_semantic_profiles(conn)
|
|
scan_audit_coverage(conn)
|
|
scan_history_metrics(conn)
|
|
scan_ownership_metrics(conn)
|
|
scan_test_function_map(conn)
|
|
scan_symbol_metadata(conn)
|
|
scan_analysis_scores(conn)
|
|
scan_review_queue(conn)
|
|
scan_symbol_audit_coverage(conn)
|
|
scan_ai_tasks(conn)
|
|
build_fts_index(conn)
|
|
|
|
|
|
def _compute_changed_files(conn):
|
|
"""Compare current files against stored hashes, return set of changed file names."""
|
|
changed = set()
|
|
|
|
# Load existing hashes (keyed by "label/filename")
|
|
stored = {}
|
|
try:
|
|
for row in conn.execute("SELECT file_path, content_hash, mtime_ns, size_bytes FROM file_hashes"):
|
|
stored[row["file_path"]] = (row["content_hash"], row["mtime_ns"], row["size_bytes"])
|
|
except Exception:
|
|
return changed
|
|
|
|
# Check all source files
|
|
current_keys = set()
|
|
for label, base_dir, exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in exts:
|
|
for f in base_dir.rglob(ext):
|
|
key = f"{label}/{_rel_name(f, base_dir)}"
|
|
current_keys.add(key)
|
|
try:
|
|
st = f.stat()
|
|
mtime_ns = int(st.st_mtime_ns)
|
|
size_bytes = st.st_size
|
|
except OSError:
|
|
changed.add(_rel_name(f, base_dir))
|
|
continue
|
|
|
|
if key in stored:
|
|
old_hash, old_mtime, old_size = stored[key]
|
|
if mtime_ns == old_mtime and size_bytes == old_size:
|
|
continue # Fast path: unchanged
|
|
# Size or mtime changed, check content hash
|
|
try:
|
|
content_hash = hashlib.sha256(f.read_bytes()).hexdigest()
|
|
except OSError:
|
|
changed.add(_rel_name(f, base_dir))
|
|
continue
|
|
if content_hash != old_hash:
|
|
changed.add(_rel_name(f, base_dir))
|
|
else:
|
|
changed.add(_rel_name(f, base_dir)) # New file
|
|
|
|
# Check for deleted files
|
|
for key in stored:
|
|
if key not in current_keys:
|
|
# Extract filename from "label/filename"
|
|
fname = key.split("/", 1)[-1] if "/" in key else key
|
|
changed.add(fname)
|
|
|
|
return changed
|
|
|
|
|
|
def _update_file_hashes(conn):
|
|
"""Update file_hashes table with current file state."""
|
|
conn.execute("DELETE FROM file_hashes")
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
|
|
for label, base_dir, exts in SOURCE_DIRS:
|
|
if not base_dir.exists():
|
|
continue
|
|
for ext in exts:
|
|
for f in base_dir.rglob(ext):
|
|
try:
|
|
st = f.stat()
|
|
content_hash = hashlib.sha256(f.read_bytes()).hexdigest()
|
|
key = f"{label}/{_rel_name(f, base_dir)}"
|
|
conn.execute(
|
|
"INSERT OR REPLACE INTO file_hashes (file_path, content_hash, mtime_ns, size_bytes, last_scan_at) VALUES (?,?,?,?,?)",
|
|
(key, content_hash, int(st.st_mtime_ns), st.st_size, now)
|
|
)
|
|
except OSError:
|
|
pass
|
|
conn.commit()
|
|
|
|
|
|
# ============================================================
|
|
# QUERY COMMANDS
|
|
# ============================================================
|
|
|
|
def get_conn():
|
|
if not DB_PATH.exists():
|
|
print("[!] Database not found. Run: python source_graph.py build")
|
|
sys.exit(1)
|
|
conn = sqlite3.connect(str(DB_PATH))
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
|
|
def print_table(rows, columns):
|
|
"""Simple formatted table output."""
|
|
if not rows:
|
|
print(" (no results)")
|
|
return
|
|
widths = [len(c) for c in columns]
|
|
for row in rows:
|
|
for i, col in enumerate(columns):
|
|
val = str(row[col] or "")
|
|
widths[i] = max(widths[i], len(val))
|
|
|
|
header = " | ".join(c.ljust(widths[i]) for i, c in enumerate(columns))
|
|
sep = "-+-".join("-" * widths[i] for i in range(len(columns)))
|
|
print(f" {header}")
|
|
print(f" {sep}")
|
|
for row in rows:
|
|
line = " | ".join(str(row[col] or "").ljust(widths[i]) for i, col in enumerate(columns))
|
|
print(f" {line}")
|
|
print(f"\n ({len(rows)} results)")
|
|
|
|
|
|
def _compact_text(value, limit=96):
|
|
text = str(value or "").strip().replace("\n", " ")
|
|
text = re.sub(r"\s+", " ", text)
|
|
if len(text) <= limit:
|
|
return text
|
|
return text[: max(0, limit - 3)].rstrip() + "..."
|
|
|
|
|
|
def _print_compact_section(title, items):
|
|
if not items:
|
|
return
|
|
print(f"\n === {title} ===")
|
|
for item in items:
|
|
print(f" - {item}")
|
|
|
|
|
|
def _parse_budget_arg(raw_value, default_value):
|
|
try:
|
|
parsed = int(raw_value)
|
|
except (TypeError, ValueError):
|
|
return default_value
|
|
return max(20, min(parsed, 400))
|
|
|
|
|
|
def _file_snapshot_row(conn, file_name):
|
|
coverage = conn.execute(
|
|
"SELECT coverage_score, crash_risk_count, null_risk_count FROM audit_coverage WHERE file = ?",
|
|
(file_name,),
|
|
).fetchone()
|
|
history = conn.execute(
|
|
"SELECT churn_score, recent_commit_count FROM history_metrics WHERE file = ?",
|
|
(file_name,),
|
|
).fetchone()
|
|
tested = conn.execute(
|
|
"SELECT COUNT(DISTINCT test_file) FROM test_function_map WHERE target_file = ?",
|
|
(file_name,),
|
|
).fetchone()[0]
|
|
callers = conn.execute(
|
|
"SELECT COUNT(*) FROM call_edges WHERE callee_file = ?",
|
|
(file_name,),
|
|
).fetchone()[0]
|
|
symbols = conn.execute(
|
|
"SELECT COUNT(*) FROM symbol_metadata WHERE file_path = ?",
|
|
(file_name,),
|
|
).fetchone()[0]
|
|
return {
|
|
"file": file_name,
|
|
"coverage_score": coverage["coverage_score"] if coverage else 0,
|
|
"crash_risks": coverage["crash_risk_count"] if coverage else 0,
|
|
"null_risks": coverage["null_risk_count"] if coverage else 0,
|
|
"churn": history["churn_score"] if history else 0,
|
|
"recent_commits": history["recent_commit_count"] if history else 0,
|
|
"tests": tested,
|
|
"callers": callers,
|
|
"symbols": symbols,
|
|
}
|
|
|
|
|
|
def _compact_file_snapshot(conn, file_name):
|
|
row = _file_snapshot_row(conn, file_name)
|
|
return (
|
|
f"{row['file']} | cov={row['coverage_score']} callers={row['callers']} "
|
|
f"tests={row['tests']} churn={row['churn']} risks={row['crash_risks'] + row['null_risks']} "
|
|
f"symbols={row['symbols']}"
|
|
)
|
|
|
|
|
|
def _iter_candidate_symbols(conn, term, candidate_files, limit=16):
|
|
pattern = f"%{term}%"
|
|
params = [pattern, pattern, pattern, pattern]
|
|
file_clause = ""
|
|
if candidate_files:
|
|
placeholders = _sql_in(candidate_files)
|
|
file_clause = f" OR file_path IN ({placeholders})"
|
|
params.extend(candidate_files)
|
|
return conn.execute(
|
|
"SELECT symbol_name, file_path, semantic_tags, hot_path, ct_sensitive, batchable, gpu_candidate, "
|
|
"review_priority, risk_score, gain_score, audit_coverage_score "
|
|
"FROM symbol_metadata WHERE symbol_name LIKE ? OR file_path LIKE ? OR semantic_tags LIKE ? OR summary LIKE ?"
|
|
f"{file_clause} ORDER BY "
|
|
"CASE "
|
|
"WHEN symbol_name LIKE ? THEN 0 "
|
|
"WHEN file_path LIKE ? THEN 1 "
|
|
"WHEN semantic_tags LIKE ? THEN 2 "
|
|
"WHEN summary LIKE ? THEN 3 "
|
|
"ELSE 4 END, "
|
|
"review_priority DESC, gain_score DESC, symbol_name "
|
|
f"LIMIT {limit}",
|
|
params + [pattern, pattern, pattern, pattern],
|
|
).fetchall()
|
|
|
|
|
|
def _build_neighbor_file_scores(conn, candidate_files, limit=12):
|
|
if not candidate_files:
|
|
return []
|
|
placeholders = _sql_in(candidate_files)
|
|
dep_rows = conn.execute(
|
|
f"SELECT source_file, target_file FROM dependencies WHERE source_file IN ({placeholders}) OR target_file IN ({placeholders})",
|
|
candidate_files + candidate_files,
|
|
).fetchall()
|
|
call_rows = conn.execute(
|
|
f"SELECT caller_file, callee_file, call_count FROM call_edges WHERE caller_file IN ({placeholders}) OR callee_file IN ({placeholders})",
|
|
candidate_files + candidate_files,
|
|
).fetchall()
|
|
|
|
scores = defaultdict(lambda: {"dep_edges": 0, "call_edges": 0, "call_weight": 0})
|
|
candidate_set = set(candidate_files)
|
|
|
|
for row in dep_rows:
|
|
for name in (row["source_file"], row["target_file"]):
|
|
if name and name not in candidate_set:
|
|
scores[name]["dep_edges"] += 1
|
|
|
|
for row in call_rows:
|
|
for name in (row["caller_file"], row["callee_file"]):
|
|
if name and name not in candidate_set:
|
|
scores[name]["call_edges"] += 1
|
|
scores[name]["call_weight"] += row["call_count"] or 0
|
|
|
|
ranked = sorted(
|
|
scores.items(),
|
|
key=lambda item: (item[1]["call_weight"], item[1]["call_edges"], item[1]["dep_edges"], item[0]),
|
|
reverse=True,
|
|
)
|
|
return ranked[:limit]
|
|
|
|
|
|
def _file_focus_penalty(file_name):
|
|
if not file_name:
|
|
return 100
|
|
if file_name.startswith(("src/", "include/", "ufsecp/", "cpu/", "gpu/", "opencl/")):
|
|
return 0
|
|
if file_name.startswith(("apps/", "examples/", "benchmarks/")):
|
|
return 1
|
|
if file_name.startswith(("tests/", "test_", "docs/")):
|
|
return 3
|
|
if file_name.startswith(("shaders/", "esp32_test/")):
|
|
return 4
|
|
return 2
|
|
|
|
|
|
def _is_core_file(file_name):
|
|
if not file_name:
|
|
return False
|
|
return file_name.startswith(("src/", "include/", "ufsecp/", "cpu/", "gpu/", "opencl/"))
|
|
|
|
|
|
def _select_candidate_files(conn, term, limit=12, core_only=False):
|
|
candidate_files = _collect_candidate_files(conn, term, limit=max(limit * 2, 12 if core_only else limit))
|
|
if not core_only:
|
|
return candidate_files[:limit]
|
|
core_files = [file_name for file_name in candidate_files if _is_core_file(file_name)]
|
|
return (core_files or candidate_files)[:limit]
|
|
|
|
|
|
def _parse_compact_args(argv, default_budget):
|
|
args = [arg for arg in argv if arg]
|
|
core_only = False
|
|
filtered = []
|
|
for arg in args:
|
|
if arg == "--core":
|
|
core_only = True
|
|
continue
|
|
filtered.append(arg)
|
|
budget = default_budget
|
|
if filtered and filtered[-1].isdigit():
|
|
budget = filtered.pop()
|
|
return " ".join(filtered), budget, core_only
|
|
|
|
|
|
def _sql_in(values):
|
|
return ",".join("?" for _ in values)
|
|
|
|
|
|
def _collect_candidate_files(conn, term, limit=12):
|
|
pattern = f"%{term}%"
|
|
queries = [
|
|
("SELECT DISTINCT file FROM function_index WHERE function_name LIKE ? OR class_name LIKE ? OR file LIKE ? LIMIT 12", (pattern, pattern, pattern)),
|
|
("SELECT DISTINCT cpp_file AS file FROM classes WHERE name LIKE ? OR cpp_file LIKE ? LIMIT 8", (pattern, pattern)),
|
|
("SELECT DISTINCT header AS file FROM classes WHERE name LIKE ? OR header LIKE ? LIMIT 8", (pattern, pattern)),
|
|
("SELECT DISTINCT source_file AS file FROM packet_handlers WHERE headcode_name LIKE ? OR handler_method LIKE ? OR description LIKE ? LIMIT 8", (pattern, pattern, pattern)),
|
|
("SELECT DISTINCT source_file AS file FROM config_keys WHERE key_name LIKE ? OR source_file LIKE ? LIMIT 8", (pattern, pattern)),
|
|
("SELECT DISTINCT target_file AS file FROM test_function_map WHERE target_file LIKE ? OR function_name LIKE ? OR class_name LIKE ? LIMIT 8", (pattern, pattern, pattern)),
|
|
("SELECT DISTINCT caller_file AS file FROM call_edges WHERE caller_symbol LIKE ? OR callee_symbol LIKE ? OR caller_file LIKE ? OR callee_file LIKE ? LIMIT 8", (pattern, pattern, pattern, pattern)),
|
|
("SELECT DISTINCT callee_file AS file FROM call_edges WHERE caller_symbol LIKE ? OR callee_symbol LIKE ? OR caller_file LIKE ? OR callee_file LIKE ? LIMIT 8", (pattern, pattern, pattern, pattern)),
|
|
]
|
|
results = []
|
|
seen = set()
|
|
for query_rank, (query, params) in enumerate(queries):
|
|
for row in conn.execute(query, params).fetchall():
|
|
file_name = row["file"]
|
|
if not file_name or file_name in seen:
|
|
continue
|
|
seen.add(file_name)
|
|
direct_penalty = 0 if term.lower() in file_name.lower() else 1
|
|
results.append((direct_penalty, query_rank, _file_focus_penalty(file_name), file_name))
|
|
ranked = sorted(results, key=lambda item: (item[0], item[1], item[2], item[3]))
|
|
return [file_name for _, _, _, file_name in ranked[:limit]]
|
|
|
|
|
|
def _research_matches(conn, term, limit=20):
|
|
pattern = f"%{term}%"
|
|
return conn.execute(
|
|
"SELECT asset_path, symbol, mention_type, context FROM research_mentions "
|
|
"WHERE symbol LIKE ? OR context LIKE ? OR asset_path LIKE ? "
|
|
"ORDER BY CASE mention_type WHEN 'protocol' THEN 0 WHEN 'symbol' THEN 1 ELSE 2 END, symbol "
|
|
"LIMIT ?",
|
|
(pattern, pattern, pattern, limit),
|
|
).fetchall()
|
|
|
|
|
|
def find_cmd(term):
|
|
"""Search everything using FTS."""
|
|
conn = get_conn()
|
|
# FTS5 needs simple conn without row_factory for proper fetch
|
|
conn2 = sqlite3.connect(str(DB_PATH))
|
|
rows = conn2.execute(
|
|
"SELECT entity_type, name, file, category, description FROM fts_index WHERE fts_index MATCH ? ORDER BY rank LIMIT 30",
|
|
(term,)
|
|
).fetchall()
|
|
if not rows:
|
|
# Fallback to LIKE search
|
|
pattern = f"%{term}%"
|
|
rows = conn2.execute(
|
|
"SELECT entity_type, name, file, category, description FROM fts_index WHERE name LIKE ? OR description LIKE ? OR file LIKE ? LIMIT 30",
|
|
(pattern, pattern, pattern)
|
|
).fetchall()
|
|
conn2.close()
|
|
cols = ["entity_type", "name", "file", "category", "description"]
|
|
print(f"\n Search: '{term}'")
|
|
if not rows:
|
|
print(" (no results)")
|
|
return
|
|
# Manual table print for tuple rows
|
|
widths = [len(c) for c in cols]
|
|
for row in rows:
|
|
for i in range(len(cols)):
|
|
widths[i] = max(widths[i], len(str(row[i] or "")))
|
|
header = " | ".join(cols[i].ljust(widths[i]) for i in range(len(cols)))
|
|
sep = "-+-".join("-" * widths[i] for i in range(len(cols)))
|
|
print(f" {header}")
|
|
print(f" {sep}")
|
|
for row in rows:
|
|
line = " | ".join(str(row[i] or "").ljust(widths[i]) for i in range(len(cols)))
|
|
print(f" {line}")
|
|
print(f"\n ({len(rows)} results)")
|
|
|
|
|
|
def singleton_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT macro, class_name, header, project, category, description FROM singletons WHERE macro LIKE ? OR class_name LIKE ?",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Singletons matching '{name}':")
|
|
print_table(rows, ["macro", "class_name", "header", "project", "category", "description"])
|
|
|
|
|
|
def file_cmd(pattern):
|
|
conn = get_conn()
|
|
like = f"%{pattern}%"
|
|
rows = conn.execute(
|
|
"SELECT path, project, category FROM files WHERE path LIKE ?",
|
|
(like,)
|
|
).fetchall()
|
|
print(f"\n Files matching '{pattern}':")
|
|
print_table(rows, ["path", "project", "category"])
|
|
|
|
|
|
def handler_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT headcode_name, headcode_value, headcode_hex, handler_method, source_file, handler_type, description FROM packet_handlers WHERE headcode_name LIKE ? OR handler_method LIKE ? OR description LIKE ?",
|
|
(pattern, pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Handlers matching '{name}':")
|
|
print_table(rows, ["headcode_name", "headcode_hex", "handler_method", "source_file", "handler_type", "description"])
|
|
|
|
|
|
def event_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT name, singleton_macro, cpp_file, header_file, def_file, ai_file, description FROM events WHERE name LIKE ? OR description LIKE ?",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Events matching '{name}':")
|
|
print_table(rows, ["name", "singleton_macro", "cpp_file", "header_file", "def_file", "ai_file", "description"])
|
|
|
|
|
|
def method_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT class_name, method_name, file, category, description FROM methods WHERE method_name LIKE ? OR description LIKE ?",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if not rows:
|
|
# Try player_files key_methods
|
|
rows = conn.execute(
|
|
"SELECT 'Player' as class_name, domain as method_name, file, 'player' as category, key_methods as description FROM player_files WHERE key_methods LIKE ? OR domain LIKE ?",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Methods matching '{name}':")
|
|
print_table(rows, ["class_name", "method_name", "file", "category", "description"])
|
|
|
|
|
|
def player_cmd(domain):
|
|
conn = get_conn()
|
|
pattern = f"%{domain}%"
|
|
rows = conn.execute(
|
|
"SELECT file, domain, key_methods, description FROM player_files WHERE domain LIKE ? OR description LIKE ? OR key_methods LIKE ?",
|
|
(pattern, pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Player files matching '{domain}':")
|
|
print_table(rows, ["file", "domain", "key_methods", "description"])
|
|
|
|
|
|
def ai_cmd(target):
|
|
conn = get_conn()
|
|
pattern = f"%{target}%"
|
|
rows = conn.execute(
|
|
"SELECT file, target, description FROM ai_handlers WHERE file LIKE ? OR target LIKE ? OR description LIKE ?",
|
|
(pattern, pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n AI handlers matching '{target}':")
|
|
print_table(rows, ["file", "target", "description"])
|
|
|
|
|
|
def const_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT name, value, header, category, description FROM constants WHERE name LIKE ? OR description LIKE ?",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Constants matching '{name}':")
|
|
print_table(rows, ["name", "value", "header", "category", "description"])
|
|
|
|
|
|
def config_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT path, description FROM config_files WHERE path LIKE ? OR description LIKE ?",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Config files matching '{name}':")
|
|
print_table(rows, ["path", "description"])
|
|
|
|
|
|
def deps_cmd(cls):
|
|
conn = get_conn()
|
|
pattern = f"%{cls}%"
|
|
print(f"\n === Files that USE '{cls}' ===")
|
|
rows = conn.execute(
|
|
"SELECT source_file, target_file, dep_type FROM dependencies WHERE target_file LIKE ? ORDER BY dep_type, source_file",
|
|
(pattern,)
|
|
).fetchall()
|
|
print_table(rows, ["source_file", "target_file", "dep_type"])
|
|
|
|
print(f"\n === Files that '{cls}' DEPENDS ON ===")
|
|
rows2 = conn.execute(
|
|
"SELECT source_file, target_file, dep_type FROM dependencies WHERE source_file LIKE ? ORDER BY dep_type, target_file",
|
|
(pattern,)
|
|
).fetchall()
|
|
print_table(rows2, ["source_file", "target_file", "dep_type"])
|
|
|
|
|
|
def enum_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT name, file, line, value_count, values_preview, category FROM enums WHERE name LIKE ? OR values_preview LIKE ?",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Enums matching '{name}':")
|
|
print_table(rows, ["name", "file", "line", "value_count", "values_preview", "category"])
|
|
|
|
|
|
def struct_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT name, file, line, field_count, is_packed, fields_preview, category FROM structs WHERE name LIKE ? OR fields_preview LIKE ?",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Structs matching '{name}':")
|
|
print_table(rows, ["name", "file", "line", "field_count", "is_packed", "fields_preview", "category"])
|
|
|
|
|
|
def class_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT name, header, cpp_file, parent_class, project FROM classes WHERE name LIKE ? OR parent_class LIKE ?",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Classes matching '{name}':")
|
|
print_table(rows, ["name", "header", "cpp_file", "parent_class", "project"])
|
|
for row in rows:
|
|
class_name = row["name"]
|
|
children = conn.execute(
|
|
"SELECT name, header FROM classes WHERE parent_class = ?",
|
|
(class_name,)
|
|
).fetchall()
|
|
if children:
|
|
print(f"\n Children of {class_name}:")
|
|
print_table(children, ["name", "header"])
|
|
|
|
|
|
def todo_cmd(filter_text=None):
|
|
conn = get_conn()
|
|
if filter_text:
|
|
pattern = f"%{filter_text}%"
|
|
rows = conn.execute(
|
|
"SELECT file, line, todo_type, text, project FROM todos WHERE text LIKE ? OR file LIKE ? OR todo_type LIKE ? ORDER BY todo_type, file, line",
|
|
(pattern, pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n TODOs matching '{filter_text}':")
|
|
print_table(rows, ["file", "line", "todo_type", "text", "project"])
|
|
else:
|
|
print("\n === TODO/FIXME Summary ===")
|
|
summary = conn.execute(
|
|
"SELECT todo_type, COUNT(*) as cnt FROM todos GROUP BY todo_type ORDER BY cnt DESC"
|
|
).fetchall()
|
|
print_table(summary, ["todo_type", "cnt"])
|
|
print("\n === Top 20 files with most TODOs ===")
|
|
rows = conn.execute(
|
|
"SELECT file, COUNT(*) as cnt, GROUP_CONCAT(DISTINCT todo_type) as types FROM todos GROUP BY file ORDER BY cnt DESC LIMIT 20"
|
|
).fetchall()
|
|
print_table(rows, ["file", "cnt", "types"])
|
|
|
|
|
|
def dbtable_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT table_name, source_file, line, query_type, context FROM db_tables WHERE table_name LIKE ? ORDER BY table_name, source_file",
|
|
(pattern,)
|
|
).fetchall()
|
|
print(f"\n Database table refs matching '{name}':")
|
|
print_table(rows, ["table_name", "source_file", "line", "query_type", "context"])
|
|
|
|
|
|
def leaks_cmd():
|
|
conn = get_conn()
|
|
print("\n === Memory Leak Risk Analysis (Top 30) ===")
|
|
rows = conn.execute(
|
|
"SELECT file, project, new_count, delete_count, make_unique_count, make_shared_count, risk_score FROM leak_risks ORDER BY risk_score DESC LIMIT 30"
|
|
).fetchall()
|
|
print_table(rows, ["file", "project", "new_count", "delete_count", "make_unique_count", "make_shared_count", "risk_score"])
|
|
total_risk = conn.execute("SELECT SUM(risk_score), COUNT(*) FROM leak_risks WHERE risk_score > 0").fetchone()
|
|
if total_risk[0]:
|
|
print(f"\n Total risk score: {total_risk[0]} across {total_risk[1]} files")
|
|
print(" (risk = new - delete - smart_ptr; higher = more likely leak)")
|
|
|
|
|
|
def nullrisks_cmd(file_filter=None):
|
|
conn = get_conn()
|
|
if file_filter:
|
|
pattern = f"%{file_filter}%"
|
|
rows = conn.execute(
|
|
"SELECT file, line, function_call, pointer_var, risk_type, context FROM null_risks WHERE file LIKE ? ORDER BY file, line",
|
|
(pattern,)
|
|
).fetchall()
|
|
print(f"\n NULL-check risks in '{file_filter}':")
|
|
else:
|
|
print("\n === NULL-Check Risk Summary ===")
|
|
summary = conn.execute(
|
|
"SELECT function_call, risk_type, COUNT(*) as cnt FROM null_risks GROUP BY function_call, risk_type ORDER BY cnt DESC LIMIT 20"
|
|
).fetchall()
|
|
print_table(summary, ["function_call", "risk_type", "cnt"])
|
|
print("\n === Top 20 Files with Most NULL Risks ===")
|
|
rows = conn.execute(
|
|
"SELECT file, COUNT(*) as cnt FROM null_risks GROUP BY file ORDER BY cnt DESC LIMIT 20"
|
|
).fetchall()
|
|
print_table(rows, ["file", "cnt"])
|
|
return
|
|
print_table(rows, ["file", "line", "function_call", "pointer_var", "risk_type", "context"])
|
|
|
|
|
|
def rawptrs_cmd(file_filter=None):
|
|
conn = get_conn()
|
|
if file_filter:
|
|
pattern = f"%{file_filter}%"
|
|
rows = conn.execute(
|
|
"SELECT file, line, class_name, member_type, member_name FROM raw_pointers WHERE file LIKE ? OR class_name LIKE ? ORDER BY file, line",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Raw pointer members in '{file_filter}':")
|
|
else:
|
|
print("\n === Raw Pointer Members Summary ===")
|
|
rows = conn.execute(
|
|
"SELECT class_name, COUNT(*) as cnt FROM raw_pointers GROUP BY class_name ORDER BY cnt DESC LIMIT 25"
|
|
).fetchall()
|
|
print_table(rows, ["class_name", "cnt"])
|
|
total = conn.execute("SELECT COUNT(*) FROM raw_pointers").fetchone()[0]
|
|
print(f"\n Total: {total} raw pointer members (RAII migration candidates)")
|
|
return
|
|
print_table(rows, ["file", "line", "class_name", "member_type", "member_name"])
|
|
|
|
|
|
def casts_cmd(file_filter=None):
|
|
conn = get_conn()
|
|
if file_filter:
|
|
pattern = f"%{file_filter}%"
|
|
rows = conn.execute(
|
|
"SELECT file, line, cast_expr, context FROM unsafe_casts WHERE file LIKE ? ORDER BY file, line",
|
|
(pattern,)
|
|
).fetchall()
|
|
print(f"\n C-style casts in '{file_filter}':")
|
|
else:
|
|
print("\n === C-Style Cast Summary ===")
|
|
rows = conn.execute(
|
|
"SELECT file, COUNT(*) as cnt FROM unsafe_casts GROUP BY file ORDER BY cnt DESC LIMIT 25"
|
|
).fetchall()
|
|
print_table(rows, ["file", "cnt"])
|
|
total = conn.execute("SELECT COUNT(*) FROM unsafe_casts").fetchone()[0]
|
|
print(f"\n Total: {total} C-style casts (static_cast migration candidates)")
|
|
return
|
|
print_table(rows, ["file", "line", "cast_expr", "context"])
|
|
|
|
|
|
def crashes_cmd(file_filter=None):
|
|
conn = get_conn()
|
|
if file_filter:
|
|
pattern = f"%{file_filter}%"
|
|
rows = conn.execute(
|
|
"SELECT file, line, risk_type, severity, expression, context FROM crash_risks WHERE file LIKE ? ORDER BY CASE severity WHEN 'critical' THEN 0 WHEN 'high' THEN 1 ELSE 2 END, file, line",
|
|
(pattern,)
|
|
).fetchall()
|
|
print(f"\n Crash risks in '{file_filter}':")
|
|
else:
|
|
print("\n === Crash Risk Pattern Summary ===")
|
|
summary = conn.execute(
|
|
"SELECT risk_type, severity, COUNT(*) as cnt FROM crash_risks GROUP BY risk_type, severity ORDER BY CASE severity WHEN 'critical' THEN 0 WHEN 'high' THEN 1 ELSE 2 END, cnt DESC"
|
|
).fetchall()
|
|
print_table(summary, ["risk_type", "severity", "cnt"])
|
|
print("\n === Top 20 Files with Most Crash Risks ===")
|
|
by_file = conn.execute(
|
|
"SELECT file, COUNT(*) as cnt, SUM(CASE WHEN severity='critical' THEN 1 ELSE 0 END) as critical FROM crash_risks GROUP BY file ORDER BY critical DESC, cnt DESC LIMIT 20"
|
|
).fetchall()
|
|
print_table(by_file, ["file", "cnt", "critical"])
|
|
total = conn.execute("SELECT COUNT(*) FROM crash_risks").fetchone()[0]
|
|
crit = conn.execute("SELECT COUNT(*) FROM crash_risks WHERE severity='critical'").fetchone()[0]
|
|
print(f"\n Total: {total} crash risks ({crit} critical)")
|
|
return
|
|
print_table(rows, ["file", "line", "risk_type", "severity", "expression", "context"])
|
|
|
|
|
|
def deadmethods_cmd():
|
|
conn = get_conn()
|
|
print("\n === Dead Methods (declared in .h, never referenced in .cpp) ===")
|
|
rows = conn.execute(
|
|
"SELECT class_name, method_name, header_file, header_line, ref_count FROM dead_methods ORDER BY class_name, method_name LIMIT 50"
|
|
).fetchall()
|
|
print_table(rows, ["class_name", "method_name", "header_file", "header_line", "ref_count"])
|
|
total = conn.execute("SELECT COUNT(*) FROM dead_methods").fetchone()[0]
|
|
if total > 50:
|
|
print(f"\n ... and {total - 50} more. Use: py tools/source_graph.py sql \"SELECT * FROM dead_methods\"")
|
|
|
|
|
|
def looprisks_cmd(file_filter=None):
|
|
conn = get_conn()
|
|
if file_filter:
|
|
pattern = f"%{file_filter}%"
|
|
rows = conn.execute(
|
|
"SELECT file, line, risk_type, severity, expression, context FROM infinite_loop_risks "
|
|
"WHERE file LIKE ? ORDER BY CASE severity WHEN 'critical' THEN 0 WHEN 'high' THEN 1 ELSE 2 END, file, line",
|
|
(pattern,)
|
|
).fetchall()
|
|
print(f"\n Infinite loop risks in '{file_filter}':")
|
|
else:
|
|
print("\n === Infinite Loop Risk Summary ===")
|
|
summary = conn.execute(
|
|
"SELECT risk_type, severity, COUNT(*) as cnt FROM infinite_loop_risks "
|
|
"GROUP BY risk_type, severity ORDER BY CASE severity WHEN 'critical' THEN 0 WHEN 'high' THEN 1 ELSE 2 END, cnt DESC"
|
|
).fetchall()
|
|
print_table(summary, ["risk_type", "severity", "cnt"])
|
|
print("\n === Top 20 Files with Loop Risks ===")
|
|
by_file = conn.execute(
|
|
"SELECT file, COUNT(*) as cnt, SUM(CASE WHEN severity='critical' THEN 1 ELSE 0 END) as critical "
|
|
"FROM infinite_loop_risks GROUP BY file ORDER BY critical DESC, cnt DESC LIMIT 20"
|
|
).fetchall()
|
|
print_table(by_file, ["file", "cnt", "critical"])
|
|
total = conn.execute("SELECT COUNT(*) FROM infinite_loop_risks").fetchone()[0]
|
|
crit = conn.execute("SELECT COUNT(*) FROM infinite_loop_risks WHERE severity='critical'").fetchone()[0]
|
|
print(f"\n Total: {total} loop risks ({crit} critical)")
|
|
return
|
|
print_table(rows, ["file", "line", "risk_type", "severity", "expression", "context"])
|
|
|
|
|
|
def duplicates_cmd():
|
|
conn = get_conn()
|
|
print("\n === Duplicate Code Blocks (5+ identical lines across files) ===")
|
|
rows = conn.execute(
|
|
"SELECT file_a, line_a, file_b, line_b, line_count, preview FROM duplicate_blocks ORDER BY file_a, line_a LIMIT 40"
|
|
).fetchall()
|
|
print_table(rows, ["file_a", "line_a", "file_b", "line_b", "line_count", "preview"])
|
|
total = conn.execute("SELECT COUNT(DISTINCT block_hash) FROM duplicate_blocks").fetchone()[0]
|
|
total_instances = conn.execute("SELECT COUNT(*) FROM duplicate_blocks").fetchone()[0]
|
|
print(f"\n {total} unique duplicate blocks, {total_instances} cross-file pairs")
|
|
print(" (candidates for extraction into shared functions/utilities)")
|
|
|
|
|
|
def context_cmd(name):
|
|
"""Unified context query: summary + functions + deps + rdeps + tests + edges for a file."""
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
|
|
# 1. File summary
|
|
summary = conn.execute(
|
|
"SELECT file, project, summary, category FROM file_summaries WHERE file LIKE ?",
|
|
(pattern,)
|
|
).fetchall()
|
|
if summary:
|
|
print(f"\n === File Summary ===")
|
|
print_table(summary, ["file", "project", "summary", "category"])
|
|
else:
|
|
print(f"\n (no summary for '{name}')")
|
|
|
|
# 2. Functions with line ranges
|
|
funcs = conn.execute(
|
|
"SELECT function_name, class_name, start_line, end_line, signature FROM function_index WHERE file LIKE ? ORDER BY start_line",
|
|
(pattern,)
|
|
).fetchall()
|
|
if funcs:
|
|
print(f"\n === Functions ({len(funcs)}) ===")
|
|
print_table(funcs, ["function_name", "class_name", "start_line", "end_line", "signature"])
|
|
|
|
symbols = conn.execute(
|
|
"SELECT symbol_name, hot_path, ct_sensitive, batchable, gpu_candidate, risk_level, review_priority "
|
|
"FROM symbol_metadata WHERE file_path LIKE ? ORDER BY review_priority DESC, symbol_name LIMIT 25",
|
|
(pattern,)
|
|
).fetchall()
|
|
if symbols:
|
|
print(f"\n === Symbol Metadata ===")
|
|
print_table(symbols, ["symbol_name", "hot_path", "ct_sensitive", "batchable", "gpu_candidate", "risk_level", "review_priority"])
|
|
|
|
calls = conn.execute(
|
|
"SELECT caller_symbol, callee_symbol, confidence, call_count FROM call_edges "
|
|
"WHERE caller_file LIKE ? OR callee_file LIKE ? ORDER BY confidence DESC, call_count DESC LIMIT 25",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if calls:
|
|
print(f"\n === Call Edges ===")
|
|
print_table(calls, ["caller_symbol", "callee_symbol", "confidence", "call_count"])
|
|
|
|
# 3. Dependencies (what this file includes/uses)
|
|
deps = conn.execute(
|
|
"SELECT target_file, dep_type FROM dependencies WHERE source_file LIKE ? ORDER BY dep_type, target_file",
|
|
(pattern,)
|
|
).fetchall()
|
|
if deps:
|
|
print(f"\n === Depends On ({len(deps)}) ===")
|
|
print_table(deps, ["target_file", "dep_type"])
|
|
|
|
# 4. Reverse dependencies (who includes/uses this file)
|
|
rdeps = conn.execute(
|
|
"SELECT source_file, dep_type FROM dependencies WHERE target_file LIKE ? ORDER BY dep_type, source_file",
|
|
(pattern,)
|
|
).fetchall()
|
|
if rdeps:
|
|
print(f"\n === Used By ({len(rdeps)}) ===")
|
|
print_table(rdeps, ["source_file", "dep_type"])
|
|
|
|
# 5. Edges (tests, extends, etc.)
|
|
edges_out = conn.execute(
|
|
"SELECT target, edge_type, detail FROM edges WHERE source LIKE ? ORDER BY edge_type",
|
|
(pattern,)
|
|
).fetchall()
|
|
edges_in = conn.execute(
|
|
"SELECT source, edge_type, detail FROM edges WHERE target LIKE ? ORDER BY edge_type",
|
|
(pattern,)
|
|
).fetchall()
|
|
if edges_out:
|
|
print(f"\n === Outgoing Edges ===")
|
|
print_table(edges_out, ["target", "edge_type", "detail"])
|
|
if edges_in:
|
|
print(f"\n === Incoming Edges ===")
|
|
print_table(edges_in, ["source", "edge_type", "detail"])
|
|
|
|
# 6. Singletons defined in this file
|
|
singletons = conn.execute(
|
|
"SELECT macro, class_name, description FROM singletons WHERE header LIKE ?",
|
|
(pattern,)
|
|
).fetchall()
|
|
if singletons:
|
|
print(f"\n === Singletons ===")
|
|
print_table(singletons, ["macro", "class_name", "description"])
|
|
|
|
# 7. Enums in this file
|
|
enums = conn.execute(
|
|
"SELECT name, value_count, values_preview FROM enums WHERE file LIKE ?",
|
|
(pattern,)
|
|
).fetchall()
|
|
if enums:
|
|
print(f"\n === Enums ===")
|
|
print_table(enums, ["name", "value_count", "values_preview"])
|
|
|
|
# 8. Structs in this file
|
|
structs = conn.execute(
|
|
"SELECT name, field_count, is_packed, fields_preview FROM structs WHERE file LIKE ?",
|
|
(pattern,)
|
|
).fetchall()
|
|
if structs:
|
|
print(f"\n === Structs ===")
|
|
print_table(structs, ["name", "field_count", "is_packed", "fields_preview"])
|
|
|
|
if not summary and not funcs:
|
|
print(f"\n No context found for '{name}'. Try a more specific filename.")
|
|
|
|
|
|
def func_cmd(name):
|
|
"""Find function by name with exact line ranges."""
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT file, function_name, class_name, start_line, end_line, signature, project FROM function_index "
|
|
"WHERE function_name LIKE ? OR class_name LIKE ? OR signature LIKE ? ORDER BY file, start_line",
|
|
(pattern, pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Functions matching '{name}':")
|
|
print_table(rows, ["file", "function_name", "class_name", "start_line", "end_line", "signature", "project"])
|
|
|
|
meta = conn.execute(
|
|
"SELECT symbol_name, file_path, hot_path, ct_sensitive, batchable, gpu_candidate, risk_level, review_priority, risk_score, gain_score "
|
|
"FROM symbol_metadata WHERE symbol_name LIKE ? OR file_path LIKE ? ORDER BY review_priority DESC LIMIT 40",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if meta:
|
|
print(f"\n === Symbol Metadata ===")
|
|
print_table(meta, ["symbol_name", "file_path", "hot_path", "ct_sensitive", "batchable", "gpu_candidate", "risk_level", "review_priority", "risk_score", "gain_score"])
|
|
|
|
|
|
def gaps_cmd():
|
|
"""Find documentation and coverage gaps in the codebase."""
|
|
conn = get_conn()
|
|
|
|
# 1. Files without summaries
|
|
print("\n === Files Without Summaries ===")
|
|
rows = conn.execute(
|
|
"SELECT f.path, f.project FROM files f "
|
|
"LEFT JOIN file_summaries fs ON f.path = fs.file "
|
|
"WHERE fs.id IS NULL AND (f.path LIKE '%.cpp' OR f.path LIKE '%.h') "
|
|
"ORDER BY f.project, f.path LIMIT 30"
|
|
).fetchall()
|
|
print_table(rows, ["path", "project"])
|
|
|
|
# 2. Large files without function index entries
|
|
print("\n === Large Files Missing Function Index (500+ lines) ===")
|
|
rows = conn.execute(
|
|
"SELECT fl.file, fl.project, fl.line_count FROM file_lines fl "
|
|
"LEFT JOIN function_index fi ON fl.file = fi.file "
|
|
"WHERE fi.id IS NULL AND fl.line_count >= 500 "
|
|
"ORDER BY fl.line_count DESC LIMIT 20"
|
|
).fetchall()
|
|
print_table(rows, ["file", "project", "line_count"])
|
|
|
|
# 3. Files with no tests
|
|
print("\n === Source Files Without Test Coverage ===")
|
|
rows = conn.execute(
|
|
"SELECT f.path, f.project FROM files f "
|
|
"LEFT JOIN edges e ON f.path = e.target AND e.edge_type = 'tests' "
|
|
"WHERE e.id IS NULL AND f.path LIKE '%.cpp' AND f.project = 'Game' "
|
|
"ORDER BY f.path LIMIT 30"
|
|
).fetchall()
|
|
print_table(rows, ["path", "project"])
|
|
|
|
# 4. Stats
|
|
total_files = conn.execute("SELECT COUNT(*) FROM files WHERE path LIKE '%.cpp' OR path LIKE '%.h'").fetchone()[0]
|
|
summarized = conn.execute("SELECT COUNT(*) FROM file_summaries").fetchone()[0]
|
|
indexed_files = conn.execute("SELECT COUNT(DISTINCT file) FROM function_index").fetchone()[0]
|
|
tested_files = conn.execute("SELECT COUNT(DISTINCT target) FROM edges WHERE edge_type = 'tests'").fetchone()[0]
|
|
print(f"\n Coverage: {summarized}/{total_files} summarized, {indexed_files}/{total_files} function-indexed, {tested_files}/{total_files} tested")
|
|
|
|
|
|
def summary_cmd():
|
|
"""Show project-level overview with key statistics."""
|
|
conn = get_conn()
|
|
print("\n === Project Summary ===")
|
|
|
|
meta = conn.execute(
|
|
"SELECT schema_version, extractor_version, graph_build_revision, built_at FROM graph_metadata ORDER BY id DESC LIMIT 1"
|
|
).fetchone()
|
|
if meta:
|
|
print(f"\n Graph Build: schema={meta['schema_version']} extractor={meta['extractor_version']} built_at={meta['built_at']}")
|
|
if meta["graph_build_revision"]:
|
|
print(f" Revision: {meta['graph_build_revision']}")
|
|
|
|
# Total code
|
|
totals = conn.execute(
|
|
"SELECT project, SUM(line_count) as total_lines, COUNT(*) as files FROM file_lines GROUP BY project ORDER BY total_lines DESC"
|
|
).fetchall()
|
|
print_table(totals, ["project", "total_lines", "files"])
|
|
|
|
total_lines = conn.execute("SELECT SUM(line_count) FROM file_lines").fetchone()[0] or 0
|
|
print(f"\n Total: {total_lines:,} lines of code")
|
|
|
|
# Key counts
|
|
counts = {
|
|
"Singletons": conn.execute("SELECT COUNT(*) FROM singletons").fetchone()[0],
|
|
"Classes": conn.execute("SELECT COUNT(*) FROM classes").fetchone()[0],
|
|
"Methods (headers)": conn.execute("SELECT COUNT(*) FROM methods").fetchone()[0],
|
|
"Functions (indexed)": conn.execute("SELECT COUNT(*) FROM function_index").fetchone()[0],
|
|
"Semantic Tags": conn.execute("SELECT COUNT(*) FROM semantic_tags").fetchone()[0],
|
|
"Semantic Profiles": conn.execute("SELECT COUNT(*) FROM semantic_profiles").fetchone()[0],
|
|
"Audit Coverage Rows": conn.execute("SELECT COUNT(*) FROM audit_coverage").fetchone()[0],
|
|
"History Metrics": conn.execute("SELECT COUNT(*) FROM history_metrics").fetchone()[0],
|
|
"Review Queue Items": conn.execute("SELECT COUNT(*) FROM review_queue").fetchone()[0],
|
|
"Ownership Metrics": conn.execute("SELECT COUNT(*) FROM ownership_metrics").fetchone()[0],
|
|
"Test Mappings": conn.execute("SELECT COUNT(*) FROM test_function_map").fetchone()[0],
|
|
"Call Edges": conn.execute("SELECT COUNT(*) FROM call_edges").fetchone()[0],
|
|
"Symbol Metadata": conn.execute("SELECT COUNT(*) FROM symbol_metadata").fetchone()[0],
|
|
"Analysis Scores": conn.execute("SELECT COUNT(*) FROM analysis_scores").fetchone()[0],
|
|
"Symbol Audit Rows": conn.execute("SELECT COUNT(*) FROM symbol_audit_coverage").fetchone()[0],
|
|
"AI Tasks": conn.execute("SELECT COUNT(*) FROM ai_tasks").fetchone()[0],
|
|
"Research Assets": conn.execute("SELECT COUNT(*) FROM research_assets").fetchone()[0],
|
|
"Research Mentions": conn.execute("SELECT COUNT(*) FROM research_mentions").fetchone()[0],
|
|
"Packet Handlers": conn.execute("SELECT COUNT(*) FROM packet_handlers").fetchone()[0],
|
|
"Events": conn.execute("SELECT COUNT(*) FROM events").fetchone()[0],
|
|
"Enums": conn.execute("SELECT COUNT(*) FROM enums").fetchone()[0],
|
|
"Structs": conn.execute("SELECT COUNT(*) FROM structs").fetchone()[0],
|
|
"DB Tables Referenced": conn.execute("SELECT COUNT(DISTINCT table_name) FROM db_tables").fetchone()[0],
|
|
"Prepared Statements": conn.execute("SELECT COUNT(*) FROM prepared_statements").fetchone()[0],
|
|
"#define Macros": conn.execute("SELECT COUNT(*) FROM defines").fetchone()[0],
|
|
}
|
|
print("\n === Key Metrics ===")
|
|
for label, count in counts.items():
|
|
print(f" {label:.<30} {count}")
|
|
|
|
# Quality
|
|
print("\n === Code Quality ===")
|
|
quality = {
|
|
"Crash Risks (critical)": conn.execute("SELECT COUNT(*) FROM crash_risks WHERE severity='critical'").fetchone()[0],
|
|
"NULL-Check Risks": conn.execute("SELECT COUNT(*) FROM null_risks").fetchone()[0],
|
|
"Memory Leak Risk Files": conn.execute("SELECT COUNT(*) FROM leak_risks WHERE risk_score > 0").fetchone()[0],
|
|
"Dead Methods": conn.execute("SELECT COUNT(*) FROM dead_methods").fetchone()[0],
|
|
"Duplicate Blocks": conn.execute("SELECT COUNT(DISTINCT block_hash) FROM duplicate_blocks").fetchone()[0],
|
|
"TODO/FIXME Comments": conn.execute("SELECT COUNT(*) FROM todos").fetchone()[0],
|
|
"Low Coverage Files (<40)": conn.execute("SELECT COUNT(*) FROM audit_coverage WHERE coverage_score < 40").fetchone()[0],
|
|
"High Churn Files (>=20)": conn.execute("SELECT COUNT(*) FROM history_metrics WHERE churn_score >= 20").fetchone()[0],
|
|
"Review Queue Items": conn.execute("SELECT COUNT(*) FROM review_queue").fetchone()[0],
|
|
"Ownership Risk Files (>=5)": conn.execute("SELECT COUNT(*) FROM ownership_metrics WHERE bus_factor_risk >= 5").fetchone()[0],
|
|
"Untested Function Maps": conn.execute("SELECT COUNT(*) FROM review_queue WHERE queue_type='untested_hotspot'").fetchone()[0],
|
|
}
|
|
for label, count in quality.items():
|
|
print(f" {label:.<30} {count}")
|
|
|
|
|
|
def query_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT query_name, sql_text, connection_type, source_file, line FROM prepared_statements WHERE query_name LIKE ? OR sql_text LIKE ?",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Prepared statements matching '{name}':")
|
|
print_table(rows, ["query_name", "sql_text", "connection_type", "source_file", "line"])
|
|
|
|
|
|
def tags_cmd(term):
|
|
conn = get_conn()
|
|
pattern = f"%{term}%"
|
|
rows = conn.execute(
|
|
"SELECT tag, entity_type, entity_name, file, line, confidence, evidence "
|
|
"FROM semantic_tags "
|
|
"WHERE tag LIKE ? OR entity_name LIKE ? OR file LIKE ? OR evidence LIKE ? "
|
|
"ORDER BY confidence DESC, tag, entity_type, file, line "
|
|
"LIMIT 80",
|
|
(pattern, pattern, pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Semantic tags matching '{term}':")
|
|
print_table(rows, ["tag", "entity_type", "entity_name", "file", "line", "confidence", "evidence"])
|
|
|
|
|
|
def hotspots_cmd(term=None):
|
|
conn = get_conn()
|
|
params = []
|
|
query = (
|
|
"SELECT entity_type, entity_name, file, line, risk_score, gain_score, "
|
|
"security_sensitive, hot_path_candidate, optimization_candidate, network_surface, fragile_surface, test_surface "
|
|
"FROM semantic_profiles"
|
|
)
|
|
if term:
|
|
pattern = f"%{term}%"
|
|
query += (
|
|
" WHERE entity_name LIKE ? OR file LIKE ? OR evidence LIKE ? "
|
|
"OR EXISTS (SELECT 1 FROM semantic_tags st WHERE st.file = semantic_profiles.file "
|
|
"AND st.entity_name = semantic_profiles.entity_name AND st.tag LIKE ?)"
|
|
)
|
|
params = [pattern, pattern, pattern, pattern]
|
|
query += " ORDER BY gain_score DESC, risk_score DESC, entity_type, file LIMIT 80"
|
|
rows = conn.execute(query, params).fetchall()
|
|
title = f"'{term}'" if term else "all"
|
|
print(f"\n Semantic hotspots for {title}:")
|
|
print_table(
|
|
rows,
|
|
[
|
|
"entity_type", "entity_name", "file", "line", "risk_score", "gain_score",
|
|
"security_sensitive", "hot_path_candidate", "optimization_candidate",
|
|
"network_surface", "fragile_surface", "test_surface"
|
|
]
|
|
)
|
|
|
|
|
|
def coverage_cmd(term=None):
|
|
conn = get_conn()
|
|
params = []
|
|
query = (
|
|
"SELECT file, project, coverage_score, unit_test_refs, has_summary, has_function_index, "
|
|
"crash_risk_count, null_risk_count, duplicate_pair_count, semantic_risk_max, semantic_gain_max, notes "
|
|
"FROM audit_coverage"
|
|
)
|
|
if term:
|
|
pattern = f"%{term}%"
|
|
query += " WHERE file LIKE ? OR project LIKE ? OR notes LIKE ?"
|
|
params = [pattern, pattern, pattern]
|
|
query += " ORDER BY coverage_score ASC, semantic_risk_max DESC, file LIMIT 80"
|
|
rows = conn.execute(query, params).fetchall()
|
|
title = f"'{term}'" if term else "all"
|
|
print(f"\n Coverage signals for {title}:")
|
|
print_table(
|
|
rows,
|
|
[
|
|
"file", "project", "coverage_score", "unit_test_refs", "has_summary", "has_function_index",
|
|
"crash_risk_count", "null_risk_count", "duplicate_pair_count", "semantic_risk_max",
|
|
"semantic_gain_max", "notes"
|
|
]
|
|
)
|
|
|
|
|
|
def churn_cmd(term=None):
|
|
conn = get_conn()
|
|
params = []
|
|
query = (
|
|
"SELECT file, commit_count, recent_commit_count, unique_authors, bugfix_commits, perf_commits, "
|
|
"audit_commits, last_commit_date, churn_score FROM history_metrics"
|
|
)
|
|
if term:
|
|
pattern = f"%{term}%"
|
|
query += " WHERE file LIKE ?"
|
|
params = [pattern]
|
|
query += " ORDER BY churn_score DESC, recent_commit_count DESC, file LIMIT 80"
|
|
rows = conn.execute(query, params).fetchall()
|
|
title = f"'{term}'" if term else "all"
|
|
print(f"\n Churn/history signals for {title}:")
|
|
print_table(
|
|
rows,
|
|
["file", "commit_count", "recent_commit_count", "unique_authors", "bugfix_commits", "perf_commits", "audit_commits", "last_commit_date", "churn_score"]
|
|
)
|
|
|
|
|
|
def reviewqueue_cmd(term=None):
|
|
conn = get_conn()
|
|
params = []
|
|
query = "SELECT file, queue_type, priority_score, rationale FROM review_queue"
|
|
if term:
|
|
pattern = f"%{term}%"
|
|
query += " WHERE file LIKE ? OR queue_type LIKE ? OR rationale LIKE ?"
|
|
params = [pattern, pattern, pattern]
|
|
query += " ORDER BY priority_score DESC, queue_type, file LIMIT 120"
|
|
rows = conn.execute(query, params).fetchall()
|
|
title = f"'{term}'" if term else "all"
|
|
print(f"\n Review queue for {title}:")
|
|
print_table(rows, ["file", "queue_type", "priority_score", "rationale"])
|
|
|
|
|
|
def ownership_cmd(term=None):
|
|
conn = get_conn()
|
|
params = []
|
|
query = (
|
|
"SELECT file, primary_author, primary_author_share, author_count, blamed_lines, "
|
|
"most_recent_line_date, bus_factor_risk FROM ownership_metrics"
|
|
)
|
|
if term:
|
|
pattern = f"%{term}%"
|
|
query += " WHERE file LIKE ? OR primary_author LIKE ?"
|
|
params = [pattern, pattern]
|
|
query += " ORDER BY bus_factor_risk DESC, primary_author_share DESC, file LIMIT 80"
|
|
rows = conn.execute(query, params).fetchall()
|
|
title = f"'{term}'" if term else "all"
|
|
print(f"\n Ownership signals for {title}:")
|
|
print_table(
|
|
rows,
|
|
["file", "primary_author", "primary_author_share", "author_count", "blamed_lines", "most_recent_line_date", "bus_factor_risk"]
|
|
)
|
|
|
|
|
|
def testmap_cmd(term=None):
|
|
conn = get_conn()
|
|
params = []
|
|
query = (
|
|
"SELECT test_file, target_file, function_name, class_name, mapping_type FROM test_function_map"
|
|
)
|
|
if term:
|
|
pattern = f"%{term}%"
|
|
query += " WHERE test_file LIKE ? OR target_file LIKE ? OR function_name LIKE ? OR class_name LIKE ?"
|
|
params = [pattern, pattern, pattern, pattern]
|
|
query += " ORDER BY target_file, mapping_type, class_name, function_name LIMIT 120"
|
|
rows = conn.execute(query, params).fetchall()
|
|
title = f"'{term}'" if term else "all"
|
|
print(f"\n Test-function mapping for {title}:")
|
|
print_table(rows, ["test_file", "target_file", "function_name", "class_name", "mapping_type"])
|
|
|
|
|
|
def calls_cmd(term):
|
|
conn = get_conn()
|
|
pattern = f"%{term}%"
|
|
rows = conn.execute(
|
|
"SELECT caller_symbol, caller_file, callee_symbol, callee_file, confidence, call_count, evidence "
|
|
"FROM call_edges WHERE caller_symbol LIKE ? OR caller_file LIKE ? OR callee_symbol LIKE ? OR callee_file LIKE ? "
|
|
"ORDER BY confidence DESC, call_count DESC, caller_symbol, callee_symbol LIMIT 120",
|
|
(pattern, pattern, pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Call graph edges matching '{term}':")
|
|
print_table(rows, ["caller_symbol", "caller_file", "callee_symbol", "callee_file", "confidence", "call_count", "evidence"])
|
|
|
|
|
|
def focus_cmd(term, budget=80, core_only=False):
|
|
"""Compact ranked snapshot optimized for low-token agent context."""
|
|
conn = get_conn()
|
|
budget = _parse_budget_arg(budget, 80)
|
|
candidate_files = _select_candidate_files(conn, term, limit=6, core_only=core_only)
|
|
pattern = f"%{term}%"
|
|
mode_suffix = ", core-only" if core_only else ""
|
|
print(f"\n === Focus for '{term}' (budget={budget}{mode_suffix}) ===")
|
|
|
|
sections = []
|
|
line_budget = budget
|
|
|
|
def add(title, items, max_items):
|
|
nonlocal line_budget
|
|
if not items or line_budget <= 0:
|
|
return
|
|
trimmed = list(items[: min(max_items, line_budget)])
|
|
if trimmed:
|
|
sections.append((title, trimmed))
|
|
line_budget -= len(trimmed)
|
|
|
|
if candidate_files:
|
|
file_items = [_compact_file_snapshot(conn, file_name) for file_name in candidate_files]
|
|
add("Primary Files", file_items, 6)
|
|
|
|
symbol_rows = _iter_candidate_symbols(conn, term, candidate_files, limit=10)
|
|
if core_only:
|
|
filtered_symbol_rows = [row for row in symbol_rows if _is_core_file(row["file_path"])]
|
|
if filtered_symbol_rows:
|
|
symbol_rows = filtered_symbol_rows
|
|
symbol_items = []
|
|
for row in symbol_rows:
|
|
tags = _compact_text(row["semantic_tags"], 36)
|
|
flags = []
|
|
if row["hot_path"]:
|
|
flags.append("hot")
|
|
if row["ct_sensitive"]:
|
|
flags.append("ct")
|
|
if row["batchable"]:
|
|
flags.append("batch")
|
|
if row["gpu_candidate"]:
|
|
flags.append("gpu")
|
|
flag_text = f" [{' '.join(flags)}]" if flags else ""
|
|
symbol_items.append(
|
|
f"{row['symbol_name']} @ {row['file_path']} | prio={row['review_priority']} gain={row['gain_score']} risk={row['risk_score']} cov={row['audit_coverage_score']}{flag_text} tags={tags}"
|
|
)
|
|
add("Top Symbols", symbol_items, 8)
|
|
|
|
call_rows = conn.execute(
|
|
"SELECT caller_symbol, caller_file, callee_symbol, callee_file, confidence, call_count "
|
|
"FROM call_edges WHERE caller_symbol LIKE ? OR callee_symbol LIKE ? OR caller_file LIKE ? OR callee_file LIKE ? "
|
|
"ORDER BY confidence DESC, call_count DESC LIMIT 8",
|
|
(pattern, pattern, pattern, pattern),
|
|
).fetchall()
|
|
if not call_rows and candidate_files:
|
|
placeholders = _sql_in(candidate_files)
|
|
call_rows = conn.execute(
|
|
f"SELECT caller_symbol, caller_file, callee_symbol, callee_file, confidence, call_count FROM call_edges "
|
|
f"WHERE caller_file IN ({placeholders}) OR callee_file IN ({placeholders}) "
|
|
f"ORDER BY confidence DESC, call_count DESC LIMIT 8",
|
|
candidate_files + candidate_files,
|
|
).fetchall()
|
|
call_items = [
|
|
f"{row['caller_symbol']} -> {row['callee_symbol']} | {row['caller_file']} -> {row['callee_file']} | conf={row['confidence']} calls={row['call_count']}"
|
|
for row in call_rows
|
|
]
|
|
add("Call Frontier", call_items, 8)
|
|
|
|
risk_items = []
|
|
if candidate_files:
|
|
placeholders = _sql_in(candidate_files)
|
|
crash_rows = conn.execute(
|
|
f"SELECT file, line, risk_type, severity, expression FROM crash_risks WHERE file IN ({placeholders}) "
|
|
"ORDER BY CASE severity WHEN 'critical' THEN 0 WHEN 'high' THEN 1 ELSE 2 END, line LIMIT 6",
|
|
candidate_files,
|
|
).fetchall()
|
|
null_rows = conn.execute(
|
|
f"SELECT file, line, function_call, risk_type FROM null_risks WHERE file IN ({placeholders}) LIMIT 4",
|
|
candidate_files,
|
|
).fetchall()
|
|
for row in crash_rows:
|
|
risk_items.append(
|
|
f"[{row['severity']}] {row['file']}:{row['line']} {row['risk_type']} :: {_compact_text(row['expression'], 64)}"
|
|
)
|
|
for row in null_rows:
|
|
risk_items.append(
|
|
f"[null] {row['file']}:{row['line']} {row['function_call']} ({row['risk_type']})"
|
|
)
|
|
add("Top Risks", risk_items, 8)
|
|
|
|
test_items = []
|
|
if candidate_files:
|
|
placeholders = _sql_in(candidate_files)
|
|
tests = conn.execute(
|
|
f"SELECT DISTINCT test_file, target_file, function_name, mapping_type FROM test_function_map WHERE target_file IN ({placeholders}) "
|
|
"ORDER BY target_file, test_file LIMIT 8",
|
|
candidate_files,
|
|
).fetchall()
|
|
for row in tests:
|
|
test_items.append(
|
|
f"{row['test_file']} -> {row['target_file']} :: {row['function_name']} ({row['mapping_type']})"
|
|
)
|
|
add("Tests", test_items, 8)
|
|
|
|
research_items = [
|
|
f"{row['asset_path']} :: {row['symbol']} [{row['mention_type']}] {_compact_text(row['context'], 60)}"
|
|
for row in _research_matches(conn, term, limit=5)
|
|
]
|
|
add("Research", research_items, 5)
|
|
|
|
if not sections:
|
|
print(" (no focus results)")
|
|
return
|
|
|
|
for title, items in sections:
|
|
_print_compact_section(title, items)
|
|
|
|
|
|
def slice_cmd(term, budget=120, core_only=False):
|
|
"""Minimal implementation slice: entry files, hot symbols, neighbors, and tests."""
|
|
conn = get_conn()
|
|
budget = _parse_budget_arg(budget, 120)
|
|
pattern = f"%{term}%"
|
|
candidate_files = _select_candidate_files(conn, term, limit=4, core_only=core_only)
|
|
if not candidate_files:
|
|
print(f"\n No slice found for '{term}'.")
|
|
return
|
|
|
|
mode_suffix = ", core-only" if core_only else ""
|
|
print(f"\n === Slice for '{term}' (budget={budget}{mode_suffix}) ===")
|
|
sections = []
|
|
line_budget = budget
|
|
|
|
def add(title, items, max_items):
|
|
nonlocal line_budget
|
|
if not items or line_budget <= 0:
|
|
return
|
|
trimmed = list(items[: min(max_items, line_budget)])
|
|
if trimmed:
|
|
sections.append((title, trimmed))
|
|
line_budget -= len(trimmed)
|
|
|
|
add("Entry Files", [_compact_file_snapshot(conn, file_name) for file_name in candidate_files], 4)
|
|
|
|
func_items = []
|
|
placeholders = _sql_in(candidate_files)
|
|
func_rows = conn.execute(
|
|
f"SELECT fi.file, fi.function_name, fi.class_name, fi.start_line, fi.end_line, sm.review_priority, sm.risk_score, sm.gain_score, sm.ct_sensitive, sm.hot_path "
|
|
f"FROM function_index fi LEFT JOIN symbol_metadata sm ON sm.symbol_name = fi.function_name AND sm.file_path = fi.file "
|
|
f"WHERE fi.file IN ({placeholders}) ORDER BY "
|
|
"CASE WHEN fi.function_name LIKE ? THEN 0 WHEN fi.class_name LIKE ? THEN 1 ELSE 2 END, "
|
|
"COALESCE(sm.review_priority, 0) DESC, fi.start_line LIMIT 14",
|
|
candidate_files + [pattern, pattern],
|
|
).fetchall()
|
|
for row in func_rows:
|
|
label = _function_label(row["class_name"], row["function_name"])
|
|
flags = []
|
|
if row["hot_path"]:
|
|
flags.append("hot")
|
|
if row["ct_sensitive"]:
|
|
flags.append("ct")
|
|
flag_text = f" [{' '.join(flags)}]" if flags else ""
|
|
func_items.append(
|
|
f"{label} @ {row['file']}:{row['start_line']}-{row['end_line']} | prio={row['review_priority'] or 0} gain={row['gain_score'] or 0} risk={row['risk_score'] or 0}{flag_text}"
|
|
)
|
|
add("Entry Functions", func_items, 12)
|
|
|
|
symbol_items = []
|
|
symbol_rows = _iter_candidate_symbols(conn, term, candidate_files, limit=10)
|
|
if core_only:
|
|
filtered_symbol_rows = [row for row in symbol_rows if _is_core_file(row["file_path"])]
|
|
if filtered_symbol_rows:
|
|
symbol_rows = filtered_symbol_rows
|
|
for row in symbol_rows:
|
|
symbol_items.append(
|
|
f"{row['symbol_name']} @ {row['file_path']} | prio={row['review_priority']} gain={row['gain_score']} risk={row['risk_score']} cov={row['audit_coverage_score']}"
|
|
)
|
|
add("Entry Symbols", symbol_items, 10)
|
|
|
|
neighbor_items = []
|
|
for file_name, score in _build_neighbor_file_scores(conn, candidate_files, limit=10):
|
|
neighbor_items.append(
|
|
f"{file_name} | call_edges={score['call_edges']} dep_edges={score['dep_edges']} call_weight={score['call_weight']}"
|
|
)
|
|
add("Neighbor Files", neighbor_items, 10)
|
|
|
|
edge_rows = conn.execute(
|
|
f"SELECT source_file, target_file, dep_type FROM dependencies WHERE source_file IN ({placeholders}) OR target_file IN ({placeholders}) "
|
|
"ORDER BY dep_type, source_file, target_file LIMIT 10",
|
|
candidate_files + candidate_files,
|
|
).fetchall()
|
|
edge_items = [f"{row['source_file']} -> {row['target_file']} ({row['dep_type']})" for row in edge_rows]
|
|
add("Dependency Edges", edge_items, 10)
|
|
|
|
call_rows = conn.execute(
|
|
f"SELECT caller_symbol, caller_file, callee_symbol, callee_file, confidence, call_count FROM call_edges "
|
|
f"WHERE caller_file IN ({placeholders}) OR callee_file IN ({placeholders}) "
|
|
f"ORDER BY confidence DESC, call_count DESC LIMIT 10",
|
|
candidate_files + candidate_files,
|
|
).fetchall()
|
|
call_items = [
|
|
f"{row['caller_symbol']} -> {row['callee_symbol']} | {row['caller_file']} -> {row['callee_file']} | conf={row['confidence']} calls={row['call_count']}"
|
|
for row in call_rows
|
|
]
|
|
add("Call Edges", call_items, 10)
|
|
|
|
test_rows = conn.execute(
|
|
f"SELECT DISTINCT test_file, target_file, function_name, mapping_type FROM test_function_map WHERE target_file IN ({placeholders}) "
|
|
"ORDER BY target_file, mapping_type, test_file LIMIT 8",
|
|
candidate_files,
|
|
).fetchall()
|
|
test_items = [
|
|
f"{row['test_file']} -> {row['target_file']} :: {row['function_name']} ({row['mapping_type']})"
|
|
for row in test_rows
|
|
]
|
|
add("Tests", test_items, 8)
|
|
|
|
if not sections:
|
|
print(" (no slice results)")
|
|
return
|
|
|
|
for title, items in sections:
|
|
_print_compact_section(title, items)
|
|
|
|
|
|
def trace_cmd(term):
|
|
"""Build a compact trace: direct matches, call edges, configs, tests, and research refs."""
|
|
conn = get_conn()
|
|
pattern = f"%{term}%"
|
|
print(f"\n === Trace for '{term}' ===")
|
|
|
|
direct_functions = conn.execute(
|
|
"SELECT file, function_name, class_name, start_line, end_line "
|
|
"FROM function_index WHERE function_name LIKE ? OR class_name LIKE ? OR file LIKE ? "
|
|
"ORDER BY file, start_line LIMIT 20",
|
|
(pattern, pattern, pattern),
|
|
).fetchall()
|
|
if direct_functions:
|
|
print("\n Direct function matches:")
|
|
print_table(direct_functions, ["file", "function_name", "class_name", "start_line", "end_line"])
|
|
|
|
direct_handlers = conn.execute(
|
|
"SELECT headcode_name, handler_method, source_file, handler_type "
|
|
"FROM packet_handlers WHERE headcode_name LIKE ? OR handler_method LIKE ? OR description LIKE ? "
|
|
"ORDER BY source_file, headcode_name LIMIT 16",
|
|
(pattern, pattern, pattern),
|
|
).fetchall()
|
|
if direct_handlers:
|
|
print("\n Direct packet handlers:")
|
|
print_table(direct_handlers, ["headcode_name", "handler_method", "source_file", "handler_type"])
|
|
|
|
direct_configs = conn.execute(
|
|
"SELECT path, description FROM config_files WHERE path LIKE ? OR description LIKE ? ORDER BY path LIMIT 12",
|
|
(pattern, pattern),
|
|
).fetchall()
|
|
if direct_configs:
|
|
print("\n Direct config/runtime assets:")
|
|
print_table(direct_configs, ["path", "description"])
|
|
|
|
candidate_files = _collect_candidate_files(conn, term, limit=10)
|
|
if candidate_files:
|
|
placeholders = _sql_in(candidate_files)
|
|
print("\n Candidate files:")
|
|
file_rows = [{"file": file_name} for file_name in candidate_files]
|
|
print_table(file_rows, ["file"])
|
|
|
|
outgoing = conn.execute(
|
|
f"SELECT caller_symbol, caller_file, callee_symbol, callee_file, confidence, call_count "
|
|
f"FROM call_edges WHERE caller_file IN ({placeholders}) "
|
|
f"ORDER BY confidence DESC, call_count DESC LIMIT 24",
|
|
candidate_files,
|
|
).fetchall()
|
|
if outgoing:
|
|
print("\n Outgoing calls from candidate files:")
|
|
print_table(outgoing, ["caller_symbol", "caller_file", "callee_symbol", "callee_file", "confidence", "call_count"])
|
|
|
|
incoming = conn.execute(
|
|
f"SELECT caller_symbol, caller_file, callee_symbol, callee_file, confidence, call_count "
|
|
f"FROM call_edges WHERE callee_file IN ({placeholders}) "
|
|
f"ORDER BY confidence DESC, call_count DESC LIMIT 24",
|
|
candidate_files,
|
|
).fetchall()
|
|
if incoming:
|
|
print("\n Incoming calls into candidate files:")
|
|
print_table(incoming, ["caller_symbol", "caller_file", "callee_symbol", "callee_file", "confidence", "call_count"])
|
|
|
|
deps = conn.execute(
|
|
f"SELECT source_file, target_file, dep_type FROM dependencies "
|
|
f"WHERE source_file IN ({placeholders}) OR target_file IN ({placeholders}) "
|
|
f"ORDER BY dep_type, source_file, target_file LIMIT 30",
|
|
candidate_files + candidate_files,
|
|
).fetchall()
|
|
if deps:
|
|
print("\n File dependencies around the trace:")
|
|
print_table(deps, ["source_file", "target_file", "dep_type"])
|
|
|
|
tests = conn.execute(
|
|
f"SELECT test_file, target_file, function_name, class_name, mapping_type FROM test_function_map "
|
|
f"WHERE target_file IN ({placeholders}) "
|
|
f"ORDER BY target_file, mapping_type, class_name, function_name LIMIT 30",
|
|
candidate_files,
|
|
).fetchall()
|
|
if tests:
|
|
print("\n Related unit tests:")
|
|
print_table(tests, ["test_file", "target_file", "function_name", "class_name", "mapping_type"])
|
|
|
|
research = _research_matches(conn, term, limit=18)
|
|
if research:
|
|
print("\n ReversingResearch cross-reference:")
|
|
print_table(research, ["asset_path", "symbol", "mention_type", "context"])
|
|
|
|
if not direct_functions and not direct_handlers and not direct_configs and not candidate_files and not research:
|
|
print(" (no trace results)")
|
|
|
|
|
|
def impact_cmd(term):
|
|
"""Aggregate change-impact signals for likely affected files."""
|
|
conn = get_conn()
|
|
candidate_files = _collect_candidate_files(conn, term, limit=12)
|
|
if not candidate_files:
|
|
print(f"\n No impacted files found for '{term}'.")
|
|
return
|
|
|
|
print(f"\n === Impact for '{term}' ===")
|
|
rows = []
|
|
for file_name in candidate_files:
|
|
coverage = conn.execute(
|
|
"SELECT coverage_score, unit_test_refs, crash_risk_count, null_risk_count FROM audit_coverage WHERE file = ?",
|
|
(file_name,),
|
|
).fetchone()
|
|
history = conn.execute(
|
|
"SELECT churn_score, recent_commit_count, bugfix_commits FROM history_metrics WHERE file = ?",
|
|
(file_name,),
|
|
).fetchone()
|
|
ownership = conn.execute(
|
|
"SELECT primary_author, primary_author_share, bus_factor_risk FROM ownership_metrics WHERE file = ?",
|
|
(file_name,),
|
|
).fetchone()
|
|
tested = conn.execute(
|
|
"SELECT COUNT(DISTINCT test_file) FROM test_function_map WHERE target_file = ?",
|
|
(file_name,),
|
|
).fetchone()[0]
|
|
callers = conn.execute(
|
|
"SELECT COUNT(*) FROM call_edges WHERE callee_file = ?",
|
|
(file_name,),
|
|
).fetchone()[0]
|
|
callees = conn.execute(
|
|
"SELECT COUNT(*) FROM call_edges WHERE caller_file = ?",
|
|
(file_name,),
|
|
).fetchone()[0]
|
|
review_items = conn.execute(
|
|
"SELECT COUNT(*) FROM review_queue WHERE file = ?",
|
|
(file_name,),
|
|
).fetchone()[0]
|
|
rows.append(
|
|
{
|
|
"file": file_name,
|
|
"coverage_score": coverage["coverage_score"] if coverage else 0,
|
|
"tests": tested,
|
|
"callers": callers,
|
|
"callees": callees,
|
|
"crash_risks": coverage["crash_risk_count"] if coverage else 0,
|
|
"null_risks": coverage["null_risk_count"] if coverage else 0,
|
|
"churn": history["churn_score"] if history else 0,
|
|
"recent_commits": history["recent_commit_count"] if history else 0,
|
|
"bus_factor_risk": ownership["bus_factor_risk"] if ownership else 0,
|
|
"owner": ownership["primary_author"] if ownership else "",
|
|
"review_items": review_items,
|
|
}
|
|
)
|
|
|
|
rows.sort(
|
|
key=lambda row: (
|
|
-row["callers"],
|
|
row["coverage_score"],
|
|
-row["crash_risks"],
|
|
-row["null_risks"],
|
|
-row["churn"],
|
|
)
|
|
)
|
|
print_table(
|
|
rows,
|
|
[
|
|
"file",
|
|
"coverage_score",
|
|
"tests",
|
|
"callers",
|
|
"callees",
|
|
"crash_risks",
|
|
"null_risks",
|
|
"churn",
|
|
"recent_commits",
|
|
"bus_factor_risk",
|
|
"owner",
|
|
"review_items",
|
|
],
|
|
)
|
|
|
|
placeholders = _sql_in(candidate_files)
|
|
tests = conn.execute(
|
|
f"SELECT DISTINCT test_file, target_file FROM test_function_map "
|
|
f"WHERE target_file IN ({placeholders}) ORDER BY target_file, test_file LIMIT 24",
|
|
candidate_files,
|
|
).fetchall()
|
|
if tests:
|
|
print("\n Impacted unit tests:")
|
|
print_table(tests, ["test_file", "target_file"])
|
|
|
|
callers = conn.execute(
|
|
f"SELECT caller_symbol, caller_file, callee_symbol, callee_file, confidence "
|
|
f"FROM call_edges WHERE callee_file IN ({placeholders}) "
|
|
f"ORDER BY confidence DESC, call_count DESC LIMIT 24",
|
|
candidate_files,
|
|
).fetchall()
|
|
if callers:
|
|
print("\n High-confidence inbound callers:")
|
|
print_table(callers, ["caller_symbol", "caller_file", "callee_symbol", "callee_file", "confidence"])
|
|
|
|
research = _research_matches(conn, term, limit=12)
|
|
if research:
|
|
print("\n Related research notes:")
|
|
print_table(research, ["asset_path", "symbol", "mention_type", "context"])
|
|
|
|
|
|
def research_cmd(term=None):
|
|
"""Search research assets and extracted mentions from ReversingResearch."""
|
|
conn = get_conn()
|
|
if not term:
|
|
print("\n === ReversingResearch Summary ===")
|
|
assets = conn.execute(
|
|
"SELECT asset_type, COUNT(*) AS count, SUM(size_bytes) AS total_bytes "
|
|
"FROM research_assets GROUP BY asset_type ORDER BY count DESC, asset_type"
|
|
).fetchall()
|
|
print_table(assets, ["asset_type", "count", "total_bytes"])
|
|
top_assets = conn.execute(
|
|
"SELECT path, asset_type, size_bytes, title, summary FROM research_assets "
|
|
"ORDER BY size_bytes DESC, path LIMIT 12"
|
|
).fetchall()
|
|
print("\n Largest assets:")
|
|
print_table(top_assets, ["path", "asset_type", "size_bytes", "title", "summary"])
|
|
return
|
|
|
|
pattern = f"%{term}%"
|
|
assets = conn.execute(
|
|
"SELECT path, asset_type, size_bytes, title, summary, symbol_refs, protocol_refs "
|
|
"FROM research_assets WHERE path LIKE ? OR file_name LIKE ? OR title LIKE ? OR summary LIKE ? "
|
|
"OR symbol_refs LIKE ? OR protocol_refs LIKE ? "
|
|
"ORDER BY CASE asset_type WHEN 'markdown' THEN 0 WHEN 'asm' THEN 1 ELSE 2 END, path LIMIT 20",
|
|
(pattern, pattern, pattern, pattern, pattern, pattern),
|
|
).fetchall()
|
|
mentions = _research_matches(conn, term, limit=30)
|
|
print(f"\n Research assets for '{term}':")
|
|
if assets:
|
|
print_table(assets, ["path", "asset_type", "size_bytes", "title", "summary", "symbol_refs", "protocol_refs"])
|
|
else:
|
|
print(" (no matching assets)")
|
|
if mentions:
|
|
print("\n Extracted research mentions:")
|
|
print_table(mentions, ["asset_path", "symbol", "mention_type", "context"])
|
|
|
|
|
|
def symbols_cmd(term=None):
|
|
conn = get_conn()
|
|
params = []
|
|
query = (
|
|
"SELECT symbol_name, file_path, project, semantic_tags, hot_path, ct_sensitive, batchable, gpu_candidate, risk_level, "
|
|
"review_priority, risk_score, gain_score, audit_coverage_score, change_frequency, caller_count, callee_count "
|
|
"FROM symbol_metadata"
|
|
)
|
|
if term:
|
|
pattern = f"%{term}%"
|
|
query += " WHERE symbol_name LIKE ? OR file_path LIKE ? OR semantic_tags LIKE ? OR summary LIKE ?"
|
|
params = [pattern, pattern, pattern, pattern]
|
|
query += " ORDER BY review_priority DESC, gain_score DESC, symbol_name LIMIT 120"
|
|
rows = conn.execute(query, params).fetchall()
|
|
title = f"'{term}'" if term else "all"
|
|
print(f"\n Symbol metadata for {title}:")
|
|
print_table(
|
|
rows,
|
|
[
|
|
"symbol_name", "file_path", "project", "semantic_tags", "hot_path", "ct_sensitive", "batchable",
|
|
"gpu_candidate", "risk_level", "review_priority", "risk_score", "gain_score",
|
|
"audit_coverage_score", "change_frequency", "caller_count", "callee_count"
|
|
]
|
|
)
|
|
|
|
|
|
def bottlenecks_cmd(term=None):
|
|
conn = get_conn()
|
|
params = []
|
|
query = (
|
|
"SELECT symbol_name, file_path, overall_priority, perf_priority, safe_priority, hotness_score, complexity_score, fanin_score, fanout_score, "
|
|
"gpu_score, ct_risk_score, audit_gap_score, risk_level, semantic_tags, summary "
|
|
"FROM v_bottleneck_queue"
|
|
)
|
|
if term:
|
|
pattern = f"%{term}%"
|
|
query += " WHERE symbol_name LIKE ? OR file_path LIKE ? OR semantic_tags LIKE ? OR summary LIKE ?"
|
|
params = [pattern, pattern, pattern, pattern]
|
|
query += " ORDER BY overall_priority DESC, hotness_score DESC, symbol_name LIMIT 120"
|
|
rows = conn.execute(query, params).fetchall()
|
|
title = f"'{term}'" if term else "all"
|
|
print(f"\n Bottleneck queue for {title}:")
|
|
print_table(
|
|
rows,
|
|
[
|
|
"symbol_name", "file_path", "overall_priority", "perf_priority", "safe_priority", "hotness_score", "complexity_score",
|
|
"fanin_score", "fanout_score", "gpu_score", "ct_risk_score", "audit_gap_score", "risk_level", "semantic_tags", "summary"
|
|
]
|
|
)
|
|
|
|
|
|
def auditmap_cmd(term=None):
|
|
conn = get_conn()
|
|
params = []
|
|
query = (
|
|
"SELECT symbol_name, file_path, covered_by_tests, test_count, mapping_types, review_queue_types, audit_modules, coverage_score, last_status, historical_failures, evidence "
|
|
"FROM symbol_audit_coverage"
|
|
)
|
|
if term:
|
|
pattern = f"%{term}%"
|
|
query += " WHERE symbol_name LIKE ? OR file_path LIKE ? OR review_queue_types LIKE ? OR audit_modules LIKE ? OR evidence LIKE ?"
|
|
params = [pattern, pattern, pattern, pattern, pattern]
|
|
query += " ORDER BY coverage_score ASC, historical_failures DESC, symbol_name LIMIT 120"
|
|
rows = conn.execute(query, params).fetchall()
|
|
title = f"'{term}'" if term else "all"
|
|
print(f"\n Symbol audit coverage for {title}:")
|
|
print_table(rows, ["symbol_name", "file_path", "covered_by_tests", "test_count", "mapping_types", "review_queue_types", "audit_modules", "coverage_score", "last_status", "historical_failures", "evidence"])
|
|
|
|
|
|
def aitasks_cmd(term=None):
|
|
conn = get_conn()
|
|
params = []
|
|
query = "SELECT task_type, symbol_name, file_path, status, priority, rationale, prompt FROM ai_tasks"
|
|
if term:
|
|
pattern = f"%{term}%"
|
|
query += " WHERE task_type LIKE ? OR symbol_name LIKE ? OR file_path LIKE ? OR rationale LIKE ? OR prompt LIKE ?"
|
|
params = [pattern, pattern, pattern, pattern, pattern]
|
|
query += " ORDER BY priority DESC, task_type, symbol_name LIMIT 120"
|
|
rows = conn.execute(query, params).fetchall()
|
|
title = f"'{term}'" if term else "all"
|
|
print(f"\n AI task queue for {title}:")
|
|
print_table(rows, ["task_type", "symbol_name", "file_path", "status", "priority", "rationale", "prompt"])
|
|
|
|
|
|
def cfgkey_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT key_name, getter_type, source_file, line, context FROM config_keys WHERE key_name LIKE ? OR context LIKE ? ORDER BY key_name, source_file",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n Config keys matching '{name}':")
|
|
print_table(rows, ["key_name", "getter_type", "source_file", "line", "context"])
|
|
|
|
|
|
def define_cmd(name):
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT name, value, file, line, category FROM defines WHERE name LIKE ? OR value LIKE ? ORDER BY category, name",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
print(f"\n #defines matching '{name}':")
|
|
print_table(rows, ["name", "value", "file", "line", "category"])
|
|
|
|
|
|
def complexity_cmd():
|
|
conn = get_conn()
|
|
print("\n === Top 30 Largest Files (by line count) ===")
|
|
rows = conn.execute(
|
|
"SELECT file, project, line_count, category FROM file_lines ORDER BY line_count DESC LIMIT 30"
|
|
).fetchall()
|
|
print_table(rows, ["file", "project", "line_count", "category"])
|
|
# Summary
|
|
totals = conn.execute(
|
|
"SELECT project, SUM(line_count) as total, COUNT(*) as files, MAX(line_count) as biggest FROM file_lines GROUP BY project"
|
|
).fetchall()
|
|
print("\n === Summary by Project ===")
|
|
print_table(totals, ["project", "total", "files", "biggest"])
|
|
|
|
|
|
def stats_cmd(show_header=True):
|
|
conn = get_conn()
|
|
if show_header:
|
|
print()
|
|
tables = [
|
|
("graph_metadata", "Graph Metadata"),
|
|
("singletons", "Singletons"),
|
|
("events", "Events/Dungeons"),
|
|
("ai_handlers", "AI Handlers"),
|
|
("player_files", "Player Files"),
|
|
("packet_handlers", "Packet Handlers"),
|
|
("inventory_scripts", "Inventory Scripts"),
|
|
("constants", "Constants"),
|
|
("config_files", "Config Files"),
|
|
("enums", "Enums"),
|
|
("structs", "Structs/Packets"),
|
|
("classes", "Classes"),
|
|
("methods", "Methods"),
|
|
("todos", "TODOs/FIXMEs"),
|
|
("db_tables", "DB Table Refs"),
|
|
("prepared_statements", "Prepared Stmts"),
|
|
("config_keys", "Config Keys"),
|
|
("defines", "#define Macros"),
|
|
("file_lines", "File Line Counts"),
|
|
("leak_risks", "Leak Risk Files"),
|
|
("null_risks", "NULL-Check Risks"),
|
|
("raw_pointers", "Raw Ptr Members"),
|
|
("unsafe_casts", "C-Style Casts"),
|
|
("crash_risks", "Crash Risk Patterns"),
|
|
("infinite_loop_risks", "Infinite Loop Risks"),
|
|
("dead_methods", "Dead Methods"),
|
|
("duplicate_blocks", "Duplicate Blocks"),
|
|
("file_summaries", "File Summaries"),
|
|
("function_index", "Function Index"),
|
|
("semantic_tags", "Semantic Tags"),
|
|
("semantic_profiles", "Semantic Profiles"),
|
|
("audit_coverage", "Audit Coverage"),
|
|
("history_metrics", "History Metrics"),
|
|
("review_queue", "Review Queue"),
|
|
("ownership_metrics", "Ownership Metrics"),
|
|
("test_function_map", "Test Function Map"),
|
|
("call_edges", "Call Graph Edges"),
|
|
("symbol_metadata", "Symbol Metadata"),
|
|
("analysis_scores", "Analysis Scores"),
|
|
("symbol_audit_coverage", "Symbol Audit Coverage"),
|
|
("ai_tasks", "AI Tasks"),
|
|
("research_assets", "Research Assets"),
|
|
("research_mentions", "Research Mentions"),
|
|
("edges", "Edges/Relations"),
|
|
("files", "Source Files"),
|
|
("dependencies", "Dependencies"),
|
|
("fts_index", "FTS Index Entries"),
|
|
]
|
|
print(" === Source Graph Statistics ===")
|
|
for table, label in tables:
|
|
count = conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
|
|
print(f" {label:.<30} {count}")
|
|
|
|
|
|
def validateai_cmd(base_path=None):
|
|
if base_path:
|
|
ai_dir = Path(base_path)
|
|
else:
|
|
ai_dir = RUNTIME_DATA_ROOT / "Monster" / "AI"
|
|
|
|
ai_dir = ai_dir.resolve()
|
|
automata_path = ai_dir / "MonsterAiAutomata.xml"
|
|
element_path = ai_dir / "MonsterAiElement.xml"
|
|
unit_path = ai_dir / "MonsterAiUnit.xml"
|
|
|
|
print(f"\n Validating Monster AI config in: {ai_dir}")
|
|
|
|
required = [automata_path, element_path, unit_path]
|
|
missing = [str(p) for p in required if not p.exists()]
|
|
if missing:
|
|
print("\n Missing required files:")
|
|
for path in missing:
|
|
print(f" - {path}")
|
|
return
|
|
|
|
supported_transition_types = {0, 1, 2, 4, 5, 6, 7, 8, 10, 11, 12}
|
|
supported_element_classes = {
|
|
1, 11, 12, 13, 14,
|
|
21, 22, 23,
|
|
31, 32,
|
|
41,
|
|
51, 52, 53,
|
|
61, 62, 64, 65, 66, 67, 68, 71
|
|
}
|
|
def parse_xml(path):
|
|
return ET.parse(path).getroot()
|
|
|
|
def line_number(path, needle):
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
for idx, line in enumerate(f, 1):
|
|
if needle in line:
|
|
return idx
|
|
except Exception:
|
|
return None
|
|
return None
|
|
|
|
root_automata = parse_xml(automata_path)
|
|
root_element = parse_xml(element_path)
|
|
root_unit = parse_xml(unit_path)
|
|
|
|
elements = {}
|
|
units = {}
|
|
issues = []
|
|
|
|
for data in root_element.findall("Data"):
|
|
elem_id = int(data.attrib.get("ID", "-1"))
|
|
elem_class = int(data.attrib.get("Class", "-1"))
|
|
elem_state = int(data.attrib.get("State", "-1"))
|
|
elements[elem_id] = {"class": elem_class, "state": elem_state, "name": data.attrib.get("Name", "")}
|
|
|
|
if elem_class not in supported_element_classes:
|
|
issues.append(("unsupported_element_class", line_number(element_path, f'ID="{elem_id}"'),
|
|
f"Element ID {elem_id} uses unsupported Class={elem_class}"))
|
|
|
|
for data in root_unit.findall("Data"):
|
|
unit_id = int(data.attrib.get("ID", "-1"))
|
|
units[unit_id] = dict(data.attrib)
|
|
|
|
for attr_name in ("Normal", "Move", "Attack", "Heal", "Avoid", "Help", "Special", "Event"):
|
|
raw = data.attrib.get(attr_name, "-1")
|
|
try:
|
|
ref = int(raw)
|
|
except ValueError:
|
|
issues.append(("invalid_unit_ref", line_number(unit_path, f'ID="{unit_id}"'),
|
|
f"Unit ID {unit_id} has non-integer {attr_name}='{raw}'"))
|
|
continue
|
|
|
|
if ref == -1:
|
|
continue
|
|
|
|
if ref not in elements:
|
|
issues.append(("orphan_element_ref", line_number(unit_path, f'ID="{unit_id}"'),
|
|
f"Unit ID {unit_id} references missing element {attr_name}={ref}"))
|
|
|
|
for data in root_automata.findall("Data"):
|
|
raw_id = data.attrib.get("ID", "-1")
|
|
current_state = data.attrib.get("CurrentState", "?")
|
|
next_state = data.attrib.get("NextState", "?")
|
|
priority = data.attrib.get("Priority", "?")
|
|
line = line_number(automata_path, f'ID="{raw_id}"')
|
|
|
|
if "TransitionType" not in data.attrib:
|
|
issues.append(("missing_transition_type", line,
|
|
f"Automata ID {raw_id} state {current_state}->{next_state} priority {priority} is missing TransitionType"))
|
|
continue
|
|
|
|
transition_type = int(data.attrib["TransitionType"])
|
|
if transition_type not in supported_transition_types:
|
|
issues.append(("unsupported_transition_type", line,
|
|
f"Automata ID {raw_id} state {current_state}->{next_state} priority {priority} uses unsupported TransitionType={transition_type}"))
|
|
|
|
if data.attrib.get("TransitionRate", "").count(" ") > 0:
|
|
issues.append(("suspicious_transition_rate", line,
|
|
f"Automata ID {raw_id} has suspicious TransitionRate formatting"))
|
|
|
|
print("\n === AI Validation Summary ===")
|
|
print(f" Elements ................. {len(elements)}")
|
|
print(f" Units .................... {len(units)}")
|
|
print(f" Issues ................... {len(issues)}")
|
|
|
|
if not issues:
|
|
print("\n No AI config issues found.")
|
|
return
|
|
|
|
grouped = defaultdict(list)
|
|
for issue_type, line, message in issues:
|
|
grouped[issue_type].append((line, message))
|
|
|
|
for issue_type in sorted(grouped.keys()):
|
|
print(f"\n [{issue_type}]")
|
|
for line, message in grouped[issue_type][:20]:
|
|
if line:
|
|
print(f" - line {line}: {message}")
|
|
else:
|
|
print(f" - {message}")
|
|
|
|
if len(issues) > 20:
|
|
print(f"\n Showing first {min(len(issues), 20)} issues per category.")
|
|
|
|
|
|
def sql_cmd(query):
|
|
conn = get_conn()
|
|
try:
|
|
rows = conn.execute(query).fetchall()
|
|
if rows:
|
|
columns = [desc[0] for desc in conn.execute(query).description]
|
|
# Convert to dicts for print_table
|
|
class DictRow:
|
|
def __init__(self, row, cols):
|
|
self._data = {c: row[i] for i, c in enumerate(cols)}
|
|
def __getitem__(self, key):
|
|
return self._data.get(key)
|
|
dict_rows = [DictRow(r, columns) for r in rows]
|
|
print_table(dict_rows, columns)
|
|
else:
|
|
print(" (no results)")
|
|
except Exception as e:
|
|
print(f" SQL Error: {e}")
|
|
|
|
|
|
# ============================================================
|
|
# NEW COMMANDS: body, summarize, decide, decisions, bundle, claudemd, init, export-config
|
|
# ============================================================
|
|
|
|
def body_cmd(name):
|
|
"""Print function body from snippet cache."""
|
|
conn = get_conn()
|
|
pattern = f"%{name}%"
|
|
rows = conn.execute(
|
|
"SELECT file, function_name, class_name, start_line, end_line, body, line_count, project "
|
|
"FROM function_bodies WHERE function_name LIKE ? OR class_name LIKE ? "
|
|
"ORDER BY file, start_line LIMIT 10",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if not rows:
|
|
print(f" No function bodies matching '{name}'")
|
|
return
|
|
for row in rows:
|
|
cls = f"{row['class_name']}::" if row['class_name'] else ""
|
|
print(f"\n === {cls}{row['function_name']} ({row['file']}:{row['start_line']}-{row['end_line']}, {row['line_count']} lines) ===")
|
|
print(row['body'])
|
|
|
|
|
|
def summarize_cmd(args):
|
|
"""Generate or show function summaries."""
|
|
conn = get_conn()
|
|
|
|
# Parse args
|
|
generate = "--generate" in args
|
|
pattern = None
|
|
for a in args:
|
|
if not a.startswith("--"):
|
|
pattern = a
|
|
break
|
|
|
|
if generate:
|
|
_generate_heuristic_summaries(conn, pattern)
|
|
return
|
|
|
|
if pattern:
|
|
like = f"%{pattern}%"
|
|
rows = conn.execute(
|
|
"SELECT file, function_name, class_name, start_line, end_line, summary, params, "
|
|
"return_type, side_effects, stale, generator FROM function_summaries "
|
|
"WHERE function_name LIKE ? OR class_name LIKE ? OR file LIKE ? "
|
|
"ORDER BY file, start_line LIMIT 30",
|
|
(like, like, like)
|
|
).fetchall()
|
|
if rows:
|
|
print(f"\n Function summaries matching '{pattern}':")
|
|
for row in rows:
|
|
cls = f"{row['class_name']}::" if row['class_name'] else ""
|
|
stale_mark = " [STALE]" if row['stale'] else ""
|
|
print(f" {cls}{row['function_name']} ({row['file']}:{row['start_line']}){stale_mark}")
|
|
if row['summary']:
|
|
print(f" {row['summary']}")
|
|
if row['params']:
|
|
print(f" params: {row['params']}")
|
|
if row['side_effects']:
|
|
print(f" side_effects: {row['side_effects']}")
|
|
print(f"\n ({len(rows)} results)")
|
|
else:
|
|
print(f" No summaries matching '{pattern}'. Run 'summarize --generate {pattern}' to create them.")
|
|
return
|
|
|
|
# Show summary statistics
|
|
total = conn.execute("SELECT COUNT(*) FROM function_summaries").fetchone()[0]
|
|
stale = conn.execute("SELECT COUNT(*) FROM function_summaries WHERE stale = 1").fetchone()[0]
|
|
bodies = conn.execute("SELECT COUNT(*) FROM function_bodies").fetchone()[0]
|
|
unsummarized = bodies - total
|
|
print(f"\n Function Summary Statistics:")
|
|
print(f" Total functions: {bodies}")
|
|
print(f" With summaries: {total}")
|
|
print(f" Stale summaries: {stale}")
|
|
print(f" Unsummarized: {unsummarized}")
|
|
|
|
|
|
def _generate_heuristic_summaries(conn, pattern=None):
|
|
"""Generate heuristic summaries from function signatures."""
|
|
query = """
|
|
SELECT fb.file, fb.function_name, fb.class_name, fb.start_line, fb.end_line,
|
|
fb.body_hash, fb.line_count, fi.signature
|
|
FROM function_bodies fb
|
|
JOIN function_index fi ON fb.file = fi.file AND fb.function_name = fi.function_name
|
|
AND fb.start_line = fi.start_line
|
|
LEFT JOIN function_summaries fs ON fb.file = fs.file AND fb.function_name = fs.function_name
|
|
AND fb.start_line = fs.start_line
|
|
WHERE fs.id IS NULL
|
|
"""
|
|
params = []
|
|
if pattern:
|
|
query += " AND (fb.function_name LIKE ? OR fb.class_name LIKE ?)"
|
|
params = [f"%{pattern}%", f"%{pattern}%"]
|
|
query += " ORDER BY fb.file, fb.start_line"
|
|
rows = conn.execute(query, params).fetchall()
|
|
|
|
count = 0
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
for row in rows:
|
|
sig = row["signature"] or ""
|
|
# Extract return type and params from signature
|
|
return_type = ""
|
|
params_str = ""
|
|
paren_idx = sig.find("(")
|
|
if paren_idx > 0:
|
|
prefix = sig[:paren_idx].strip()
|
|
parts = prefix.rsplit(None, 1)
|
|
if len(parts) > 1:
|
|
return_type = parts[0].strip()
|
|
# Extract params
|
|
close = sig.rfind(")")
|
|
if close > paren_idx:
|
|
params_str = sig[paren_idx + 1:close].strip()
|
|
|
|
# Build heuristic summary
|
|
cls = f"{row['class_name']}::" if row['class_name'] else ""
|
|
summary = f"{cls}{row['function_name']}"
|
|
if return_type:
|
|
summary = f"Returns {return_type}."
|
|
else:
|
|
summary = f"{row['function_name']} ({row['line_count']} lines)"
|
|
|
|
try:
|
|
conn.execute(
|
|
"INSERT OR IGNORE INTO function_summaries "
|
|
"(file, function_name, class_name, start_line, end_line, summary, params, "
|
|
"return_type, side_effects, body_hash, stale, generated_at, generator) "
|
|
"VALUES (?,?,?,?,?,?,?,?,?,?,0,?,?)",
|
|
(row["file"], row["function_name"], row["class_name"],
|
|
row["start_line"], row["end_line"], summary, params_str,
|
|
return_type, None, row["body_hash"], now, "heuristic")
|
|
)
|
|
count += 1
|
|
except Exception:
|
|
pass
|
|
|
|
conn.commit()
|
|
print(f" Generated {count} heuristic summaries")
|
|
|
|
|
|
def decide_cmd(args):
|
|
"""Log an architecture decision."""
|
|
conn = get_conn()
|
|
|
|
# Parse args
|
|
if "--supersede" in args:
|
|
# Supersede an existing decision
|
|
idx = args.index("--supersede")
|
|
if idx + 1 < len(args):
|
|
old_id = int(args[idx + 1])
|
|
new_text = " ".join(args[idx + 2:]) if idx + 2 < len(args) else ""
|
|
conn.execute("UPDATE decisions SET status = 'superseded' WHERE id = ?", (old_id,))
|
|
if new_text:
|
|
conn.execute(
|
|
"INSERT INTO decisions (created_at, decision, rationale, status) VALUES (?,?,?,?)",
|
|
(datetime.now(timezone.utc).isoformat(), new_text, f"Supersedes #{old_id}", "active")
|
|
)
|
|
conn.commit()
|
|
print(f" Decision #{old_id} superseded.")
|
|
return
|
|
|
|
decision_text = ""
|
|
file_name = None
|
|
func_name = None
|
|
rationale = ""
|
|
tags = ""
|
|
author = ""
|
|
|
|
i = 0
|
|
text_parts = []
|
|
while i < len(args):
|
|
if args[i] == "--file" and i + 1 < len(args):
|
|
file_name = args[i + 1]
|
|
i += 2
|
|
elif args[i] == "--func" and i + 1 < len(args):
|
|
func_name = args[i + 1]
|
|
i += 2
|
|
elif args[i] == "--why" and i + 1 < len(args):
|
|
rationale = args[i + 1]
|
|
i += 2
|
|
elif args[i] == "--tags" and i + 1 < len(args):
|
|
tags = args[i + 1]
|
|
i += 2
|
|
elif args[i] == "--author" and i + 1 < len(args):
|
|
author = args[i + 1]
|
|
i += 2
|
|
else:
|
|
text_parts.append(args[i])
|
|
i += 1
|
|
|
|
decision_text = " ".join(text_parts)
|
|
if not decision_text:
|
|
print(" Usage: decide \"<decision text>\" --why \"<rationale>\" [--file X] [--func Y] [--tags Z]")
|
|
return
|
|
|
|
conn.execute(
|
|
"INSERT INTO decisions (created_at, file, function_name, decision, rationale, alternatives, author, tags, status) "
|
|
"VALUES (?,?,?,?,?,?,?,?,?)",
|
|
(datetime.now(timezone.utc).isoformat(), file_name, func_name,
|
|
decision_text, rationale, None, author, tags, "active")
|
|
)
|
|
conn.commit()
|
|
print(f" Decision logged: {decision_text[:80]}")
|
|
|
|
|
|
def decisions_cmd(term=None):
|
|
"""Query decision log."""
|
|
conn = get_conn()
|
|
if term:
|
|
pattern = f"%{term}%"
|
|
rows = conn.execute(
|
|
"SELECT id, created_at, file, function_name, decision, rationale, tags, status "
|
|
"FROM decisions WHERE decision LIKE ? OR rationale LIKE ? OR file LIKE ? "
|
|
"OR function_name LIKE ? OR tags LIKE ? ORDER BY created_at DESC LIMIT 30",
|
|
(pattern, pattern, pattern, pattern, pattern)
|
|
).fetchall()
|
|
else:
|
|
rows = conn.execute(
|
|
"SELECT id, created_at, file, function_name, decision, rationale, tags, status "
|
|
"FROM decisions WHERE status = 'active' ORDER BY created_at DESC LIMIT 30"
|
|
).fetchall()
|
|
|
|
if not rows:
|
|
print(" No decisions found.")
|
|
return
|
|
|
|
print(f"\n Architecture Decisions:")
|
|
for row in rows:
|
|
status = f" [{row['status']}]" if row['status'] != 'active' else ""
|
|
file_info = f" ({row['file']}" if row['file'] else ""
|
|
if row['function_name']:
|
|
file_info += f"::{row['function_name']}"
|
|
if file_info:
|
|
file_info += ")"
|
|
print(f" #{row['id']} [{row['created_at'][:10]}]{status}{file_info}")
|
|
print(f" {row['decision']}")
|
|
if row['rationale']:
|
|
print(f" Why: {row['rationale']}")
|
|
if row['tags']:
|
|
print(f" Tags: {row['tags']}")
|
|
print()
|
|
print(f" ({len(rows)} decisions)")
|
|
|
|
|
|
def bundle_cmd(task_type, term, max_lines=2000):
|
|
"""Assemble minimal context bundle for a task."""
|
|
conn = get_conn()
|
|
sections = []
|
|
line_budget = max_lines
|
|
pattern = f"%{term}%"
|
|
|
|
def add_section(title, content, priority=5, budget=500):
|
|
nonlocal line_budget
|
|
if not content:
|
|
return
|
|
lines = content.strip().split("\n")
|
|
if len(lines) > budget:
|
|
lines = lines[:budget] + [f"... ({len(lines) - budget} more lines)"]
|
|
used = len(lines) + 2
|
|
if line_budget <= 0:
|
|
return
|
|
sections.append((priority, title, "\n".join(lines)))
|
|
line_budget -= used
|
|
|
|
# 1. File summary (always)
|
|
rows = conn.execute(
|
|
"SELECT file, summary FROM file_summaries WHERE file LIKE ? LIMIT 5", (pattern,)
|
|
).fetchall()
|
|
if rows:
|
|
content = "\n".join(f"{r['file']}: {r['summary']}" for r in rows)
|
|
add_section("File Summaries", content, priority=1, budget=100)
|
|
|
|
# 2. Target function bodies
|
|
bodies = conn.execute(
|
|
"SELECT file, function_name, class_name, start_line, end_line, body, line_count "
|
|
"FROM function_bodies WHERE function_name LIKE ? OR class_name LIKE ? "
|
|
"ORDER BY line_count ASC LIMIT 5",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if bodies:
|
|
parts = []
|
|
for b in bodies:
|
|
cls = f"{b['class_name']}::" if b['class_name'] else ""
|
|
parts.append(f"// {cls}{b['function_name']} ({b['file']}:{b['start_line']}-{b['end_line']})")
|
|
parts.append(b['body'])
|
|
add_section("Target Functions", "\n".join(parts), priority=1, budget=600)
|
|
|
|
# 3. Relevant decisions
|
|
decs = conn.execute(
|
|
"SELECT decision, rationale, file FROM decisions "
|
|
"WHERE status = 'active' AND (decision LIKE ? OR file LIKE ? OR function_name LIKE ?) "
|
|
"ORDER BY created_at DESC LIMIT 5",
|
|
(pattern, pattern, pattern)
|
|
).fetchall()
|
|
if decs:
|
|
content = "\n".join(f"- {d['decision']}" + (f" (Why: {d['rationale']})" if d['rationale'] else "") for d in decs)
|
|
add_section("Relevant Decisions", content, priority=2, budget=200)
|
|
|
|
# 4. Function summaries for context
|
|
sums = conn.execute(
|
|
"SELECT file, function_name, class_name, summary FROM function_summaries "
|
|
"WHERE (function_name LIKE ? OR class_name LIKE ? OR file LIKE ?) AND stale = 0 "
|
|
"LIMIT 15",
|
|
(pattern, pattern, pattern)
|
|
).fetchall()
|
|
if sums:
|
|
content = "\n".join(f"{s['function_name']}: {s['summary']}" for s in sums if s['summary'])
|
|
add_section("Function Summaries", content, priority=3, budget=200)
|
|
|
|
if task_type == "bugfix":
|
|
# Callers (1 hop)
|
|
callers = conn.execute(
|
|
"SELECT caller_symbol, caller_file, callee_symbol, confidence "
|
|
"FROM call_edges WHERE callee_symbol LIKE ? OR callee_file LIKE ? "
|
|
"ORDER BY confidence DESC LIMIT 10",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if callers:
|
|
content = "\n".join(f"{c['caller_file']}::{c['caller_symbol']} -> {c['callee_symbol']} (conf={c['confidence']})" for c in callers)
|
|
add_section("Callers (1-hop)", content, priority=3, budget=200)
|
|
|
|
# Crash/null risks
|
|
risks = conn.execute(
|
|
"SELECT file, line, risk_type, severity, expression FROM crash_risks "
|
|
"WHERE file LIKE ? ORDER BY severity LIMIT 10", (pattern,)
|
|
).fetchall()
|
|
null_risks = conn.execute(
|
|
"SELECT file, line, function_call, risk_type FROM null_risks "
|
|
"WHERE file LIKE ? LIMIT 10", (pattern,)
|
|
).fetchall()
|
|
if risks or null_risks:
|
|
parts = []
|
|
for r in risks:
|
|
parts.append(f"[{r['severity']}] {r['file']}:{r['line']} {r['risk_type']}: {r['expression']}")
|
|
for n in null_risks:
|
|
parts.append(f"[null] {n['file']}:{n['line']} {n['function_call']} ({n['risk_type']})")
|
|
add_section("Risk Analysis", "\n".join(parts), priority=2, budget=200)
|
|
|
|
# Related tests
|
|
tests = conn.execute(
|
|
"SELECT test_file, function_name, mapping_type FROM test_function_map "
|
|
"WHERE target_file LIKE ? OR function_name LIKE ? LIMIT 10",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if tests:
|
|
content = "\n".join(f"{t['test_file']} tests {t['function_name']} ({t['mapping_type']})" for t in tests)
|
|
add_section("Related Tests", content, priority=3, budget=200)
|
|
|
|
elif task_type == "feature":
|
|
# Related classes
|
|
classes = conn.execute(
|
|
"SELECT name, header, parent_class, project FROM classes "
|
|
"WHERE name LIKE ? OR parent_class LIKE ? LIMIT 10",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if classes:
|
|
content = "\n".join(f"{c['name']} extends {c['parent_class'] or 'none'} ({c['header']})" for c in classes)
|
|
add_section("Related Classes", content, priority=3, budget=200)
|
|
|
|
# Config keys
|
|
cfgkeys = conn.execute(
|
|
"SELECT key_name, getter_type, source_file, context FROM config_keys "
|
|
"WHERE source_file LIKE ? OR key_name LIKE ? LIMIT 10",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if cfgkeys:
|
|
content = "\n".join(f"{c['key_name']} ({c['getter_type']}) in {c['source_file']}" for c in cfgkeys)
|
|
add_section("Config Keys", content, priority=4, budget=150)
|
|
|
|
# Constants
|
|
consts = conn.execute(
|
|
"SELECT name, value, header, description FROM constants "
|
|
"WHERE name LIKE ? OR description LIKE ? LIMIT 10",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if consts:
|
|
content = "\n".join(f"{c['name']} = {c['value']} ({c['description']})" for c in consts)
|
|
add_section("Constants", content, priority=4, budget=100)
|
|
|
|
elif task_type == "refactor":
|
|
# Callers + callees (2 hop)
|
|
callers = conn.execute(
|
|
"SELECT DISTINCT caller_symbol, caller_file FROM call_edges "
|
|
"WHERE callee_symbol LIKE ? OR callee_file LIKE ? LIMIT 15",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
callees = conn.execute(
|
|
"SELECT DISTINCT callee_symbol, callee_file FROM call_edges "
|
|
"WHERE caller_symbol LIKE ? OR caller_file LIKE ? LIMIT 15",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if callers or callees:
|
|
parts = ["Callers:"] + [f" <- {c['caller_file']}::{c['caller_symbol']}" for c in callers]
|
|
parts += ["Callees:"] + [f" -> {c['callee_file']}::{c['callee_symbol']}" for c in callees]
|
|
add_section("Call Graph (2-hop)", "\n".join(parts), priority=2, budget=300)
|
|
|
|
# Dead methods
|
|
dead = conn.execute(
|
|
"SELECT class_name, method_name, header_file FROM dead_methods "
|
|
"WHERE class_name LIKE ? OR header_file LIKE ? LIMIT 10",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if dead:
|
|
content = "\n".join(f"{d['class_name']}::{d['method_name']} ({d['header_file']})" for d in dead)
|
|
add_section("Dead Methods", content, priority=4, budget=100)
|
|
|
|
# Duplicates
|
|
dupes = conn.execute(
|
|
"SELECT file_a, line_a, file_b, line_b, line_count, preview FROM duplicate_blocks "
|
|
"WHERE file_a LIKE ? OR file_b LIKE ? LIMIT 5",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if dupes:
|
|
content = "\n".join(f"{d['file_a']}:{d['line_a']} == {d['file_b']}:{d['line_b']} ({d['line_count']} lines)" for d in dupes)
|
|
add_section("Duplicate Code", content, priority=4, budget=100)
|
|
|
|
elif task_type == "audit":
|
|
coverage = conn.execute(
|
|
"SELECT file, coverage_score, unit_test_refs, crash_risk_count, null_risk_count, semantic_risk_max, notes "
|
|
"FROM audit_coverage WHERE file LIKE ? ORDER BY coverage_score ASC, semantic_risk_max DESC LIMIT 12",
|
|
(pattern,)
|
|
).fetchall()
|
|
if coverage:
|
|
content = "\n".join(
|
|
f"{row['file']}: cov={row['coverage_score']} tests={row['unit_test_refs']} crash={row['crash_risk_count']} null={row['null_risk_count']} semrisk={row['semantic_risk_max']}"
|
|
for row in coverage
|
|
)
|
|
add_section("Coverage Snapshot", content, priority=2, budget=180)
|
|
|
|
ct_symbols = conn.execute(
|
|
"SELECT symbol_name, file_path, review_priority, risk_score, audit_coverage_score FROM symbol_metadata "
|
|
"WHERE (symbol_name LIKE ? OR file_path LIKE ?) AND ct_sensitive = 1 "
|
|
"ORDER BY review_priority DESC, risk_score DESC LIMIT 12",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if ct_symbols:
|
|
content = "\n".join(
|
|
f"{row['symbol_name']} @ {row['file_path']} | prio={row['review_priority']} risk={row['risk_score']} cov={row['audit_coverage_score']}"
|
|
for row in ct_symbols
|
|
)
|
|
add_section("CT-Sensitive Symbols", content, priority=2, budget=160)
|
|
|
|
review = conn.execute(
|
|
"SELECT file, queue_type, priority_score, rationale FROM review_queue WHERE file LIKE ? OR rationale LIKE ? "
|
|
"ORDER BY priority_score DESC LIMIT 12",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if review:
|
|
content = "\n".join(
|
|
f"{row['file']} [{row['queue_type']}] prio={row['priority_score']} :: {row['rationale']}"
|
|
for row in review
|
|
)
|
|
add_section("Review Queue", content, priority=3, budget=160)
|
|
|
|
elif task_type == "optimize":
|
|
bottlenecks = conn.execute(
|
|
"SELECT symbol_name, file_path, overall_priority, perf_priority, hotness_score, gpu_score, audit_gap_score, summary "
|
|
"FROM v_bottleneck_queue WHERE symbol_name LIKE ? OR file_path LIKE ? OR summary LIKE ? "
|
|
"ORDER BY overall_priority DESC, perf_priority DESC LIMIT 12",
|
|
(pattern, pattern, pattern)
|
|
).fetchall()
|
|
if bottlenecks:
|
|
content = "\n".join(
|
|
f"{row['symbol_name']} @ {row['file_path']} | overall={row['overall_priority']} perf={row['perf_priority']} hot={row['hotness_score']} gpu={row['gpu_score']} gap={row['audit_gap_score']}"
|
|
for row in bottlenecks
|
|
)
|
|
add_section("Bottleneck Queue", content, priority=2, budget=180)
|
|
|
|
hotspots = conn.execute(
|
|
"SELECT entity_name, file, gain_score, risk_score, hot_path_candidate, optimization_candidate, fragile_surface "
|
|
"FROM semantic_profiles WHERE entity_name LIKE ? OR file LIKE ? "
|
|
"ORDER BY gain_score DESC, risk_score DESC LIMIT 12",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if hotspots:
|
|
content = "\n".join(
|
|
f"{row['entity_name']} @ {row['file']} | gain={row['gain_score']} risk={row['risk_score']} hot={row['hot_path_candidate']} opt={row['optimization_candidate']} fragile={row['fragile_surface']}"
|
|
for row in hotspots
|
|
)
|
|
add_section("Optimization Hotspots", content, priority=3, budget=160)
|
|
|
|
elif task_type == "explore":
|
|
candidate_files = _collect_candidate_files(conn, term, limit=8)
|
|
if candidate_files:
|
|
content = "\n".join(_compact_file_snapshot(conn, file_name) for file_name in candidate_files)
|
|
add_section("Primary Files", content, priority=2, budget=120)
|
|
|
|
top_symbols = _iter_candidate_symbols(conn, term, candidate_files, limit=12)
|
|
if top_symbols:
|
|
content = "\n".join(
|
|
f"{row['symbol_name']} @ {row['file_path']} | prio={row['review_priority']} gain={row['gain_score']} risk={row['risk_score']} cov={row['audit_coverage_score']}"
|
|
for row in top_symbols
|
|
)
|
|
add_section("Top Symbols", content, priority=2, budget=160)
|
|
|
|
neighbor_files = _build_neighbor_file_scores(conn, candidate_files, limit=10)
|
|
if neighbor_files:
|
|
content = "\n".join(
|
|
f"{file_name} | call_edges={score['call_edges']} dep_edges={score['dep_edges']} call_weight={score['call_weight']}"
|
|
for file_name, score in neighbor_files
|
|
)
|
|
add_section("Neighbor Files", content, priority=3, budget=140)
|
|
|
|
else:
|
|
print(f" Unsupported bundle type '{task_type}'. Use bugfix, feature, refactor, audit, optimize, or explore.")
|
|
return
|
|
|
|
# Dependencies
|
|
deps = conn.execute(
|
|
"SELECT source_file, target_file, dep_type FROM dependencies "
|
|
"WHERE source_file LIKE ? OR target_file LIKE ? LIMIT 15",
|
|
(pattern, pattern)
|
|
).fetchall()
|
|
if deps:
|
|
content = "\n".join(f"{d['source_file']} -> {d['target_file']} ({d['dep_type']})" for d in deps)
|
|
add_section("Dependencies", content, priority=5, budget=200)
|
|
|
|
# Output
|
|
if not sections:
|
|
print(f" No context found for '{term}'. Try a different search term.")
|
|
return
|
|
|
|
sections.sort(key=lambda x: x[0])
|
|
print(f"# Context Bundle: {task_type} — {term}")
|
|
print(f"# Budget: {max_lines} lines\n")
|
|
for _, title, content in sections:
|
|
print(f"## {title}")
|
|
print(content)
|
|
print()
|
|
|
|
|
|
def claudemd_cmd():
|
|
"""Generate CLAUDE.md project documentation from database."""
|
|
conn = get_conn()
|
|
|
|
project_name = "Project"
|
|
if CONFIG_PATH:
|
|
try:
|
|
cfg = _parse_toml(CONFIG_PATH)
|
|
project_name = cfg.get("project", {}).get("name", REPO_ROOT.name)
|
|
except Exception:
|
|
project_name = REPO_ROOT.name
|
|
else:
|
|
project_name = REPO_ROOT.name
|
|
|
|
# Stats
|
|
file_count = conn.execute("SELECT COUNT(*) FROM files").fetchone()[0]
|
|
total_lines = conn.execute("SELECT COALESCE(SUM(line_count), 0) FROM file_lines").fetchone()[0]
|
|
func_count = conn.execute("SELECT COUNT(*) FROM function_index").fetchone()[0]
|
|
class_count = conn.execute("SELECT COUNT(*) FROM classes").fetchone()[0]
|
|
singleton_count = conn.execute("SELECT COUNT(*) FROM singletons").fetchone()[0]
|
|
|
|
print(f"# {project_name} — AI Context Guide")
|
|
print(f"\n## Project Overview")
|
|
print(f"- **{file_count}** source files, **{total_lines:,}** lines of code")
|
|
print(f"- **{func_count}** functions, **{class_count}** classes, **{singleton_count}** singletons")
|
|
|
|
# Source dirs
|
|
print(f"- Source directories: {', '.join(label for label, _, _ in SOURCE_DIRS)}")
|
|
print()
|
|
|
|
# Key Singletons
|
|
singletons = conn.execute(
|
|
"SELECT macro, class_name, header, category, description FROM singletons "
|
|
"ORDER BY category, macro LIMIT 30"
|
|
).fetchall()
|
|
if singletons:
|
|
print("## Key Singletons")
|
|
current_cat = None
|
|
for s in singletons:
|
|
if s['category'] != current_cat:
|
|
current_cat = s['category']
|
|
print(f"\n### {current_cat or 'other'}")
|
|
print(f"- `{s['macro']}` — {s['description'] or s['class_name']} (`{s['header']}`)")
|
|
print()
|
|
|
|
# File map by category
|
|
cats = conn.execute(
|
|
"SELECT category, COUNT(*) as cnt FROM files GROUP BY category ORDER BY cnt DESC"
|
|
).fetchall()
|
|
if cats:
|
|
print("## Source File Categories")
|
|
for c in cats:
|
|
print(f"- **{c['category'] or 'uncategorized'}**: {c['cnt']} files")
|
|
print()
|
|
|
|
# Naming conventions from category rules
|
|
print("## Naming Conventions")
|
|
for rule in CATEGORY_RULES[:15]:
|
|
match = rule['match']
|
|
pattern = rule['pattern']
|
|
category = rule['category']
|
|
source = f" (in {rule['source_dir']})" if rule.get('source_dir') else ""
|
|
if match == "prefix":
|
|
print(f"- Files starting with `{pattern}` → **{category}**{source}")
|
|
elif match == "suffix":
|
|
print(f"- Files ending with `{pattern}` → **{category}**{source}")
|
|
elif match == "contains":
|
|
print(f"- Files containing `{pattern}` → **{category}**{source}")
|
|
print()
|
|
|
|
# Key constants
|
|
consts = conn.execute(
|
|
"SELECT name, value, description FROM constants WHERE category = 'limits' ORDER BY name LIMIT 15"
|
|
).fetchall()
|
|
if consts:
|
|
print("## Key Constants")
|
|
for c in consts:
|
|
print(f"- `{c['name']}` = {c['value']} — {c['description']}")
|
|
print()
|
|
|
|
# Known issues
|
|
crash_count = conn.execute("SELECT COUNT(*) FROM crash_risks").fetchone()[0]
|
|
null_count = conn.execute("SELECT COUNT(*) FROM null_risks").fetchone()[0]
|
|
dead_count = conn.execute("SELECT COUNT(*) FROM dead_methods").fetchone()[0]
|
|
todo_count = conn.execute("SELECT COUNT(*) FROM todos").fetchone()[0]
|
|
leak_count = conn.execute("SELECT COUNT(*) FROM leak_risks WHERE risk_score > 0").fetchone()[0]
|
|
|
|
print("## Known Issues")
|
|
print(f"- {crash_count} crash risks")
|
|
print(f"- {null_count} unchecked pointer dereferences")
|
|
print(f"- {dead_count} dead methods")
|
|
print(f"- {leak_count} potential memory leaks")
|
|
print(f"- {todo_count} TODO/FIXME comments")
|
|
print()
|
|
|
|
# Usage guide
|
|
print("## Using source_graph.py")
|
|
print("```bash")
|
|
print("python source_graph.py find <term> # Full-text search")
|
|
print("python source_graph.py func <name> # Find function with line ranges")
|
|
print("python source_graph.py body <name> # Print function source code")
|
|
print("python source_graph.py context <file> # Unified context for a file")
|
|
print("python source_graph.py focus <term> [budget] [--core] # Compact ranked snapshot")
|
|
print("python source_graph.py slice <term> [budget] [--core] # Minimal dependency/call slice")
|
|
print("python source_graph.py trace <term> # Build minimal code trace")
|
|
print("python source_graph.py bundle bugfix <term> # Assemble bugfix context")
|
|
print("python source_graph.py bundle feature <term># Assemble feature context")
|
|
print("python source_graph.py bundle audit <term> # Assemble audit context")
|
|
print("python source_graph.py bundle optimize <term># Assemble optimization context")
|
|
print("python source_graph.py impact <term> # Show change impact analysis")
|
|
print("python source_graph.py decide \"<text>\" --why \"<reason>\" # Log decision")
|
|
print("python source_graph.py decisions [term] # Query decision log")
|
|
print("python source_graph.py build --incremental # Fast rebuild (changed files only)")
|
|
print("```")
|
|
|
|
|
|
def init_cmd():
|
|
"""Generate a starter source_graph.toml by scanning the project."""
|
|
config_path = SCRIPT_DIR / "source_graph.toml"
|
|
if config_path.exists():
|
|
print(f"[!] Config already exists: {config_path}")
|
|
print(" Delete it first to regenerate.")
|
|
return
|
|
|
|
lines = []
|
|
lines.append("[project]")
|
|
lines.append(f'name = "{REPO_ROOT.name}"')
|
|
lines.append('language = "cpp"')
|
|
lines.append("")
|
|
|
|
# Detect source dirs
|
|
for subdir in sorted(REPO_ROOT.iterdir()):
|
|
if not subdir.is_dir():
|
|
continue
|
|
if subdir.name.startswith(".") or subdir.name in ("tools", "build", "bin", "obj", "Debug", "Release", "x64", ".vs", "packages"):
|
|
continue
|
|
cpp_count = len(list(subdir.glob("*.cpp"))) + len(list(subdir.glob("*.h")))
|
|
if cpp_count > 0:
|
|
lines.append("[[source_dirs]]")
|
|
lines.append(f'label = "{subdir.name}"')
|
|
lines.append(f'path = "{subdir.name}"')
|
|
lines.append('extensions = ["*.cpp", "*.h"]')
|
|
lines.append("")
|
|
|
|
# Category rules
|
|
lines.append("# File categorization rules")
|
|
for rule in _DEFAULT_CATEGORY_RULES[:10]:
|
|
lines.append("[[category_rules]]")
|
|
lines.append(f'match = "{rule["match"]}"')
|
|
lines.append(f'pattern = "{rule["pattern"]}"')
|
|
lines.append(f'category = "{rule["category"]}"')
|
|
if rule.get("source_dir"):
|
|
lines.append(f'source_dir = "{rule["source_dir"]}"')
|
|
lines.append("")
|
|
|
|
content = "\n".join(lines) + "\n"
|
|
with open(config_path, "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
print(f"[+] Generated: {config_path}")
|
|
print(f" Edit this file to add singletons, events, constants, etc.")
|
|
|
|
|
|
def export_config_cmd():
|
|
"""Export all hardcoded seed data to source_graph.toml format."""
|
|
lines = []
|
|
lines.append("[project]")
|
|
lines.append(f'name = "{REPO_ROOT.name}"')
|
|
lines.append('language = "cpp"')
|
|
lines.append("")
|
|
|
|
# Source dirs
|
|
for label, path, exts in SOURCE_DIRS:
|
|
lines.append("[[source_dirs]]")
|
|
lines.append(f'label = "{label}"')
|
|
try:
|
|
rel = path.relative_to(REPO_ROOT)
|
|
lines.append(f'path = "{str(rel).replace(chr(92), "/")}"')
|
|
except ValueError:
|
|
lines.append(f'path = "{str(path).replace(chr(92), "/")}"')
|
|
lines.append(f'extensions = {json.dumps(exts)}')
|
|
lines.append("")
|
|
|
|
# External paths
|
|
if EXTERNAL_PATHS:
|
|
lines.append("[external_paths]")
|
|
for key, val in EXTERNAL_PATHS.items():
|
|
lines.append(f'{key} = "{str(val).replace(chr(92), "/")}"')
|
|
lines.append("")
|
|
else:
|
|
lines.append("[external_paths]")
|
|
lines.append(f'runtime_data = "{str(RUNTIME_DATA_ROOT).replace(chr(92), "/")}"')
|
|
lines.append(f'research_dir = "ReversingResearch"')
|
|
lines.append("")
|
|
|
|
# Category rules
|
|
for rule in CATEGORY_RULES:
|
|
lines.append("[[category_rules]]")
|
|
lines.append(f'match = "{rule["match"]}"')
|
|
lines.append(f'pattern = "{rule["pattern"]}"')
|
|
lines.append(f'category = "{rule["category"]}"')
|
|
if rule.get("source_dir"):
|
|
lines.append(f'source_dir = "{rule["source_dir"]}"')
|
|
lines.append("")
|
|
|
|
# Export seed data from the DB if it exists
|
|
if DB_PATH.exists():
|
|
conn = get_conn()
|
|
|
|
# Singletons
|
|
rows = conn.execute("SELECT macro, class_name, header, project, category, description FROM singletons ORDER BY category, macro").fetchall()
|
|
for r in rows:
|
|
lines.append("[[singletons]]")
|
|
lines.append(f'macro = "{r["macro"]}"')
|
|
lines.append(f'class_name = "{r["class_name"]}"')
|
|
lines.append(f'header = "{r["header"]}"')
|
|
lines.append(f'project = "{r["project"]}"')
|
|
if r["category"]:
|
|
lines.append(f'category = "{r["category"]}"')
|
|
if r["description"]:
|
|
lines.append(f'description = "{r["description"]}"')
|
|
lines.append("")
|
|
|
|
# Player files
|
|
rows = conn.execute("SELECT file, domain, key_methods, description FROM player_files ORDER BY domain").fetchall()
|
|
for r in rows:
|
|
lines.append("[[player_files]]")
|
|
lines.append(f'file = "{r["file"]}"')
|
|
lines.append(f'domain = "{r["domain"]}"')
|
|
if r["key_methods"]:
|
|
lines.append(f'key_methods = "{r["key_methods"]}"')
|
|
if r["description"]:
|
|
lines.append(f'description = "{r["description"]}"')
|
|
lines.append("")
|
|
|
|
# Events
|
|
rows = conn.execute("SELECT name, singleton_macro, cpp_file, header_file, def_file, ai_file, description FROM events ORDER BY name").fetchall()
|
|
for r in rows:
|
|
lines.append("[[events]]")
|
|
lines.append(f'name = "{r["name"]}"')
|
|
for field in ("singleton_macro", "cpp_file", "header_file", "def_file", "ai_file", "description"):
|
|
if r[field]:
|
|
lines.append(f'{field} = "{r[field]}"')
|
|
lines.append("")
|
|
|
|
# AI handlers
|
|
rows = conn.execute("SELECT file, target, description FROM ai_handlers ORDER BY file").fetchall()
|
|
for r in rows:
|
|
lines.append("[[ai_handlers]]")
|
|
lines.append(f'file = "{r["file"]}"')
|
|
lines.append(f'target = "{r["target"]}"')
|
|
if r["description"]:
|
|
lines.append(f'description = "{r["description"]}"')
|
|
lines.append("")
|
|
|
|
# Inventory scripts
|
|
rows = conn.execute("SELECT file, description FROM inventory_scripts ORDER BY file").fetchall()
|
|
for r in rows:
|
|
lines.append("[[inventory_scripts]]")
|
|
lines.append(f'file = "{r["file"]}"')
|
|
if r["description"]:
|
|
lines.append(f'description = "{r["description"]}"')
|
|
lines.append("")
|
|
|
|
# Constants
|
|
rows = conn.execute("SELECT name, value, header, category, description FROM constants ORDER BY category, name").fetchall()
|
|
for r in rows:
|
|
lines.append("[[constants]]")
|
|
lines.append(f'name = "{r["name"]}"')
|
|
if r["value"]:
|
|
lines.append(f'value = "{r["value"]}"')
|
|
lines.append(f'header = "{r["header"]}"')
|
|
if r["category"]:
|
|
lines.append(f'category = "{r["category"]}"')
|
|
if r["description"]:
|
|
lines.append(f'description = "{r["description"]}"')
|
|
lines.append("")
|
|
|
|
# Config files
|
|
rows = conn.execute("SELECT path, description FROM config_files ORDER BY path").fetchall()
|
|
for r in rows:
|
|
lines.append("[[config_files]]")
|
|
lines.append(f'path = "{r["path"]}"')
|
|
if r["description"]:
|
|
lines.append(f'description = "{r["description"]}"')
|
|
lines.append("")
|
|
|
|
# Packet handlers
|
|
rows = conn.execute("SELECT headcode_name, headcode_value, headcode_hex, handler_method, source_file, handler_type, category, description FROM packet_handlers ORDER BY handler_type, headcode_value").fetchall()
|
|
for r in rows:
|
|
lines.append("[[packet_handlers]]")
|
|
lines.append(f'headcode_name = "{r["headcode_name"]}"')
|
|
if r["headcode_value"] is not None:
|
|
lines.append(f'headcode_value = {r["headcode_value"]}')
|
|
for field in ("headcode_hex", "handler_method", "source_file", "handler_type", "category", "description"):
|
|
if r[field]:
|
|
lines.append(f'{field} = "{r[field]}"')
|
|
lines.append("")
|
|
|
|
conn.close()
|
|
|
|
content = "\n".join(lines) + "\n"
|
|
out_path = SCRIPT_DIR / "source_graph.toml"
|
|
with open(out_path, "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
print(f"[+] Exported config to: {out_path}")
|
|
print(f" {len(lines)} lines written")
|
|
|
|
|
|
# ============================================================
|
|
# MAIN
|
|
# ============================================================
|
|
|
|
def main():
|
|
load_config()
|
|
|
|
if len(sys.argv) < 2:
|
|
print(__doc__)
|
|
return
|
|
|
|
cmd = sys.argv[1].lower()
|
|
|
|
if cmd == "build":
|
|
if "--incremental" in sys.argv or "-i" in sys.argv:
|
|
build_incremental()
|
|
else:
|
|
build()
|
|
elif cmd == "find" and len(sys.argv) > 2:
|
|
find_cmd(" ".join(sys.argv[2:]))
|
|
elif cmd == "singleton" and len(sys.argv) > 2:
|
|
singleton_cmd(sys.argv[2])
|
|
elif cmd == "file" and len(sys.argv) > 2:
|
|
file_cmd(sys.argv[2])
|
|
elif cmd == "handler" and len(sys.argv) > 2:
|
|
handler_cmd(sys.argv[2])
|
|
elif cmd == "event" and len(sys.argv) > 2:
|
|
event_cmd(sys.argv[2])
|
|
elif cmd == "method" and len(sys.argv) > 2:
|
|
method_cmd(sys.argv[2])
|
|
elif cmd == "player" and len(sys.argv) > 2:
|
|
player_cmd(sys.argv[2])
|
|
elif cmd == "ai" and len(sys.argv) > 2:
|
|
ai_cmd(sys.argv[2])
|
|
elif cmd == "const" and len(sys.argv) > 2:
|
|
const_cmd(sys.argv[2])
|
|
elif cmd == "config" and len(sys.argv) > 2:
|
|
config_cmd(sys.argv[2])
|
|
elif cmd == "deps" and len(sys.argv) > 2:
|
|
deps_cmd(sys.argv[2])
|
|
elif cmd == "enum" and len(sys.argv) > 2:
|
|
enum_cmd(sys.argv[2])
|
|
elif cmd == "struct" and len(sys.argv) > 2:
|
|
struct_cmd(sys.argv[2])
|
|
elif cmd == "class" and len(sys.argv) > 2:
|
|
class_cmd(sys.argv[2])
|
|
elif cmd == "todo":
|
|
todo_cmd(sys.argv[2] if len(sys.argv) > 2 else None)
|
|
elif cmd == "dbtable" and len(sys.argv) > 2:
|
|
dbtable_cmd(sys.argv[2])
|
|
elif cmd == "query" and len(sys.argv) > 2:
|
|
query_cmd(sys.argv[2])
|
|
elif cmd == "cfgkey" and len(sys.argv) > 2:
|
|
cfgkey_cmd(sys.argv[2])
|
|
elif cmd == "define" and len(sys.argv) > 2:
|
|
define_cmd(sys.argv[2])
|
|
elif cmd == "complexity":
|
|
complexity_cmd()
|
|
elif cmd == "leaks":
|
|
leaks_cmd()
|
|
elif cmd == "nullrisks":
|
|
nullrisks_cmd(sys.argv[2] if len(sys.argv) > 2 else None)
|
|
elif cmd == "rawptrs":
|
|
rawptrs_cmd(sys.argv[2] if len(sys.argv) > 2 else None)
|
|
elif cmd == "casts":
|
|
casts_cmd(sys.argv[2] if len(sys.argv) > 2 else None)
|
|
elif cmd == "crashes":
|
|
crashes_cmd(sys.argv[2] if len(sys.argv) > 2 else None)
|
|
elif cmd == "looprisks":
|
|
looprisks_cmd(sys.argv[2] if len(sys.argv) > 2 else None)
|
|
elif cmd == "deadmethods":
|
|
deadmethods_cmd()
|
|
elif cmd == "duplicates":
|
|
duplicates_cmd()
|
|
elif cmd == "context" and len(sys.argv) > 2:
|
|
context_cmd(sys.argv[2])
|
|
elif cmd == "func" and len(sys.argv) > 2:
|
|
func_cmd(sys.argv[2])
|
|
elif cmd == "gaps":
|
|
gaps_cmd()
|
|
elif cmd == "summary":
|
|
summary_cmd()
|
|
elif cmd == "stats":
|
|
stats_cmd()
|
|
elif cmd == "tags" and len(sys.argv) > 2:
|
|
tags_cmd(" ".join(sys.argv[2:]))
|
|
elif cmd == "hotspots":
|
|
hotspots_cmd(" ".join(sys.argv[2:]) if len(sys.argv) > 2 else None)
|
|
elif cmd == "coverage":
|
|
coverage_cmd(" ".join(sys.argv[2:]) if len(sys.argv) > 2 else None)
|
|
elif cmd == "churn":
|
|
churn_cmd(" ".join(sys.argv[2:]) if len(sys.argv) > 2 else None)
|
|
elif cmd == "reviewqueue":
|
|
reviewqueue_cmd(" ".join(sys.argv[2:]) if len(sys.argv) > 2 else None)
|
|
elif cmd == "ownership":
|
|
ownership_cmd(" ".join(sys.argv[2:]) if len(sys.argv) > 2 else None)
|
|
elif cmd == "testmap":
|
|
testmap_cmd(" ".join(sys.argv[2:]) if len(sys.argv) > 2 else None)
|
|
elif cmd == "calls" and len(sys.argv) > 2:
|
|
calls_cmd(" ".join(sys.argv[2:]))
|
|
elif cmd == "focus" and len(sys.argv) > 2:
|
|
term, budget, core_only = _parse_compact_args(sys.argv[2:], 80)
|
|
focus_cmd(term, budget, core_only=core_only)
|
|
elif cmd == "slice" and len(sys.argv) > 2:
|
|
term, budget, core_only = _parse_compact_args(sys.argv[2:], 120)
|
|
slice_cmd(term, budget, core_only=core_only)
|
|
elif cmd == "trace" and len(sys.argv) > 2:
|
|
trace_cmd(" ".join(sys.argv[2:]))
|
|
elif cmd == "impact" and len(sys.argv) > 2:
|
|
impact_cmd(" ".join(sys.argv[2:]))
|
|
elif cmd == "research":
|
|
research_cmd(" ".join(sys.argv[2:]) if len(sys.argv) > 2 else None)
|
|
elif cmd == "symbols":
|
|
symbols_cmd(" ".join(sys.argv[2:]) if len(sys.argv) > 2 else None)
|
|
elif cmd == "bottlenecks":
|
|
bottlenecks_cmd(" ".join(sys.argv[2:]) if len(sys.argv) > 2 else None)
|
|
elif cmd == "auditmap":
|
|
auditmap_cmd(" ".join(sys.argv[2:]) if len(sys.argv) > 2 else None)
|
|
elif cmd == "aitasks":
|
|
aitasks_cmd(" ".join(sys.argv[2:]) if len(sys.argv) > 2 else None)
|
|
elif cmd == "validateai":
|
|
validateai_cmd(sys.argv[2] if len(sys.argv) > 2 else None)
|
|
elif cmd == "sql" and len(sys.argv) > 2:
|
|
sql_cmd(" ".join(sys.argv[2:]))
|
|
elif cmd == "body" and len(sys.argv) > 2:
|
|
body_cmd(sys.argv[2])
|
|
elif cmd == "summarize":
|
|
summarize_cmd(sys.argv[2:])
|
|
elif cmd == "decide" and len(sys.argv) > 2:
|
|
decide_cmd(sys.argv[2:])
|
|
elif cmd == "decisions":
|
|
decisions_cmd(sys.argv[2] if len(sys.argv) > 2 else None)
|
|
elif cmd == "bundle" and len(sys.argv) > 3:
|
|
max_l = 2000
|
|
if "--max-lines" in sys.argv:
|
|
idx = sys.argv.index("--max-lines")
|
|
if idx + 1 < len(sys.argv):
|
|
max_l = int(sys.argv[idx + 1])
|
|
bundle_cmd(sys.argv[2], sys.argv[3], max_l)
|
|
elif cmd == "claudemd":
|
|
claudemd_cmd()
|
|
elif cmd == "init":
|
|
init_cmd()
|
|
elif cmd == "export-config":
|
|
export_config_cmd()
|
|
else:
|
|
print(__doc__)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|