From daac28feeafac0d7ac2ff8d6d72dffd6bc535ef6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 7 Jul 2025 15:42:09 +0000 Subject: [PATCH 1/2] Initial plan From 6d05890f6e82b584051fcf18fd22b5531c3d675e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 7 Jul 2025 15:53:21 +0000 Subject: [PATCH 2/2] Fix 6 security vulnerabilities: replace pickle with JSON, MD5 with SHA-256, remove hardcoded credentials, secure subprocess calls Co-authored-by: groupthinking <154503486+groupthinking@users.noreply.github.com> --- agents/unified_transport_layer.py | 8 ++--- connectors/mcp_debug_tool.py | 4 ++- llm/continuous_learning_system.py | 24 +++++++-------- protocols/multimodal_llm_analyzer.py | 45 +++++++++++++++++++--------- 4 files changed, 50 insertions(+), 31 deletions(-) diff --git a/agents/unified_transport_layer.py b/agents/unified_transport_layer.py index cd24556..ac86e75 100644 --- a/agents/unified_transport_layer.py +++ b/agents/unified_transport_layer.py @@ -6,7 +6,7 @@ from abc import ABC, abstractmethod import time import mmap -import pickle +import json from dataclasses import dataclass # Import our existing components @@ -166,8 +166,8 @@ async def _shared_memory_transfer( self, pipe: MojoMessagePipe, payload: Dict ) -> Dict: """Shared memory transfer for large payloads""" - # Serialize to shared memory - serialized = pickle.dumps(payload) + # Serialize to shared memory using JSON (secure) + serialized = json.dumps(payload, default=str).encode('utf-8') if pipe.shared_memory: # Write to shared memory @@ -198,7 +198,7 @@ async def _pipe_transfer(self, pipe: MojoMessagePipe, payload: Dict) -> Dict: return { "status": "delivered", "method": "pipe", - "serialized_size": len(pickle.dumps(payload)), + "serialized_size": len(json.dumps(payload, default=str).encode('utf-8')), } async def _handle_passing_transfer( diff --git a/connectors/mcp_debug_tool.py b/connectors/mcp_debug_tool.py index 929284a..11a8b9a 100644 --- a/connectors/mcp_debug_tool.py +++ b/connectors/mcp_debug_tool.py @@ -6,6 +6,7 @@ import json import asyncio import traceback +import os from datetime import datetime, timezone from typing import Dict, Any, Optional, List from dataclasses import dataclass, asdict @@ -675,7 +676,8 @@ def _estimate_quantum_efficiency(self, code: str) -> str: async def example_usage(): """Example usage of the MCP Debug Tool""" async with MCPDebugTool( - gcp_endpoint="https://your-gcp-api", auth_token="your-oauth-token" + gcp_endpoint=os.getenv("GCP_API_ENDPOINT", "https://api.example.com"), + auth_token=os.getenv("GCP_AUTH_TOKEN", "development-token") ) as debug_tool: # Debug quantum code diff --git a/llm/continuous_learning_system.py b/llm/continuous_learning_system.py index 80f5ee0..c501c18 100644 --- a/llm/continuous_learning_system.py +++ b/llm/continuous_learning_system.py @@ -25,7 +25,7 @@ from datetime import datetime import numpy as np import hashlib -import pickle +import json from pathlib import Path # Import existing components @@ -250,7 +250,7 @@ async def rollback_model(self, version_id: str) -> Dict[str, Any]: """ try: # Find version in history - version_path = self.model_dir / f"{version_id}.pkl" + version_path = self.model_dir / f"{version_id}.json" if not version_path.exists(): return { @@ -259,8 +259,8 @@ async def rollback_model(self, version_id: str) -> Dict[str, Any]: } # Load the version - with open(version_path, "rb") as f: - model_data = pickle.load(f) + with open(version_path, "r") as f: + model_data = json.load(f) # Set as current model self.current_model_version = model_data["version_info"] @@ -536,8 +536,8 @@ async def _create_model_version( }, training_data_size=self.training_stats["total_samples_processed"], quantum_optimized=self.quantum_connector.connected, - file_path=str(self.model_dir / f"{version_id}.pkl"), - checksum=hashlib.md5(version_id.encode()).hexdigest(), + file_path=str(self.model_dir / f"{version_id}.json"), + checksum=hashlib.sha256(version_id.encode()).hexdigest(), ) # Save model version @@ -547,8 +547,8 @@ async def _create_model_version( "model_state": "simulated_model_state", } - with open(version.file_path, "wb") as f: - pickle.dump(model_data, f) + with open(version.file_path, "w") as f: + json.dump(model_data, f, indent=2, default=str) # Update current version self.current_model_version = version @@ -590,14 +590,14 @@ async def _load_or_create_model(self): """Load existing model or create new one""" try: # Look for existing model versions - model_files = list(self.model_dir.glob("*.pkl")) + model_files = list(self.model_dir.glob("*.json")) if model_files: # Load latest version latest_file = max(model_files, key=lambda f: f.stat().st_mtime) - with open(latest_file, "rb") as f: - model_data = pickle.load(f) + with open(latest_file, "r") as f: + model_data = json.load(f) self.current_model_version = model_data["version_info"] logger.info( @@ -611,7 +611,7 @@ async def _load_or_create_model(self): performance_metrics={"accuracy": 0.8, "loss": 0.2}, training_data_size=0, quantum_optimized=False, - file_path=str(self.model_dir / "v1_initial.pkl"), + file_path=str(self.model_dir / "v1_initial.json"), checksum="initial", ) diff --git a/protocols/multimodal_llm_analyzer.py b/protocols/multimodal_llm_analyzer.py index 97e9238..c290dd1 100644 --- a/protocols/multimodal_llm_analyzer.py +++ b/protocols/multimodal_llm_analyzer.py @@ -6,7 +6,6 @@ from datetime import datetime from typing import Dict, List, Any import numpy as np -import random def task(): @@ -75,19 +74,35 @@ def _analyze_massive_user_collection() -> Dict[str, Any]: folder_name = os.path.basename(base_path) analysis["folders_scanned"].append(folder_name) - # Get total file count for this directory + # Get total file count for this directory using secure subprocess try: import subprocess - - result = subprocess.run( - ["find", base_path, "-type", "f"], - capture_output=True, - text=True, - ) - all_files = ( - result.stdout.strip().split("\n") if result.stdout.strip() else [] - ) - folder_file_count = len(all_files) + import shutil + + # Use absolute path for find command for security + find_path = shutil.which("find") + if not find_path: + # Fallback to Python implementation if find is not available + all_files = [] + for root, dirs, files in os.walk(base_path): + for file in files: + all_files.append(os.path.join(root, file)) + folder_file_count = len(all_files) + else: + # Validate and sanitize the base_path to prevent command injection + if not os.path.exists(base_path) or not os.path.isdir(base_path): + raise ValueError(f"Invalid directory path: {base_path}") + + result = subprocess.run( + [find_path, os.path.abspath(base_path), "-type", "f"], + capture_output=True, + text=True, + timeout=30, # Add timeout for security + ) + all_files = ( + result.stdout.strip().split("\n") if result.stdout.strip() else [] + ) + folder_file_count = len(all_files) analysis["directory_stats"][folder_name] = { "total_files": folder_file_count, @@ -95,11 +110,13 @@ def _analyze_massive_user_collection() -> Dict[str, Any]: } analysis["total_files"] += folder_file_count - # Use statistical sampling for massive datasets + # Use systematic sampling for massive datasets (deterministic) if folder_file_count > 1000: # Sample 5% or max 2000 files, whichever is smaller sample_size = min(int(folder_file_count * 0.05), 2000) - sampled_files = random.sample(all_files, sample_size) + # Systematic sampling - take every nth file for reproducible results + step = max(1, len(all_files) // sample_size) + sampled_files = all_files[::step][:sample_size] analysis["directory_stats"][folder_name][ "sample_analyzed" ] = sample_size