Flow/examples/ml_python_only.fl at main · AnonymoDGH/Flow · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# Python: Machine Learning Benchmark - 1000 archivos
```python
import numpy as np
import time

print("="*60)
print("FLOW ML BENCHMARK - 1000 ARCHIVOS")
print("="*60)

# Fase 1: Generación de datos
print("\n[Fase 1] Generando 1000 archivos falsos...")
start_total = time.time()
start_gen = time.time()

np.random.seed(42)
file_data = []

for i in range(1000):
    file_size = np.random.randint(1000, 50000000)  # 1KB - 50MB
    file_age_days = np.random.randint(1, 1095)  # 0-3 años
    access_count = np.random.randint(0, 500)
    is_encrypted = 1 if np.random.random() > 0.85 else 0
    file_type = np.random.choice([0, 1, 2, 3, 4])  # txt, pdf, jpg, doc, exe

    file_data.append([
        file_size / 1000000,  # Normalizar a MB
        file_age_days / 365,  # Normalizar a años
        access_count / 100,   # Normalizar
        is_encrypted,
        file_type / 4         # Normalizar
    ])

X = np.array(file_data)
y = np.random.choice([0, 1], size=1000, p=[0.75, 0.25])  # 25% sospechosos

gen_time = time.time() - start_gen
print(f"[Fase 1] Completado en {gen_time:.3f}s")
print(f"         Datos generados: {X.shape}")
print(f"         Archivos sospechosos reales: {np.sum(y)}")

# Fase 2: Entrenamiento del modelo
print("\n[Fase 2] Entrenando modelo de clasificación...")
start_train = time.time()

def sigmoid(z):
    return 1 / (1 + np.exp(-np.clip(z, -500, 500)))

# Inicializar pesos
np.random.seed(123)
weights = np.random.randn(X.shape[1]) * 0.01
bias = 0
learning_rate = 0.1
epochs = 200

# Entrenamiento con gradiente descendente
for epoch in range(epochs):
    z = np.dot(X, weights) + bias
    predictions = sigmoid(z)

    error = predictions - y

    weights -= learning_rate * np.dot(X.T, error) / len(y)
    bias -= learning_rate * np.sum(error) / len(y)

    if epoch % 50 == 0:
        loss = -np.mean(y * np.log(predictions + 1e-10) + (1 - y) * np.log(1 - predictions + 1e-10))
        print(f"         Época {epoch}: Loss = {loss:.4f}")

train_time = time.time() - start_train
print(f"[Fase 2] Completado en {train_time:.3f}s")

# Fase 3: Evaluación
print("\n[Fase 3] Evaluando modelo...")
start_eval = time.time()

final_predictions = sigmoid(np.dot(X, weights) + bias)
predicted_classes = (final_predictions > 0.5).astype(int)

accuracy = np.mean(predicted_classes == y)
true_positives = np.sum((predicted_classes == 1) & (y == 1))
false_positives = np.sum((predicted_classes == 1) & (y == 0))
true_negatives = np.sum((predicted_classes == 0) & (y == 0))
false_negatives = np.sum((predicted_classes == 0) & (y == 1))

precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

eval_time = time.time() - start_eval
total_time = time.time() - start_total

print(f"[Fase 3] Completado en {eval_time:.3f}s")

# Resultados finales
print("\n" + "="*60)
print("RESULTADOS DEL BENCHMARK")
print("="*60)

print(f"\n[MÉTRICAS DEL MODELO]")
print(f"  Precisión (Accuracy):  {accuracy*100:.2f}%")
print(f"  Precisión (Precision): {precision*100:.2f}%")
print(f"  Recall:                {recall*100:.2f}%")
print(f"  F1-Score:              {f1_score:.3f}")

print(f"\n[MATRIZ DE CONFUSIÓN]")
print(f"  Verdaderos Positivos:  {true_positives}")
print(f"  Falsos Positivos:      {false_positives}")
print(f"  Verdaderos Negativos:  {true_negatives}")
print(f"  Falsos Negativos:      {false_negatives}")

print(f"\n[DETECCIÓN]")
print(f"  Archivos analizados:   1000")
print(f"  Sospechosos detectados: {np.sum(predicted_classes)}")
print(f"  Tasa de detección:     {np.sum(predicted_classes)/10:.1f}%")

print(f"\n[RENDIMIENTO]")
print(f"  Tiempo de generación:  {gen_time:.3f}s")
print(f"  Tiempo de entrenamiento: {train_time:.3f}s")
print(f"  Tiempo de evaluación:  {eval_time:.3f}s")
print(f"  Tiempo total:          {total_time:.3f}s")
print(f"  Velocidad:             {1000/total_time:.0f} archivos/segundo")
print(f"  Throughput:            {1000*epochs/total_time:.0f} operaciones/segundo")

print("\n" + "="*60)
print("BENCHMARK COMPLETADO EXITOSAMENTE")
print("="*60 + "\n")
```