djaloul commited on
Commit
84bef82
·
verified ·
1 Parent(s): 36a3cf7

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +16 -0
  2. false/.gitignore +2 -0
  3. false/.models/a2c_fraud_model.zip +3 -0
  4. false/.models/a2c_fraud_model2.zip +3 -0
  5. false/.models/dqn_fraud_model.zip +3 -0
  6. false/.models/dqn_fraud_model2.zip +3 -0
  7. false/Copy_of_💵Transaction_Fraud_Detection🕵️_♂️.ipynb +0 -0
  8. false/RL-1.pdf +3 -0
  9. false/RL2.0.ipynb +0 -0
  10. false/RL2.0_ATT.ipynb +0 -0
  11. false/RL2.0_ATT_Hybrid.ipynb +385 -0
  12. false/a2c_fraud_checkpoints/A2C_fraud_model_160000_steps.zip +3 -0
  13. false/a2c_fraud_checkpoints/A2C_fraud_model_240000_steps.zip +3 -0
  14. false/a2c_fraud_checkpoints/A2C_fraud_model_80000_steps.zip +3 -0
  15. false/a2c_fraud_tb/evaluation/a2c_eval_20250514-022104/events.out.tfevents.1747185664.archlinux.48104.8 +3 -0
  16. false/a2c_fraud_tb/evaluation/a2c_eval_20251201-002818/events.out.tfevents.1764545298.archlinux.67971.3 +3 -0
  17. false/a2c_fraud_tb/evaluation/a2c_eval_20251201-005711/events.out.tfevents.1764547031.archlinux.73972.3 +3 -0
  18. false/a2c_fraud_tb/evaluation/a2c_eval_20251201-112708/events.out.tfevents.1764584828.archlinux.5620.3 +3 -0
  19. false/a2c_fraud_tb/evaluation/a2c_eval_20251202-104925/events.out.tfevents.1764668965.archlinux.47939.3 +3 -0
  20. false/ablation_results.png +0 -0
  21. false/ablation_study/ablation-study-rl.ipynb +1 -0
  22. false/ablation_study/ablation.ipynb +514 -0
  23. false/attention_pooled_embeddings.pkl +3 -0
  24. false/custom_env.py +99 -0
  25. false/dqn_fraud_checkpoints/dqn_fraud_model_100000_steps.zip +3 -0
  26. false/dqn_fraud_checkpoints/dqn_fraud_model_10000_steps.zip +3 -0
  27. false/dqn_fraud_checkpoints/dqn_fraud_model_110000_steps.zip +3 -0
  28. false/dqn_fraud_checkpoints/dqn_fraud_model_160000_steps.zip +3 -0
  29. false/dqn_fraud_checkpoints/dqn_fraud_model_20000_steps.zip +3 -0
  30. false/dqn_fraud_checkpoints/dqn_fraud_model_240000_steps.zip +3 -0
  31. false/dqn_fraud_checkpoints/dqn_fraud_model_30000_steps.zip +3 -0
  32. false/dqn_fraud_checkpoints/dqn_fraud_model_40000_steps.zip +3 -0
  33. false/dqn_fraud_checkpoints/dqn_fraud_model_50000_steps.zip +3 -0
  34. false/dqn_fraud_checkpoints/dqn_fraud_model_60000_steps.zip +3 -0
  35. false/dqn_fraud_checkpoints/dqn_fraud_model_70000_steps.zip +3 -0
  36. false/dqn_fraud_checkpoints/dqn_fraud_model_80000_steps.zip +3 -0
  37. false/dqn_fraud_checkpoints/dqn_fraud_model_90000_steps.zip +3 -0
  38. false/dqn_fraud_tb/DQN_4/events.out.tfevents.1747182467.archlinux.48104.0 +3 -0
  39. false/dqn_fraud_tb/DQN_5/events.out.tfevents.1764545110.archlinux.67971.0 +3 -0
  40. false/dqn_fraud_tb/DQN_6/events.out.tfevents.1764546770.archlinux.73972.0 +3 -0
  41. false/dqn_fraud_tb/DQN_7/events.out.tfevents.1764584658.archlinux.5620.0 +3 -0
  42. false/dqn_fraud_tb/DQN_8/events.out.tfevents.1764668813.archlinux.47939.0 +3 -0
  43. false/dqn_fraud_tb/evaluation/eval_20250514-014902/events.out.tfevents.1747183742.archlinux.48104.3 +3 -0
  44. false/dqn_fraud_tb/evaluation/eval_20251201-002715/events.out.tfevents.1764545235.archlinux.67971.1 +3 -0
  45. false/dqn_fraud_tb/evaluation/eval_20251201-005435/events.out.tfevents.1764546875.archlinux.73972.1 +3 -0
  46. false/dqn_fraud_tb/evaluation/eval_20251201-112610/events.out.tfevents.1764584770.archlinux.5620.1 +3 -0
  47. false/dqn_fraud_tb/evaluation/eval_20251202-104820/events.out.tfevents.1764668900.archlinux.47939.1 +3 -0
  48. false/embeddings.pkl +3 -0
  49. false/fraud-detection-with-distilbert.ipynb +0 -0
  50. false/models/a2c_fraud_model.zip +3 -0
.gitattributes CHANGED
@@ -33,3 +33,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ false/RL-1.pdf filter=lfs diff=lfs merge=lfs -text
37
+ false/paper/LLM-Assisted[[:space:]]Fraud[[:space:]]Detection[[:space:]]with[[:space:]]Reinforcement[[:space:]]Learning/LLM-Assisted[[:space:]]Fraud[[:space:]]Detection[[:space:]]with[[:space:]]Reinforcement[[:space:]]Learning/Definitions/logo-mdpi.eps filter=lfs diff=lfs merge=lfs -text
38
+ false/paper/LLM-Assisted[[:space:]]Fraud[[:space:]]Detection[[:space:]]with[[:space:]]Reinforcement[[:space:]]Learning/LLM-Assisted[[:space:]]Fraud[[:space:]]Detection[[:space:]]with[[:space:]]Reinforcement[[:space:]]Learning/Fraud_detection.pdf filter=lfs diff=lfs merge=lfs -text
39
+ false/paper/LLM_Assisted_Fraud_Detection_with_Reinforcement_Learning-1.pdf filter=lfs diff=lfs merge=lfs -text
40
+ false/paper/RL.pdf filter=lfs diff=lfs merge=lfs -text
41
+ false/paper/Related[[:space:]]Work_[[:space:]]Dataset-Specific[[:space:]]Performance[[:space:]]Analysis.pdf filter=lfs diff=lfs merge=lfs -text
42
+ false/paper/images/ilovepdf_jpg-to-pdf/a2c_conf.pdf filter=lfs diff=lfs merge=lfs -text
43
+ false/paper/llm_v4_plots/images/cnfA.png filter=lfs diff=lfs merge=lfs -text
44
+ false/paper/llm_v4_plots/images/conf_dqn.png filter=lfs diff=lfs merge=lfs -text
45
+ false/paper/llm_v4_plots/images/eval_ent.png filter=lfs diff=lfs merge=lfs -text
46
+ false/paper/llm_v4_plots/images/image.png filter=lfs diff=lfs merge=lfs -text
47
+ false/paper/llm_v4_plots/images/lossdqn.png filter=lfs diff=lfs merge=lfs -text
48
+ false/paper/llm_v4_plots/images/mer.png filter=lfs diff=lfs merge=lfs -text
49
+ false/paper/llm_v4_plots/images/policy_loss.png filter=lfs diff=lfs merge=lfs -text
50
+ false/paper/llm_v4_plots/images/rwrd_dyn-eval.png filter=lfs diff=lfs merge=lfs -text
51
+ false/paper/llm_v4_plots-1.pdf filter=lfs diff=lfs merge=lfs -text
false/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ablation_study/creditcard.csv
2
+ ablation_study/paysim.csv
false/.models/a2c_fraud_model.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51aa5e6d4280c5cd785f7ee69eca18ad93f67b01b5ff091772cc8d2e6ec1198d
3
+ size 945552
false/.models/a2c_fraud_model2.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ab8ce945fb98fb960ece8efd2861e5fae7e0cf0c51f0ffd5f1bf00098410330
3
+ size 179946
false/.models/dqn_fraud_model.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3615ddbbc8cd6bef2488a2899161b600dbe528ab77eb65578ee6dde816584749
3
+ size 978197
false/.models/dqn_fraud_model2.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db510c265c5148c6a8bb3c19edeae6521164ec64aad48480bb311f655f7dc082
3
+ size 182934
false/Copy_of_💵Transaction_Fraud_Detection🕵️_♂️.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
false/RL-1.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad8cbe3f3f9fa029d950a3b62e0054f933e8b9f768c83be468054bd735201fd
3
+ size 2006409
false/RL2.0.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
false/RL2.0_ATT.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
false/RL2.0_ATT_Hybrid.ipynb ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# RL Agents for Fraud Detection (Hybrid: Embeddings + Structured Data)\n",
8
+ "\n",
9
+ "This notebook trains RL agents (PPO, A2C, DQN) on a hybrid dataset consisting of:\n",
10
+ "1. **Embeddings**: Attention-pooled embeddings from a DistilBERT model.\n",
11
+ "2. **Structured Data**: Original features from the CreditCard dataset (Time, V1-V28, Amount).\n",
12
+ "\n",
13
+ "The combined data goes through a preprocessing pipeline:\n",
14
+ "- **Scaling**: StandardScaler applied to the concatenated vector.\n",
15
+ "- **PCA**: Dimensionality reduction on the scaled combined vector.\n",
16
+ "- **CTGAN**: Data augmentation to balance the training set."
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "metadata": {},
23
+ "outputs": [],
24
+ "source": [
25
+ "# Install required packages\n",
26
+ "!pip install gymnasium numpy pandas torch stable-baselines3 scikit-learn matplotlib seaborn ctgan"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "import gymnasium as gym\n",
36
+ "from gymnasium import spaces\n",
37
+ "import numpy as np\n",
38
+ "import pandas as pd\n",
39
+ "import torch\n",
40
+ "import os\n",
41
+ "\n",
42
+ "from stable_baselines3 import PPO, A2C, DQN\n",
43
+ "from stable_baselines3.common.vec_env import DummyVecEnv\n",
44
+ "\n",
45
+ "from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score\n",
46
+ "from sklearn.model_selection import train_test_split\n",
47
+ "from sklearn.preprocessing import StandardScaler\n",
48
+ "from sklearn.decomposition import PCA\n",
49
+ "import matplotlib.pyplot as plt\n",
50
+ "import seaborn as sns\n",
51
+ "\n",
52
+ "# Set random seed\n",
53
+ "SEED = 42\n",
54
+ "np.random.seed(SEED)\n",
55
+ "torch.manual_seed(SEED)"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "markdown",
60
+ "metadata": {},
61
+ "source": [
62
+ "## 1. Data Loading and Alignment"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": null,
68
+ "id": "7f46057b",
69
+ "metadata": {},
70
+ "outputs": [],
71
+ "source": [
72
+ "# Load Embeddings\n",
73
+ "print(\"Loading embeddings...\")\n",
74
+ "pkl_data = pd.read_pickle(\"attention_pooled_embeddings.pkl\")\n",
75
+ "embeddings = pkl_data['embeddings']\n",
76
+ "labels = np.array(pkl_data['labels'])\n",
77
+ "uids = pkl_data['uids']\n",
78
+ "\n",
79
+ "print(f\"Embeddings shape: {embeddings.shape}\")\n",
80
+ "print(f\"Labels shape: {labels.shape}\")\n",
81
+ "print(f\"UIDs shape: {len(uids)}\")\n",
82
+ "\n",
83
+ "# Load Structured Data\n",
84
+ "print(\"\\nLoading structured data...\")\n",
85
+ "creditcard_df = pd.read_csv(\"ablation_study/creditcard.csv\")\n",
86
+ "\n",
87
+ "# Filter structured data to match the UIDs in the embeddings file\n",
88
+ "# Using uids to select the corresponding rows from the original dataset\n",
89
+ "structured_data = creditcard_df.loc[uids].copy()\n",
90
+ "\n",
91
+ "# Drop Class column to get features\n",
92
+ "structured_features = structured_data.drop('Class', axis=1).values\n",
93
+ "print(f\"Structured features shape: {structured_features.shape}\")\n",
94
+ "\n",
95
+ "# Concatenate Embeddings and Structured Data\n",
96
+ "# We use the labels from the pickle file as the ground truth\n",
97
+ "X = np.hstack([embeddings, structured_features])\n",
98
+ "y = labels\n",
99
+ "\n",
100
+ "print(f\"\\nCombined Data Shape: {X.shape}\")"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "markdown",
105
+ "metadata": {},
106
+ "source": [
107
+ "## 2. Data Preprocessing Pipeline\n",
108
+ "1. Split Train/Test\n",
109
+ "2. Scale (StandardScaler)\n",
110
+ "3. PCA\n",
111
+ "4. CTGAN Augmentation"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": null,
117
+ "metadata": {},
118
+ "outputs": [],
119
+ "source": [
120
+ "# 1. Split\n",
121
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
122
+ " X, y, test_size=0.2, random_state=SEED, stratify=y\n",
123
+ ")\n",
124
+ "\n",
125
+ "print(f\"Train shape: {X_train.shape}\")\n",
126
+ "print(f\"Test shape: {X_test.shape}\")\n",
127
+ "\n",
128
+ "# 2. Scale\n",
129
+ "scaler = StandardScaler()\n",
130
+ "X_train_scaled = scaler.fit_transform(X_train)\n",
131
+ "X_test_scaled = scaler.transform(X_test)\n",
132
+ "\n",
133
+ "# 3. PCA\n",
134
+ "# We want to retain 99% variance, similar to the original notebook\n",
135
+ "pca = PCA(n_components=0.99, whiten=True)\n",
136
+ "X_train_pca = pca.fit_transform(X_train_scaled)\n",
137
+ "X_test_pca = pca.transform(X_test_scaled)\n",
138
+ "\n",
139
+ "print(f\"\\nPCA reduced dimensions from {X_train.shape[1]} to {X_train_pca.shape[1]}\")\n",
140
+ "\n",
141
+ "# 4. CTGAN Augmentation\n",
142
+ "from ctgan import CTGAN\n",
143
+ "\n",
144
+ "# Prepare data for CTGAN (only fraud samples from training set)\n",
145
+ "train_df_pca = pd.DataFrame(X_train_pca, columns=[f'pc{i}' for i in range(X_train_pca.shape[1])])\n",
146
+ "train_df_pca['label'] = y_train\n",
147
+ "\n",
148
+ "fraud_df = train_df_pca[train_df_pca['label'] == 1].drop('label', axis=1)\n",
149
+ "print(f\"Fraud samples for CTGAN: {len(fraud_df)}\")\n",
150
+ "\n",
151
+ "# Train CTGAN\n",
152
+ "ctgan = CTGAN(epochs=200, batch_size=64, pac=1, verbose=True)\n",
153
+ "ctgan.fit(fraud_df)\n",
154
+ "\n",
155
+ "# Generate synthetic samples\n",
156
+ "n_synthetic = len(fraud_df) # Double the fraud samples\n",
157
+ "synthetic_fraud = ctgan.sample(n_synthetic)\n",
158
+ "X_synthetic = synthetic_fraud.values.astype(np.float32)\n",
159
+ "y_synthetic = np.ones(n_synthetic, dtype=np.int64)\n",
160
+ "\n",
161
+ "# Augment Training Set\n",
162
+ "X_train_aug = np.vstack([X_train_pca, X_synthetic])\n",
163
+ "y_train_aug = np.concatenate([y_train, y_synthetic])\n",
164
+ "\n",
165
+ "print(f\"Augmented Train Shape: {X_train_aug.shape}\")"
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "markdown",
170
+ "metadata": {},
171
+ "source": [
172
+ "## 3. RL Environment"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": null,
178
+ "metadata": {},
179
+ "outputs": [],
180
+ "source": [
181
+ "class FraudDetectionEnv(gym.Env):\n",
182
+ " \"\"\"\n",
183
+ " Custom Environment that follows gym interface.\n",
184
+ " \"\"\"\n",
185
+ " def __init__(self, features, labels, reward_config=None):\n",
186
+ " super(FraudDetectionEnv, self).__init__()\n",
187
+ " \n",
188
+ " self.features = features.astype(np.float32)\n",
189
+ " self.labels = labels.astype(np.int64)\n",
190
+ " self.n_samples = len(features)\n",
191
+ " self.input_dim = features.shape[1]\n",
192
+ " \n",
193
+ " # Define action and observation space\n",
194
+ " # Action: 0 (Not Fraud), 1 (Fraud)\n",
195
+ " self.action_space = spaces.Discrete(2)\n",
196
+ " \n",
197
+ " # Observation: Feature vector\n",
198
+ " self.observation_space = spaces.Box(\n",
199
+ " low=-np.inf, high=np.inf,\n",
200
+ " shape=(self.input_dim,),\n",
201
+ " dtype=np.float32\n",
202
+ " )\n",
203
+ " \n",
204
+ " if reward_config is None:\n",
205
+ " self.reward_config = {\n",
206
+ " 'TP': 10.0,\n",
207
+ " 'FP': -5.0,\n",
208
+ " 'FN': -20.0,\n",
209
+ " 'TN': 1.0\n",
210
+ " }\n",
211
+ " else:\n",
212
+ " self.reward_config = reward_config\n",
213
+ " \n",
214
+ " self.current_step = 0\n",
215
+ " self.indices = np.arange(self.n_samples)\n",
216
+ " \n",
217
+ " def reset(self, seed=None, options=None):\n",
218
+ " super().reset(seed=seed)\n",
219
+ " self.current_step = 0\n",
220
+ " np.random.shuffle(self.indices)\n",
221
+ " return self._get_obs(), {}\n",
222
+ " \n",
223
+ " def _get_obs(self):\n",
224
+ " idx = self.indices[self.current_step]\n",
225
+ " return self.features[idx]\n",
226
+ " \n",
227
+ " def step(self, action):\n",
228
+ " idx = self.indices[self.current_step]\n",
229
+ " true_label = self.labels[idx]\n",
230
+ " \n",
231
+ " reward = 0\n",
232
+ " if action == 1 and true_label == 1:\n",
233
+ " reward = self.reward_config['TP']\n",
234
+ " elif action == 1 and true_label == 0:\n",
235
+ " reward = self.reward_config['FP']\n",
236
+ " elif action == 0 and true_label == 1:\n",
237
+ " reward = self.reward_config['FN']\n",
238
+ " elif action == 0 and true_label == 0:\n",
239
+ " reward = self.reward_config['TN']\n",
240
+ " \n",
241
+ " self.current_step += 1\n",
242
+ " done = self.current_step >= self.n_samples\n",
243
+ " truncated = False\n",
244
+ " \n",
245
+ " info = {\n",
246
+ " 'true_label': true_label,\n",
247
+ " 'pred_label': action\n",
248
+ " }\n",
249
+ " \n",
250
+ " if not done:\n",
251
+ " next_obs = self._get_obs()\n",
252
+ " else:\n",
253
+ " next_obs = np.zeros(self.input_dim, dtype=np.float32)\n",
254
+ " \n",
255
+ " return next_obs, reward, done, truncated, info"
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "markdown",
260
+ "metadata": {},
261
+ "source": [
262
+ "## 4. Training and Evaluation"
263
+ ]
264
+ },
265
+ {
266
+ "cell_type": "code",
267
+ "execution_count": null,
268
+ "metadata": {},
269
+ "outputs": [],
270
+ "source": [
271
+ "# Define a learning rate schedule\n",
272
+ "def linear_schedule(initial_value):\n",
273
+ " def schedule(progress_remaining):\n",
274
+ " return progress_remaining * initial_value\n",
275
+ " return schedule\n",
276
+ "\n",
277
+ "def train_evaluate_agent(agent_name, X_train, y_train, X_test, y_test, total_timesteps=10000):\n",
278
+ " print(f\"\\nTraining {agent_name}...\")\n",
279
+ " \n",
280
+ " # Create env\n",
281
+ " env = DummyVecEnv([lambda: FraudDetectionEnv(X_train, y_train)])\n",
282
+ " \n",
283
+ " model = None\n",
284
+ " if agent_name == 'PPO':\n",
285
+ " # PPO was not in the reference notebook, using default parameters\n",
286
+ " model = PPO('MlpPolicy', env, verbose=0)\n",
287
+ " elif agent_name == 'A2C':\n",
288
+ " # Parameters from RL2.0_ATT.ipynb\n",
289
+ " model = A2C(\n",
290
+ " \"MlpPolicy\",\n",
291
+ " env,\n",
292
+ " learning_rate=1e-4,\n",
293
+ " gamma=0.99,\n",
294
+ " n_steps=5,\n",
295
+ " ent_coef=0.01,\n",
296
+ " vf_coef=0.5,\n",
297
+ " max_grad_norm=0.5,\n",
298
+ " verbose=0,\n",
299
+ " device=\"auto\"\n",
300
+ " )\n",
301
+ " elif agent_name == 'DQN':\n",
302
+ " # Parameters from RL2.0_ATT.ipynb\n",
303
+ " model = DQN(\n",
304
+ " \"MlpPolicy\",\n",
305
+ " env,\n",
306
+ " learning_rate=linear_schedule(1e-4),\n",
307
+ " buffer_size=100000,\n",
308
+ " learning_starts=1000,\n",
309
+ " batch_size=512,\n",
310
+ " gamma=0.99,\n",
311
+ " train_freq=1,\n",
312
+ " gradient_steps=1,\n",
313
+ " target_update_interval=500,\n",
314
+ " exploration_fraction=0.1,\n",
315
+ " exploration_initial_eps=1.0,\n",
316
+ " exploration_final_eps=0.05,\n",
317
+ " max_grad_norm=10,\n",
318
+ " verbose=0,\n",
319
+ " device=\"auto\"\n",
320
+ " )\n",
321
+ " \n",
322
+ " if model:\n",
323
+ " model.learn(total_timesteps=total_timesteps)\n",
324
+ " \n",
325
+ " # Evaluate\n",
326
+ " print(f\"Evaluating {agent_name}...\")\n",
327
+ " test_env = FraudDetectionEnv(X_test, y_test)\n",
328
+ " obs, _ = test_env.reset()\n",
329
+ " \n",
330
+ " y_true = []\n",
331
+ " y_pred = []\n",
332
+ " \n",
333
+ " done = False\n",
334
+ " while not done:\n",
335
+ " action, _ = model.predict(obs, deterministic=True)\n",
336
+ " obs, reward, done, truncated, info = test_env.step(action)\n",
337
+ " \n",
338
+ " if 'true_label' in info:\n",
339
+ " y_true.append(info['true_label'])\n",
340
+ " y_pred.append(action)\n",
341
+ " \n",
342
+ " # Metrics\n",
343
+ " print(f\"--- {agent_name} Results ---\")\n",
344
+ " print(f\"Accuracy: {accuracy_score(y_true, y_pred):.4f}\")\n",
345
+ " print(f\"Precision: {precision_score(y_true, y_pred):.4f}\")\n",
346
+ " print(f\"Recall: {recall_score(y_true, y_pred):.4f}\")\n",
347
+ " print(f\"F1 Score: {f1_score(y_true, y_pred):.4f}\")\n",
348
+ " \n",
349
+ " cm = confusion_matrix(y_true, y_pred)\n",
350
+ " sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')\n",
351
+ " plt.title(f'{agent_name} Confusion Matrix')\n",
352
+ " plt.ylabel('True Label')\n",
353
+ " plt.xlabel('Predicted Label')\n",
354
+ " plt.show()\n",
355
+ "\n",
356
+ "# Train Agents\n",
357
+ "# Note: PPO parameters are default as it was not present in the reference notebook.\n",
358
+ "train_evaluate_agent('PPO', X_train_aug, y_train_aug, X_test_pca, y_test)\n",
359
+ "train_evaluate_agent('A2C', X_train_aug, y_train_aug, X_test_pca, y_test)\n",
360
+ "train_evaluate_agent('DQN', X_train_aug, y_train_aug, X_test_pca, y_test)"
361
+ ]
362
+ }
363
+ ],
364
+ "metadata": {
365
+ "kernelspec": {
366
+ "display_name": "Python 3",
367
+ "language": "python",
368
+ "name": "python3"
369
+ },
370
+ "language_info": {
371
+ "codemirror_mode": {
372
+ "name": "ipython",
373
+ "version": 3
374
+ },
375
+ "file_extension": ".py",
376
+ "mimetype": "text/x-python",
377
+ "name": "python",
378
+ "nbconvert_exporter": "python",
379
+ "pygments_lexer": "ipython3",
380
+ "version": "3.10.12"
381
+ }
382
+ },
383
+ "nbformat": 4,
384
+ "nbformat_minor": 5
385
+ }
false/a2c_fraud_checkpoints/A2C_fraud_model_160000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:968858ebc2df1c6eb362683d84ab9156e90cc62a0e0853ead28ff28fc7cd127d
3
+ size 178290
false/a2c_fraud_checkpoints/A2C_fraud_model_240000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5c4befaeb86918c04e3492ae9076198cb68d8c32b7db39c78a344af91082c23
3
+ size 179283
false/a2c_fraud_checkpoints/A2C_fraud_model_80000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1a34eac4b517a168f82bf414a9615a0c5078de3fdd2571e851dfd78d47119b9
3
+ size 176965
false/a2c_fraud_tb/evaluation/a2c_eval_20250514-022104/events.out.tfevents.1747185664.archlinux.48104.8 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad67fdd03eb5488f6b5c6ba4789665dc1cfd654c7f566fad2807f1ef6016ee79
3
+ size 537183
false/a2c_fraud_tb/evaluation/a2c_eval_20251201-002818/events.out.tfevents.1764545298.archlinux.67971.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97798a3be0e1e43fac506671c69e924f9d9e1f2e9860e55ad4ef0a52d32e6e11
3
+ size 533948
false/a2c_fraud_tb/evaluation/a2c_eval_20251201-005711/events.out.tfevents.1764547031.archlinux.73972.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77e536eda7fe14f187583de227a558fb0eb7e86130e35f9d4252acccc30ef98a
3
+ size 569194
false/a2c_fraud_tb/evaluation/a2c_eval_20251201-112708/events.out.tfevents.1764584828.archlinux.5620.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17c1ab22456731651fe2390ecb07abd35ab2adad7b2d8e7b27040d6be2dd9869
3
+ size 538607
false/a2c_fraud_tb/evaluation/a2c_eval_20251202-104925/events.out.tfevents.1764668965.archlinux.47939.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a7d84b2987427b41c800629df236d190d430b224a9920deae61e56496baf74c
3
+ size 556888
false/ablation_results.png ADDED
false/ablation_study/ablation-study-rl.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.11.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[{"sourceId":1940,"sourceType":"datasetVersion","datasetId":1069},{"sourceId":23498,"sourceType":"datasetVersion","datasetId":310}],"dockerImageVersionId":31193,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Ablation Study: RL Agents for Fraud Detection\n\nThis notebook performs an ablation study to compare the performance of different Reinforcement Learning (RL) agents (PPO, A2C, DQN) on two fraud detection datasets (CreditCard and PaySim). \nWe also evaluate the impact of different preprocessing techniques: Raw Data, PCA, and CTGAN Data Augmentation.","metadata":{}},{"cell_type":"code","source":"!pip install ctgan shimmy","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-02T08:27:12.781130Z","iopub.execute_input":"2025-12-02T08:27:12.781489Z","iopub.status.idle":"2025-12-02T08:28:20.009166Z","shell.execute_reply.started":"2025-12-02T08:27:12.781458Z","shell.execute_reply":"2025-12-02T08:28:20.008483Z"}},"outputs":[{"name":"stdout","text":"Collecting ctgan\n Downloading ctgan-0.11.1-py3-none-any.whl.metadata (10 kB)\nRequirement already satisfied: shimmy in /usr/local/lib/python3.11/dist-packages (1.3.0)\nRequirement already satisfied: numpy>=1.23.3 in /usr/local/lib/python3.11/dist-packages (from ctgan) (1.26.4)\nRequirement already satisfied: pandas>=1.5.0 in /usr/local/lib/python3.11/dist-packages (from ctgan) (2.2.3)\nRequirement already satisfied: torch>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from ctgan) (2.6.0+cu124)\nRequirement already satisfied: tqdm<5,>=4.29 in /usr/local/lib/python3.11/dist-packages (from ctgan) (4.67.1)\nCollecting rdt>=1.14.0 (from ctgan)\n Downloading rdt-1.18.2-py3-none-any.whl.metadata (10 kB)\nRequirement already satisfied: gymnasium>=0.27.0 in /usr/local/lib/python3.11/dist-packages (from shimmy) (0.29.0)\nRequirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.27.0->shimmy) (3.1.2)\nRequirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.27.0->shimmy) (4.15.0)\nRequirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.27.0->shimmy) (0.0.4)\nRequirement already satisfied: mkl_fft in /usr/local/lib/python3.11/dist-packages (from numpy>=1.23.3->ctgan) (1.3.8)\nRequirement already satisfied: mkl_random in /usr/local/lib/python3.11/dist-packages (from numpy>=1.23.3->ctgan) (1.2.4)\nRequirement already satisfied: mkl_umath in /usr/local/lib/python3.11/dist-packages (from numpy>=1.23.3->ctgan) (0.1.1)\nRequirement already satisfied: mkl in /usr/local/lib/python3.11/dist-packages (from numpy>=1.23.3->ctgan) (2025.3.0)\nRequirement already satisfied: tbb4py in /usr/local/lib/python3.11/dist-packages (from numpy>=1.23.3->ctgan) (2022.3.0)\nRequirement already satisfied: mkl-service in /usr/local/lib/python3.11/dist-packages (from numpy>=1.23.3->ctgan) (2.4.1)\nRequirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.5.0->ctgan) (2.9.0.post0)\nRequirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.5.0->ctgan) (2025.2)\nRequirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.5.0->ctgan) (2025.2)\nRequirement already satisfied: scipy>=1.9.2 in /usr/local/lib/python3.11/dist-packages (from rdt>=1.14.0->ctgan) (1.15.3)\nRequirement already satisfied: scikit-learn>=1.1.3 in /usr/local/lib/python3.11/dist-packages (from rdt>=1.14.0->ctgan) (1.2.2)\nCollecting Faker!=37.11.0,>=17 (from rdt>=1.14.0->ctgan)\n Downloading faker-38.2.0-py3-none-any.whl.metadata (16 kB)\nRequirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->ctgan) (3.20.0)\nRequirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->ctgan) (3.5)\nRequirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->ctgan) (3.1.6)\nRequirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->ctgan) (2025.10.0)\nCollecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->ctgan)\n Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->ctgan)\n Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->ctgan)\n Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\nCollecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->ctgan)\n Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\nCollecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->ctgan)\n Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->ctgan)\n Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-curand-cu12==10.3.5.147 (from torch>=2.0.0->ctgan)\n Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nCollecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=2.0.0->ctgan)\n Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\nCollecting nvidia-cusparse-cu12==12.3.1.170 (from torch>=2.0.0->ctgan)\n Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\nRequirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->ctgan) (0.6.2)\nRequirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->ctgan) (2.21.5)\nRequirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->ctgan) (12.4.127)\nCollecting nvidia-nvjitlink-cu12==12.4.127 (from torch>=2.0.0->ctgan)\n Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\nRequirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->ctgan) (3.2.0)\nRequirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->ctgan) (1.13.1)\nRequirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.0.0->ctgan) (1.3.0)\nRequirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas>=1.5.0->ctgan) (1.17.0)\nRequirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from scikit-learn>=1.1.3->rdt>=1.14.0->ctgan) (1.5.2)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn>=1.1.3->rdt>=1.14.0->ctgan) (3.6.0)\nRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=2.0.0->ctgan) (3.0.3)\nRequirement already satisfied: onemkl-license==2025.3.0 in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.23.3->ctgan) (2025.3.0)\nRequirement already satisfied: intel-openmp<2026,>=2024 in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.23.3->ctgan) (2024.2.0)\nRequirement already satisfied: tbb==2022.* in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.23.3->ctgan) (2022.3.0)\nRequirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.11/dist-packages (from tbb==2022.*->mkl->numpy>=1.23.3->ctgan) (1.4.0)\nRequirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.11/dist-packages (from mkl_umath->numpy>=1.23.3->ctgan) (2024.2.0)\nRequirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.11/dist-packages (from intel-openmp<2026,>=2024->mkl->numpy>=1.23.3->ctgan) (2024.2.0)\nDownloading ctgan-0.11.1-py3-none-any.whl (25 kB)\nDownloading rdt-1.18.2-py3-none-any.whl (74 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.3/74.3 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m104.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m80.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m59.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m23.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m84.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hDownloading faker-38.2.0-py3-none-any.whl (2.0 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m81.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hInstalling collected packages: nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, Faker, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, rdt, ctgan\n Attempting uninstall: nvidia-nvjitlink-cu12\n Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n Attempting uninstall: nvidia-curand-cu12\n Found existing installation: nvidia-curand-cu12 10.3.6.82\n Uninstalling nvidia-curand-cu12-10.3.6.82:\n Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n Attempting uninstall: nvidia-cufft-cu12\n Found existing installation: nvidia-cufft-cu12 11.2.3.61\n Uninstalling nvidia-cufft-cu12-11.2.3.61:\n Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n Attempting uninstall: nvidia-cuda-runtime-cu12\n Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n Attempting uninstall: nvidia-cuda-nvrtc-cu12\n Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n Attempting uninstall: nvidia-cuda-cupti-cu12\n Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n Attempting uninstall: nvidia-cublas-cu12\n Found existing installation: nvidia-cublas-cu12 12.5.3.2\n Uninstalling nvidia-cublas-cu12-12.5.3.2:\n Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n Attempting uninstall: nvidia-cusparse-cu12\n Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n Attempting uninstall: nvidia-cudnn-cu12\n Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n Attempting uninstall: nvidia-cusolver-cu12\n Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\nlibcugraph-cu12 25.6.0 requires libraft-cu12==25.6.*, but you have libraft-cu12 25.2.0 which is incompatible.\npylibcugraph-cu12 25.6.0 requires pylibraft-cu12==25.6.*, but you have pylibraft-cu12 25.2.0 which is incompatible.\npylibcugraph-cu12 25.6.0 requires rmm-cu12==25.6.*, but you have rmm-cu12 25.2.0 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed Faker-38.2.0 ctgan-0.11.1 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127 rdt-1.18.2\n","output_type":"stream"}],"execution_count":4},{"cell_type":"code","source":"import gymnasium as gym\nfrom gymnasium import spaces\nimport numpy as np\nimport pandas as pd\nimport torch\nimport os\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler, OneHotEncoder\nfrom sklearn.decomposition import PCA\nfrom sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score\nfrom sklearn.compose import ColumnTransformer\n\nfrom stable_baselines3 import PPO, A2C, DQN\nfrom stable_baselines3.common.vec_env import DummyVecEnv\nfrom ctgan import CTGAN\n\n# Set random seed for reproducibility\nSEED = 42\nnp.random.seed(SEED)\ntorch.manual_seed(SEED)\n\n# Define a learning rate schedule\ndef linear_schedule(initial_value):\n def schedule(progress_remaining):\n return progress_remaining * initial_value\n return schedule\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-02T08:42:44.030136Z","iopub.execute_input":"2025-12-02T08:42:44.030514Z","iopub.status.idle":"2025-12-02T08:42:44.194310Z","shell.execute_reply.started":"2025-12-02T08:42:44.030482Z","shell.execute_reply":"2025-12-02T08:42:44.193597Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/rdt/transformers/utils.py:18: DeprecationWarning: module 'sre_parse' is deprecated\n import sre_parse # isort:skip\n","output_type":"stream"}],"execution_count":5},{"cell_type":"markdown","source":"## 1. Custom RL Environment","metadata":{}},{"cell_type":"code","source":"class FraudDetectionEnv(gym.Env):\n \"\"\"\n A custom Gym environment for Fraud Detection.\n State: Feature vector of a transaction.\n Action: 0 (Declare Not Fraud), 1 (Declare Fraud).\n Reward: Based on correctly/incorrectly classifying fraud vs non-fraud.\n \"\"\"\n def __init__(self, features: np.ndarray, labels: np.ndarray, reward_config: dict = None):\n super().__init__()\n \n if reward_config is None:\n # Default reward configuration favoring recall of fraud\n self.reward_config = {\n 'TP': 10.0,\n 'FP': -5.0,\n 'FN': -20.0,\n 'TN': 1.0\n}\n\n else:\n self.reward_config = reward_config\n\n self.features = features.astype(np.float32)\n self.labels = labels.astype(np.int64)\n\n self.num_instances = self.features.shape[0]\n self.feature_dim = self.features.shape[1]\n\n # Action Space: Discrete(2) -> 0 for Not Fraud, 1 for Fraud\n self.action_space = spaces.Discrete(2)\n\n # Observation Space: Box(low, high, shape, dtype)\n self.observation_space = spaces.Box(\n low=-np.inf, high=np.inf, \n shape=(self.feature_dim,), \n dtype=np.float32\n )\n\n self._current_index = 0\n self._order = np.arange(self.num_instances)\n np.random.shuffle(self._order)\n\n def step(self, action: int):\n if self._current_index >= self.num_instances:\n return self.observation_space.sample() * 0, 0, True, False, {}\n\n actual_index = self._order[self._current_index]\n true_label = self.labels[actual_index]\n\n # Calculate Reward\n reward = 0\n if action == 1 and true_label == 1:\n reward = self.reward_config['TP']\n elif action == 1 and true_label == 0:\n reward = self.reward_config['FP']\n elif action == 0 and true_label == 1:\n reward = self.reward_config['FN']\n elif action == 0 and true_label == 0:\n reward = self.reward_config['TN']\n\n self._current_index += 1\n done = self._current_index >= self.num_instances\n truncated = False\n\n next_observation = np.zeros(self.feature_dim, dtype=np.float32)\n if not done:\n next_observation = self.features[self._order[self._current_index]]\n\n info = {\n 'true_label': true_label,\n 'predicted_action': action,\n 'is_done': done\n }\n\n return next_observation, reward, done, truncated, info\n\n def reset(self, seed=None, options=None):\n super().reset(seed=seed)\n self._current_index = 0\n self._order = np.arange(self.num_instances)\n np.random.shuffle(self._order)\n \n initial_observation = self.features[self._order[self._current_index]]\n return initial_observation, {}","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-02T08:42:44.195742Z","iopub.execute_input":"2025-12-02T08:42:44.195998Z","iopub.status.idle":"2025-12-02T08:42:44.205640Z","shell.execute_reply.started":"2025-12-02T08:42:44.195983Z","shell.execute_reply":"2025-12-02T08:42:44.204948Z"}},"outputs":[],"execution_count":6},{"cell_type":"markdown","source":"## 2. Data Loading and Preprocessing","metadata":{}},{"cell_type":"code","source":"def load_creditcard_data(path=\"creditcard.csv\"):\n print(\"Loading CreditCard dataset...\")\n df = pd.read_csv(path)\n \n # 1. Balance Data (1:5 Fraud to Non-Fraud)\n fraud = df[df['Class'] == 1]\n non_fraud = df[df['Class'] == 0]\n \n # Undersample non-fraud\n n_fraud = len(fraud)\n n_non_fraud = n_fraud * 5\n \n if len(non_fraud) > n_non_fraud:\n non_fraud = non_fraud.sample(n=n_non_fraud, random_state=SEED)\n \n balanced_df = pd.concat([fraud, non_fraud]).sample(frac=1, random_state=SEED).reset_index(drop=True)\n print(f\"Balanced CreditCard Data: {len(fraud)} Fraud, {len(non_fraud)} Non-Fraud\")\n\n # 2. Split\n X = balanced_df.drop('Class', axis=1).values\n y = balanced_df['Class'].values\n \n X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED, stratify=y)\n \n # 3. Scale (Fit on Train, Transform Test)\n # Scale Time (0) and Amount (29)\n def scale_columns(X_tr, X_te, indices):\n for i in indices:\n scaler_i = StandardScaler()\n X_tr[:, i] = scaler_i.fit_transform(X_tr[:, i].reshape(-1, 1)).flatten()\n X_te[:, i] = scaler_i.transform(X_te[:, i].reshape(-1, 1)).flatten()\n return X_tr, X_te\n\n if X_train.shape[1] == 30:\n X_train, X_test = scale_columns(X_train, X_test, [0, 29])\n \n return X_train, X_test, y_train, y_test\n\ndef load_paysim_data(path=\"paysim.csv\"):\n print(\"Loading PaySim dataset...\")\n df = pd.read_csv(path)\n \n # Drop unnecessary columns\n df = df.drop(['nameOrig', 'nameDest', 'isFlaggedFraud'], axis=1)\n \n # Rename 'isFraud' to 'Class' for consistency\n df = df.rename(columns={'isFraud': 'Class'})\n \n # One-hot encode 'type'\n df = pd.get_dummies(df, columns=['type'], drop_first=True)\n \n # 1. Balance Data (1:5 Fraud to Non-Fraud)\n fraud = df[df['Class'] == 1]\n non_fraud = df[df['Class'] == 0]\n \n # Undersample non-fraud\n n_fraud = len(fraud)\n n_non_fraud = n_fraud * 5\n \n if len(non_fraud) > n_non_fraud:\n non_fraud = non_fraud.sample(n=n_non_fraud, random_state=SEED)\n \n balanced_df = pd.concat([fraud, non_fraud]).sample(frac=1, random_state=SEED).reset_index(drop=True)\n print(f\"Balanced PaySim Data: {len(fraud)} Fraud, {len(non_fraud)} Non-Fraud\")\n \n # 2. Split\n X = balanced_df.drop('Class', axis=1).values.astype(np.float32)\n y = balanced_df['Class'].values.astype(np.int64)\n \n X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED, stratify=y)\n \n # 3. Scale\n scaler = StandardScaler()\n X_train = scaler.fit_transform(X_train)\n X_test = scaler.transform(X_test)\n \n return X_train, X_test, y_train, y_test\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-02T08:42:44.206516Z","iopub.execute_input":"2025-12-02T08:42:44.206783Z","iopub.status.idle":"2025-12-02T08:42:44.223599Z","shell.execute_reply.started":"2025-12-02T08:42:44.206757Z","shell.execute_reply":"2025-12-02T08:42:44.222946Z"}},"outputs":[],"execution_count":7},{"cell_type":"markdown","source":"## 3. Preprocessing Techniques (PCA & CTGAN)","metadata":{}},{"cell_type":"code","source":"def apply_pca_ctgan(X_train, y_train, X_test, n_components=0.99, epochs=200):\n print(f\"Applying PCA (n_components={n_components}) + CTGAN (epochs={epochs})...\")\n \n # 1. Apply PCA\n pca = PCA(n_components=n_components)\n X_train_pca = pca.fit_transform(X_train)\n X_test_pca = pca.transform(X_test)\n print(f\"PCA reduced dimensions from {X_train.shape[1]} to {X_train_pca.shape[1]}\")\n \n # 2. Apply CTGAN on PCA-transformed data\n # Combine X and y for CTGAN\n df_train = pd.DataFrame(X_train_pca, columns=[f'f{i}' for i in range(X_train_pca.shape[1])])\n df_train['label'] = y_train\n \n # Filter fraud samples\n fraud_df = df_train[df_train['label'] == 1].drop('label', axis=1)\n \n if len(fraud_df) == 0:\n print(\"No fraud samples found for CTGAN!\")\n return X_train_pca, y_train, X_test_pca\n \n # Train CTGAN\n ctgan = CTGAN(epochs=epochs, batch_size=64, pac=1, verbose=True)\n ctgan.fit(fraud_df)\n \n # Generate synthetic samples to balance the dataset (or double the fraud count)\n n_synthetic = len(fraud_df) \n synthetic_fraud = ctgan.sample(n_synthetic)\n \n X_synthetic = synthetic_fraud.values\n y_synthetic = np.ones(n_synthetic)\n \n X_aug = np.vstack([X_train_pca, X_synthetic])\n y_aug = np.concatenate([y_train, y_synthetic])\n \n print(f\"Augmented training set from {len(X_train_pca)} to {len(X_aug)} samples.\")\n return X_aug, y_aug, X_test_pca\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-02T08:42:44.249293Z","iopub.execute_input":"2025-12-02T08:42:44.249852Z","iopub.status.idle":"2025-12-02T08:42:44.255758Z","shell.execute_reply.started":"2025-12-02T08:42:44.249833Z","shell.execute_reply":"2025-12-02T08:42:44.255071Z"}},"outputs":[],"execution_count":8},{"cell_type":"markdown","source":"## 4. Experiment Loop","metadata":{}},{"cell_type":"code","source":"def train_and_evaluate(agent_name, X_train, y_train, X_test, y_test, total_timesteps=10000):\n # Create Environment\n env = DummyVecEnv([lambda: FraudDetectionEnv(X_train, y_train)])\n \n # Initialize Agent\n if agent_name == 'PPO':\n # Default PPO parameters as none were specified in reference\n model = PPO('MlpPolicy', env, verbose=0)\n elif agent_name == 'A2C':\n model = A2C(\n \"MlpPolicy\",\n env,\n learning_rate=1e-4,\n gamma=0.99,\n n_steps=5,\n ent_coef=0.01,\n vf_coef=0.5,\n max_grad_norm=0.5,\n verbose=0,\n device=\"auto\"\n )\n elif agent_name == 'DQN':\n model = DQN(\n \"MlpPolicy\",\n env,\n learning_rate=linear_schedule(1e-4),\n buffer_size=100000,\n learning_starts=1000,\n batch_size=512,\n gamma=0.99,\n train_freq=1,\n gradient_steps=1,\n target_update_interval=500,\n exploration_fraction=0.1,\n exploration_initial_eps=1.0,\n exploration_final_eps=0.05,\n max_grad_norm=10,\n verbose=0,\n device=\"auto\"\n )\n else:\n raise ValueError(f\"Unknown agent: {agent_name}\")\n \n # Train\n model.learn(total_timesteps=total_timesteps)\n \n # Evaluate\n # We'll use the environment logic to step through test set\n test_env = FraudDetectionEnv(X_test, y_test)\n obs, _ = test_env.reset()\n \n y_pred = []\n y_true = []\n \n done = False\n while not done:\n action, _ = model.predict(obs, deterministic=True)\n obs, reward, done, truncated, info = test_env.step(action)\n \n if 'true_label' in info:\n y_true.append(info['true_label'])\n y_pred.append(action)\n \n # Metrics\n precision = precision_score(y_true, y_pred, zero_division=0)\n recall = recall_score(y_true, y_pred, zero_division=0)\n f1 = f1_score(y_true, y_pred, zero_division=0)\n \n return precision, recall, f1\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-02T08:42:44.278425Z","iopub.execute_input":"2025-12-02T08:42:44.278595Z","iopub.status.idle":"2025-12-02T08:42:44.285248Z","shell.execute_reply.started":"2025-12-02T08:42:44.278581Z","shell.execute_reply":"2025-12-02T08:42:44.284588Z"}},"outputs":[],"execution_count":9},{"cell_type":"code","source":"# Configuration\ndatasets = ['CreditCard', 'PaySim']\npreprocessing_methods = ['Raw', 'PCA_CTGAN']\nagents = ['PPO', 'A2C', 'DQN']\n\nresults = []\n\nfor dataset_name in datasets:\n print(f\"\\n=== Processing Dataset: {dataset_name} ===\")\n \n # Load Data\n if dataset_name == 'CreditCard':\n X_train, X_test, y_train, y_test = load_creditcard_data(\"/kaggle/input/creditcardfraud/creditcard.csv\")\n else:\n X_train, X_test, y_train, y_test = load_paysim_data(\"/kaggle/input/paysim1/PS_20174392719_1491204439457_log.csv\")\n \n # Subsample PaySim for speed if it's too large (optional, but recommended for quick ablation)\n # PaySim is huge, so let's take a subset for this study\n if len(X_train) > 500000:\n print(\"Subsampling PaySim to 500k samples for speed...\")\n indices = np.random.choice(len(X_train), 500000, replace=False)\n X_train = X_train[indices]\n y_train = y_train[indices]\n # Also subsample test\n test_indices = np.random.choice(len(X_test), 100000, replace=False)\n X_test = X_test[test_indices]\n y_test = y_test[test_indices]\n\n # Store original copies\n X_train_orig, X_test_orig = X_train.copy(), X_test.copy()\n y_train_orig = y_train.copy()\n\n for prep in preprocessing_methods:\n print(f\"\\n--- Preprocessing: {prep} ---\")\n \n # Reset data\n X_curr_train, X_curr_test = X_train_orig.copy(), X_test_orig.copy()\n y_curr_train = y_train_orig.copy()\n \n # Apply Preprocessing\n if prep == 'PCA_CTGAN':\n X_curr_train, y_curr_train, X_curr_test = apply_pca_ctgan(X_curr_train, y_curr_train, X_curr_test, epochs=200)\n \n for agent in agents:\n print(f\"Training {agent}...\")\n try:\n prec, rec, f1 = train_and_evaluate(agent, X_curr_train, y_curr_train, X_curr_test, y_test, total_timesteps=5000)\n print(f\"Result: F1={f1:.4f} (Prec={prec:.4f}, Rec={rec:.4f})\")\n \n results.append({\n 'Dataset': dataset_name,\n 'Preprocessing': prep,\n 'Agent': agent,\n 'Precision': prec,\n 'Recall': rec,\n 'F1': f1\n })\n except Exception as e:\n print(f\"Failed to train {agent}: {e}\")\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-02T08:42:44.311506Z","iopub.execute_input":"2025-12-02T08:42:44.311705Z","iopub.status.idle":"2025-12-02T08:52:32.225896Z","shell.execute_reply.started":"2025-12-02T08:42:44.311690Z","shell.execute_reply":"2025-12-02T08:52:32.225198Z"}},"outputs":[{"name":"stdout","text":"\n=== Processing Dataset: CreditCard ===\nLoading CreditCard dataset...\nBalanced CreditCard Data: 492 Fraud, 2460 Non-Fraud\n\n--- Preprocessing: Raw ---\nTraining PPO...\nResult: F1=0.7667 (Prec=0.6479, Rec=0.9388)\nTraining A2C...\nResult: F1=0.6460 (Prec=0.4870, Rec=0.9592)\nTraining DQN...\nResult: F1=0.9355 (Prec=0.9886, Rec=0.8878)\n\n--- Preprocessing: PCA_CTGAN ---\nApplying PCA (n_components=0.99) + CTGAN (epochs=200)...\nPCA reduced dimensions from 30 to 24\n","output_type":"stream"},{"name":"stderr","text":"Gen. (-1.22) | Discrim. (-0.19): 100%|██████████| 200/200 [00:38<00:00, 5.26it/s]\n","output_type":"stream"},{"name":"stdout","text":"Augmented training set from 2361 to 2755 samples.\nTraining PPO...\nResult: F1=0.8708 (Prec=0.8198, Rec=0.9286)\nTraining A2C...\nResult: F1=0.9175 (Prec=0.9271, Rec=0.9082)\nTraining DQN...\nResult: F1=0.8911 (Prec=0.8654, Rec=0.9184)\n\n=== Processing Dataset: PaySim ===\nLoading PaySim dataset...\nBalanced PaySim Data: 8213 Fraud, 41065 Non-Fraud\n\n--- Preprocessing: Raw ---\nTraining PPO...\nResult: F1=0.6854 (Prec=0.5757, Rec=0.8466)\nTraining A2C...\nResult: F1=0.6718 (Prec=0.5834, Rec=0.7918)\nTraining DQN...\nResult: F1=0.7020 (Prec=0.6672, Rec=0.7407)\n\n--- Preprocessing: PCA_CTGAN ---\nApplying PCA (n_components=0.99) + CTGAN (epochs=200)...\nPCA reduced dimensions from 10 to 8\n","output_type":"stream"},{"name":"stderr","text":"Gen. (-0.05) | Discrim. (-0.19): 100%|██████████| 200/200 [05:42<00:00, 1.71s/it]\n","output_type":"stream"},{"name":"stdout","text":"Augmented training set from 39422 to 45992 samples.\nTraining PPO...\nResult: F1=0.5086 (Prec=0.3453, Rec=0.9647)\nTraining A2C...\nResult: F1=0.5243 (Prec=0.3649, Rec=0.9306)\nTraining DQN...\nResult: F1=0.7055 (Prec=0.6381, Rec=0.7888)\n","output_type":"stream"}],"execution_count":10},{"cell_type":"markdown","source":"## 5. Results Visualization","metadata":{}},{"cell_type":"code","source":"# Create DataFrame from results sorted by F1 score\nresults_df = pd.DataFrame(results)\nsorted_results_df = results_df.sort_values(by='F1', ascending=False)\n\nprint(\"\\n=== Final Results ===\")\n(results_df)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-02T08:57:53.271031Z","iopub.execute_input":"2025-12-02T08:57:53.271521Z","iopub.status.idle":"2025-12-02T08:57:53.289311Z","shell.execute_reply.started":"2025-12-02T08:57:53.271497Z","shell.execute_reply":"2025-12-02T08:57:53.288745Z"}},"outputs":[{"name":"stdout","text":"\n=== Final Results ===\n","output_type":"stream"},{"execution_count":13,"output_type":"execute_result","data":{"text/plain":" Dataset Preprocessing Agent Precision Recall F1\n0 CreditCard Raw PPO 0.647887 0.938776 0.766667\n1 CreditCard Raw A2C 0.487047 0.959184 0.646048\n2 CreditCard Raw DQN 0.988636 0.887755 0.935484\n3 CreditCard PCA_CTGAN PPO 0.819820 0.928571 0.870813\n4 CreditCard PCA_CTGAN A2C 0.927083 0.908163 0.917526\n5 CreditCard PCA_CTGAN DQN 0.865385 0.918367 0.891089\n6 PaySim Raw PPO 0.575745 0.846622 0.685390\n7 PaySim Raw A2C 0.583408 0.791844 0.671831\n8 PaySim Raw DQN 0.667215 0.740718 0.702048\n9 PaySim PCA_CTGAN PPO 0.345316 0.964699 0.508583\n10 PaySim PCA_CTGAN A2C 0.364916 0.930615 0.524259\n11 PaySim PCA_CTGAN DQN 0.638109 0.788801 0.705498","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Dataset</th>\n <th>Preprocessing</th>\n <th>Agent</th>\n <th>Precision</th>\n <th>Recall</th>\n <th>F1</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>CreditCard</td>\n <td>Raw</td>\n <td>PPO</td>\n <td>0.647887</td>\n <td>0.938776</td>\n <td>0.766667</td>\n </tr>\n <tr>\n <th>1</th>\n <td>CreditCard</td>\n <td>Raw</td>\n <td>A2C</td>\n <td>0.487047</td>\n <td>0.959184</td>\n <td>0.646048</td>\n </tr>\n <tr>\n <th>2</th>\n <td>CreditCard</td>\n <td>Raw</td>\n <td>DQN</td>\n <td>0.988636</td>\n <td>0.887755</td>\n <td>0.935484</td>\n </tr>\n <tr>\n <th>3</th>\n <td>CreditCard</td>\n <td>PCA_CTGAN</td>\n <td>PPO</td>\n <td>0.819820</td>\n <td>0.928571</td>\n <td>0.870813</td>\n </tr>\n <tr>\n <th>4</th>\n <td>CreditCard</td>\n <td>PCA_CTGAN</td>\n <td>A2C</td>\n <td>0.927083</td>\n <td>0.908163</td>\n <td>0.917526</td>\n </tr>\n <tr>\n <th>5</th>\n <td>CreditCard</td>\n <td>PCA_CTGAN</td>\n <td>DQN</td>\n <td>0.865385</td>\n <td>0.918367</td>\n <td>0.891089</td>\n </tr>\n <tr>\n <th>6</th>\n <td>PaySim</td>\n <td>Raw</td>\n <td>PPO</td>\n <td>0.575745</td>\n <td>0.846622</td>\n <td>0.685390</td>\n </tr>\n <tr>\n <th>7</th>\n <td>PaySim</td>\n <td>Raw</td>\n <td>A2C</td>\n <td>0.583408</td>\n <td>0.791844</td>\n <td>0.671831</td>\n </tr>\n <tr>\n <th>8</th>\n <td>PaySim</td>\n <td>Raw</td>\n <td>DQN</td>\n <td>0.667215</td>\n <td>0.740718</td>\n <td>0.702048</td>\n </tr>\n <tr>\n <th>9</th>\n <td>PaySim</td>\n <td>PCA_CTGAN</td>\n <td>PPO</td>\n <td>0.345316</td>\n <td>0.964699</td>\n <td>0.508583</td>\n </tr>\n <tr>\n <th>10</th>\n <td>PaySim</td>\n <td>PCA_CTGAN</td>\n <td>A2C</td>\n <td>0.364916</td>\n <td>0.930615</td>\n <td>0.524259</td>\n </tr>\n <tr>\n <th>11</th>\n <td>PaySim</td>\n <td>PCA_CTGAN</td>\n <td>DQN</td>\n <td>0.638109</td>\n <td>0.788801</td>\n <td>0.705498</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}],"execution_count":13},{"cell_type":"code","source":"# Plotting\nplt.figure(figsize=(14, 6))\n\nplt.subplot(1, 2, 1)\nsns.barplot(data=results_df[results_df['Dataset'] == 'CreditCard'], x='Agent', y='F1', hue='Preprocessing')\nplt.title('CreditCard Dataset - F1 Score')\nplt.ylim(0, 1)\n\nplt.subplot(1, 2, 2)\nsns.barplot(data=results_df[results_df['Dataset'] == 'PaySim'], x='Agent', y='F1', hue='Preprocessing')\nplt.title('PaySim Dataset - F1 Score')\nplt.ylim(0, 1)\n\nplt.tight_layout()\nplt.show()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-02T08:58:30.402840Z","iopub.execute_input":"2025-12-02T08:58:30.403082Z","iopub.status.idle":"2025-12-02T08:58:30.735267Z","shell.execute_reply.started":"2025-12-02T08:58:30.403066Z","shell.execute_reply":"2025-12-02T08:58:30.734548Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1498: DeprecationWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, pd.CategoricalDtype) instead\n if pd.api.types.is_categorical_dtype(vector):\n/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1498: DeprecationWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, pd.CategoricalDtype) instead\n if pd.api.types.is_categorical_dtype(vector):\n/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1498: DeprecationWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, pd.CategoricalDtype) instead\n if pd.api.types.is_categorical_dtype(vector):\n/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1498: DeprecationWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, pd.CategoricalDtype) instead\n if pd.api.types.is_categorical_dtype(vector):\n/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1498: DeprecationWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, pd.CategoricalDtype) instead\n if pd.api.types.is_categorical_dtype(vector):\n/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1498: DeprecationWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, pd.CategoricalDtype) instead\n if pd.api.types.is_categorical_dtype(vector):\n/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1498: DeprecationWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, pd.CategoricalDtype) instead\n if pd.api.types.is_categorical_dtype(vector):\n/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1498: DeprecationWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, pd.CategoricalDtype) instead\n if pd.api.types.is_categorical_dtype(vector):\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<Figure size 1400x600 with 2 Axes>","image/png":"\n"},"metadata":{}}],"execution_count":14}]}
false/ablation_study/ablation.ipynb ADDED
@@ -0,0 +1,514 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Ablation Study: RL Agents for Fraud Detection\n",
8
+ "\n",
9
+ "This notebook performs an ablation study to compare the performance of different Reinforcement Learning (RL) agents (PPO, A2C, DQN) on two fraud detection datasets (CreditCard and PaySim). \n",
10
+ "We also evaluate the impact of different preprocessing techniques: Raw Data, PCA, and CTGAN Data Augmentation."
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 1,
16
+ "metadata": {},
17
+ "outputs": [
18
+ {
19
+ "data": {
20
+ "text/plain": [
21
+ "<torch._C.Generator at 0x7fcfdd72ae10>"
22
+ ]
23
+ },
24
+ "execution_count": 1,
25
+ "metadata": {},
26
+ "output_type": "execute_result"
27
+ }
28
+ ],
29
+ "source": [
30
+ "import gymnasium as gym\n",
31
+ "from gymnasium import spaces\n",
32
+ "import numpy as np\n",
33
+ "import pandas as pd\n",
34
+ "import torch\n",
35
+ "import os\n",
36
+ "import matplotlib.pyplot as plt\n",
37
+ "import seaborn as sns\n",
38
+ "from sklearn.model_selection import train_test_split\n",
39
+ "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
40
+ "from sklearn.decomposition import PCA\n",
41
+ "from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score\n",
42
+ "from sklearn.compose import ColumnTransformer\n",
43
+ "\n",
44
+ "from stable_baselines3 import PPO, A2C, DQN\n",
45
+ "from stable_baselines3.common.vec_env import DummyVecEnv\n",
46
+ "from ctgan import CTGAN\n",
47
+ "\n",
48
+ "# Set random seed for reproducibility\n",
49
+ "SEED = 42\n",
50
+ "np.random.seed(SEED)\n",
51
+ "torch.manual_seed(SEED)\n",
52
+ "\n",
53
+ "# Define a learning rate schedule\n",
54
+ "def linear_schedule(initial_value):\n",
55
+ " def schedule(progress_remaining):\n",
56
+ " return progress_remaining * initial_value\n",
57
+ " return schedule\n"
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "markdown",
62
+ "metadata": {},
63
+ "source": [
64
+ "## 1. Custom RL Environment"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": 2,
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "class FraudDetectionEnv(gym.Env):\n",
74
+ " \"\"\"\n",
75
+ " A custom Gym environment for Fraud Detection.\n",
76
+ " State: Feature vector of a transaction.\n",
77
+ " Action: 0 (Declare Not Fraud), 1 (Declare Fraud).\n",
78
+ " Reward: Based on correctly/incorrectly classifying fraud vs non-fraud.\n",
79
+ " \"\"\"\n",
80
+ " def __init__(self, features: np.ndarray, labels: np.ndarray, reward_config: dict = None):\n",
81
+ " super().__init__()\n",
82
+ " \n",
83
+ " if reward_config is None:\n",
84
+ " # Default reward configuration favoring recall of fraud\n",
85
+ " self.reward_config = {\n",
86
+ " 'TP': 10.0,\n",
87
+ " 'FP': -5.0,\n",
88
+ " 'FN': -20.0,\n",
89
+ " 'TN': 1.0\n",
90
+ "}\n",
91
+ "\n",
92
+ " else:\n",
93
+ " self.reward_config = reward_config\n",
94
+ "\n",
95
+ " self.features = features.astype(np.float32)\n",
96
+ " self.labels = labels.astype(np.int64)\n",
97
+ "\n",
98
+ " self.num_instances = self.features.shape[0]\n",
99
+ " self.feature_dim = self.features.shape[1]\n",
100
+ "\n",
101
+ " # Action Space: Discrete(2) -> 0 for Not Fraud, 1 for Fraud\n",
102
+ " self.action_space = spaces.Discrete(2)\n",
103
+ "\n",
104
+ " # Observation Space: Box(low, high, shape, dtype)\n",
105
+ " self.observation_space = spaces.Box(\n",
106
+ " low=-np.inf, high=np.inf, \n",
107
+ " shape=(self.feature_dim,), \n",
108
+ " dtype=np.float32\n",
109
+ " )\n",
110
+ "\n",
111
+ " self._current_index = 0\n",
112
+ " self._order = np.arange(self.num_instances)\n",
113
+ " np.random.shuffle(self._order)\n",
114
+ "\n",
115
+ " def step(self, action: int):\n",
116
+ " if self._current_index >= self.num_instances:\n",
117
+ " return self.observation_space.sample() * 0, 0, True, False, {}\n",
118
+ "\n",
119
+ " actual_index = self._order[self._current_index]\n",
120
+ " true_label = self.labels[actual_index]\n",
121
+ "\n",
122
+ " # Calculate Reward\n",
123
+ " reward = 0\n",
124
+ " if action == 1 and true_label == 1:\n",
125
+ " reward = self.reward_config['TP']\n",
126
+ " elif action == 1 and true_label == 0:\n",
127
+ " reward = self.reward_config['FP']\n",
128
+ " elif action == 0 and true_label == 1:\n",
129
+ " reward = self.reward_config['FN']\n",
130
+ " elif action == 0 and true_label == 0:\n",
131
+ " reward = self.reward_config['TN']\n",
132
+ "\n",
133
+ " self._current_index += 1\n",
134
+ " done = self._current_index >= self.num_instances\n",
135
+ " truncated = False\n",
136
+ "\n",
137
+ " next_observation = np.zeros(self.feature_dim, dtype=np.float32)\n",
138
+ " if not done:\n",
139
+ " next_observation = self.features[self._order[self._current_index]]\n",
140
+ "\n",
141
+ " info = {\n",
142
+ " 'true_label': true_label,\n",
143
+ " 'predicted_action': action,\n",
144
+ " 'is_done': done\n",
145
+ " }\n",
146
+ "\n",
147
+ " return next_observation, reward, done, truncated, info\n",
148
+ "\n",
149
+ " def reset(self, seed=None, options=None):\n",
150
+ " super().reset(seed=seed)\n",
151
+ " self._current_index = 0\n",
152
+ " self._order = np.arange(self.num_instances)\n",
153
+ " np.random.shuffle(self._order)\n",
154
+ " \n",
155
+ " initial_observation = self.features[self._order[self._current_index]]\n",
156
+ " return initial_observation, {}"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "markdown",
161
+ "metadata": {},
162
+ "source": [
163
+ "## 2. Data Loading and Preprocessing"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "execution_count": 3,
169
+ "metadata": {},
170
+ "outputs": [],
171
+ "source": [
172
+ "def load_creditcard_data(path=\"creditcard.csv\"):\n",
173
+ " print(\"Loading CreditCard dataset...\")\n",
174
+ " df = pd.read_csv(path)\n",
175
+ " \n",
176
+ " # 1. Balance Data (1:5 Fraud to Non-Fraud)\n",
177
+ " fraud = df[df['Class'] == 1]\n",
178
+ " non_fraud = df[df['Class'] == 0]\n",
179
+ " \n",
180
+ " # Undersample non-fraud\n",
181
+ " n_fraud = len(fraud)\n",
182
+ " n_non_fraud = n_fraud * 5\n",
183
+ " \n",
184
+ " if len(non_fraud) > n_non_fraud:\n",
185
+ " non_fraud = non_fraud.sample(n=n_non_fraud, random_state=SEED)\n",
186
+ " \n",
187
+ " balanced_df = pd.concat([fraud, non_fraud]).sample(frac=1, random_state=SEED).reset_index(drop=True)\n",
188
+ " print(f\"Balanced CreditCard Data: {len(fraud)} Fraud, {len(non_fraud)} Non-Fraud\")\n",
189
+ "\n",
190
+ " # 2. Split\n",
191
+ " X = balanced_df.drop('Class', axis=1).values\n",
192
+ " y = balanced_df['Class'].values\n",
193
+ " \n",
194
+ " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED, stratify=y)\n",
195
+ " \n",
196
+ " # 3. Scale (Fit on Train, Transform Test)\n",
197
+ " # Scale Time (0) and Amount (29)\n",
198
+ " def scale_columns(X_tr, X_te, indices):\n",
199
+ " for i in indices:\n",
200
+ " scaler_i = StandardScaler()\n",
201
+ " X_tr[:, i] = scaler_i.fit_transform(X_tr[:, i].reshape(-1, 1)).flatten()\n",
202
+ " X_te[:, i] = scaler_i.transform(X_te[:, i].reshape(-1, 1)).flatten()\n",
203
+ " return X_tr, X_te\n",
204
+ "\n",
205
+ " if X_train.shape[1] == 30:\n",
206
+ " X_train, X_test = scale_columns(X_train, X_test, [0, 29])\n",
207
+ " \n",
208
+ " return X_train, X_test, y_train, y_test\n",
209
+ "\n",
210
+ "def load_paysim_data(path=\"paysim.csv\"):\n",
211
+ " print(\"Loading PaySim dataset...\")\n",
212
+ " df = pd.read_csv(path)\n",
213
+ " \n",
214
+ " # Drop unnecessary columns\n",
215
+ " df = df.drop(['nameOrig', 'nameDest', 'isFlaggedFraud'], axis=1)\n",
216
+ " \n",
217
+ " # Rename 'isFraud' to 'Class' for consistency\n",
218
+ " df = df.rename(columns={'isFraud': 'Class'})\n",
219
+ " \n",
220
+ " # One-hot encode 'type'\n",
221
+ " df = pd.get_dummies(df, columns=['type'], drop_first=True)\n",
222
+ " \n",
223
+ " # 1. Balance Data (1:5 Fraud to Non-Fraud)\n",
224
+ " fraud = df[df['Class'] == 1]\n",
225
+ " non_fraud = df[df['Class'] == 0]\n",
226
+ " \n",
227
+ " # Undersample non-fraud\n",
228
+ " n_fraud = len(fraud)\n",
229
+ " n_non_fraud = n_fraud * 5\n",
230
+ " \n",
231
+ " if len(non_fraud) > n_non_fraud:\n",
232
+ " non_fraud = non_fraud.sample(n=n_non_fraud, random_state=SEED)\n",
233
+ " \n",
234
+ " balanced_df = pd.concat([fraud, non_fraud]).sample(frac=1, random_state=SEED).reset_index(drop=True)\n",
235
+ " print(f\"Balanced PaySim Data: {len(fraud)} Fraud, {len(non_fraud)} Non-Fraud\")\n",
236
+ " \n",
237
+ " # 2. Split\n",
238
+ " X = balanced_df.drop('Class', axis=1).values.astype(np.float32)\n",
239
+ " y = balanced_df['Class'].values.astype(np.int64)\n",
240
+ " \n",
241
+ " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED, stratify=y)\n",
242
+ " \n",
243
+ " # 3. Scale\n",
244
+ " scaler = StandardScaler()\n",
245
+ " X_train = scaler.fit_transform(X_train)\n",
246
+ " X_test = scaler.transform(X_test)\n",
247
+ " \n",
248
+ " return X_train, X_test, y_train, y_test\n"
249
+ ]
250
+ },
251
+ {
252
+ "cell_type": "markdown",
253
+ "metadata": {},
254
+ "source": [
255
+ "## 3. Preprocessing Techniques (PCA & CTGAN)"
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "code",
260
+ "execution_count": 4,
261
+ "metadata": {},
262
+ "outputs": [],
263
+ "source": [
264
+ "def apply_pca_ctgan(X_train, y_train, X_test, n_components=0.99, epochs=200):\n",
265
+ " print(f\"Applying PCA (n_components={n_components}) + CTGAN (epochs={epochs})...\")\n",
266
+ " \n",
267
+ " # 1. Apply PCA\n",
268
+ " pca = PCA(n_components=n_components)\n",
269
+ " X_train_pca = pca.fit_transform(X_train)\n",
270
+ " X_test_pca = pca.transform(X_test)\n",
271
+ " print(f\"PCA reduced dimensions from {X_train.shape[1]} to {X_train_pca.shape[1]}\")\n",
272
+ " \n",
273
+ " # 2. Apply CTGAN on PCA-transformed data\n",
274
+ " # Combine X and y for CTGAN\n",
275
+ " df_train = pd.DataFrame(X_train_pca, columns=[f'f{i}' for i in range(X_train_pca.shape[1])])\n",
276
+ " df_train['label'] = y_train\n",
277
+ " \n",
278
+ " # Filter fraud samples\n",
279
+ " fraud_df = df_train[df_train['label'] == 1].drop('label', axis=1)\n",
280
+ " \n",
281
+ " if len(fraud_df) == 0:\n",
282
+ " print(\"No fraud samples found for CTGAN!\")\n",
283
+ " return X_train_pca, y_train, X_test_pca\n",
284
+ " \n",
285
+ " # Train CTGAN\n",
286
+ " ctgan = CTGAN(epochs=epochs, batch_size=64, pac=1, verbose=True)\n",
287
+ " ctgan.fit(fraud_df)\n",
288
+ " \n",
289
+ " # Generate synthetic samples to balance the dataset (or double the fraud count)\n",
290
+ " n_synthetic = len(fraud_df) \n",
291
+ " synthetic_fraud = ctgan.sample(n_synthetic)\n",
292
+ " \n",
293
+ " X_synthetic = synthetic_fraud.values\n",
294
+ " y_synthetic = np.ones(n_synthetic)\n",
295
+ " \n",
296
+ " X_aug = np.vstack([X_train_pca, X_synthetic])\n",
297
+ " y_aug = np.concatenate([y_train, y_synthetic])\n",
298
+ " \n",
299
+ " print(f\"Augmented training set from {len(X_train_pca)} to {len(X_aug)} samples.\")\n",
300
+ " return X_aug, y_aug, X_test_pca\n"
301
+ ]
302
+ },
303
+ {
304
+ "cell_type": "markdown",
305
+ "metadata": {},
306
+ "source": [
307
+ "## 4. Experiment Loop"
308
+ ]
309
+ },
310
+ {
311
+ "cell_type": "code",
312
+ "execution_count": 6,
313
+ "metadata": {},
314
+ "outputs": [],
315
+ "source": [
316
+ "def train_and_evaluate(agent_name, X_train, y_train, X_test, y_test, total_timesteps=10000):\n",
317
+ " # Create Environment\n",
318
+ " env = DummyVecEnv([lambda: FraudDetectionEnv(X_train, y_train)])\n",
319
+ " \n",
320
+ " # Initialize Agent\n",
321
+ " if agent_name == 'PPO':\n",
322
+ " # Default PPO parameters as none were specified in reference\n",
323
+ " model = PPO('MlpPolicy', env, verbose=0)\n",
324
+ " elif agent_name == 'A2C':\n",
325
+ " model = A2C(\n",
326
+ " \"MlpPolicy\",\n",
327
+ " env,\n",
328
+ " learning_rate=1e-4,\n",
329
+ " gamma=0.99,\n",
330
+ " n_steps=5,\n",
331
+ " ent_coef=0.01,\n",
332
+ " vf_coef=0.5,\n",
333
+ " max_grad_norm=0.5,\n",
334
+ " verbose=0,\n",
335
+ " device=\"auto\"\n",
336
+ " )\n",
337
+ " elif agent_name == 'DQN':\n",
338
+ " model = DQN(\n",
339
+ " \"MlpPolicy\",\n",
340
+ " env,\n",
341
+ " learning_rate=linear_schedule(1e-4),\n",
342
+ " buffer_size=100000,\n",
343
+ " learning_starts=1000,\n",
344
+ " batch_size=512,\n",
345
+ " gamma=0.99,\n",
346
+ " train_freq=1,\n",
347
+ " gradient_steps=1,\n",
348
+ " target_update_interval=500,\n",
349
+ " exploration_fraction=0.1,\n",
350
+ " exploration_initial_eps=1.0,\n",
351
+ " exploration_final_eps=0.05,\n",
352
+ " max_grad_norm=10,\n",
353
+ " verbose=0,\n",
354
+ " device=\"auto\"\n",
355
+ " )\n",
356
+ " else:\n",
357
+ " raise ValueError(f\"Unknown agent: {agent_name}\")\n",
358
+ " \n",
359
+ " # Train\n",
360
+ " model.learn(total_timesteps=total_timesteps)\n",
361
+ " \n",
362
+ " # Evaluate\n",
363
+ " # We'll use the environment logic to step through test set\n",
364
+ " test_env = FraudDetectionEnv(X_test, y_test)\n",
365
+ " obs, _ = test_env.reset()\n",
366
+ " \n",
367
+ " y_pred = []\n",
368
+ " y_true = []\n",
369
+ " \n",
370
+ " done = False\n",
371
+ " while not done:\n",
372
+ " action, _ = model.predict(obs, deterministic=True)\n",
373
+ " obs, reward, done, truncated, info = test_env.step(action)\n",
374
+ " \n",
375
+ " if 'true_label' in info:\n",
376
+ " y_true.append(info['true_label'])\n",
377
+ " y_pred.append(action)\n",
378
+ " \n",
379
+ " # Metrics\n",
380
+ " precision = precision_score(y_true, y_pred, zero_division=0)\n",
381
+ " recall = recall_score(y_true, y_pred, zero_division=0)\n",
382
+ " f1 = f1_score(y_true, y_pred, zero_division=0)\n",
383
+ " \n",
384
+ " return precision, recall, f1\n"
385
+ ]
386
+ },
387
+ {
388
+ "cell_type": "code",
389
+ "execution_count": null,
390
+ "metadata": {},
391
+ "outputs": [
392
+ {
393
+ "name": "stdout",
394
+ "output_type": "stream",
395
+ "text": [
396
+ "\n",
397
+ "=== Processing Dataset: CreditCard ===\n",
398
+ "Loading CreditCard dataset...\n",
399
+ "\n",
400
+ "--- Preprocessing: Raw ---\n",
401
+ "Training PPO...\n"
402
+ ]
403
+ }
404
+ ],
405
+ "source": [
406
+ "# Configuration\n",
407
+ "datasets = ['CreditCard', 'PaySim']\n",
408
+ "preprocessing_methods = ['Raw', 'PCA_CTGAN']\n",
409
+ "agents = ['PPO', 'A2C', 'DQN']\n",
410
+ "\n",
411
+ "results = []\n",
412
+ "\n",
413
+ "for dataset_name in datasets:\n",
414
+ " print(f\"\\n=== Processing Dataset: {dataset_name} ===\")\n",
415
+ " \n",
416
+ " # Load Data\n",
417
+ " if dataset_name == 'CreditCard':\n",
418
+ " X_train, X_test, y_train, y_test = load_creditcard_data(\"creditcard.csv\")\n",
419
+ " else:\n",
420
+ " X_train, X_test, y_train, y_test = load_paysim_data(\"paysim.csv\")\n",
421
+ " \n",
422
+ " # Store original copies\n",
423
+ " X_train_orig, X_test_orig = X_train.copy(), X_test.copy()\n",
424
+ " y_train_orig = y_train.copy()\n",
425
+ "\n",
426
+ " for prep in preprocessing_methods:\n",
427
+ " print(f\"\\n--- Preprocessing: {prep} ---\")\n",
428
+ " \n",
429
+ " # Reset data\n",
430
+ " X_curr_train, X_curr_test = X_train_orig.copy(), X_test_orig.copy()\n",
431
+ " y_curr_train = y_train_orig.copy()\n",
432
+ " \n",
433
+ " # Apply Preprocessing\n",
434
+ " if prep == 'PCA_CTGAN':\n",
435
+ " X_curr_train, y_curr_train, X_curr_test = apply_pca_ctgan(X_curr_train, y_curr_train, X_curr_test, epochs=200)\n",
436
+ " \n",
437
+ " for agent in agents:\n",
438
+ " print(f\"Training {agent}...\")\n",
439
+ " try:\n",
440
+ " prec, rec, f1 = train_and_evaluate(agent, X_curr_train, y_curr_train, X_curr_test, y_test, total_timesteps=5000)\n",
441
+ " print(f\"Result: F1={f1:.4f} (Prec={prec:.4f}, Rec={rec:.4f})\")\n",
442
+ " \n",
443
+ " results.append({\n",
444
+ " 'Dataset': dataset_name,\n",
445
+ " 'Preprocessing': prep,\n",
446
+ " 'Agent': agent,\n",
447
+ " 'Precision': prec,\n",
448
+ " 'Recall': rec,\n",
449
+ " 'F1': f1\n",
450
+ " })\n",
451
+ " except Exception as e:\n",
452
+ " print(f\"Failed to train {agent}: {e}\")\n"
453
+ ]
454
+ },
455
+ {
456
+ "cell_type": "markdown",
457
+ "metadata": {},
458
+ "source": [
459
+ "## 5. Results Visualization"
460
+ ]
461
+ },
462
+ {
463
+ "cell_type": "code",
464
+ "execution_count": null,
465
+ "metadata": {},
466
+ "outputs": [],
467
+ "source": [
468
+ "# Create DataFrame from results sorted by F1 score\n",
469
+ "results_df = pd.DataFrame(results, columns=['Dataset', 'Agent', 'Preprocessing', 'F1'])\n",
470
+ "sorted_results_df = results_df.sort_values(by='F1', ascending=False)\n",
471
+ "\n",
472
+ "print(\"\\n=== Final Results ===\")\n",
473
+ "print(results_df)\n",
474
+ "\n",
475
+ "# Plotting\n",
476
+ "plt.figure(figsize=(14, 6))\n",
477
+ "\n",
478
+ "plt.subplot(1, 2, 1)\n",
479
+ "sns.barplot(data=results_df[results_df['Dataset'] == 'CreditCard'], x='Agent', y='F1', hue='Preprocessing')\n",
480
+ "plt.title('CreditCard Dataset - F1 Score')\n",
481
+ "plt.ylim(0, 1)\n",
482
+ "\n",
483
+ "plt.subplot(1, 2, 2)\n",
484
+ "sns.barplot(data=results_df[results_df['Dataset'] == 'PaySim'], x='Agent', y='F1', hue='Preprocessing')\n",
485
+ "plt.title('PaySim Dataset - F1 Score')\n",
486
+ "plt.ylim(0, 1)\n",
487
+ "\n",
488
+ "plt.tight_layout()\n",
489
+ "plt.show()"
490
+ ]
491
+ }
492
+ ],
493
+ "metadata": {
494
+ "kernelspec": {
495
+ "display_name": ".pyvenv",
496
+ "language": "python",
497
+ "name": "python3"
498
+ },
499
+ "language_info": {
500
+ "codemirror_mode": {
501
+ "name": "ipython",
502
+ "version": 3
503
+ },
504
+ "file_extension": ".py",
505
+ "mimetype": "text/x-python",
506
+ "name": "python",
507
+ "nbconvert_exporter": "python",
508
+ "pygments_lexer": "ipython3",
509
+ "version": "3.13.7"
510
+ }
511
+ },
512
+ "nbformat": 4,
513
+ "nbformat_minor": 2
514
+ }
false/attention_pooled_embeddings.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcf22e6150629cf983dcc72493abfcdeaa9abbd1ec0ff4bd75c09a1003b88151
3
+ size 9083260
false/custom_env.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ class FraudDetectionEnv(gym.Env):
5
+ """
6
+ A custom Gym environment for Fraud Detection using embeddings.
7
+
8
+ State: Embedding of a transaction.
9
+ Action: 0 (Declare Not Fraud), 1 (Declare Fraud).
10
+ Reward: Based on correctly/incorrectly classifying fraud vs non-fraud.
11
+ """
12
+ def __init__(self, embeddings: np.ndarray, labels: np.ndarray, reward_config: dict):
13
+ super().__init__()
14
+
15
+ # Ensure data consistency
16
+ assert embeddings.shape[0] == labels.shape[0], "Embeddings and labels must have the same number of instances."
17
+ assert embeddings.shape[1] == 768, f"Embeddings must be 768-dimensional, but got {embeddings.shape[1]}"
18
+
19
+ self.embeddings = embeddings.astype(np.float32)
20
+ self.labels = labels.astype(np.int64)
21
+
22
+ self.num_instances = self.embeddings.shape[0]
23
+ self.reward_config = reward_config
24
+
25
+ # Define action and observation space
26
+ # Action Space: Discrete(2) -> 0 for Not Fraud, 1 for Fraud
27
+ self.action_space = spaces.Discrete(2)
28
+
29
+ # Observation Space: Box(low, high, shape, dtype) -> 768-dim vector
30
+ self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(768,), dtype=np.float32)
31
+
32
+ # Internal state
33
+ self._current_index = 0
34
+ self._order = np.arange(self.num_instances)
35
+ np.random.shuffle(self._order) # Shuffle the order of instances initially
36
+
37
+
38
+ def step(self, action: int):
39
+ # Check if episode is done
40
+ if self._current_index >= self.num_instances:
41
+ print("Warning: step() called when episode is already done.")
42
+ return self.observation_space.sample() * 0, 0, True, False, {} # Return dummy values
43
+
44
+ # Get current instance data based on shuffled order
45
+ actual_index = self._order[self._current_index]
46
+ current_embedding = self.embeddings[actual_index]
47
+ true_label = self.labels[actual_index]
48
+
49
+ # Determine reward
50
+ reward = 0
51
+ if action == 1 and true_label == 1:
52
+ reward = self.reward_config.get('TP', 0)
53
+ elif action == 1 and true_label == 0:
54
+ reward = self.reward_config.get('FP', 0)
55
+ elif action == 0 and true_label == 1:
56
+ reward = self.reward_config.get('FN', 0)
57
+ elif action == 0 and true_label == 0:
58
+ reward = self.reward_config.get('TN', 0)
59
+
60
+ # Move to the next instance
61
+ self._current_index += 1
62
+
63
+ # Check if the episode is finished
64
+ done = self._current_index >= self.num_instances
65
+ truncated = False
66
+
67
+ # Get the next observation
68
+ next_observation = np.zeros_like(current_embedding, dtype=np.float32) # Default for done state
69
+ if not done:
70
+ next_observation = self.embeddings[self._order[self._current_index]]
71
+
72
+ info = {
73
+ 'true_label': true_label,
74
+ 'predicted_action': action,
75
+ 'instance_uid': actual_index,
76
+ 'is_done': done
77
+ }
78
+
79
+ return next_observation, reward, done, truncated, info
80
+
81
+
82
+ def reset(self, seed=None, options=None):
83
+ super().reset(seed=seed) # Handles seeding
84
+
85
+ # Reset index and shuffle order for a new episode
86
+ self._current_index = 0
87
+ self._order = np.arange(self.num_instances)
88
+ self.np_random.shuffle(self._order) # Use the environment's random number generator
89
+
90
+ # Get the first observation of the new episode
91
+ initial_observation = self.embeddings[self._order[self._current_index]]
92
+
93
+ info = {'instance_uid': self._order[self._current_index]}
94
+
95
+ return initial_observation, info
96
+
97
+ def close(self):
98
+ # Optional: Implement cleanup
99
+ pass
false/dqn_fraud_checkpoints/dqn_fraud_model_100000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:585c5b2b8027b09c2b64da89fba0d168044312fbc66d1208bf81f4cb757a1677
3
+ size 917517
false/dqn_fraud_checkpoints/dqn_fraud_model_10000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac24b960c596527b905ccee51d9e2f5809f85520c1d13a0135f8b3b6f716d2f4
3
+ size 915979
false/dqn_fraud_checkpoints/dqn_fraud_model_110000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fc33e3614958ac6455a1ef21551a548301a5820caf2374d4932292fc521fab1
3
+ size 917692
false/dqn_fraud_checkpoints/dqn_fraud_model_160000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df091f161c1c7e4b802f2aca0fc5ed2285b47250637f748ecd46277e479325f0
3
+ size 181274
false/dqn_fraud_checkpoints/dqn_fraud_model_20000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb022da5cc8d9e098a2cd7f18c4c869a5c9a82176df20f38ec42011cd5e6105a
3
+ size 916111
false/dqn_fraud_checkpoints/dqn_fraud_model_240000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:625e4ff26f05cec0be165ceed45e5719cdbee8f3eb758eb60d7ba06d3b63fb63
3
+ size 182271
false/dqn_fraud_checkpoints/dqn_fraud_model_30000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e3e1c7b46d356e97b2f6c6aa885ce0edd0cb3035425076dabb44c5d15ab5a7a
3
+ size 916276
false/dqn_fraud_checkpoints/dqn_fraud_model_40000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fabd12cba4197d7cb21562636653d0bba1ecd9cf3b8ea021806df5a41361cf22
3
+ size 916448
false/dqn_fraud_checkpoints/dqn_fraud_model_50000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8be26553b9e8b8f96ee2b7d5e88e62b78d7a2dfb4c0019790192e77d20d76409
3
+ size 916648
false/dqn_fraud_checkpoints/dqn_fraud_model_60000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c87078831ca9a1e05b70305a02916ed6e0deb00a4f60dd5c7506247fea00fcca
3
+ size 916817
false/dqn_fraud_checkpoints/dqn_fraud_model_70000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bea3ac686d75f6ee47527fa0e0745623b78e99c3d2e6abbd6587fc0d84987b1
3
+ size 916980
false/dqn_fraud_checkpoints/dqn_fraud_model_80000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9e11c031044237d777f31088021a35fa6cf1f103b6b961dccee6ce284155211
3
+ size 179951
false/dqn_fraud_checkpoints/dqn_fraud_model_90000_steps.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf6d054488ef6e3edf1016c06cb6bdeb53399ee08d57a1c931c9839534b2c989
3
+ size 917357
false/dqn_fraud_tb/DQN_4/events.out.tfevents.1747182467.archlinux.48104.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f87439c552fc69f40a847fafda6d5cb09a708cd67dc5e4b6060fdf1a2727787c
3
+ size 6148
false/dqn_fraud_tb/DQN_5/events.out.tfevents.1764545110.archlinux.67971.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1223f1afb9acd6b95baea0bc047561d7bb25d8591b14026bcc7cddcba07267e1
3
+ size 6148
false/dqn_fraud_tb/DQN_6/events.out.tfevents.1764546770.archlinux.73972.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:826cbeb2ca5a3bfd30c75fee2cd683f512e11a7b6340e952868fe789800f38a7
3
+ size 6148
false/dqn_fraud_tb/DQN_7/events.out.tfevents.1764584658.archlinux.5620.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64b7581c3b247c8e6add6398488433aef03712ce49f29267d4aa4d32f224bfc4
3
+ size 6148
false/dqn_fraud_tb/DQN_8/events.out.tfevents.1764668813.archlinux.47939.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96301031addb0f2f5e872f0266a1b348f8d6800013a9e1baedeae7743d9d48f6
3
+ size 6148
false/dqn_fraud_tb/evaluation/eval_20250514-014902/events.out.tfevents.1747183742.archlinux.48104.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e5d9747632cf39d7324fed3343bb8984ec5d5007a90ede736d21cbb1bda54a6
3
+ size 307820
false/dqn_fraud_tb/evaluation/eval_20251201-002715/events.out.tfevents.1764545235.archlinux.67971.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd4083078fa50c8f52f508982ac76662b624b93295919a71751d71c6c955f534
3
+ size 307902
false/dqn_fraud_tb/evaluation/eval_20251201-005435/events.out.tfevents.1764546875.archlinux.73972.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba41d0fa7f85af90fcbbd3cd79599f00a5fd225caecdc6a574f7833a933052eb
3
+ size 312770
false/dqn_fraud_tb/evaluation/eval_20251201-112610/events.out.tfevents.1764584770.archlinux.5620.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32e684588cbd59bcaa29bc43f01e98d297dc0e7b12e42d4416ce12200ea0aa8c
3
+ size 308804
false/dqn_fraud_tb/evaluation/eval_20251202-104820/events.out.tfevents.1764668900.archlinux.47939.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6079783a5ceb7bc66fd81adefe0d4ec007172daefa12dcbc85274050421c720c
3
+ size 315329
false/embeddings.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3e96b96a7c7f0610a88a48f8a26044e0d209c9054af7153d8d2f369bb1b5a8e
3
+ size 9083260
false/fraud-detection-with-distilbert.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
false/models/a2c_fraud_model.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc864a6872392233e3e6243724fbaee59513c5974b0db2b87d19d59614d93fef
3
+ size 945556