Spaces:
Sleeping
Sleeping
Commit
·
08f5009
1
Parent(s):
486af19
debug
Browse files- requirements.txt +2 -1
- src/data.py +2 -9
requirements.txt
CHANGED
|
@@ -6,4 +6,5 @@ numpy
|
|
| 6 |
scikit-learn==1.7.1
|
| 7 |
joblib
|
| 8 |
tabulate
|
| 9 |
-
datasets
|
|
|
|
|
|
| 6 |
scikit-learn==1.7.1
|
| 7 |
joblib
|
| 8 |
tabulate
|
| 9 |
+
datasets
|
| 10 |
+
torch==2.8.0
|
src/data.py
CHANGED
|
@@ -6,22 +6,15 @@ As an input it takes a list of SMILES and it outputs a nested dictionary with
|
|
| 6 |
SMILES and target names as keys.
|
| 7 |
"""
|
| 8 |
|
| 9 |
-
import os
|
| 10 |
from typing import Iterable, Literal
|
| 11 |
|
| 12 |
import numpy as np
|
| 13 |
import torch
|
| 14 |
|
| 15 |
-
from sklearn.preprocessing import StandardScaler
|
| 16 |
-
from statsmodels.distributions.empirical_distribution import ECDF
|
| 17 |
-
|
| 18 |
-
from rdkit import Chem, DataStructs
|
| 19 |
-
from rdkit.Chem import Descriptors, rdFingerprintGenerator
|
| 20 |
-
from rdkit.Chem.rdchem import Mol
|
| 21 |
-
|
| 22 |
-
from .utils import USED_200_DESCR, Standardizer, load_pickle, write_pickle, KNOWN_DESCR
|
| 23 |
from .preprocess import normalize_features
|
| 24 |
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def get_descriptor_dataset(
|
| 27 |
data_path: str,
|
|
|
|
| 6 |
SMILES and target names as keys.
|
| 7 |
"""
|
| 8 |
|
|
|
|
| 9 |
from typing import Iterable, Literal
|
| 10 |
|
| 11 |
import numpy as np
|
| 12 |
import torch
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
from .preprocess import normalize_features
|
| 15 |
|
| 16 |
+
KNOWN_DESCR = ["ecfps", "rdkit_descr_quantiles", "maccs", "tox"]
|
| 17 |
+
|
| 18 |
|
| 19 |
def get_descriptor_dataset(
|
| 20 |
data_path: str,
|