antoniaebner commited on
Commit
08f5009
·
1 Parent(s): 486af19
Files changed (2) hide show
  1. requirements.txt +2 -1
  2. src/data.py +2 -9
requirements.txt CHANGED
@@ -6,4 +6,5 @@ numpy
6
  scikit-learn==1.7.1
7
  joblib
8
  tabulate
9
- datasets
 
 
6
  scikit-learn==1.7.1
7
  joblib
8
  tabulate
9
+ datasets
10
+ torch==2.8.0
src/data.py CHANGED
@@ -6,22 +6,15 @@ As an input it takes a list of SMILES and it outputs a nested dictionary with
6
  SMILES and target names as keys.
7
  """
8
 
9
- import os
10
  from typing import Iterable, Literal
11
 
12
  import numpy as np
13
  import torch
14
 
15
- from sklearn.preprocessing import StandardScaler
16
- from statsmodels.distributions.empirical_distribution import ECDF
17
-
18
- from rdkit import Chem, DataStructs
19
- from rdkit.Chem import Descriptors, rdFingerprintGenerator
20
- from rdkit.Chem.rdchem import Mol
21
-
22
- from .utils import USED_200_DESCR, Standardizer, load_pickle, write_pickle, KNOWN_DESCR
23
  from .preprocess import normalize_features
24
 
 
 
25
 
26
  def get_descriptor_dataset(
27
  data_path: str,
 
6
  SMILES and target names as keys.
7
  """
8
 
 
9
  from typing import Iterable, Literal
10
 
11
  import numpy as np
12
  import torch
13
 
 
 
 
 
 
 
 
 
14
  from .preprocess import normalize_features
15
 
16
+ KNOWN_DESCR = ["ecfps", "rdkit_descr_quantiles", "maccs", "tox"]
17
+
18
 
19
  def get_descriptor_dataset(
20
  data_path: str,