monetjoe
/

hdemucs_high_musdbhq

Model card Files Files and versions

admin commited on May 18

Commit

babad42

·

1 Parent(s): db92ea3

upd md

Files changed (1) hide show

README.md +72 -0

README.md CHANGED Viewed

@@ -7,8 +7,80 @@ The demucs model in the ICASSP 2024 Cadenza Challenge is an innovative sound sep
 ## Usage
 ```python
 from modelscope import snapshot_download
 model_dir = snapshot_download('monetjoe/hdemucs_high_musdbhq')
 ```
 ## Maintenance

 ## Usage
 ```python
+import torch
+import torchaudio
+from typing import Callable
+from functools import partial
+from dataclasses import dataclass
 from modelscope import snapshot_download
+from torchaudio.models import hdemucs_high
+@dataclass
+class SourceSeparationBundle:
+    """Dataclass that bundles components for performing source separation.
+    Example
+        >>> import torchaudio
+        >>> from torchaudio.pipelines import CONVTASNET_BASE_LIBRI2MIX
+        >>> import torch
+        >>>
+        >>> # Build the separation model.
+        >>> model = CONVTASNET_BASE_LIBRI2MIX.get_model()
+        >>> 100%|███████████████████████████████|19.1M/19.1M [00:04<00:00, 4.93MB/s]
+        >>>
+        >>> # Instantiate the test set of Libri2Mix dataset.
+        >>> dataset = torchaudio.datasets.LibriMix("/home/datasets/", subset="test")
+        >>>
+        >>> # Apply source separation on mixture audio.
+        >>> for i, data in enumerate(dataset):
+        >>>     sample_rate, mixture, clean_sources = data
+        >>>     # Make sure the shape of input suits the model requirement.
+        >>>     mixture = mixture.reshape(1, 1, -1)
+        >>>     estimated_sources = model(mixture)
+        >>>     score = si_snr_pit(estimated_sources, clean_sources) # for demonstration
+        >>>     print(f"Si-SNR score is : {score}.)
+        >>>     break
+        >>> Si-SNR score is : 16.24.
+        >>>
+    """
+    _model_path: str
+    _model_factory_func: Callable[[], torch.nn.Module]
+    _sample_rate: int
+    @property
+    def sample_rate(self) -> int:
+        """Sample rate of the audio that the model is trained on.
+        :type: int
+        """
+        return self._sample_rate
+    def get_model(self) -> torch.nn.Module:
+        """Construct the model and load the pretrained weight."""
+        model = self._model_factory_func()
+        path = torchaudio.utils.download_asset(self._model_path)
+        state_dict = torch.load(path)
+        model.load_state_dict(state_dict)
+        model.eval()
+        return model
 model_dir = snapshot_download('monetjoe/hdemucs_high_musdbhq')
+HDEMUCS_HIGH_MUSDB = SourceSeparationBundle(
+    _model_path=f"{model_dir}/hdemucs_high_musdbhq_only.pt",
+    _model_factory_func=partial(
+        hdemucs_high, sources=["drums", "bass", "other", "vocals"]
+    ),
+    _sample_rate=44100,
+)
+HDEMUCS_HIGH_MUSDB.__doc__ = """Pre-trained music source separation pipeline with
+*Hybrid Demucs* :cite:`defossez2021hybrid` trained on the training set of MUSDB-HQ :cite:`MUSDB18HQ`.
+The model is constructed by :func:`~torchaudio.models.hdemucs_high`.
+Training was performed in the original HDemucs repository `here <https://github.com/facebookresearch/demucs/>`__.
+Please refer to :class:`SourceSeparationBundle` for usage instructions.
+"""
 ```
 ## Maintenance