Simplified colab for training VST models.

jesseengel · Magenta Team · commit 761d6e3ed131 · 2022-04-18T16:04:54.000-07:00
PiperOrigin-RevId: 442655762
diff --git a/ddsp/colab/demos/README.md b/ddsp/colab/demos/README.md
@@ -10,3 +10,6 @@ Here are colab notebooks for demonstrating neat things you can do with DDSP.
 
 *   [pitch_detection](https://colab.research.google.com/github/magenta/ddsp/blob/main/ddsp/colab/demos/pitch_detection.ipynb):
     Demonstration of self-supervised pitch detection models from [2020 ICML Workshop paper](https://openreview.net/forum?id=RlVTYWhsky7).
+
+*   [Train_VST](https://colab.research.google.com/github/magenta/ddsp/blob/main/ddsp/colab/demos/Train_VST.ipynb):
+    Simplified training colab for the real-time audio plugin (WIP).
diff --git a/ddsp/colab/demos/Train_VST.ipynb b/ddsp/colab/demos/Train_VST.ipynb
@@ -0,0 +1,318 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "VxPuPR0j5Gs7"
+      },
+      "outputs": [],
+      "source": [
+        "# ------------------------------------------------------------------------------\n",
+        "#     Copyright 2022 Google LLC. All Rights Reserved.\n",
+        "#\n",
+        "#     Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "#     you may not use this file except in compliance with the License.\n",
+        "#     You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "#     Unless required by applicable law or agreed to in writing, software\n",
+        "#     distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "#     See the License for the specific language governing permissions and\n",
+        "#     limitations under the License.\n",
+        "# ------------------------------------------------------------------------------\n",
+        "\n",
+        "#@title Train your own DDSP-VST Model\n",
+        "#@markdown Just press the &#9654;&#65039; button!\n",
+        "\n",
+        "#@markdown \u003cbr/\u003e Custom models can train on as little as 10 minutes of audio (`.wav` or `.mp3`). Best results from \"monophonic\" (only one note at a time) audio from a single recording session (same mic, same reverb).\n",
+        "\n",
+        "#@markdown Training typically takes ~2-3 hours with free Colab, and less than an hour with ColabPro.\n",
+        "\n",
+        "\n",
+        "#@markdown We recommend using Google Drive for training to load faster and save your model during training. Just create a folder on your drive with your audio files in it, and select the folder. If you don't use drive, you can still upload audio through the browser (slower) and download the final trained model.\n",
+        "\n",
+        "#@markdown Colab often kicks people off after ~12 hours, but hopefully that shouldn't be a problem.\n",
+        "\n",
+        "#@markdown After training, it should automatically export and download your model as `{my_name}.tflite` that you can use by dropping in the VST custom models folder. If it doesn't automatically download, you can find the file in the `ddsp-training-{date-time}/export` folder either on this page (click the &#128193; icon on the left), or in the folder you selected from your drive.\n",
+        "\n",
+        "\n",
+        "#@markdown \u003cbr/\u003e \u003cbr/\u003e\n",
+        "#@markdown Name your model!\n",
+        "Name = 'MyInstrument' #@param {type:\"string\"}\n",
+        "Name = Name.replace(' ', '_')\n",
+        "\n",
+        "#@markdown \u003cbr/\u003e\n",
+        "#@markdown Use Google Drive for training?\n",
+        "Google_Drive = True #@param {type:\"boolean\"}\n",
+        "\n",
+        "\n",
+        "\n",
+        "#@markdown \u003cbr/\u003e \u003cbr/\u003e\n",
+        "#@markdown ### Advanced Options\n",
+        "\n",
+        "#@markdown \u003ca href=\"https://colab.research.google.com/github/magenta/ddsp/blob/main/ddsp/colab/demos/Train_VST.ipynb\" target=\"_parent\"\u003e\u003cimg src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/\u003e\u003c/a\u003e\n",
+        "\n",
+        "Training_Steps = 30000 #@param {type:\"integer\"}\n",
+        "\n",
+        "#@markdown \u003cbr/\u003e\n",
+        "#@markdown Ignore previous checkpoints and start a fresh run\n",
+        "\n",
+        "Ignore_Previous = False #@param {type:\"boolean\"}\n",
+        "\n",
+        "\n",
+        "# Sample_Rate = '16kHz'  #@param ['16kHz', '32kHz', '48kHz']\n",
+        "# Sample_Rate = {'16kHz': 16000, '32kHz': 32000, '48kHz': 48000}[Sample_Rate]\n",
+        "# Model_Gin_File = 'models/vst/vst.gin'\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "# ------------------------------------------------------------------------------\n",
+        "# Install\n",
+        "# ------------------------------------------------------------------------------\n",
+        "print('Installing DDSP...')\n",
+        "print('This should take about 2 minutes...')\n",
+        "!pip install -U ddsp[data_preparation]==3.3.4 \u0026\u003e /dev/null\n",
+        "!pip install ipyfilechooser \u0026\u003e /dev/null\n",
+        "\n",
+        "\n",
+        "# ------------------------------------------------------------------------------\n",
+        "# Imports\n",
+        "# ------------------------------------------------------------------------------\n",
+        "print('Importing Libraries...')\n",
+        "print()\n",
+        "import datetime\n",
+        "import glob\n",
+        "import os\n",
+        "import shutil\n",
+        "\n",
+        "from ddsp import spectral_ops\n",
+        "from ddsp.colab import colab_utils\n",
+        "import ddsp.training\n",
+        "import gin\n",
+        "from google.colab import drive\n",
+        "from ipyfilechooser import FileChooser\n",
+        "import pydub\n",
+        "from matplotlib import pyplot as plt\n",
+        "import numpy as np\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from ddsp.training.data_preparation.prepare_tfrecord_lib import _load_audio_as_array as load_audio\n",
+        "\n",
+        "\n",
+        "# ------------------------------------------------------------------------------\n",
+        "# Functions\n",
+        "# ------------------------------------------------------------------------------\n",
+        "def directory_has_files(target_dir):\n",
+        "  n_files = len(glob.glob(os.path.join(target_dir, '*')))\n",
+        "  return n_files \u003e 0\n",
+        "\n",
+        "\n",
+        "def get_audio_files(drive_dir, audio_dir):\n",
+        "  if drive_dir:\n",
+        "    mp3_files = glob.glob(os.path.join(drive_dir, '*.mp3'))\n",
+        "    wav_files = glob.glob(os.path.join(drive_dir, '*.wav'))\n",
+        "    audio_paths = mp3_files + wav_files\n",
+        "    if len(audio_paths) \u003c 1:\n",
+        "      raise FileNotFoundError(\"Sorry, it seems that there aren't any MP3 or \"\n",
+        "                              f\"WAV files in your folder ({drive_dir}). Try \"\n",
+        "                              \"running again and choose a different folder.\")\n",
+        "  else:\n",
+        "    audio_paths, _ = colab_utils.upload()\n",
+        "\n",
+        "  # Copy Audio.\n",
+        "  for src in audio_paths:\n",
+        "    target = os.path.join(audio_dir, \n",
+        "                          os.path.basename(src).replace(' ', '_'))\n",
+        "    print('Copying {} to {}'.format(src, target))\n",
+        "    shutil.copy(src, target)\n",
+        "    # !cp $src $target\n",
+        "\n",
+        "\n",
+        "def prepare_dataset(audio_dir, \n",
+        "                    data_dir,\n",
+        "                    sample_rate=16000, \n",
+        "                    frame_rate=50, \n",
+        "                    example_secs=4.0, \n",
+        "                    hop_secs=1.0, \n",
+        "                    viterbi=True, \n",
+        "                    center=True):\n",
+        "  if directory_has_files(data_dir):\n",
+        "    print(f'Dataset already exists in `{data_dir}`')\n",
+        "    return\n",
+        "  else:\n",
+        "    # Otherwise prepare new dataset locally.\n",
+        "    print(f'Preparing new dataset from `{audio_dir}`')\n",
+        "\n",
+        "    print()\n",
+        "    print('Creating dataset...')\n",
+        "    print('This usually takes around 2-3 minutes for each minute of audio')\n",
+        "    print('(10 minutes of training audio -\u003e 20-30 minutes)')\n",
+        "\n",
+        "    audio_filepattern = os.path.join(audio_dir, '*')\n",
+        "    !ddsp_prepare_tfrecord \\\n",
+        "    --input_audio_filepatterns=$audio_filepattern \\\n",
+        "    --output_tfrecord_path=$data_dir/train.tfrecord \\\n",
+        "    --num_shards=10 \\\n",
+        "    --sample_rate=$sample_rate \\\n",
+        "    --frame_rate=$frame_rate \\\n",
+        "    --example_secs=$example_secs \\\n",
+        "    --hop_secs=$hop_secs \\\n",
+        "    --viterbi=$viterbi \\\n",
+        "    --center=$center \\\n",
+        "    --alsologtostderr \u0026\u003e /dev/null\n",
+        "\n",
+        "\n",
+        "def train(model_dir, data_dir, steps=30000):\n",
+        "  file_pattern = os.path.join(data_dir, 'train.tfrecord*')\n",
+        "  !ddsp_run \\\n",
+        "  --mode=train \\\n",
+        "  --save_dir=\"$model_dir\" \\\n",
+        "  --gin_file=models/vst/vst.gin \\\n",
+        "  --gin_file=datasets/tfrecord.gin \\\n",
+        "  --gin_param=\"TFRecordProvider.file_pattern='$file_pattern'\" \\\n",
+        "  --gin_param=\"TFRecordProvider.centered=True\" \\\n",
+        "  --gin_param=\"TFRecordProvider.frame_rate=50\" \\\n",
+        "  --gin_param=\"batch_size=16\" \\\n",
+        "  --gin_param=\"train_util.train.num_steps=$steps\" \\\n",
+        "  --gin_param=\"train_util.train.steps_per_save=300\" \\\n",
+        "  --gin_param=\"trainers.Trainer.checkpoints_to_keep=3\"\n",
+        "\n",
+        "  # --gin_param=\"train.data_provider=@ExperimentalDataProvider()\" \\\n",
+        "  # --gin_param=\"ExperimentalRecordProvider.data_dir='$data_dir'\" \\\n",
+        "  # --gin_param=\"ExperimentalRecordProvider.sample_rate=16000\" \\\n",
+        "  # --gin_param=\"ExperimentalRecordProvider.frame_rate=50\" \\\n",
+        "\n",
+        "\n",
+        "def launch_tensorboard(save_dir):\n",
+        "  %reload_ext tensorboard\n",
+        "  import tensorboard as tb\n",
+        "  tb.notebook.start('--logdir \"{}\"'.format(save_dir))\n",
+        "\n",
+        "\n",
+        "def reset_state(data_dir, audio_dir, model_dir):\n",
+        "  if tf.io.gfile.exists(data_dir):\n",
+        "    !rm -r $data_dir\n",
+        "    !rm -r $audio_dir\n",
+        "  !mkdir -p $data_dir\n",
+        "  !mkdir -p $audio_dir\n",
+        "  !mkdir -p $model_dir\n",
+        "\n",
+        "\n",
+        "def export_and_download(model_dir, model_name=Name):\n",
+        "  export_path = os.path.join(model_dir, 'export')\n",
+        "\n",
+        "  !ddsp_export \\\n",
+        "  --model_path=$model_dir \\\n",
+        "  --save_dir=$export_path \\\n",
+        "  --inference_model=vst_stateless_predict_controls \\\n",
+        "  --tflite \\\n",
+        "  --notfjs\n",
+        "\n",
+        "  # Just copy the tflite model.\n",
+        "  tflite_fp = os.path.join(export_path, 'tflite', 'model.tflite')\n",
+        "  my_model = os.path.join(model_dir, f'{model_name}.tflite')\n",
+        "  !cp $tflite_fp $my_model\n",
+        "  print('Export Complete! Downloading...')\n",
+        "  print(f'You can also find your model at {my_model}')\n",
+        "  colab_utils.download(my_model)\n",
+        "\n",
+        "  # Copy the whole directory.\n",
+        "  # my_model = f'{model_name}.zip'\n",
+        "  # !zip -r $my_model $export_path\n",
+        "  # colab_utils.download(my_model)\n",
+        "\n",
+        "\n",
+        "def get_model_dir(base_dir):\n",
+        "  base_str = 'ddsp-training'\n",
+        "  dirs = tf.io.gfile.glob(os.path.join(base_dir, f'{base_str}-*'))\n",
+        "  if dirs and not Ignore_Previous:\n",
+        "    model_dir = dirs[-1]  # Sorted, so last is most recent.\n",
+        "  else:\n",
+        "    now = datetime.datetime.now().strftime('%Y-%m-%d-%H%M')\n",
+        "    model_dir = os.path.join(base_dir, f'{base_str}-{now}')\n",
+        "  return model_dir\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "def run_training(drive_dir=''):\n",
+        "\n",
+        "  # ------------------------------------------------------------------------------\n",
+        "  # Setup\n",
+        "  # ------------------------------------------------------------------------------\n",
+        "  # Save data locally, but model on drive.\n",
+        "  data_dir = 'data/'\n",
+        "  audio_dir = 'audio/'\n",
+        "  model_dir = get_model_dir(drive_dir)\n",
+        "\n",
+        "  reset_state(data_dir, audio_dir, model_dir)\n",
+        "\n",
+        "  # ------------------------------------------------------------------------------\n",
+        "  # Dataset\n",
+        "  # ------------------------------------------------------------------------------\n",
+        "  get_audio_files(drive_dir, audio_dir)\n",
+        "  prepare_dataset(audio_dir, data_dir)\n",
+        "\n",
+        "  # ------------------------------------------------------------------------------\n",
+        "  # Train\n",
+        "  # ------------------------------------------------------------------------------\n",
+        "  print()\n",
+        "  print('Training...')\n",
+        "  train(model_dir, data_dir, steps=Training_Steps)\n",
+        "\n",
+        "  # ------------------------------------------------------------------------------\n",
+        "  # Export\n",
+        "  # ------------------------------------------------------------------------------\n",
+        "  print()\n",
+        "  print('Exporting model...')\n",
+        "  export_and_download(model_dir)\n",
+        "\n",
+        "\n",
+        "def run(Google_Drive=True):\n",
+        "  \"\"\"Create and display a FileChooser widget.\"\"\"\n",
+        "\n",
+        "  if Google_Drive:\n",
+        "    print('Mounting Google Drive...')\n",
+        "    drive.mount('gdrive', force_remount=True, timeout_ms=10000)    \n",
+        "    initial_dir = 'gdrive/MyDrive'\n",
+        "\n",
+        "    def run_after_select(chooser):\n",
+        "      drive_dir = chooser.selected_path\n",
+        "      run_training(drive_dir=drive_dir)\n",
+        "\n",
+        "    fc = FileChooser(initial_dir)\n",
+        "    fc.show_only_dirs = True\n",
+        "    fc.title = '\u003cb\u003ePick a folder with the audio files for training...\u003c/b\u003e'\n",
+        "    fc.register_callback(run_after_select)\n",
+        "    display(fc)\n",
+        "\n",
+        "\n",
+        "  else:\n",
+        "    print('Skipping Drive Setup...')\n",
+        "    print('Upload Audio Manually...')\n",
+        "    run_training(drive_dir='')\n",
+        "\n",
+        "\n",
+        "run(Google_Drive)\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "Train_VST.ipynb",
+      "private_outputs": true,
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/ddsp/version.py b/ddsp/version.py
@@ -18,4 +18,4 @@
 pulling in all the dependencies in __init__.py.
 """
 
-__version__ = '3.3.4'
+__version__ = '3.3.5'