v1.1.0

martinpacesa · web-flow · commit 13cf70759006 · 2024-10-14T13:59:55.000+02:00
New update fixes problems with installation and compatibility:
- updated to python 3.10
- updated biopython to &gt;1.8
- added CUDA version dependence for the installation script to choose correct jaxlib version
- merged all package requirement into a single conda command
- updated Readme
- fixed relative paths for running the scripts to enable execution from anywhere
diff --git a/README.md b/README.md
@@ -10,19 +10,23 @@ First you need to clone this repository. Replace **[install_folder]** with the p
 
 `git clone https://github.com/martinpacesa/BindCraft [install_folder]`
 
-The navigate into your install folder using *cd* and run the installation code. In *pkg_manager* specify whether you are using 'mamba' or 'conda', if left blank it will use 'conda' by default.
+The navigate into your install folder using *cd* and run the installation code. BindCraft requires a CUDA-compatible Nvidia graphics card to run. In the *cuda* setting, please specify the CUDA version compatible with your graphics card, for example '11.8'. If unsure, leave blank but it's possible that the installation might select the wrong version, which will lead to errors. In *pkg_manager* specify whether you are using 'mamba' or 'conda', if left blank it will use 'conda' by default. 
 
-`bash install_bindcraft.sh --pkg_manager 'conda'`
+`bash install_bindcraft.sh --cuda '12.4' --pkg_manager 'conda'`
 
 ## Google Colab
 <a href="https://colab.research.google.com/github/martinpacesa/BindCraft/blob/main/notebooks/BindCraft.ipynb">
   <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
 </a> <br />
 We prepared a convenient google colab notebook to test the bindcraft code functionalities. However, as the pipeline requires significant amount of GPU memory to run for larger target+binder complexes, we highly recommend to run it using a local installation and at least 32 Gb of GPU memory.
 
+**Always try to trim the input target PDB to the smallest size possible! It will significantly speed up the binder generation and minimise the GPU memory requirements.**
+
+**Be ready to run at least a few hundred trajectories to see some accepted binders, for difficult targets it might even be a few thousand.**
+
 
 ## Running the script locally and explanation of settings
-To run the script locally, first you need to configure your target .json file in the *target_settings* folder. In the json file are the following settings:
+To run the script locally, first you need to configure your target .json file in the *settings_target* folder. In the json file are the following settings:
 
 ```
 design_path         -> path where to save designs and statistics
@@ -35,16 +39,16 @@ number_of_final_designs   -> how many designs that pass all filters to aim for,
 ```
 Then run the binder design script:
 
-`sbatch bindcraft.slurm --settings 'path/to/settings_target/.json' --filters 'path/to/settings_filters/.json' --advanced 'path/to/settings_advanced/.json'`
+`sbatch ./bindcraft.slurm --settings './settings_target/PDL1.json' --filters './settings_filters/default_filters.json' --advanced './settings_advanced/4stage_multimer.json'`
 
 The *settings* flag should point to your target .json which you set above. The *filters* flag points to the json where the design filters are specified (default is ./filters/default_filters.json). The *advanced* flag points to your advanced settings (default is ./advanced_settings/4stage_multimer.json). If you leave out the filters and advanced settings flags it will automatically point to the defaults.
 
 Alternatively, if your machine does not support SLURM, you can run the code directly by activating the environment in conda and running the python code:
 
 ```
 conda activate BindCraft
-cd /path/to/install/folder/
-python -u /work/lpdi/users/mpacesa/Pipelines/BindCraft/bindcraft.py --settings 'path/to/settings_target/.json' --filters 'path/to/settings_filters/.json' --advanced 'path/to/settings_advanced/.json'
+cd /path/to/bindcraft/folder/
+python -u ./bindcraft.py --settings './settings_target/PDL1.json' --filters './settings_filters/default_filters.json' --advanced './settings_advanced/4stage_multimer.json'
 ```
 
 **We recommend to generate at least a 100 final designs passing all filters, then order the top 5-20 for experimental characterisation.** If high affinity binders are required, it is better to screen more, as the ipTM metric used for ranking is not a good predictor for affinity, but has been shown to be a good binary predictor of binding. 
diff --git a/bindcraft.py b/bindcraft.py
@@ -34,9 +34,10 @@
 design_models, prediction_models, multimer_validation = load_af2_models(advanced_settings["use_multimer_design"])
 
 ### set package settings
-advanced_settings["af_params_dir"] = os.path.realpath('')
-advanced_settings["dssp_path"] = os.path.join(os.path.realpath(''), 'functions/dssp')
-advanced_settings["dalphaball_path"] = os.path.join(os.path.realpath(''), 'functions/DAlphaBall.gcc')
+bindcraft_folder = os.path.dirname(os.path.realpath(__file__))
+advanced_settings["af_params_dir"] = bindcraft_folder
+advanced_settings["dssp_path"] = os.path.join(bindcraft_folder, 'functions/dssp')
+advanced_settings["dalphaball_path"] = os.path.join(bindcraft_folder, 'functions/DAlphaBall.gcc')
 
 ### generate directories, design path names can be found within the function
 design_paths = generate_directories(target_settings["design_path"])
diff --git a/bindcraft.slurm b/bindcraft.slurm
@@ -15,6 +15,8 @@ conda activate BindCraft
 # alternatively you can source the environment directly
 #source /path/to/mambaforge/bin/activate /path/to/mambaforge/envs/BindCraft
 
+# Get the directory where the bindcraft script is located
+SCRIPT_DIR=$(dirname "$0")
 
 # Parsing command line options
 SETTINGS=""
@@ -37,4 +39,4 @@ echo "Running the BindCraft pipeline"
 echo "Running binder design for target ${SETTINGS}"
 echo "Design settings used: ${ADVANCED}"
 echo "Filtering designs based on ${FILTERS}"
-python -u ./bindcraft.py --settings "${SETTINGS}" --filters "${FILTERS}" --advanced "${ADVANCED}"
+python -u "${SCRIPT_DIR}/bindcraft.py" --settings "${SETTINGS}" --filters "${FILTERS}" --advanced "${ADVANCED}"
diff --git a/functions/__init__.py b/functions/__init__.py
@@ -13,7 +13,7 @@
 from .biopython_utils import *
 from .generic_utils import *
 
-# set slurm environment modules and suppress warnings
+# suppress warnings
 #os.environ["SLURM_STEP_NODELIST"] = os.environ["SLURM_NODELIST"]
 warnings.simplefilter(action='ignore', category=FutureWarning)
 warnings.simplefilter(action='ignore', category=DeprecationWarning)
diff --git a/functions/biopython_utils.py b/functions/biopython_utils.py
@@ -8,7 +8,7 @@
 from collections import defaultdict
 from scipy.spatial import cKDTree
 from Bio import BiopythonWarning
-from Bio.PDB import PDBParser, DSSP, Selection, Polypeptide, PDBIO, Select, Chain,  Superimposer
+from Bio.PDB import PDBParser, DSSP, Selection, Polypeptide, PDBIO, Select, Chain, Superimposer
 from Bio.SeqUtils.ProtParam import ProteinAnalysis
 from Bio.PDB.Selection import unfold_entities
 from Bio.PDB.Polypeptide import is_aa
@@ -126,6 +126,13 @@ def calculate_clash_score(pdb_file, threshold=2.4, only_ca=False):
 
     return len(valid_pairs)
 
+three_to_one_map = {
+    'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',
+    'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',
+    'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',
+    'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y'
+}
+
 # identify interacting residues at the binder interface
 def hotspot_residues(trajectory_pdb, binder_chain="B", atom_distance_cutoff=4.0):
     # Parse the PDB file
@@ -154,8 +161,10 @@ def hotspot_residues(trajectory_pdb, binder_chain="B", atom_distance_cutoff=4.0)
     for binder_idx, close_indices in enumerate(pairs):
         binder_residue = binder_atoms[binder_idx].get_parent()
         binder_resname = binder_residue.get_resname()
-        if binder_resname in Polypeptide.standard_aa_names:
-            aa_single_letter = Polypeptide.three_to_one(binder_resname)
+
+        # Convert three-letter code to single-letter code using the manual dictionary
+        if binder_resname in three_to_one_map:
+            aa_single_letter = three_to_one_map[binder_resname]
             for close_idx in close_indices:
                 target_residue = target_atoms[close_idx].get_parent()
                 interacting_residues[binder_residue.id[1]] = aa_single_letter
diff --git a/install_bindcraft.sh b/install_bindcraft.sh
@@ -3,10 +3,11 @@
 ################## specify conda/mamba folder, and installation folder for git repositories, and whether to use mamba or $pkg_manager
 # Default value for pkg_manager
 pkg_manager='conda'
+cuda=''
 
 # Define the short and long options
-OPTIONS=p:
-LONGOPTIONS=pkg_manager:
+OPTIONS=p:c:
+LONGOPTIONS=pkg_manager:,cuda:
 
 # Parse the command-line options
 PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTIONS --name "$0" -- "$@")
@@ -19,6 +20,10 @@ while true; do
       pkg_manager="$2"
       shift 2
       ;;
+    -c|--cuda)
+      cuda="$2"
+      shift 2
+      ;;
     --)
       shift
       break
@@ -30,6 +35,10 @@ while true; do
   esac
 done
 
+# Example usage of the parsed variables
+echo "Package manager: $pkg_manager"
+echo "CUDA version (if provided): $cuda"
+
 ############################################################################################################
 ############################################################################################################
 ################## initialisation
@@ -40,26 +49,26 @@ install_dir=$(pwd)
 
 ### BindCraft install
 printf "Installing BindCraft environment\n"
-$pkg_manager create --name BindCraft python=3.9 -y
-conda activate BindCraft
+$pkg_manager create --name BindCraft python=3.10 -y
+CONDA_BASE=$(conda info --base)
+source ${CONDA_BASE}/bin/activate ${CONDA_BASE}/envs/BindCraft
+printf "BindCraft environment activated at ${CONDA_BASE}/envs/BindCraft"
 
-# install helpful packages
-$pkg_manager install pandas numpy biopython==1.79 scipy"<1.13.0" pdbfixer seaborn tqdm jupyter ffmpeg -y
+# install required packages
+if [ -n "$cuda" ]; then
+    CONDA_OVERRIDE_CUDA="$cuda" $pkg_manager install pip pandas matplotlib numpy"<2.0.0" biopython scipy pdbfixer seaborn tqdm jupyter ffmpeg pyrosetta fsspec py3dmol chex dm-haiku dm-tree joblib ml-collections immutabledict optax jaxlib=*=*cuda* jax cuda-nvcc cudnn -c conda-forge -c anaconda -c nvidia  --channel https://conda.graylab.jhu.edu -y
+else
+    $pkg_manager install pip pandas matplotlib numpy"<2.0.0" biopython scipy pdbfixer seaborn tqdm jupyter ffmpeg pyrosetta fsspec py3dmol chex dm-haiku dm-tree joblib ml-collections immutabledict optax jaxlib=*=*cuda* jax cuda-nvcc cudnn -c conda-forge -c anaconda -c nvidia  --channel https://conda.graylab.jhu.edu -y
+fi
 
 # install ColabDesign
-pip install git+https://github.com/sokrypton/ColabDesign.git
-pip install --upgrade "jax[cuda]" -f https://storage.googleapis.com/jax-releases/jax_releases.htm
-pip install matplotlib==3.7.1
-
-# install PyRosetta
-$pkg_manager install pyrosetta --channel https://conda.graylab.jhu.edu -y
+pip3 install git+https://github.com/sokrypton/ColabDesign.git --no-deps
 
 # Download AlphaFold2 weights
 mkdir -p ${install_dir}/params/
 cd ${install_dir}/params/
 wget -P ${install_dir}/params/ https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar
 tar -xvf ${install_dir}/params/alphafold_params_2022-12-06.tar
-rm ${install_dir}/params/alphafold_params_2022-12-06.tar
 
 # chmod executables
 chmod +x ${install_dir}/functions/dssp
@@ -79,6 +88,6 @@ printf "$pkg_manager cleaned up\n"
 ################## finish script
 t=$SECONDS 
 printf "Finished setting up BindCraft environment\n"
-printf "Activate environment using command: \"conda activate BindCraft\""
+printf "Activate environment using command: \"$pkg_manager activate BindCraft\""
 printf "\n"
-printf "Installation took $(($t / 3600)) hours, $((($t / 60) % 60)) minutes and $(($t % 60)) seconds."
+printf "Installation took $(($t / 3600)) hours, $((($t / 60) % 60)) minutes and $(($t % 60)) seconds."
diff --git a/notebooks/BindCraft.ipynb b/notebooks/BindCraft.ipynb
@@ -59,7 +59,6 @@
             "Resolving deltas: 100% (26/26), done.\n",
             "Installing ColabDesign\n",
             "Installing PyRosetta\n",
-            "Downgrading BioPython\n",
             "BindCraft installation is finished, ready to run!\n",
             "CPU times: user 1.86 s, sys: 244 ms, total: 2.11 s\n",
             "Wall time: 2min 10s\n"
@@ -97,10 +96,6 @@
         "    import pyrosettacolabsetup\n",
         "    pyrosettacolabsetup.install_pyrosetta(serialization=True, cache_wheel_on_google_drive=False)\n",
         "\n",
-        "  print(\"Downgrading BioPython\")\n",
-        "  os.system(\"pip uninstall -y biopython\")\n",
-        "  os.system(\"pip install biopython==1.79\")\n",
-        "\n",
         "  # download params\n",
         "  if not os.path.isfile(\"bindcraft/params/done.txt\"):\n",
         "    print(\"downloading AlphaFold params\")\n",
diff --git a/settings_advanced/4stage_multimer_betasheet.json b/settings_advanced/4stage_multimer_betasheet.json
@@ -57,6 +57,6 @@
     "save_trajectory_pickle": false,
     "max_trajectories": false,
     "enable_rejection_check": true,
-    "acceptance_rate": 0.05,
+    "acceptance_rate": 0.01,
     "start_monitoring": 50
 }
diff --git a/settings_advanced/4stage_multimer_peptides.json b/settings_advanced/4stage_multimer_peptides.json
@@ -57,6 +57,6 @@
     "save_trajectory_pickle": false,
     "max_trajectories": false,
     "enable_rejection_check": true,
-    "acceptance_rate": 0.05,
+    "acceptance_rate": 0.01,
     "start_monitoring": 50
 }
diff --git a/settings_target/PDL1.json b/settings_target/PDL1.json
@@ -6,4 +6,4 @@
     "target_hotspot_residues": "56",
     "lengths": [65, 150],
     "number_of_final_designs": 100
-}
+}

Original file line number	Diff line number	Diff line change
`@@ -57,6 +57,6 @@`
`57`	`57`	`"save_trajectory_pickle": false,`
`58`	`58`	`"max_trajectories": false,`
`59`	`59`	`"enable_rejection_check": true,`
`60`		`- "acceptance_rate": 0.05,`
	`60`	`+ "acceptance_rate": 0.01,`
`61`	`61`	`"start_monitoring": 50`
`62`	`62`	`}`