From cf9d108eb30af2f5c655cbd440fea861eaf1a918 Mon Sep 17 00:00:00 2001
From: Natasha Pavlovikj <npavlovikj2@unl.edu>
Date: Mon, 5 Feb 2024 18:34:10 -0600
Subject: [PATCH] Add scratch to Trinity example

---
 .../de_novo_assembly_tools/trinity/_index.md  |  4 ++
 .../running_trinity_in_multiple_steps.md      | 43 +++++++++++++++++--
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/content/applications/app_specific/bioinformatics_tools/de_novo_assembly_tools/trinity/_index.md b/content/applications/app_specific/bioinformatics_tools/de_novo_assembly_tools/trinity/_index.md
index 6e19c01b..4d912274 100644
--- a/content/applications/app_specific/bioinformatics_tools/de_novo_assembly_tools/trinity/_index.md
+++ b/content/applications/app_specific/bioinformatics_tools/de_novo_assembly_tools/trinity/_index.md
@@ -14,6 +14,10 @@ $ Trinity --seqType [fa|fq] --max_memory <maximum_memory> --left input_reads_pai
 {{< /highlight >}}
 where **input_reads_pair_1.[fa|fq]** and **input_reads_pair_2.[fa|fq]** are the input paired-end files of sequence reads in fasta/fastq format, and **--seqType** is the type of these input reads. The option **--max_memory** specifies the maximum memory to use with Trinity.
 
+{{% notice info %}}
+**Trinity produces many intermediate files that can affect the file system. To avoid any issues, please copy all the input data to the faster local storage called "scratch", store the output in "scratch" and finally copy all the needed output files from "scratch" to /work. The "scratch" directories are unique per job and are deleted when the job finishes. This can greatly improve performance!**
+{{% /notice %}}
+
 Additional Trinity **options** can be found in the Trinity website, or by typing:
 {{< highlight bash >}}
 $ Trinity
diff --git a/content/applications/app_specific/bioinformatics_tools/de_novo_assembly_tools/trinity/running_trinity_in_multiple_steps.md b/content/applications/app_specific/bioinformatics_tools/de_novo_assembly_tools/trinity/running_trinity_in_multiple_steps.md
index 22fa8383..75137592 100644
--- a/content/applications/app_specific/bioinformatics_tools/de_novo_assembly_tools/trinity/running_trinity_in_multiple_steps.md
+++ b/content/applications/app_specific/bioinformatics_tools/de_novo_assembly_tools/trinity/running_trinity_in_multiple_steps.md
@@ -7,6 +7,10 @@ weight = "10"
 
 ## Running Trinity with Paired-End fastq data with 8 CPUs and 100GB of RAM
 
+{{% notice info %}}
+**Trinity produces many intermediate files that can affect the file system. To avoid any issues, please copy all the input data to the faster local storage called "scratch", store the output in "scratch" and finally copy all the needed output files from "scratch" to /work. The "scratch" directories are unique per job and are deleted when the job finishes. This can greatly improve performance!**
+{{% /notice %}}
+
 The first step of running Trinity is to run Trinity with the option **--no_run_inchworm**:
 {{% panel header="`trinity_step1.submit`"%}}
 {{< highlight bash >}}
@@ -21,7 +25,14 @@ The first step of running Trinity is to run Trinity with the option **--no_run_i
 
 module load trinity
 
-Trinity --seqType fq --max_memory 100G --left input_reads_pair_1.fastq --right input_reads_pair_2.fastq --SS_lib_type FR --output trinity_out/ --CPU $SLURM_NTASKS_PER_NODE --no_run_inchworm
+# copy input data to /scratch
+cp input_reads_pair_1.fastq /scratch
+cp input_reads_pair_2.fastq /scratch 
+
+Trinity --seqType fq --max_memory 100G --left /scratch/input_reads_pair_1.fastq --right /scratch/input_reads_pair_2.fastq --SS_lib_type FR --output /scratch/trinity_out/ --CPU $SLURM_NTASKS_PER_NODE --no_run_inchworm
+
+# copy output in current directory
+cp -r /scratch/trinity_out/ .
 {{< /highlight >}}
 {{% /panel %}}
 
@@ -40,7 +51,15 @@ The second step of running Trinity is to run Trinity with the option **--no_run_
 
 module load trinity
 
-Trinity --seqType fq --max_memory 100G --left input_reads_pair_1.fastq --right input_reads_pair_2.fastq --SS_lib_type FR --output trinity_out/ --CPU $SLURM_NTASKS_PER_NODE --no_run_chrysalis
+# copy input data to /scratch
+cp input_reads_pair_1.fastq /scratch
+cp input_reads_pair_2.fastq /scratch
+cp -r trinity_out /scratch/
+
+Trinity --seqType fq --max_memory 100G --left /scratch/input_reads_pair_1.fastq --right /scratch/input_reads_pair_2.fastq --SS_lib_type FR --output /scratch/trinity_out/ --CPU $SLURM_NTASKS_PER_NODE --no_run_chrysalis
+
+# copy output in current directory
+cp -r /scratch/trinity_out/ .
 {{< /highlight >}}
 {{% /panel %}}
 
@@ -59,7 +78,15 @@ The third step of running Trinity is to run Trinity with the option **--no_distr
 
 module load trinity
 
-Trinity --seqType fq --max_memory 100G --left input_reads_pair_1.fastq --right input_reads_pair_2.fastq --SS_lib_type FR --output trinity_out/ --CPU $SLURM_NTASKS_PER_NODE --no_distributed_trinity_exec
+# copy input data to /scratch
+cp input_reads_pair_1.fastq /scratch
+cp input_reads_pair_2.fastq /scratch
+cp -r trinity_out /scratch/
+
+Trinity --seqType fq --max_memory 100G --left /scratch/input_reads_pair_1.fastq --right /scratch/input_reads_pair_2.fastq --SS_lib_type FR --output /scratch/trinity_out/ --CPU $SLURM_NTASKS_PER_NODE --no_distributed_trinity_exec
+
+# copy output in current directory
+cp -r /scratch/trinity_out/ .
 {{< /highlight >}}
 {{% /panel %}}
 
@@ -78,7 +105,15 @@ The fourth step of running Trinity is to run Trinity without any additional opti
 
 module load trinity
 
-Trinity --seqType fq --max_memory 100G --left input_reads_pair_1.fastq --right input_reads_pair_2.fastq --SS_lib_type FR --output trinity_out/ --CPU $SLURM_NTASKS_PER_NODE
+# copy input data to /scratch
+cp input_reads_pair_1.fastq /scratch
+cp input_reads_pair_2.fastq /scratch
+cp -r trinity_out /scratch/
+
+Trinity --seqType fq --max_memory 100G --left /scratch/input_reads_pair_1.fastq --right /scratch/input_reads_pair_2.fastq --SS_lib_type FR --output /scratch/trinity_out/ --CPU $SLURM_NTASKS_PER_NODE
+
+# copy output in current directory
+cp -r /scratch/trinity_out/ .
 {{< /highlight >}}
 {{% /panel %}}
 
-- 
GitLab