@prefix this: <https://w3id.org/np/RABFywdSqQHkQNX_xve_9J11i1NTIFmmomu_LvG6-JPsE> .
@prefix sub: <https://w3id.org/np/RABFywdSqQHkQNX_xve_9J11i1NTIFmmomu_LvG6-JPsE/> .
@prefix np: <http://www.nanopub.org/nschema#> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix pav: <http://purl.org/pav/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix npx: <http://purl.org/nanopub/x/> .
sub:Head {
  this: np:hasAssertion sub:assertion ;
    np:hasProvenance sub:provenance ;
    np:hasPublicationInfo sub:pubinfo ;
    a np:Nanopublication .
}
sub:assertion {
  <https://about.workflowhub.eu> <http://schema.org/name> "WorkflowHub" ;
    <http://schema.org/url> "https://about.workflowhub.eu/" ;
    a <http://schema.org/Organization> .
  <https://about.workflowhub.eu/Workflow-RO-Crate> <http://schema.org/name> "Workflow RO-Crate Profile" ;
    <http://schema.org/version> "0.2.0" ;
    a <http://schema.org/CreativeWork> .
  <https://orcid.org/0000-0003-2388-0744> <http://schema.org/name> "Małgorzata Wolniewicz" ;
    a <http://xmlns.com/foaf/0.1/Agent> .
  <https://ror.org/https://doi.org/10.1093/bioinformatics/bts480> <http://schema.org/identifier> "https://doi.org/10.1093/bioinformatics/bts480" ;
    <http://schema.org/name> "Snakemake" ;
    <http://schema.org/url> "https://snakemake.readthedocs.io/" ;
    a <http://schema.org/ComputerLanguage> .
  <https://ror.org/https://orcid.org/0000-0003-3876-6581> <http://schema.org/identifier> "https://orcid.org/0000-0003-3876-6581" ;
    <http://schema.org/name> "Agata Kilar" ;
    a <http://schema.org/Person> .
  <https://ror.org/https://w3id.org/cwl/v1.0/> <http://schema.org/alternateName> "CWL" ;
    <http://schema.org/identifier> "https://w3id.org/cwl/v1.0/" ;
    <http://schema.org/name> "Common Workflow Language" ;
    <http://schema.org/url> "https://www.commonwl.org/" ;
    a <http://schema.org/ComputerLanguage> .
  <https://w3id.org/ro-id/005c8422-6fce-4a35-862d-7f2ee4d9c4f5> <http://schema.org/name> "analysis" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "3.0936454849498327" ;
    <https://w3id.org/ro/terms/earth-science#score> "3.7" .
  <https://w3id.org/ro-id/05648ad5-fcc8-4f24-882a-0bf77f727e52> <http://schema.org/name> "NCBI database" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "6.079027355623101" ;
    <https://w3id.org/ro/terms/earth-science#score> "4.0" .
  <https://w3id.org/ro-id/0735a772-38d8-45e7-a3d2-e20f62056146> <http://schema.org/name> "directory from the file" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "2.8875379939209727" ;
    <https://w3id.org/ro/terms/earth-science#score> "1.9" .
  <https://w3id.org/ro-id/0f678463-a81a-47a0-a669-079e20fa0125> <http://schema.org/name> "name" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "5.1003344481605355" ;
    <https://w3id.org/ro/terms/earth-science#score> "6.1" .
  <https://w3id.org/ro-id/14d92214-4a88-46fb-b596-4f2e99bfcc37> <http://schema.org/name> "GERONIMO is a bioinformatics pipeline designed to conduct high-throughput homology searches of structural genes using covariance models." ;
    a <https://w3id.org/ro/terms/earth-science#Sentence> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "27.215189873417724" ;
    <https://w3id.org/ro/terms/earth-science#score> "4.3" .
  <https://w3id.org/ro-id/15a63617-f9a8-4753-8830-af3a1bac7b4d> <http://schema.org/name> "pipeline" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "8.646003262642742" ;
    <https://w3id.org/ro/terms/earth-science#score> "5.3" .
  <https://w3id.org/ro-id/17b944ac-f8bb-403d-bddf-98d90bd13b83> <http://schema.org/name> "genome" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "10.114192495921698" ;
    <https://w3id.org/ro/terms/earth-science#score> "6.2" .
  <https://w3id.org/ro-id/1c684234-851e-4998-a0d2-41c66b7a20d5> <http://schema.org/name> "computer science" ;
    a <https://w3id.org/contentdesc#Domain> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "58.58585858585859" ;
    <https://w3id.org/ro/terms/earth-science#score> "29.0" .
  <https://w3id.org/ro-id/285fc200-65ce-4175-afc6-da0c325375f0> <http://schema.org/name> "documentation file" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "2.8875379939209727" ;
    <https://w3id.org/ro/terms/earth-science#score> "1.9" .
  <https://w3id.org/ro-id/28bfade8-878b-4c72-8135-4078fbd09e42> <http://schema.org/name> "covariance" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "8.646003262642742" ;
    <https://w3id.org/ro/terms/earth-science#score> "5.3" .
  <https://w3id.org/ro-id/2c76f1de-2578-4660-8926-16dbd0f88985> <http://schema.org/name> "covariance model" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "2.7355623100303954" ;
    <https://w3id.org/ro/terms/earth-science#score> "1.8" .
  <https://w3id.org/ro-id/359d5480-f589-47f7-aaae-ff4c8aeb9a7a> <http://schema.org/name> "database" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "7.34094616639478" ;
    <https://w3id.org/ro/terms/earth-science#score> "4.5" .
  <https://w3id.org/ro-id/3a8d21f9-3481-47d7-925e-2c129cf76f8d> <http://schema.org/name> "installation of Geronimo dependency" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "2.8875379939209727" ;
    <https://w3id.org/ro/terms/earth-science#score> "1.9" .
  <https://w3id.org/ro-id/3f7102cd-996b-4e5b-b498-525210a18b3a> <http://schema.org/name> "Geronimo" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "4.893964110929853" ;
    <https://w3id.org/ro/terms/earth-science#score> "3.0" .
  <https://w3id.org/ro-id/3f7be379-7d08-4ee4-b1b9-aa17032ec971> <http://schema.org/name> "atmospheric sciences" ;
    a <https://w3id.org/ro/terms/earth-science#FieldOfResearch> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "100.0" ;
    <https://w3id.org/ro/terms/earth-science#score> "0.9349703192710876" .
  <https://w3id.org/ro-id/4b31f4de-1524-4cef-b8b3-72a240d63dd0> <http://schema.org/name> "The" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "6.035889070146819" ;
    <https://w3id.org/ro/terms/earth-science#score> "3.7" .
  <https://w3id.org/ro-id/4fae7e6b-9f40-4a7c-b2aa-e85c1a97e8ec> <http://schema.org/name> "IT-computer sciences" ;
    a <https://w3id.org/ro/terms/earth-science#IPTC> ;
    <https://w3id.org/ro/terms/earth-science#path> "Science and technology/Technology and engineering/IT-computer sciences" .
  <https://w3id.org/ro-id/54c71f37-c543-46a8-84b3-aaa8ae32fb6f> <http://schema.org/name> "gene family" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "2.759197324414716" ;
    <https://w3id.org/ro/terms/earth-science#score> "3.3" .
  <https://w3id.org/ro-id/58d0583c-0a1a-4ce8-83ee-3620189c1ddd> <http://schema.org/name> "software" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "4.1806020066889635" ;
    <https://w3id.org/ro/terms/earth-science#score> "5.0" .
  <https://w3id.org/ro-id/5b68f8b8-d94d-44ed-b47c-9b30492dd501> <http://schema.org/name> "mathematical and computer sciences" ;
    a <https://w3id.org/ro/terms/earth-science#NASA> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "100.0" ;
    <https://w3id.org/ro/terms/earth-science#score> "0.41168108582496643" .
  <https://w3id.org/ro-id/5d9ce283-c305-46ae-99e6-e81b9acf9018> <http://schema.org/name> "RNA" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "8.156606851549755" ;
    <https://w3id.org/ro/terms/earth-science#score> "5.0" .
  <https://w3id.org/ro-id/5f76202b-8605-4866-9750-2ae44e74cf74> <http://schema.org/name> "GERONIMO installation" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "3.0395136778115504" ;
    <https://w3id.org/ro/terms/earth-science#score> "2.0" .
  <https://w3id.org/ro-id/612078aa-2703-4c7b-843e-0b683ce23f9c> <http://schema.org/name> "Genetics" ;
    a <https://w3id.org/ro/terms/earth-science#IPTC> ;
    <https://w3id.org/ro/terms/earth-science#path> "Science and technology/Natural science/Biology/Genetics" .
  <https://w3id.org/ro-id/630cd87c-2fd2-4a70-bbad-e85002d3cc15> <http://schema.org/name> "GERONIMO pipeline" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "6.231003039513677" ;
    <https://w3id.org/ro/terms/earth-science#score> "4.1" .
  <https://w3id.org/ro-id/73687a3c-6aaa-429a-9d13-7d4b854f803e> <http://schema.org/name> "Oil and gas - upstream activities" ;
    a <https://w3id.org/ro/terms/earth-science#IPTC> ;
    <https://w3id.org/ro/terms/earth-science#path> "Economy, business and finance/Economic sector/Energy and resource/Oil and gas - upstream activities" .
  <https://w3id.org/ro-id/7589da61-b04f-425f-83c1-6dbbedd2d27a> <http://schema.org/name> "GERONIMO repository" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "3.0395136778115504" ;
    <https://w3id.org/ro/terms/earth-science#score> "2.0" .
  <https://w3id.org/ro-id/76e5ce8b-2407-4d11-adba-17e295f4b1bd> <http://schema.org/name> "summary file" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "3.4954407294832825" ;
    <https://w3id.org/ro/terms/earth-science#score> "2.3" .
  <https://w3id.org/ro-id/7bffe30b-5a6e-4262-80b0-701944edc9f6> <http://schema.org/name> "storage space" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "3.5953177257525084" ;
    <https://w3id.org/ro/terms/earth-science#score> "4.3" .
  <https://w3id.org/ro-id/7f74e75a-8601-49da-a852-353b1a3994f2> <http://schema.org/name> "license" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "4.431438127090301" ;
    <https://w3id.org/ro/terms/earth-science#score> "5.3" .
  <https://w3id.org/ro-id/82d5b0b6-0398-46cd-9cc2-8af27f478fc0> <http://schema.org/name> "database" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "2.9264214046822743" ;
    <https://w3id.org/ro/terms/earth-science#score> "3.5" .
  <https://w3id.org/ro-id/83fc731e-3aa1-43c7-9cf5-ab6740c3633d> <http://schema.org/name> "question" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "4.933110367892977" ;
    <https://w3id.org/ro/terms/earth-science#score> "5.9" .
  <https://w3id.org/ro-id/869e48d4-c589-4db1-af1e-0a2a00da300b> <http://schema.org/name> "directory structure" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "15.653495440729484" ;
    <https://w3id.org/ro/terms/earth-science#score> "10.3" .
  <https://w3id.org/ro-id/87d45696-5e29-461e-b6bf-bc8fcc2a033f> <http://schema.org/name> """You can safely remove the following:
- `GERONIMO/results`
- `GERONIMO/database`
- `GERONIMO/taxonomy`
- `GERONIMO/temp`
- `.create_genome_list.touch`
- `list_of_genomes.txt`""" ;
    a <https://w3id.org/ro/terms/earth-science#Sentence> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "30.37974683544304" ;
    <https://w3id.org/ro/terms/earth-science#score> "4.8" .
  <https://w3id.org/ro-id/8865ad4d-7c4a-46a5-a12a-89213e7e14bd> <http://schema.org/name> "By default, the GERONIMO pipeline conducts high-throughput searches of homology sequences in downloaded genomes utilizing covariance models." ;
    a <https://w3id.org/ro/terms/earth-science#Sentence> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "21.518987341772153" ;
    <https://w3id.org/ro/terms/earth-science#score> "3.4" .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/#enrichment_service-account-enrichment> <http://schema.org/name> "service-account-enrichment" ;
    a <http://xmlns.com/foaf/0.1/Agent> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/> pav:importedBy <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/author> <https://ror.org/https://orcid.org/0000-0003-3876-6581> ;
    <http://schema.org/contentSize> 1715468 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/ros/892d05bf-da84-4be9-9ab7-60eba5f73c70/crate/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:04.643799+00:00" ;
    <http://schema.org/dateModified> "2024-03-05 12:23:14.455859+00:00" ;
    <http://schema.org/datePublished> "2023-09-08 12:04:04.643799+00:00" ;
    <http://schema.org/description> """# GERONIMO

## Introduction
GERONIMO is a bioinformatics pipeline designed to conduct high-throughput homology searches of structural genes using covariance models. These models are based on the alignment of sequences and the consensus of secondary structures. The pipeline is built using Snakemake, a workflow management tool that allows for the reproducible execution of analyses on various computational platforms. 

The idea for developing GERONIMO emerged from a comprehensive search for [telomerase RNA in lower plants] and was subsequently refined through an [expanded search of telomerase RNA across Insecta]. GERONIMO can test hundreds of genomes and ensures the stability and reproducibility of the analyses performed.


[telomerase RNA in lower plants]: https://doi.org/10.1093/nar/gkab545
[expanded search of telomerase RNA across Insecta]: https://doi.org/10.1093/nar/gkac1202

## Scope
The GERONIMO tool utilises covariance models (CMs) to conduct homology searches of RNA sequences across a wide range of gene families in a broad evolutionary context. Specifically, it can be utilised to:

* Detect RNA sequences that share a common evolutionary ancestor
* Identify and align orthologous RNA sequences among closely related species, as well as paralogous sequences within a single species
* Identify conserved non-coding RNAs in a genome, and extract upstream genomic regions to characterise potential promoter regions. 
It is important to note that GERONIMO is a computational tool, and as such, it is intended to be run on a computer with a small amount of data. Appropriate computational infrastructure is necessary for analysing hundreds of genomes.

Although GERONIMO was primarily designed for Telomerase RNA identification, its functionality extends to include the detection and alignment of other RNA gene families, including **rRNA**, **tRNA**, **snRNA**, **miRNA**, and **lncRNA**. This can aid in identifying paralogs and orthologs across different species that may carry specific functions, making it useful for phylogenetic analyses. 

It is crucial to remember that some gene families may exhibit similar characteristics but different functions. Therefore, analysing the data and functional annotation after conducting the search is essential to characterise the sequences properly.

## Pipeline overview


By default, the GERONIMO pipeline conducts high-throughput searches of homology sequences in downloaded genomes utilizing covariance models. If a significant similarity is detected between the model and genome sequence, the pipeline extracts the upstream region, making it convenient to identify the promoter of the discovered gene. In brief, the pipeline:
- Compiles a list of genomes using the NCBI's [Entrez] database based on a specified query, *e.g. \"Rhodophyta\"[Organism]*
- Downloads and decompresses the requested genomes using *rsync* and *gunzip*, respectively
- *Optionally*, generates a covariance model based on a provided alignment using [Infernal]
- Conducts searches among the genomes using the covariance model [Infernal]
- Supplements genome information with taxonomy data using [rentrez]
- Expands the significant hits sequence by extracting upstream genomic regions using [*blastcmd*]
- Compiles the results, organizes them into a tabular format, and generates a visual summary of the performed analysis.

[Entrez]: https://www.ncbi.nlm.nih.gov/books/NBK179288/
[Infernal]: http://eddylab.org/infernal/
[rentrez]: https://github.com/ropensci/rentrez
[*blastcmd*]: https://www.ncbi.nlm.nih.gov/books/NBK569853/

## Quick start
The GERONIMO is available as a `snakemake pipeline` running on Linux and Windows operating systems.

### Windows 10
Instal Linux on Windows 10 (WSL) according to [instructions], which bottling down to opening PowerShell or Windows Command Prompt in *administrator mode* and pasting the following:
```shell
wsl --install
wsl.exe --install UBUNTU
```
Then restart the machine and follow the instructions for setting up the Linux environment.

[instructions]: https://learn.microsoft.com/en-us/windows/wsl/install

### Linux:
#### Check whether the conda is installed:
```shell
conda -V
```
&gt; GERONIMO was tested on conda 23.3.1
#### 1) If you do not have installed `conda`, please install `miniconda`
Please follow the instructions for installing [miniconda]

[miniconda]: https://conda.io/projects/conda/en/stable/user-guide/install/linux.html

#### 2) Continue with installing `mamba` (recommended but optional)
```shell
conda install -n base -c conda-forge mamba
```
#### 3) Install `snakemake`
```shell
conda activate base
mamba create -p env_snakemake -c conda-forge -c bioconda snakemake
mamba activate env_snakemake
snakemake --help
```
In case of complications, please check the section `Questions &amp; Answers` below or follow the [official documentation] for troubleshooting.

[official documentation]: https://snakemake.readthedocs.io/en/stable/getting_started/installation.html

### Clone the GERONIMO repository
Go to the path in which you want to run the analysis and clone the repository:
```shell
cd
git clone https://github.com/amkilar/GERONIMO.git
```

### Run sample analysis to ensure GERONIMO installation was successful
All files are prepared for the sample analysis as a default. Please execute the line below:
```shell
snakemake -s GERONIMO.sm --cores 1 --use-conda results/summary_table.xlsx
```

This will prompt GERONIMO to quickly scan all modules, verifying the correct setup of the pipeline without executing any analysis.
You should see the message `Building DAG of jobs...`, followed by `Nothing to be done (all requested files are present and up to date).`, when successfully completed.

If you want to run the sample analysis fully, please remove the folder `results` from the GERONIMO directory and execute GERONIMO again with:

`snakemake -s GERONIMO.sm --cores 1 --use-conda results/summary_table.xlsx`

&gt; You might consider allowing more cores to speed up the analysis, which might take up to several hours.

#### You might want to clean `GERONIMO/` directory from the files produced by the example analysis. You can safely remove the following:
- `GERONIMO/results`
- `GERONIMO/database`
- `GERONIMO/taxonomy`
- `GERONIMO/temp`
- `.create_genome_list.touch`
- `list_of_genomes.txt`

## Setup the inputs

### 1) Prepare the `covariance models`:

#### Browse the collection of available `covariance models` at [Rfam] (*You can find the covariance model in the tab `Curation`.*) 
Paste the covariance model to the folder `GERONIMO/models` and ensure its name follows the convention: `cov_model_`

[Rfam]: https://rfam.org/

#### **OR**

#### Prepare your own `covariance model` using [LocARNA]
1. Paste or upload your sequences to the web server and download the `.stk` file with the alignment result. 
 
    &gt; *Please note that the `.stk` file format is crucial for the analysis, containing sequence alignment and secondary structure consensus.*
   
    &gt; The LocARNA web service allows you to align 30 sequences at once - if you need to align more sequences, please use the standalone version available [here] 
    &gt; After installation run:
    ```shell
    mlocarna my_fasta_sequences.fasta
    ```
 
2. Paste the `.stk` alignment file to the folder `GERONIMO/model_to_build` and ensure its name follows the convention: `.stk`

   &gt; Please check the example `heterotrichea.stk` format in `GERONIMO/models_to_built` for reference
   

[LocARNA]: http://rna.informatik.uni-freiburg.de/LocARNA/Input.jsp
[here]: http://www.bioinf.uni-freiburg.de/Software/LocARNA/


### 2) Adjust the `config.yaml` file
Please adjust the analysis specifications, as in the following example:

&gt; - database: ' [Organism]' (in case of difficulties with defining the database query, please follow the instructions below)
&gt; - extract_genomic_region-length:   (here you can determine how long the upstream genomic region should be extracted; tested for 200)
&gt; - models: [\"\", \"\"] (here specify the names of models that should be used to perform analysis)
&gt;   
&gt;   *Here you can also insert the name of the covariance model you want to build with GERONIMO - just be sure you placed `.stk` file in `GERONIMO/models_to_build` before starting analysis*
&gt; - CPU_for_model_building:  (specify the number of available CPUs devoted to the process of building model (cannot exceed the CPU number allowed to snakemake with `--cores`)
&gt;
&gt;   *You might ignore this parameter when you do not need to create a new covariance model*


Keep in mind that the covariance models and alignments must be present in the respective GERONIMO folders.

### 3) Remove folder `results`, which contains example analysis output
### 4) **Please ensure you have enough storage capacity to download all the requested genomes (in the `GERONIMO/` directory)**

## Run GERONIMO
```shell
mamba activate env_snakemake
cd ~/GERONIMO
snakemake -s GERONIMO.sm --cores  --use-conda results/summary_table.xlsx
```
 
## Example results

### Outputs characterisation

#### A) Summary table
The Excel table contains the results arranged by taxonomy information and hit significance. The specific columns include:
* family, organism_name, class, order, phylum (taxonomy context)
* GCA_id - corresponds to the genome assembly in the *NCBI database*
* model - describes which covariance model identified the result
* label - follows the *Infernal* convention of categorizing hits
* number - the counter of the result
* e_value - indicates the significance level of the hit
* HIT_sequence - the exact HIT sequence found by *Infernal*, which corresponds to the covariance model
* HIT_ID - describes in which part of the genome assembly the hit was found, which may help publish novel sequences
* extended_genomic_region - upstream sequence, which may contain a possible promoter sequence
* secondary_structure - the secondary structure consensus of the covariance model


#### B) Significant Hits Distribution Across Taxonomy Families
The plot provides an overview of the number of genomes in which at least one significant hit was identified, grouped by family. The bold black line corresponds to the number of genomes present in each family, helping to minimize bias regarding unequal data representation across the taxonomy.


#### C) Hits Distribution in Genomes Across Families
The heatmap provides information about the most significant hits from the genome, identified by a specific covariance model. Genomes are grouped by families (on the right). Hits are classified into three categories based on their e-values. Generally, these categories correspond to hit classifications (\"HIT,\" \"MAYBE,\" \"NO HIT\"). The \"HIT\" category is further divided to distinguish between highly significant hits and moderately significant ones.



### GERONIMO directory structure

The GERONIMO directory structure is designed to produce files in a highly structured manner, ensuring clear insight and facilitating the analysis of results. During a successful run, GERONIMO produces the following folders:
* `/database` - which contains genome assemblies that were downloaded from the *NCBI database* and grouped in subfolders
* `/taxonomy` - where taxonomy information is gathered and stored in the form of tables
* `/results` - the main folder containing all produced results:
  * `/infernal_raw` - contains the raw results produced by *Infernal*
  * `/infernal` - contains restructured results of *Infernal* in table format
  * `/cmdBLAST` - contains results of *cmdblast*, which extracts the extended genomic region
  * `/summary` - contains summary files that join results from *Infernal*, *cmdblast*, and attach taxonomy context
  * `/plots` - contains two types of summary plots
* `/temp` - folder contains the information necessary to download genome assemblies from *NCBI database*

* `/env` - stores instructions for dependency installation
* `/models` - where calibrated covariance models can be pasted, *for example, from the Rfam database*
* `/modes_to_built` - where multiple alignments in *.stk* format can be pasted
* `/scripts` - contains developed scripts that perform results structurization

#### The example GERONIMO directory structure:

```shell
GERONIMO
├── database
│   ├── GCA_000091205.1_ASM9120v1_genomic
│   ├── GCA_000341285.1_ASM34128v1_genomic
│   ├── GCA_000350225.2_ASM35022v2_genomic
│   └── ...
├── env
├── models
├── model_to_build
├── results
│   ├── cmdBLAST
│   │   ├── MRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   └── ...
│   │   ├── SRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   └── ...
│   │   ├── ...
│   ├── infernal
│   │   ├── MRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   ├── ...
│   │   ├── SRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   ├── ...
│   ├── plots
│   ├── raw_infernal
│   │   ├── MRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   ├── ...
│   │   ├── SRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   ├── ...
│   └── summary
│       ├── GCA_000091205.1_ASM9120v1_genomic
│       ├── GCA_000341285.1_ASM34128v1_genomic
│       ├── GCA_000350225.2_ASM35022v2_genomic
│       ├── ...
├── scripts
├── taxonomy
└── temp
```

## GERONIMO applicability

### Expanding the evolutionary context
To add new genomes or database queries to an existing analysis, please follow the instructions:
1) Rename the `list_of_genomes.txt` file to `previous_list_of_genomes.txt` or any other preferred name.
2) Modify the `config.yaml` file by replacing the previous database query with the new one.
3) Delete:
   - `summary_table.xlsx`, `part_summary_table.csv`, `summary_table_models.xlsx` files located in the `GERONIMO\\results` directory
   - `.create_genome_list.touch` file
5) Run GERONIMO to calculate new results using the command:
     ```shell
     snakemake -s GERONIMO.sm --cores  --use-conda results/summary_table.xlsx
     ```
7) Once the new results are generated, reviewing them before merging them with the original results is recommended.
8) Copy the contents of the `previous_list_of_genomes.txt` file and paste them into the current `list_of_genomes.txt`.
9) Delete:
   - `summary_table.xlsx` located in the `GERONIMO\\results` directory
   - `.create_genome_list.touch` file
10) Run GERONIMO to merge the results from both analyses using the command:
    ```shell
      snakemake -s GERONIMO.sm --cores 1 --use-conda results/summary_table.xlsx
    ```

### Incorporating new covariance models into existing analysis
1) Copy the new covariance model to `GERONIMO/models`
2) Modify the `config.yaml` file by adding the name of the new model to the line `models: [...]`
3) Run GERONIMO to see the updated analysis outcome

### Building a new covariance model
With GERONIMO, building a new covariance model from multiple sequence alignment in the `.stk` format is possible.

To do so, simply paste `.stk` file to `GERONIMO/models_to_build` and paste the name of the new covariance  model to `config.yaml` file to the line `models: [\"\"]`

and run GERONIMO.


## Questions &amp; Answers

### How to specify the database query?
- Visit the [NCBI Assemblies] website. 
- Follow the instruction on the graphic below:

[NCBI Assemblies]: https://www.ncbi.nlm.nih.gov/assembly/?term=

### WSL: problem with creating `snakemake_env`
In the case of an error similar to the one below:
&gt; CondaError: Unable to create prefix directory '/mnt/c/Windows/system32/env_snakemake'.
&gt; Check that you have sufficient permissions. 
 
You might try to delete the cache with: `rm -r ~/.cache/` and try again.

### When `snakemake` does not seem to be installed properly
In the case of the following error:
&gt; Command 'snakemake' not found ...

Check whether the `env_snakemake` is activated.
&gt; It should result in a change from (base) to (env_snakemake) before your login name in the command line window.

If you still see `(base)` before your login name, please try to activate the environment with conda:
`conda activate env_snakemake`


Please note that you might need to specify the full path to the `env_snakemake`, like /home/your user name/env_snakemake

### How to browse GERONIMO results obtained in WSL?
You can easily access the results obtained on WSL from your Windows environment by opening `File Explorer` and pasting the following line into the search bar: `\\\\wsl.localhost\\Ubuntu\\home\\`. This will reveal a folder with your username, as specified during the configuration of your Ubuntu system. To locate the GERONIMO results, simply navigate to the folder with your username and then to the `home` folder. (`\\\\wsl.localhost\\Ubuntu\\home\\\\home\\GERONIMO`)

### GERONIMO occupies a lot of storage space
Through genome downloads, GERONIMO can potentially consume storage space, rapidly leading to a shortage. Currently, downloading genomes is an essential step for optimal GERONIMO performance.

Regrettably, if the analysis is rerun without the `/database` folder, it will result in the need to redownload genomes, which is a highly time-consuming process.

Nevertheless, if you do not intend to repeat the analysis and have no requirement for additional genomes or models, you are welcome to retain your results tables and plots while removing the remaining files.

It is strongly advised against using local machines for extensive analyses. If you lack access to external storage space, it is recommended to divide the analysis into smaller segments, which can be later merged, as explained in the section titled `Expanding the evolutionary context`.

Considering this limitation, I am currently working on implementing a solution that will help circumvent the need for redundant genome downloads without compromising GERONIMO performance in the future.

You might consider deleting the `.snakemake` folder to free up storage space. However, please note that deleting this folder will require the reinstallation of GERONIMO dependencies when the analysis is rerun.

## License
Copyright (c) 2023 Agata M. Kilar

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the \"Software\"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

## Contact
mgr inż. Agata Magdalena Kilar, PhD (agata.kilar@gmail.com)

""" , """# GERONIMO

## Introduction
GERONIMO is a bioinformatics pipeline designed to conduct high-throughput homology searches of structural genes using covariance models. These models are based on the alignment of sequences and the consensus of secondary structures. The pipeline is built using Snakemake, a workflow management tool that allows for the reproducible execution of analyses on various computational platforms. 

The idea for developing GERONIMO emerged from a comprehensive search for [telomerase RNA in lower plants] and was subsequently refined through an [expanded search of telomerase RNA across Insecta]. GERONIMO can test hundreds of genomes and ensures the stability and reproducibility of the analyses performed.


[telomerase RNA in lower plants]: https://doi.org/10.1093/nar/gkab545
[expanded search of telomerase RNA across Insecta]: https://doi.org/10.1093/nar/gkac1202

## Scope
The GERONIMO tool utilises covariance models (CMs) to conduct homology searches of RNA sequences across a wide range of gene families in a broad evolutionary context. Specifically, it can be utilised to:

* Detect RNA sequences that share a common evolutionary ancestor
* Identify and align orthologous RNA sequences among closely related species, as well as paralogous sequences within a single species
* Identify conserved non-coding RNAs in a genome, and extract upstream genomic regions to characterise potential promoter regions. 
It is important to note that GERONIMO is a computational tool, and as such, it is intended to be run on a computer with a small amount of data. Appropriate computational infrastructure is necessary for analysing hundreds of genomes.

Although GERONIMO was primarily designed for Telomerase RNA identification, its functionality extends to include the detection and alignment of other RNA gene families, including **rRNA**, **tRNA**, **snRNA**, **miRNA**, and **lncRNA**. This can aid in identifying paralogs and orthologs across different species that may carry specific functions, making it useful for phylogenetic analyses. 

It is crucial to remember that some gene families may exhibit similar characteristics but different functions. Therefore, analysing the data and functional annotation after conducting the search is essential to characterise the sequences properly.

## Pipeline overview


By default, the GERONIMO pipeline conducts high-throughput searches of homology sequences in downloaded genomes utilizing covariance models. If a significant similarity is detected between the model and genome sequence, the pipeline extracts the upstream region, making it convenient to identify the promoter of the discovered gene. In brief, the pipeline:
- Compiles a list of genomes using the NCBI's [Entrez] database based on a specified query, *e.g. \"Rhodophyta\"[Organism]*
- Downloads and decompresses the requested genomes using *rsync* and *gunzip*, respectively
- *Optionally*, generates a covariance model based on a provided alignment using [Infernal]
- Conducts searches among the genomes using the covariance model [Infernal]
- Supplements genome information with taxonomy data using [rentrez]
- Expands the significant hits sequence by extracting upstream genomic regions using [*blastcmd*]
- Compiles the results, organizes them into a tabular format, and generates a visual summary of the performed analysis.

[Entrez]: https://www.ncbi.nlm.nih.gov/books/NBK179288/
[Infernal]: http://eddylab.org/infernal/
[rentrez]: https://github.com/ropensci/rentrez
[*blastcmd*]: https://www.ncbi.nlm.nih.gov/books/NBK569853/

## Quick start
The GERONIMO is available as a `snakemake pipeline` running on Linux and Windows operating systems.

### Windows 10
Instal Linux on Windows 10 (WSL) according to [instructions], which bottling down to opening PowerShell or Windows Command Prompt in *administrator mode* and pasting the following:
```shell
wsl --install
wsl.exe --install UBUNTU
```
Then restart the machine and follow the instructions for setting up the Linux environment.

[instructions]: https://learn.microsoft.com/en-us/windows/wsl/install

### Linux:
#### Check whether the conda is installed:
```shell
conda -V
```
&gt; GERONIMO was tested on conda 23.3.1
#### 1) If you do not have installed `conda`, please install `miniconda`
Please follow the instructions for installing [miniconda]

[miniconda]: https://conda.io/projects/conda/en/stable/user-guide/install/linux.html

#### 2) Continue with installing `mamba` (recommended but optional)
```shell
conda install -n base -c conda-forge mamba
```
#### 3) Install `snakemake`
```shell
conda activate base
mamba create -p env_snakemake -c conda-forge -c bioconda snakemake
mamba activate env_snakemake
snakemake --help
```
In case of complications, please check the section `Questions &amp; Answers` below or follow the [official documentation] for troubleshooting.

[official documentation]: https://snakemake.readthedocs.io/en/stable/getting_started/installation.html

### Clone the GERONIMO repository
Go to the path in which you want to run the analysis and clone the repository:
```shell
cd
git clone https://github.com/amkilar/GERONIMO.git
```

### Run sample analysis to ensure GERONIMO installation was successful
All files are prepared for the sample analysis as a default. Please execute the line below:
```shell
snakemake -s GERONIMO.sm --cores 1 --use-conda results/summary_table.xlsx
```

This will prompt GERONIMO to quickly scan all modules, verifying the correct setup of the pipeline without executing any analysis.
You should see the message `Building DAG of jobs...`, followed by `Nothing to be done (all requested files are present and up to date).`, when successfully completed.

If you want to run the sample analysis fully, please remove the folder `results` from the GERONIMO directory and execute GERONIMO again with:

`snakemake -s GERONIMO.sm --cores 1 --use-conda results/summary_table.xlsx`

&gt; You might consider allowing more cores to speed up the analysis, which might take up to several hours.

#### You might want to clean `GERONIMO/` directory from the files produced by the example analysis. You can safely remove the following:
- `GERONIMO/results`
- `GERONIMO/database`
- `GERONIMO/taxonomy`
- `GERONIMO/temp`
- `.create_genome_list.touch`
- `list_of_genomes.txt`

## Setup the inputs

### 1) Prepare the `covariance models`:

#### Browse the collection of available `covariance models` at [Rfam] (*You can find the covariance model in the tab `Curation`.*) 
Paste the covariance model to the folder `GERONIMO/models` and ensure its name follows the convention: `cov_model_`

[Rfam]: https://rfam.org/

#### **OR**

#### Prepare your own `covariance model` using [LocARNA]
1. Paste or upload your sequences to the web server and download the `.stk` file with the alignment result. 
 
    &gt; *Please note that the `.stk` file format is crucial for the analysis, containing sequence alignment and secondary structure consensus.*
   
    &gt; The LocARNA web service allows you to align 30 sequences at once - if you need to align more sequences, please use the standalone version available [here] 
    &gt; After installation run:
    ```shell
    mlocarna my_fasta_sequences.fasta
    ```
 
2. Paste the `.stk` alignment file to the folder `GERONIMO/model_to_build` and ensure its name follows the convention: `.stk`

   &gt; Please check the example `heterotrichea.stk` format in `GERONIMO/models_to_built` for reference
   

[LocARNA]: http://rna.informatik.uni-freiburg.de/LocARNA/Input.jsp
[here]: http://www.bioinf.uni-freiburg.de/Software/LocARNA/


### 2) Adjust the `config.yaml` file
Please adjust the analysis specifications, as in the following example:

&gt; - database: ' [Organism]' (in case of difficulties with defining the database query, please follow the instructions below)
&gt; - extract_genomic_region-length:   (here you can determine how long the upstream genomic region should be extracted; tested for 200)
&gt; - models: [\"\", \"\"] (here specify the names of models that should be used to perform analysis)
&gt;   
&gt;   *Here you can also insert the name of the covariance model you want to build with GERONIMO - just be sure you placed `.stk` file in `GERONIMO/models_to_build` before starting analysis*
&gt; - CPU_for_model_building:  (specify the number of available CPUs devoted to the process of building model (cannot exceed the CPU number allowed to snakemake with `--cores`)
&gt;
&gt;   *You might ignore this parameter when you do not need to create a new covariance model*


Keep in mind that the covariance models and alignments must be present in the respective GERONIMO folders.

### 3) Remove folder `results`, which contains example analysis output
### 4) **Please ensure you have enough storage capacity to download all the requested genomes (in the `GERONIMO/` directory)**

## Run GERONIMO
```shell
mamba activate env_snakemake
cd ~/GERONIMO
snakemake -s GERONIMO.sm --cores  --use-conda results/summary_table.xlsx
```
 
## Example results

### Outputs characterisation

#### A) Summary table
The Excel table contains the results arranged by taxonomy information and hit significance. The specific columns include:
* family, organism_name, class, order, phylum (taxonomy context)
* GCA_id - corresponds to the genome assembly in the *NCBI database*
* model - describes which covariance model identified the result
* label - follows the *Infernal* convention of categorizing hits
* number - the counter of the result
* e_value - indicates the significance level of the hit
* HIT_sequence - the exact HIT sequence found by *Infernal*, which corresponds to the covariance model
* HIT_ID - describes in which part of the genome assembly the hit was found, which may help publish novel sequences
* extended_genomic_region - upstream sequence, which may contain a possible promoter sequence
* secondary_structure - the secondary structure consensus of the covariance model


#### B) Significant Hits Distribution Across Taxonomy Families
The plot provides an overview of the number of genomes in which at least one significant hit was identified, grouped by family. The bold black line corresponds to the number of genomes present in each family, helping to minimize bias regarding unequal data representation across the taxonomy.


#### C) Hits Distribution in Genomes Across Families
The heatmap provides information about the most significant hits from the genome, identified by a specific covariance model. Genomes are grouped by families (on the right). Hits are classified into three categories based on their e-values. Generally, these categories correspond to hit classifications (\"HIT,\" \"MAYBE,\" \"NO HIT\"). The \"HIT\" category is further divided to distinguish between highly significant hits and moderately significant ones.



### GERONIMO directory structure

The GERONIMO directory structure is designed to produce files in a highly structured manner, ensuring clear insight and facilitating the analysis of results. During a successful run, GERONIMO produces the following folders:
* `/database` - which contains genome assemblies that were downloaded from the *NCBI database* and grouped in subfolders
* `/taxonomy` - where taxonomy information is gathered and stored in the form of tables
* `/results` - the main folder containing all produced results:
  * `/infernal_raw` - contains the raw results produced by *Infernal*
  * `/infernal` - contains restructured results of *Infernal* in table format
  * `/cmdBLAST` - contains results of *cmdblast*, which extracts the extended genomic region
  * `/summary` - contains summary files that join results from *Infernal*, *cmdblast*, and attach taxonomy context
  * `/plots` - contains two types of summary plots
* `/temp` - folder contains the information necessary to download genome assemblies from *NCBI database*

* `/env` - stores instructions for dependency installation
* `/models` - where calibrated covariance models can be pasted, *for example, from the Rfam database*
* `/modes_to_built` - where multiple alignments in *.stk* format can be pasted
* `/scripts` - contains developed scripts that perform results structurization

#### The example GERONIMO directory structure:

```shell
GERONIMO
├── database
│   ├── GCA_000091205.1_ASM9120v1_genomic
│   ├── GCA_000341285.1_ASM34128v1_genomic
│   ├── GCA_000350225.2_ASM35022v2_genomic
│   └── ...
├── env
├── models
├── model_to_build
├── results
│   ├── cmdBLAST
│   │   ├── MRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   └── ...
│   │   ├── SRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   └── ...
│   │   ├── ...
│   ├── infernal
│   │   ├── MRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   ├── ...
│   │   ├── SRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   ├── ...
│   ├── plots
│   ├── raw_infernal
│   │   ├── MRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   ├── ...
│   │   ├── SRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   ├── ...
│   └── summary
│       ├── GCA_000091205.1_ASM9120v1_genomic
│       ├── GCA_000341285.1_ASM34128v1_genomic
│       ├── GCA_000350225.2_ASM35022v2_genomic
│       ├── ...
├── scripts
├── taxonomy
└── temp
```

## GERONIMO applicability

### Expanding the evolutionary context
To add new genomes or database queries to an existing analysis, please follow the instructions:
1) Rename the `list_of_genomes.txt` file to `previous_list_of_genomes.txt` or any other preferred name.
2) Modify the `config.yaml` file by replacing the previous database query with the new one.
3) Delete:
   - `summary_table.xlsx`, `part_summary_table.csv`, `summary_table_models.xlsx` files located in the `GERONIMO\\results` directory
   - `.create_genome_list.touch` file
5) Run GERONIMO to calculate new results using the command:
     ```shell
     snakemake -s GERONIMO.sm --cores  --use-conda results/summary_table.xlsx
     ```
7) Once the new results are generated, reviewing them before merging them with the original results is recommended.
8) Copy the contents of the `previous_list_of_genomes.txt` file and paste them into the current `list_of_genomes.txt`.
9) Delete:
   - `summary_table.xlsx` located in the `GERONIMO\\results` directory
   - `.create_genome_list.touch` file
10) Run GERONIMO to merge the results from both analyses using the command:
    ```shell
      snakemake -s GERONIMO.sm --cores 1 --use-conda results/summary_table.xlsx
    ```

### Incorporating new covariance models into existing analysis
1) Copy the new covariance model to `GERONIMO/models`
2) Modify the `config.yaml` file by adding the name of the new model to the line `models: [...]`
3) Run GERONIMO to see the updated analysis outcome

### Building a new covariance model
With GERONIMO, building a new covariance model from multiple sequence alignment in the `.stk` format is possible.

To do so, simply paste `.stk` file to `GERONIMO/models_to_build` and paste the name of the new covariance  model to `config.yaml` file to the line `models: [\"\"]`

and run GERONIMO.


## Questions &amp; Answers

### How to specify the database query?
- Visit the [NCBI Assemblies] website. 
- Follow the instruction on the graphic below:

[NCBI Assemblies]: https://www.ncbi.nlm.nih.gov/assembly/?term=

### WSL: problem with creating `snakemake_env`
In the case of an error similar to the one below:
&gt; CondaError: Unable to create prefix directory '/mnt/c/Windows/system32/env_snakemake'.
&gt; Check that you have sufficient permissions. 
 
You might try to delete the cache with: `rm -r ~/.cache/` and try again.

### When `snakemake` does not seem to be installed properly
In the case of the following error:
&gt; Command 'snakemake' not found ...

Check whether the `env_snakemake` is activated.
&gt; It should result in a change from (base) to (env_snakemake) before your login name in the command line window.

If you still see `(base)` before your login name, please try to activate the environment with conda:
`conda activate env_snakemake`


Please note that you might need to specify the full path to the `env_snakemake`, like /home/your user name/env_snakemake

### How to browse GERONIMO results obtained in WSL?
You can easily access the results obtained on WSL from your Windows environment by opening `File Explorer` and pasting the following line into the search bar: `\\\\wsl.localhost\\Ubuntu\\home\\`. This will reveal a folder with your username, as specified during the configuration of your Ubuntu system. To locate the GERONIMO results, simply navigate to the folder with your username and then to the `home` folder. (`\\\\wsl.localhost\\Ubuntu\\home\\\\home\\GERONIMO`)

### GERONIMO occupies a lot of storage space
Through genome downloads, GERONIMO can potentially consume storage space, rapidly leading to a shortage. Currently, downloading genomes is an essential step for optimal GERONIMO performance.

Regrettably, if the analysis is rerun without the `/database` folder, it will result in the need to redownload genomes, which is a highly time-consuming process.

Nevertheless, if you do not intend to repeat the analysis and have no requirement for additional genomes or models, you are welcome to retain your results tables and plots while removing the remaining files.

It is strongly advised against using local machines for extensive analyses. If you lack access to external storage space, it is recommended to divide the analysis into smaller segments, which can be later merged, as explained in the section titled `Expanding the evolutionary context`.

Considering this limitation, I am currently working on implementing a solution that will help circumvent the need for redundant genome downloads without compromising GERONIMO performance in the future.

You might consider deleting the `.snakemake` folder to free up storage space. However, please note that deleting this folder will require the reinstallation of GERONIMO dependencies when the analysis is rerun.

## License
Copyright (c) 2023 Agata M. Kilar

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the \"Software\"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

## Contact
mgr inż. Agata Magdalena Kilar, PhD (agata.kilar@gmail.com)""" ;
    <http://schema.org/encodingFormat> "application/ld+json" ;
    <http://schema.org/hasPart> <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/folders/30957f04-50f4-4104-bb55-bfa5a2163f9c> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/folders/4eea1967-7e3b-4f7f-8127-1f899c6d9167> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/folders/53b4f5cd-4fc6-4fc1-8e8e-44be979b31cf> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/folders/cd20e31e-53ae-4b2a-9333-1ec4bfe1b779> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/folders/e2e22957-5f34-4552-9620-4f7cd4fd64e3> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/083e698f-333d-4c5d-9610-cc48cdad9e4d> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/17bc9250-5ec3-4696-a1e2-2d8fbfab6736> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/1c28c455-7dac-4035-8898-511615255bcb> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/34d6227c-e493-4c4d-a841-67685c68be6d> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/8cebbd0d-c2fc-4812-a092-5965ce747c14> ;
    <http://schema.org/identifier> "https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70" ;
    <http://schema.org/isBasedOn> "https://github.com/amkilar/GERONIMO.git" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/mainEntity> "GERONIMO.sm" ;
    <http://schema.org/name> "Research Object Crate for GERONIMO" ;
    <http://schema.org/url> "https://workflowhub.eu/workflows/547/ro_crate?version=1" ;
    a <http://purl.org/wf4ever/ro#ResearchObject> , <http://purl.org/wf4ever/roevo#LiveRO> , <http://schema.org/Dataset> ;
    <https://w3id.org/contentdesc#Domain> "https://w3id.org/ro-id/1c684234-851e-4998-a0d2-41c66b7a20d5" , "https://w3id.org/ro-id/a15f9eff-f256-4663-87fd-6ab855cd5941" , "https://w3id.org/ro-id/ad9304c3-ccf5-4d0f-8ca0-9f4da0cab28e" ;
    <https://w3id.org/ro/terms/earth-science#Concept> "https://w3id.org/ro-id/005c8422-6fce-4a35-862d-7f2ee4d9c4f5" , "https://w3id.org/ro-id/0f678463-a81a-47a0-a669-079e20fa0125" , "https://w3id.org/ro-id/54c71f37-c543-46a8-84b3-aaa8ae32fb6f" , "https://w3id.org/ro-id/58d0583c-0a1a-4ce8-83ee-3620189c1ddd" , "https://w3id.org/ro-id/7bffe30b-5a6e-4262-80b0-701944edc9f6" , "https://w3id.org/ro-id/7f74e75a-8601-49da-a852-353b1a3994f2" , "https://w3id.org/ro-id/82d5b0b6-0398-46cd-9cc2-8af27f478fc0" , "https://w3id.org/ro-id/83fc731e-3aa1-43c7-9cf5-ab6740c3633d" , "https://w3id.org/ro-id/8ac41430-786a-430f-abe4-37d599cdae43" , "https://w3id.org/ro-id/935b0973-3d62-49ac-9c5a-84cc5b147c8b" , "https://w3id.org/ro-id/9e31b1dd-d3ba-4d69-ad80-5549179acdc5" , "https://w3id.org/ro-id/a028a5da-e9aa-4ae3-b2d4-0ffaa42903f5" , "https://w3id.org/ro-id/acf35671-dadd-4cf7-9241-ed095c2c7de3" , "https://w3id.org/ro-id/af381567-5178-4371-a371-df7607665fc5" , "https://w3id.org/ro-id/b0e0a997-66e3-4631-ae36-b0b54cdee3d9" , "https://w3id.org/ro-id/ba2815fb-b1e0-4bd6-8934-8040df63fe9f" , "https://w3id.org/ro-id/c45ec5d3-c9ab-43ac-857d-f1acd9382c63" , "https://w3id.org/ro-id/c88a7dc6-d3d4-43a4-9de6-b0d62237c95f" , "https://w3id.org/ro-id/d548f2ff-c42c-4cba-b8fe-feee2ff0fb11" ;
    <https://w3id.org/ro/terms/earth-science#FieldOfResearch> "https://w3id.org/ro-id/3f7be379-7d08-4ee4-b1b9-aa17032ec971" , "https://w3id.org/ro-id/d4da8bc8-008c-4385-9640-9bed35f86494" ;
    <https://w3id.org/ro/terms/earth-science#IPTC> "https://w3id.org/ro-id/4fae7e6b-9f40-4a7c-b2aa-e85c1a97e8ec" , "https://w3id.org/ro-id/612078aa-2703-4c7b-843e-0b683ce23f9c" , "https://w3id.org/ro-id/73687a3c-6aaa-429a-9d13-7d4b854f803e" ;
    <https://w3id.org/ro/terms/earth-science#Lemma> "https://w3id.org/ro-id/15a63617-f9a8-4753-8830-af3a1bac7b4d" , "https://w3id.org/ro-id/17b944ac-f8bb-403d-bddf-98d90bd13b83" , "https://w3id.org/ro-id/28bfade8-878b-4c72-8135-4078fbd09e42" , "https://w3id.org/ro-id/359d5480-f589-47f7-aaae-ff4c8aeb9a7a" , "https://w3id.org/ro-id/3f7102cd-996b-4e5b-b498-525210a18b3a" , "https://w3id.org/ro-id/4b31f4de-1524-4cef-b8b3-72a240d63dd0" , "https://w3id.org/ro-id/5d9ce283-c305-46ae-99e6-e81b9acf9018" , "https://w3id.org/ro-id/9d5f1beb-ded6-4482-9c66-179de5feff0b" , "https://w3id.org/ro-id/a48c1ce5-f60b-461b-b1a9-249075c973e8" , "https://w3id.org/ro-id/bb5ac70a-e6aa-4213-95d8-a7cf4b58f3f6" , "https://w3id.org/ro-id/c32b7cb6-cdf9-45f8-9acd-64e8609d64f0" , "https://w3id.org/ro-id/d9478a50-7977-42f2-afc6-f9f5016caa29" , "https://w3id.org/ro-id/e4e53726-b83b-4025-ac62-990609bc13db" ;
    <https://w3id.org/ro/terms/earth-science#NASA> "https://w3id.org/ro-id/5b68f8b8-d94d-44ed-b47c-9b30492dd501" , "https://w3id.org/ro-id/cceabc2e-5232-4060-8697-5fcaa87b6f18" ;
    <https://w3id.org/ro/terms/earth-science#Phrase> "https://w3id.org/ro-id/05648ad5-fcc8-4f24-882a-0bf77f727e52" , "https://w3id.org/ro-id/0735a772-38d8-45e7-a3d2-e20f62056146" , "https://w3id.org/ro-id/285fc200-65ce-4175-afc6-da0c325375f0" , "https://w3id.org/ro-id/2c76f1de-2578-4660-8926-16dbd0f88985" , "https://w3id.org/ro-id/3a8d21f9-3481-47d7-925e-2c129cf76f8d" , "https://w3id.org/ro-id/5f76202b-8605-4866-9750-2ae44e74cf74" , "https://w3id.org/ro-id/630cd87c-2fd2-4a70-bbad-e85002d3cc15" , "https://w3id.org/ro-id/7589da61-b04f-425f-83c1-6dbbedd2d27a" , "https://w3id.org/ro-id/76e5ce8b-2407-4d11-adba-17e295f4b1bd" , "https://w3id.org/ro-id/869e48d4-c589-4db1-af1e-0a2a00da300b" , "https://w3id.org/ro-id/8cff22b7-ba04-4331-9c66-9839b47eaa32" , "https://w3id.org/ro-id/a20d1313-4ed3-40e3-b3a2-410652e82081" , "https://w3id.org/ro-id/a5ffdb9a-54c2-4a38-946c-77190ed34431" , "https://w3id.org/ro-id/b0a6cec6-4527-4450-8012-5fa257d2ab77" , "https://w3id.org/ro-id/b1d6ebd1-ab77-4337-b253-7b08f406c27e" , "https://w3id.org/ro-id/b591df3e-7def-4e90-a20b-28fcbbff4fc2" , "https://w3id.org/ro-id/c86349a6-01f9-4c71-a872-ce3527630004" , "https://w3id.org/ro-id/cde37df3-f892-4c6e-89a0-f8b532fde95d" , "https://w3id.org/ro-id/d2685e00-63e1-4365-b375-92539c6893ec" ;
    <https://w3id.org/ro/terms/earth-science#Sentence> "https://w3id.org/ro-id/14d92214-4a88-46fb-b596-4f2e99bfcc37" , "https://w3id.org/ro-id/87d45696-5e29-461e-b6bf-bc8fcc2a033f" , "https://w3id.org/ro-id/8865ad4d-7c4a-46a5-a12a-89213e7e14bd" , "https://w3id.org/ro-id/c6632496-4497-4e92-869f-1072a707de02" ;
    <https://www.w3.org/ns/iana/link-relations/relation#cite-as> "Agata Kilar. \"Research Object Crate for GERONIMO.\" ROHub. Sep 08 ,2023. https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70." .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/folders/30957f04-50f4-4104-bb55-bfa5a2163f9c> <http://schema.org/hasPart> <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/1037d046-1068-4888-a38b-ba2ef9ce42eb> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/2062fe32-a75a-4957-a90b-8dafc5ad0367> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/6c59aa5d-1ba7-4bd1-8be4-ff050e3ead30> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/a2b33867-14fb-4100-b9a5-c8286bcf138e> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/af1cd576-0d29-4f18-be29-84e5ec3ccd82> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/b5c49c93-d1ad-4601-80c4-70277c4fd400> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/bf376d44-9a16-4535-92b1-4b66ad2a519a> ;
    <http://schema.org/name> "env" ;
    a <http://purl.org/wf4ever/wf4ever#Folder> , <http://schema.org/Dataset> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/folders/4eea1967-7e3b-4f7f-8127-1f899c6d9167> <http://schema.org/hasPart> <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/76f62c7c-9e00-43e4-8065-99eee7aae198> ;
    <http://schema.org/name> "models_to_build" ;
    a <http://purl.org/wf4ever/wf4ever#Folder> , <http://schema.org/Dataset> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/folders/53b4f5cd-4fc6-4fc1-8e8e-44be979b31cf> <http://schema.org/hasPart> <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/04fd81d6-ef3e-4e2f-8e50-ae6a40144ea2> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/07ffb5a3-347e-4f5d-883a-78f016ee907a> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/200efa59-3ae7-4970-aea3-e22645d28d96> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/280bae50-1fa1-45c8-ae1b-17fe40b9f300> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/3008db58-7827-4bf7-b378-3257e455e597> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/4b7473cf-f431-4b64-b5c2-199d9fd3f328> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/6664e5f4-08cb-4e0b-9436-e59a6719b6e6> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/9cbd68ad-d410-48e1-8bd2-3067f695e62d> ;
    <http://schema.org/name> "models" ;
    a <http://purl.org/wf4ever/wf4ever#Folder> , <http://schema.org/Dataset> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/folders/b3fa73df-35cb-42e4-8891-f159c9ccc32c> <http://schema.org/hasPart> <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/39ab3c54-396c-4bdc-a0da-fdb563e1a32f> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/d642e7aa-836b-4808-a6a7-65043f69a119> ;
    <http://schema.org/name> "plots" ;
    a <http://purl.org/wf4ever/wf4ever#Folder> , <http://schema.org/Dataset> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/folders/cd20e31e-53ae-4b2a-9333-1ec4bfe1b779> <http://schema.org/hasPart> <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/21369f0b-77af-4840-8160-0aaab2e6b15f> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/215f4518-6bf1-41f1-b1f3-72f45c0f387d> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/73838afc-95e9-4778-9397-b1177d41d5a9> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/81147bb0-01a7-4a8d-b2c6-d4aa632ba662> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/a5812e02-c6e1-4bf0-a13f-9fad72c73f79> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/c119db00-a9bb-469c-8aef-97d2927ad412> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/f985439a-75a1-4c88-9d85-c49b1bf41a9f> ;
    <http://schema.org/name> "scripts" ;
    a <http://purl.org/wf4ever/wf4ever#Folder> , <http://schema.org/Dataset> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/folders/e2e22957-5f34-4552-9620-4f7cd4fd64e3> <http://schema.org/hasPart> <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/folders/b3fa73df-35cb-42e4-8891-f159c9ccc32c> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/229330c4-3395-4872-8fe5-94eae1b2b2ed> , <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/d7c7b947-a603-404d-bbba-42138827e166> ;
    <http://schema.org/name> "results" ;
    a <http://purl.org/wf4ever/wf4ever#Folder> , <http://schema.org/Dataset> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/04fd81d6-ef3e-4e2f-8e50-ae6a40144ea2> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 92192 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/04fd81d6-ef3e-4e2f-8e50-ae6a40144ea2/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.466271+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:10.301548+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "cov_model_U4" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.466271+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/07ffb5a3-347e-4f5d-883a-78f016ee907a> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 111328 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/07ffb5a3-347e-4f5d-883a-78f016ee907a/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.460885+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:09.341000+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "cov_model_U1" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.460885+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/083e698f-333d-4c5d-9610-cc48cdad9e4d> <http://schema.org/about> "https://ror.org/https://doi.org/10.48546/workflowhub.workflow.547.1" ;
    <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 72017 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/083e698f-333d-4c5d-9610-cc48cdad9e4d/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.481914+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:20.681373+00:00" ;
    <http://schema.org/encodingFormat> "text/html" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "ro-crate-preview.html" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.481914+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/CreativeWork> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/1037d046-1068-4888-a38b-ba2ef9ce42eb> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 657 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/1037d046-1068-4888-a38b-ba2ef9ce42eb/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.472015+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:12.815668+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "infernal_env.yaml" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.472015+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/17bc9250-5ec3-4696-a1e2-2d8fbfab6736> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 1071 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/17bc9250-5ec3-4696-a1e2-2d8fbfab6736/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.440532+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:06.435874+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "LICENSE" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.440532+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/1c28c455-7dac-4035-8898-511615255bcb> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> "21805" ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/1c28c455-7dac-4035-8898-511615255bcb/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.472901+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:20.104078+00:00" ;
    <http://schema.org/encodingFormat> "text/markdown" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "README.md" ;
    <http://schema.org/programmingLanguage> "#cwl" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.472901+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/HowTo> , <http://schema.org/MediaObject> , <http://schema.org/SoftwareSourceCode> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/200efa59-3ae7-4970-aea3-e22645d28d96> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 159869 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/200efa59-3ae7-4970-aea3-e22645d28d96/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.462474+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:09.540442+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "cov_model_SRP" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.462474+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/2062fe32-a75a-4957-a90b-8dafc5ad0367> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 6681 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/2062fe32-a75a-4957-a90b-8dafc5ad0367/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.470567+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:12.326066+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "make_summary_R.yaml" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.470567+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/21369f0b-77af-4840-8160-0aaab2e6b15f> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 4883 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/21369f0b-77af-4840-8160-0aaab2e6b15f/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.477109+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:14.349759+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "search_taxonomy.r" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.477109+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/215f4518-6bf1-41f1-b1f3-72f45c0f387d> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 2262 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/215f4518-6bf1-41f1-b1f3-72f45c0f387d/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.477887+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:14.755680+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "transform_cmdBLAST_output.R" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.477887+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/229330c4-3395-4872-8fe5-94eae1b2b2ed> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 213819 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/229330c4-3395-4872-8fe5-94eae1b2b2ed/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.454424+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:07.339998+00:00" ;
    <http://schema.org/encodingFormat> "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "summary_table.xlsx" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.454424+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/280bae50-1fa1-45c8-ae1b-17fe40b9f300> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 68255 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/280bae50-1fa1-45c8-ae1b-17fe40b9f300/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.458093+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:08.198257+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "cov_model_U6" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.458093+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/3008db58-7827-4bf7-b378-3257e455e597> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 146892 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/3008db58-7827-4bf7-b378-3257e455e597/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.459620+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:08.394076+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "cov_model_U3" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.459620+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/34d6227c-e493-4c4d-a841-67685c68be6d> dct:conformsTo "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/" ;
    <http://schema.org/author> <https://workflowhub.eu/people/522> ;
    <http://schema.org/contentSize> "6740" ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/34d6227c-e493-4c4d-a841-67685c68be6d/download/" ;
    <http://schema.org/creator> <https://workflowhub.eu/people/522> ;
    <http://schema.org/dateCreated> "2023-08-01 01:34:42+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:20.296455+00:00" ;
    <http://schema.org/description> """# GERONIMO

## Introduction
GERONIMO is a bioinformatics pipeline designed to conduct high-throughput homology searches of structural genes using covariance models. These models are based on the alignment of sequences and the consensus of secondary structures. The pipeline is built using Snakemake, a workflow management tool that allows for the reproducible execution of analyses on various computational platforms. 

The idea for developing GERONIMO emerged from a comprehensive search for [telomerase RNA in lower plants] and was subsequently refined through an [expanded search of telomerase RNA across Insecta]. GERONIMO can test hundreds of genomes and ensures the stability and reproducibility of the analyses performed.


[telomerase RNA in lower plants]: https://doi.org/10.1093/nar/gkab545
[expanded search of telomerase RNA across Insecta]: https://doi.org/10.1093/nar/gkac1202

## Scope
The GERONIMO tool utilises covariance models (CMs) to conduct homology searches of RNA sequences across a wide range of gene families in a broad evolutionary context. Specifically, it can be utilised to:

* Detect RNA sequences that share a common evolutionary ancestor
* Identify and align orthologous RNA sequences among closely related species, as well as paralogous sequences within a single species
* Identify conserved non-coding RNAs in a genome, and extract upstream genomic regions to characterise potential promoter regions. 
It is important to note that GERONIMO is a computational tool, and as such, it is intended to be run on a computer with a small amount of data. Appropriate computational infrastructure is necessary for analysing hundreds of genomes.

Although GERONIMO was primarily designed for Telomerase RNA identification, its functionality extends to include the detection and alignment of other RNA gene families, including **rRNA**, **tRNA**, **snRNA**, **miRNA**, and **lncRNA**. This can aid in identifying paralogs and orthologs across different species that may carry specific functions, making it useful for phylogenetic analyses. 

It is crucial to remember that some gene families may exhibit similar characteristics but different functions. Therefore, analysing the data and functional annotation after conducting the search is essential to characterise the sequences properly.

## Pipeline overview


By default, the GERONIMO pipeline conducts high-throughput searches of homology sequences in downloaded genomes utilizing covariance models. If a significant similarity is detected between the model and genome sequence, the pipeline extracts the upstream region, making it convenient to identify the promoter of the discovered gene. In brief, the pipeline:
- Compiles a list of genomes using the NCBI's [Entrez] database based on a specified query, *e.g. \"Rhodophyta\"[Organism]*
- Downloads and decompresses the requested genomes using *rsync* and *gunzip*, respectively
- *Optionally*, generates a covariance model based on a provided alignment using [Infernal]
- Conducts searches among the genomes using the covariance model [Infernal]
- Supplements genome information with taxonomy data using [rentrez]
- Expands the significant hits sequence by extracting upstream genomic regions using [*blastcmd*]
- Compiles the results, organizes them into a tabular format, and generates a visual summary of the performed analysis.

[Entrez]: https://www.ncbi.nlm.nih.gov/books/NBK179288/
[Infernal]: http://eddylab.org/infernal/
[rentrez]: https://github.com/ropensci/rentrez
[*blastcmd*]: https://www.ncbi.nlm.nih.gov/books/NBK569853/

## Quick start
The GERONIMO is available as a `snakemake pipeline` running on Linux and Windows operating systems.

### Windows 10
Instal Linux on Windows 10 (WSL) according to [instructions], which bottling down to opening PowerShell or Windows Command Prompt in *administrator mode* and pasting the following:
```shell
wsl --install
wsl.exe --install UBUNTU
```
Then restart the machine and follow the instructions for setting up the Linux environment.

[instructions]: https://learn.microsoft.com/en-us/windows/wsl/install

### Linux:
#### Check whether the conda is installed:
```shell
conda -V
```
&gt; GERONIMO was tested on conda 23.3.1
#### 1) If you do not have installed `conda`, please install `miniconda`
Please follow the instructions for installing [miniconda]

[miniconda]: https://conda.io/projects/conda/en/stable/user-guide/install/linux.html

#### 2) Continue with installing `mamba` (recommended but optional)
```shell
conda install -n base -c conda-forge mamba
```
#### 3) Install `snakemake`
```shell
conda activate base
mamba create -p env_snakemake -c conda-forge -c bioconda snakemake
mamba activate env_snakemake
snakemake --help
```
In case of complications, please check the section `Questions &amp; Answers` below or follow the [official documentation] for troubleshooting.

[official documentation]: https://snakemake.readthedocs.io/en/stable/getting_started/installation.html

### Clone the GERONIMO repository
Go to the path in which you want to run the analysis and clone the repository:
```shell
cd
git clone https://github.com/amkilar/GERONIMO.git
```

### Run sample analysis to ensure GERONIMO installation was successful
All files are prepared for the sample analysis as a default. Please execute the line below:
```shell
snakemake -s GERONIMO.sm --cores 1 --use-conda results/summary_table.xlsx
```

This will prompt GERONIMO to quickly scan all modules, verifying the correct setup of the pipeline without executing any analysis.
You should see the message `Building DAG of jobs...`, followed by `Nothing to be done (all requested files are present and up to date).`, when successfully completed.

If you want to run the sample analysis fully, please remove the folder `results` from the GERONIMO directory and execute GERONIMO again with:

`snakemake -s GERONIMO.sm --cores 1 --use-conda results/summary_table.xlsx`

&gt; You might consider allowing more cores to speed up the analysis, which might take up to several hours.

#### You might want to clean `GERONIMO/` directory from the files produced by the example analysis. You can safely remove the following:
- `GERONIMO/results`
- `GERONIMO/database`
- `GERONIMO/taxonomy`
- `GERONIMO/temp`
- `.create_genome_list.touch`
- `list_of_genomes.txt`

## Setup the inputs

### 1) Prepare the `covariance models`:

#### Browse the collection of available `covariance models` at [Rfam] (*You can find the covariance model in the tab `Curation`.*) 
Paste the covariance model to the folder `GERONIMO/models` and ensure its name follows the convention: `cov_model_`

[Rfam]: https://rfam.org/

#### **OR**

#### Prepare your own `covariance model` using [LocARNA]
1. Paste or upload your sequences to the web server and download the `.stk` file with the alignment result. 
 
    &gt; *Please note that the `.stk` file format is crucial for the analysis, containing sequence alignment and secondary structure consensus.*
   
    &gt; The LocARNA web service allows you to align 30 sequences at once - if you need to align more sequences, please use the standalone version available [here] 
    &gt; After installation run:
    ```shell
    mlocarna my_fasta_sequences.fasta
    ```
 
2. Paste the `.stk` alignment file to the folder `GERONIMO/model_to_build` and ensure its name follows the convention: `.stk`

   &gt; Please check the example `heterotrichea.stk` format in `GERONIMO/models_to_built` for reference
   

[LocARNA]: http://rna.informatik.uni-freiburg.de/LocARNA/Input.jsp
[here]: http://www.bioinf.uni-freiburg.de/Software/LocARNA/


### 2) Adjust the `config.yaml` file
Please adjust the analysis specifications, as in the following example:

&gt; - database: ' [Organism]' (in case of difficulties with defining the database query, please follow the instructions below)
&gt; - extract_genomic_region-length:   (here you can determine how long the upstream genomic region should be extracted; tested for 200)
&gt; - models: [\"\", \"\"] (here specify the names of models that should be used to perform analysis)
&gt;   
&gt;   *Here you can also insert the name of the covariance model you want to build with GERONIMO - just be sure you placed `.stk` file in `GERONIMO/models_to_build` before starting analysis*
&gt; - CPU_for_model_building:  (specify the number of available CPUs devoted to the process of building model (cannot exceed the CPU number allowed to snakemake with `--cores`)
&gt;
&gt;   *You might ignore this parameter when you do not need to create a new covariance model*


Keep in mind that the covariance models and alignments must be present in the respective GERONIMO folders.

### 3) Remove folder `results`, which contains example analysis output
### 4) **Please ensure you have enough storage capacity to download all the requested genomes (in the `GERONIMO/` directory)**

## Run GERONIMO
```shell
mamba activate env_snakemake
cd ~/GERONIMO
snakemake -s GERONIMO.sm --cores  --use-conda results/summary_table.xlsx
```
 
## Example results

### Outputs characterisation

#### A) Summary table
The Excel table contains the results arranged by taxonomy information and hit significance. The specific columns include:
* family, organism_name, class, order, phylum (taxonomy context)
* GCA_id - corresponds to the genome assembly in the *NCBI database*
* model - describes which covariance model identified the result
* label - follows the *Infernal* convention of categorizing hits
* number - the counter of the result
* e_value - indicates the significance level of the hit
* HIT_sequence - the exact HIT sequence found by *Infernal*, which corresponds to the covariance model
* HIT_ID - describes in which part of the genome assembly the hit was found, which may help publish novel sequences
* extended_genomic_region - upstream sequence, which may contain a possible promoter sequence
* secondary_structure - the secondary structure consensus of the covariance model


#### B) Significant Hits Distribution Across Taxonomy Families
The plot provides an overview of the number of genomes in which at least one significant hit was identified, grouped by family. The bold black line corresponds to the number of genomes present in each family, helping to minimize bias regarding unequal data representation across the taxonomy.


#### C) Hits Distribution in Genomes Across Families
The heatmap provides information about the most significant hits from the genome, identified by a specific covariance model. Genomes are grouped by families (on the right). Hits are classified into three categories based on their e-values. Generally, these categories correspond to hit classifications (\"HIT,\" \"MAYBE,\" \"NO HIT\"). The \"HIT\" category is further divided to distinguish between highly significant hits and moderately significant ones.



### GERONIMO directory structure

The GERONIMO directory structure is designed to produce files in a highly structured manner, ensuring clear insight and facilitating the analysis of results. During a successful run, GERONIMO produces the following folders:
* `/database` - which contains genome assemblies that were downloaded from the *NCBI database* and grouped in subfolders
* `/taxonomy` - where taxonomy information is gathered and stored in the form of tables
* `/results` - the main folder containing all produced results:
  * `/infernal_raw` - contains the raw results produced by *Infernal*
  * `/infernal` - contains restructured results of *Infernal* in table format
  * `/cmdBLAST` - contains results of *cmdblast*, which extracts the extended genomic region
  * `/summary` - contains summary files that join results from *Infernal*, *cmdblast*, and attach taxonomy context
  * `/plots` - contains two types of summary plots
* `/temp` - folder contains the information necessary to download genome assemblies from *NCBI database*

* `/env` - stores instructions for dependency installation
* `/models` - where calibrated covariance models can be pasted, *for example, from the Rfam database*
* `/modes_to_built` - where multiple alignments in *.stk* format can be pasted
* `/scripts` - contains developed scripts that perform results structurization

#### The example GERONIMO directory structure:

```shell
GERONIMO
├── database
│   ├── GCA_000091205.1_ASM9120v1_genomic
│   ├── GCA_000341285.1_ASM34128v1_genomic
│   ├── GCA_000350225.2_ASM35022v2_genomic
│   └── ...
├── env
├── models
├── model_to_build
├── results
│   ├── cmdBLAST
│   │   ├── MRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   └── ...
│   │   ├── SRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   │   ├── extended
│   │   │   │   └── filtered
│   │   │   └── ...
│   │   ├── ...
│   ├── infernal
│   │   ├── MRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   ├── ...
│   │   ├── SRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   ├── ...
│   ├── plots
│   ├── raw_infernal
│   │   ├── MRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   ├── ...
│   │   ├── SRP
│   │   │   ├── GCA_000091205.1_ASM9120v1_genomic
│   │   │   ├── GCA_000341285.1_ASM34128v1_genomic
│   │   │   ├── GCA_000350225.2_ASM35022v2_genomic
│   │   │   ├── ...
│   └── summary
│       ├── GCA_000091205.1_ASM9120v1_genomic
│       ├── GCA_000341285.1_ASM34128v1_genomic
│       ├── GCA_000350225.2_ASM35022v2_genomic
│       ├── ...
├── scripts
├── taxonomy
└── temp
```

## GERONIMO applicability

### Expanding the evolutionary context
To add new genomes or database queries to an existing analysis, please follow the instructions:
1) Rename the `list_of_genomes.txt` file to `previous_list_of_genomes.txt` or any other preferred name.
2) Modify the `config.yaml` file by replacing the previous database query with the new one.
3) Delete:
   - `summary_table.xlsx`, `part_summary_table.csv`, `summary_table_models.xlsx` files located in the `GERONIMO\\results` directory
   - `.create_genome_list.touch` file
5) Run GERONIMO to calculate new results using the command:
     ```shell
     snakemake -s GERONIMO.sm --cores  --use-conda results/summary_table.xlsx
     ```
7) Once the new results are generated, reviewing them before merging them with the original results is recommended.
8) Copy the contents of the `previous_list_of_genomes.txt` file and paste them into the current `list_of_genomes.txt`.
9) Delete:
   - `summary_table.xlsx` located in the `GERONIMO\\results` directory
   - `.create_genome_list.touch` file
10) Run GERONIMO to merge the results from both analyses using the command:
    ```shell
      snakemake -s GERONIMO.sm --cores 1 --use-conda results/summary_table.xlsx
    ```

### Incorporating new covariance models into existing analysis
1) Copy the new covariance model to `GERONIMO/models`
2) Modify the `config.yaml` file by adding the name of the new model to the line `models: [...]`
3) Run GERONIMO to see the updated analysis outcome

### Building a new covariance model
With GERONIMO, building a new covariance model from multiple sequence alignment in the `.stk` format is possible.

To do so, simply paste `.stk` file to `GERONIMO/models_to_build` and paste the name of the new covariance  model to `config.yaml` file to the line `models: [\"\"]`

and run GERONIMO.


## Questions &amp; Answers

### How to specify the database query?
- Visit the [NCBI Assemblies] website. 
- Follow the instruction on the graphic below:

[NCBI Assemblies]: https://www.ncbi.nlm.nih.gov/assembly/?term=

### WSL: problem with creating `snakemake_env`
In the case of an error similar to the one below:
&gt; CondaError: Unable to create prefix directory '/mnt/c/Windows/system32/env_snakemake'.
&gt; Check that you have sufficient permissions. 
 
You might try to delete the cache with: `rm -r ~/.cache/` and try again.

### When `snakemake` does not seem to be installed properly
In the case of the following error:
&gt; Command 'snakemake' not found ...

Check whether the `env_snakemake` is activated.
&gt; It should result in a change from (base) to (env_snakemake) before your login name in the command line window.

If you still see `(base)` before your login name, please try to activate the environment with conda:
`conda activate env_snakemake`


Please note that you might need to specify the full path to the `env_snakemake`, like /home/your user name/env_snakemake

### How to browse GERONIMO results obtained in WSL?
You can easily access the results obtained on WSL from your Windows environment by opening `File Explorer` and pasting the following line into the search bar: `\\\\wsl.localhost\\Ubuntu\\home\\`. This will reveal a folder with your username, as specified during the configuration of your Ubuntu system. To locate the GERONIMO results, simply navigate to the folder with your username and then to the `home` folder. (`\\\\wsl.localhost\\Ubuntu\\home\\\\home\\GERONIMO`)

### GERONIMO occupies a lot of storage space
Through genome downloads, GERONIMO can potentially consume storage space, rapidly leading to a shortage. Currently, downloading genomes is an essential step for optimal GERONIMO performance.

Regrettably, if the analysis is rerun without the `/database` folder, it will result in the need to redownload genomes, which is a highly time-consuming process.

Nevertheless, if you do not intend to repeat the analysis and have no requirement for additional genomes or models, you are welcome to retain your results tables and plots while removing the remaining files.

It is strongly advised against using local machines for extensive analyses. If you lack access to external storage space, it is recommended to divide the analysis into smaller segments, which can be later merged, as explained in the section titled `Expanding the evolutionary context`.

Considering this limitation, I am currently working on implementing a solution that will help circumvent the need for redundant genome downloads without compromising GERONIMO performance in the future.

You might consider deleting the `.snakemake` folder to free up storage space. However, please note that deleting this folder will require the reinstallation of GERONIMO dependencies when the analysis is rerun.

## License
Copyright (c) 2023 Agata M. Kilar

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the \"Software\"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

## Contact
mgr inż. Agata Magdalena Kilar, PhD (agata.kilar@gmail.com)""" ;
    <http://schema.org/image> "README.md" ;
    <http://schema.org/keywords> "Bioinformatics, Snakemake, rna" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "GERONIMO" ;
    <http://schema.org/producer> "https://workflowhub.eu/projects/178" ;
    <http://schema.org/programmingLanguage> "#snakemake" ;
    <http://schema.org/sdDatePublished> "2023-08-01 01:34:42+00:00" ;
    <http://schema.org/sdPublisher> "https://about.workflowhub.eu/" ;
    <http://schema.org/subjectOf> "README.md" ;
    <http://schema.org/url> "https://workflowhub.eu/workflows/547?version=1" ;
    <http://schema.org/version> "1" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> , <http://schema.org/SoftwareSourceCode> , <https://bioschemas.org/ComputationalWorkflow> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/39ab3c54-396c-4bdc-a0da-fdb563e1a32f> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 354983 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/39ab3c54-396c-4bdc-a0da-fdb563e1a32f/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.450498+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:06.925272+00:00" ;
    <http://schema.org/encodingFormat> "image/png" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "Hits_distribution_across_families.png" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.450498+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/4b7473cf-f431-4b64-b5c2-199d9fd3f328> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 129276 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/4b7473cf-f431-4b64-b5c2-199d9fd3f328/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.463907+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:09.908441+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "cov_model_U2" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.463907+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/6664e5f4-08cb-4e0b-9436-e59a6719b6e6> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 77501 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/6664e5f4-08cb-4e0b-9436-e59a6719b6e6/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.465058+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:10.101003+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "cov_model_U5" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.465058+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/6c59aa5d-1ba7-4bd1-8be4-ff050e3ead30> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 8159 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/6c59aa5d-1ba7-4bd1-8be4-ff050e3ead30/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.469780+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:11.784941+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "search_taxonomy_r_env.yaml" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.469780+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/73838afc-95e9-4778-9397-b1177d41d5a9> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 3019 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/73838afc-95e9-4778-9397-b1177d41d5a9/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.478657+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:14.941074+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "create_input_for_cmdBLAST.R" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.478657+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/76f62c7c-9e00-43e4-8065-99eee7aae198> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 1962 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/76f62c7c-9e00-43e4-8065-99eee7aae198/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.480716+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:15.392082+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "heterotrichera.stk" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.480716+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/81147bb0-01a7-4a8d-b2c6-d4aa632ba662> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 16338 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/81147bb0-01a7-4a8d-b2c6-d4aa632ba662/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.476362+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:14.172718+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "read_results_infernal.R" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.476362+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/8cebbd0d-c2fc-4812-a092-5965ce747c14> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 218 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/8cebbd0d-c2fc-4812-a092-5965ce747c14/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.455163+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:07.781299+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "config.yaml" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.455163+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/9cbd68ad-d410-48e1-8bd2-3067f695e62d> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 176087 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/9cbd68ad-d410-48e1-8bd2-3067f695e62d/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.457012+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:08.004058+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "cov_model_MRP" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.457012+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/a2b33867-14fb-4100-b9a5-c8286bcf138e> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 1833 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/a2b33867-14fb-4100-b9a5-c8286bcf138e/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.467422+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:10.485032+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "blast_env.yaml" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.467422+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/a5812e02-c6e1-4bf0-a13f-9fad72c73f79> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 2567 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/a5812e02-c6e1-4bf0-a13f-9fad72c73f79/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.479423+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:15.204459+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "join_infernal_and_BLASTcmd_results.R" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.479423+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/af1cd576-0d29-4f18-be29-84e5ec3ccd82> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 538 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/af1cd576-0d29-4f18-be29-84e5ec3ccd82/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.471287+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:12.619195+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "entrez_env.yaml" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.471287+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/b5c49c93-d1ad-4601-80c4-70277c4fd400> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 6627 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/b5c49c93-d1ad-4601-80c4-70277c4fd400/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.468996+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:11.117606+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "r_tidyverse_env.yaml" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.468996+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/bf376d44-9a16-4535-92b1-4b66ad2a519a> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 6670 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/bf376d44-9a16-4535-92b1-4b66ad2a519a/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.468204+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:10.908257+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "cmdBLAST_to_R_env.yaml" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.468204+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/c119db00-a9bb-469c-8aef-97d2927ad412> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 459 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/c119db00-a9bb-469c-8aef-97d2927ad412/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.475571+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:13.878329+00:00" ;
    <http://schema.org/encodingFormat> "text/x-sh" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "cmdBLAST.sh" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.475571+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/d642e7aa-836b-4808-a6a7-65043f69a119> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 851052 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/d642e7aa-836b-4808-a6a7-65043f69a119/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.447483+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:06.648816+00:00" ;
    <http://schema.org/encodingFormat> "image/png" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "Hits_distribution_heatmap.png" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.447483+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/d7c7b947-a603-404d-bbba-42138827e166> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 227587 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/d7c7b947-a603-404d-bbba-42138827e166/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.452494+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:04:07.138945+00:00" ;
    <http://schema.org/encodingFormat> "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "summary_table_models.xlsx" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.452494+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/resources/f985439a-75a1-4c88-9d85-c49b1bf41a9f> <http://schema.org/author> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/contentSize> 7577 ;
    <http://schema.org/contentUrl> "https://api.rohub.org/api/resources/f985439a-75a1-4c88-9d85-c49b1bf41a9f/download/" ;
    <http://schema.org/creator> <https://orcid.org/0000-0003-2388-0744> ;
    <http://schema.org/dateCreated> "2023-09-08 12:04:05.474815+00:00" ;
    <http://schema.org/dateModified> "2023-09-08 12:05:13.695278+00:00" ;
    <http://schema.org/license> <https://choosealicense.com/no-permission/> ;
    <http://schema.org/name> "make_table_plots.R" ;
    <http://schema.org/sdDatePublished> "2023-09-08 12:04:05.474815+00:00" ;
    a <http://purl.org/wf4ever/wf4ever#Resource> , <http://schema.org/MediaObject> .
  <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/ro-crate-metadata.json> dct:conformsTo <https://w3id.org/ro/crate/1.1> ;
    <http://schema.org/about> <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/> ;
    a <http://schema.org/CreativeWork> .
  <https://w3id.org/ro-id/8ac41430-786a-430f-abe4-37d599cdae43> <http://schema.org/name> "search" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "6.270903010033445" ;
    <https://w3id.org/ro/terms/earth-science#score> "7.5" .
  <https://w3id.org/ro-id/8cff22b7-ba04-4331-9c66-9839b47eaa32> <http://schema.org/name> "database' folder" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "3.6474164133738602" ;
    <https://w3id.org/ro/terms/earth-science#score> "2.4" .
  <https://w3id.org/ro-id/935b0973-3d62-49ac-9c5a-84cc5b147c8b> <http://schema.org/name> "ribonucleic acid" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "7.5250836120401345" ;
    <https://w3id.org/ro/terms/earth-science#score> "9.0" .
  <https://w3id.org/ro-id/9d5f1beb-ded6-4482-9c66-179de5feff0b> <http://schema.org/name> "username" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "5.5464926590538335" ;
    <https://w3id.org/ro/terms/earth-science#score> "3.4" .
  <https://w3id.org/ro-id/9e31b1dd-d3ba-4d69-ad80-5549179acdc5> <http://schema.org/name> "username" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "5.518394648829432" ;
    <https://w3id.org/ro/terms/earth-science#score> "6.6" .
  <https://w3id.org/ro-id/a028a5da-e9aa-4ae3-b2d4-0ffaa42903f5> <http://schema.org/name> "covariance" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "7.859531772575251" ;
    <https://w3id.org/ro/terms/earth-science#score> "9.4" .
  <https://w3id.org/ro-id/a15f9eff-f256-4663-87fd-6ab855cd5941> <http://schema.org/name> "software" ;
    a <https://w3id.org/contentdesc#Domain> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "27.474747474747474" ;
    <https://w3id.org/ro/terms/earth-science#score> "13.6" .
  <https://w3id.org/ro-id/a20d1313-4ed3-40e3-b3a2-410652e82081> <http://schema.org/name> "The software" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "2.8875379939209727" ;
    <https://w3id.org/ro/terms/earth-science#score> "1.9" .
  <https://w3id.org/ro-id/a48c1ce5-f60b-461b-b1a9-249075c973e8> <http://schema.org/name> "file" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "7.6672104404567705" ;
    <https://w3id.org/ro/terms/earth-science#score> "4.7" .
  <https://w3id.org/ro-id/a5ffdb9a-54c2-4a38-946c-77190ed34431> <http://schema.org/name> "database query" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "7.750759878419452" ;
    <https://w3id.org/ro/terms/earth-science#score> "5.1" .
  <https://w3id.org/ro-id/acf35671-dadd-4cf7-9241-ed095c2c7de3> <http://schema.org/name> "directory" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "3.3444816053511706" ;
    <https://w3id.org/ro/terms/earth-science#score> "4.0" .
  <https://w3id.org/ro-id/ad9304c3-ccf5-4d0f-8ca0-9f4da0cab28e> <http://schema.org/name> "genetics" ;
    a <https://w3id.org/contentdesc#Domain> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "13.93939393939394" ;
    <https://w3id.org/ro/terms/earth-science#score> "6.9" .
  <https://w3id.org/ro-id/af381567-5178-4371-a371-df7607665fc5> <http://schema.org/name> "throughput" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "2.508361204013378" ;
    <https://w3id.org/ro/terms/earth-science#score> "3.0" .
  <https://w3id.org/ro-id/b0a6cec6-4527-4450-8012-5fa257d2ab77> <http://schema.org/name> "homology search" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "3.4954407294832825" ;
    <https://w3id.org/ro/terms/earth-science#score> "2.3" .
  <https://w3id.org/ro-id/b0e0a997-66e3-4631-ae36-b0b54cdee3d9> <http://schema.org/name> "folder" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "8.862876254180602" ;
    <https://w3id.org/ro/terms/earth-science#score> "10.6" .
  <https://w3id.org/ro-id/b1d6ebd1-ab77-4337-b253-7b08f406c27e> <http://schema.org/name> "permission notice" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "2.8875379939209727" ;
    <https://w3id.org/ro/terms/earth-science#score> "1.9" .
  <https://w3id.org/ro-id/b591df3e-7def-4e90-a20b-28fcbbff4fc2> <http://schema.org/name> "GERONIMO tool" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "3.4954407294832825" ;
    <https://w3id.org/ro/terms/earth-science#score> "2.3" .
  <https://w3id.org/ro-id/ba2815fb-b1e0-4bd6-8934-8040df63fe9f> <http://schema.org/name> "pipeline" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "8.19397993311037" ;
    <https://w3id.org/ro/terms/earth-science#score> "9.8" .
  <https://w3id.org/ro-id/bb5ac70a-e6aa-4213-95d8-a7cf4b58f3f6> <http://schema.org/name> "folder" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "8.97226753670473" ;
    <https://w3id.org/ro/terms/earth-science#score> "5.5" .
  <https://w3id.org/ro-id/c32b7cb6-cdf9-45f8-9acd-64e8609d64f0> <http://schema.org/name> "GERONIMO" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "12.39804241435563" ;
    <https://w3id.org/ro/terms/earth-science#score> "7.6" .
  <https://w3id.org/ro-id/c45ec5d3-c9ab-43ac-857d-f1acd9382c63> <http://schema.org/name> "homology" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "3.5953177257525084" ;
    <https://w3id.org/ro/terms/earth-science#score> "4.3" .
  <https://w3id.org/ro-id/c6632496-4497-4e92-869f-1072a707de02> <http://schema.org/name> "However, please note that deleting this folder will require the reinstallation of GERONIMO dependencies when the analysis is rerun." ;
    a <https://w3id.org/ro/terms/earth-science#Sentence> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "20.88607594936709" ;
    <https://w3id.org/ro/terms/earth-science#score> "3.3" .
  <https://w3id.org/ro-id/c86349a6-01f9-4c71-a872-ce3527630004> <http://schema.org/name> "file explorer" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "5.471124620060791" ;
    <https://w3id.org/ro/terms/earth-science#score> "3.6" .
  <https://w3id.org/ro-id/c88a7dc6-d3d4-43a4-9de6-b0d62237c95f> <http://schema.org/name> "genome" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "9.448160535117058" ;
    <https://w3id.org/ro/terms/earth-science#score> "11.3" .
  <https://w3id.org/ro-id/cceabc2e-5232-4060-8697-5fcaa87b6f18> <http://schema.org/name> "computer programming and software" ;
    a <https://w3id.org/ro/terms/earth-science#NASA> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "100.0" ;
    <https://w3id.org/ro/terms/earth-science#score> "0.41168108582496643" .
  <https://w3id.org/ro-id/cde37df3-f892-4c6e-89a0-f8b532fde95d> <http://schema.org/name> "Rfam database" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "2.8875379939209727" ;
    <https://w3id.org/ro/terms/earth-science#score> "1.9" .
  <https://w3id.org/ro-id/d2685e00-63e1-4365-b375-92539c6893ec> <http://schema.org/name> "Detect RNA sequence" ;
    a <https://w3id.org/ro/terms/earth-science#Phrase> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "3.6474164133738602" ;
    <https://w3id.org/ro/terms/earth-science#score> "2.4" .
  <https://w3id.org/ro-id/d4da8bc8-008c-4385-9640-9bed35f86494> <http://schema.org/name> "earth sciences" ;
    a <https://w3id.org/ro/terms/earth-science#FieldOfResearch> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "100.0" ;
    <https://w3id.org/ro/terms/earth-science#score> "0.9349703192710876" .
  <https://w3id.org/ro-id/d548f2ff-c42c-4cba-b8fe-feee2ff0fb11> <http://schema.org/name> "taxonomy" ;
    a <https://w3id.org/ro/terms/earth-science#Concept> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "3.3444816053511706" ;
    <https://w3id.org/ro/terms/earth-science#score> "4.0" .
  <https://w3id.org/ro-id/d9478a50-7977-42f2-afc6-f9f5016caa29> <http://schema.org/name> "search" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "6.688417618270798" ;
    <https://w3id.org/ro/terms/earth-science#score> "4.1" .
  <https://w3id.org/ro-id/e4e53726-b83b-4025-ac62-990609bc13db> <http://schema.org/name> "name" ;
    a <https://w3id.org/ro/terms/earth-science#Lemma> ;
    <https://w3id.org/ro/terms/earth-science#normScore> "4.893964110929853" ;
    <https://w3id.org/ro/terms/earth-science#score> "3.0" .
  <https://workflowhub.eu/people/522> <http://schema.org/name> "Agata Kilar" ;
    a <http://schema.org/Person> .
  <https://workflowhub.eu/projects/178> <http://schema.org/name> "Mendel Centre for Plant Genomics and Proteomics" ;
    a <http://schema.org/Organization> , <http://schema.org/Project> .
}
sub:provenance {
  sub:assertion prov:wasDerivedFrom <https://api.rohub.org/api/ros/892d05bf-da84-4be9-9ab7-60eba5f73c70/crate/download/ro-crate-metadata.json> .
}
sub:pubinfo {
  this: dct:created "2025-11-11T16:09:16.035+01:00"^^xsd:dateTime ;
    npx:introduces <https://w3id.org/ro-id/892d05bf-da84-4be9-9ab7-60eba5f73c70/> ;
    a npx:RoCrateNanopub ;
    rdfs:label "Research Object Crate for GERONIMO" .
  sub:sig npx:hasAlgorithm "RSA" ;
    npx:hasPublicKey "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4pPaESKwmC6l37P86K6TNLq6yeQtc7m9CvcqauLs/1FC0viHvQnFBgxj0a+loPDv/Egwe6OqFpa0iW9Ypnyz9YPoh+pxbRXonbuMOb+8Ry9hXZ+TEKfWjhjVDGEaClwfRwglh2HI/xfV4CD9AgvDOEoZQiyta8a90PYwJ3G6e70oCHTn61+OWTkI9KRYHOYgg3btdy2Z7q/30PTFawb2ZT5aIfIJYobUYv2a7yhtcqWCHZeKv0bxGnRjTFNx1rscBMlLJSzvRtpQc1cCRVEPFZHo1adaXCI9tGvn4cxeNQ96y8dxkN1XhpaJairde+23MDzf42Oe97KG2HYzKiyVnQIDAQAB" ;
    npx:hasSignature "2wqnBh1ev32QIZtPLmjLfElb3kGQK7i4YbhM2CyEmEh8Bkuib6BYJ9WRECh92ytxjBhveTbeKOAWTGeU9yTzDfvzundH22XYiTkhYm1hJei9tezNxdIE/GwG4f2BWzzegFtaxAkh52+yZJK2HgWfz9Xwo4ZOnZUArbJtxP/S1Nlbl/p6I+Cdwbm3lKRtpINW2X2GE/VVvSpEa3KfJxit11Ot3iFjgOKGfWs3Mrl46kIGi6nxtrMTwllKkyS2FnnV2Ofa7IUl66jR2OB+gi1ivTNs3ztuWolMi+ssMBs9R/dJQLJxDDx4Fzy4DzcPTMIx+gP+CSAz7B1yOoJkUr5eGA==" ;
    npx:hasSignatureTarget this: ;
    npx:signedBy <https://w3id.org/kpxl/gen/terms/RoCrateBot> .
}