Full list of parameters
Global settings of resources
Global settings for max resources available per process in your system
- max_memory
- max_cpus
- max_time
General settings per run
- project: project name. Max 50 chars. No spaces allowed. Use only alphanumeric, underscores, or dashes
- project_date: usually configured automatically to execution date, but you can override the date manually
- outdir: a specific outdir for the project, if null the project id is used as output dir, otherwise
outdir/project_id/ - chromosomes: chromosomes to include in the analysis. Accepts comma separated lists and intervals like
1-5,8-12,22 - master_log_dir: set this to redirect all logs to a specific folder
Multi-model specific settings
models_table = null //when a table is provided, this activates master table mode
pheno_chunk_size = 50 //maximum number of phenotypes for a single GWAS run
missing_tolerance = 0.1 //max fraction of missing values per phenotype when assembling execution groups
Input files
Genetic data
genotypes_array = null //genotypes data for step 1. Can be bgen, pgen, bed or vcf.gz
genotypes_imputed = null //genotype data for gwas analysis. Can be bgen, pgen, bed or vcf.gz
genotypes_rarevar = null //genotype data for rare variants analysis. Can be bgen, pgen, plink bed or vcf.gz
genotypes_build = null //genome build
genotypes_imputed_format = null //input data format for gwas. Can be bgen, pgen, bed or vcf
genotypes_rarevar_format = null //input data format for rare variants. Can be bgen, pgen, bed or vcf
imputed_sample_file = 'NO_SAMPLE_FILE' //Provide a specific sample file to use with gwas bgen input
rarevar_sample_file = 'NO_SAMPLE_FILE' //Provide a specific sample file to use with rare variants bgen input
ld_panel = 'NO_LD_FILE' //A pattern pointing to a subset of genotypes to be used for LD computation. Optional, but highly reccomended for large datasets
Phenotypes
phenotypes_filename = null //table of phenos - required
phenotypes_columns = null //comma separated list of col names in pheno file to be analyzed
phenotypes_binary_trait = null //true for binary
Covariates
covariates_filename = 'NO_COV_FILE' //files containing covariates, use NO_COV_FILE when absent
covariates_columns = '' //comma-separated list of covariates column names
covariates_cat_columns = '' //comma-separated list of categorical covariate column names
maxCatLevels = 10 //maximum number of allowed levels for categorical covars
Rare variants accessory files
These are mandatory when running rare var analysis
rarevar_set_list_file = null //set list file as defined in regenie docs
rarevar_anno_file = null //variant annotation file as defined in regenie docs
rarevar_mask_file = null //mask definition file as defined in regenie docs
Other optional inputs
genes_bed = false //an optional .bed file specifying genes intervals used for results annotation
genes_ranges = false //an optional .interval file specifying genes intervals used for loci annotation
STEP1 PRE-PROCESSING SETTINGS
SNP pruning
prune_enabled = false
prune_maf = 0.01
prune_window_kbsize = 1000
prune_step_size = 100
prune_r2_threshold = 0.9
Filtering
qc_maf = '0.01'
qc_mac = '100'
qc_geno = '0.05'
qc_hwe = '1e-15'
qc_mind = '0.1'
REGENIE STEP1 SETTINGS
regenie_bsize_step1 = 1000
regenie_premade_predictions = false //or pattern to regenie step1 files
save_step1_predictions = true
regenie_force_step1 = false
regenie_ref_first_step1 = false
step1_use_loocv = false
step1_niter = 30
step1_n_chunks = 100 // N chunks when performing step1 L0 regression
REGENIE STEP2 SETTINGS
General settings
phenotypes_delete_missings = false //remove samples with missing data at any of the phenotypes
regenie_bsize_step2 = 400
regenie_ref_first_step2 = true
regenie_skip_predictions = false //skip reading the step1 predictions (corresponds to simple linear/logistic regression)
regenie_range = '' // when splitting is not active you can use this to specify a genomic range for step2 analysis
regenie_extract_snps = '' // when splitting is not active you can specify a file containing a list of variant IDs to restrict step2 analysis
regenie_extract_genes = '' // when splitting is not active you can use this to specify a file containing a list of genes to restrict step2 analysis
interaction_cov = null // run GxE test in GWAS specifying the interacting covariate from covariate table
interaction_snp = null // run GxG test in GWAS specifying the interacting variant ID
condition_list = null // run conditional analysis in GWAS specifying a files with variant IDs to condition on
additional_geno_file = null // prefix of the genotype dataset containing vars in condition_list or interaction var. This is mandatory for conditional or interaction analysis
additional_geno_format = null // can be bgen, pgen or bed.
save_chunks_file = true
save_step2_logs = true
save_bgen_index = true
save_bgen_sample = true
save_converted_bgen = true
VCF to PGEN conversion settings
gwas_read_dosage_from = 'DS' //DS (usually for VCF from imputateion) or GP (usually VCF from sequencing)
rarevar_read_dosage_from = 'GP' //DS (usually for VCF from imputateion) or GP (usually VCF from sequencing)
import_dosage_certainty = 0.7 //when using GP, the certainty threshold to import dosages. If none of the probabilities is above this, the genotype is set to missing
vcf_fixed_fid = null //when null vcf is converted to pgen using --double-id, otherwise fid is fixed to this value
GWAS analysis settings
step2_gwas_split = true //when true activate split of step2 by variant chunks
step2_gwas_chunk_size = 100000 //n variants per chunk when running gwas in split mode
regenie_gwas_test = 'additive' //additive, dominant, recessive
regenie_min_imputation_score = '0.00'
regenie_gwas_min_mac = '50' // min MAC for variants to be included in step2 for gwas
regenie_firth = true
regenie_firth_approx = true
Rare variant analysis settings
step2_rarevar_split = true //when true activate split of step2 by gene chunks
step2_rarevar_chunk_size = 200 //n genes per chunk when running rare variant test in split mode
regenie_rarevar_min_mac = '1' // min MAC for variants to be included in step2 for rare vars
rarevars_aaf_bins = '0.01,0.05' //comma-separated list of AAF upper bounds to use when building masks for burden test
rarevars_vc_test = 'skat,skato,acatv,acato' //comma-separated list of SKAT/ACAT-type tests to run
rarevars_joint_test = 'minp,acat' //comma-separated list of joint tests to run. Choices: minp,acat,sbat. Note that sbat can't run on binary phenotypes
rarevars_vc_maxAAF = '0.05' //AAF upper bound to use for SKAT/ACAT-type tests
regenie_build_mask = 'max' //build mask for rare variant test. Can be max, sum, or a comphet
rarevars_write_mask_snplist = false //when true write list of variants that went into each mask to file
GWAS RESULTS ANNOTATION AND CLUMPING
genes_group = 'protein_coding' //genes group to use for annotation. Can be all or protein_coding
annotation_min_log10p = 7.3 //results with -log10(p) above this will be reported as top hits with annotated genes
annotation_interval_kb = 25
clumping = true
clump_p1 = 5e-8
clump_p2 = 1e-4
clump_kb = 250
RARE VARIANTS RESULTS ANNOTATION
rarevar_min_log10p = 5 //results with -log10(p) above this will be reported as top hits
rarevar_stat_test = "BONF_bygroup" //Stat value to filter on. Possible values: "FDR_bygroup", "FDR_alltests", "BONF_bygroup", "BONF_alltests"
rarevar_stat_test_threshold = 1.3 //results with -log10(stat_value) above this will be annotated in a dedicated manhattan plot
REPORT SETTINGS
make_report = true //it can be useful to disable when analyzing many phenotypes
manhattan_annotations = 'genes' //how to annotate peaks in manhattan plot. Either 'genes' or 'snpid'
regional_plot_window_kb = 300 //window size in kb for regional plots. This value is added on each side of the locus when plotting
n_top_loci_plot = 5 //number of top loci to plot in regional plots