A few people have asked for examples that show complete work flows from beginning to end. Here are a few such examples. They assume only that ADMIXTOOLS 2 is installed and R is started. The disadvantage of following these recipes is that they always start from scratch, and don’t take advantage of precomputed f-statistics, which would be faster. It’s easy to adapt them so they do take advantage of that, but at that point, you might as well read the regular tutorial.
Each recipe begins by defining the input and output files, as well as other parameters. They usually have names like mypop1
, or /my/files/prefix
. This needs to be replaced with populations in your files, or a path that points to data on your computer. prefix
means that if your genotype files have names like data.geno
, data.snp
, and data.ind
, you should leave out the ending and just type data
(see Data formats for a description of the file formats). As an example, if the genotype files are in C:\Users\Robert\Documents\
, you would have to replace /my/files/prefix
with C:/Users/Robert/Documents/data
. (On Windows you might have to replace backslashes with forward slashes.)
You can also copy and paste the recipes into a file and execute them from the command line using Rscript
. For example, you can copy the first example into a file called compute_fst.R
, replace the file and population names, and run it by typing Rscript compute_fst.R
.
data = '/my/files/prefix'
outfile = '/my/files/out.txt'
pop1 = 'mypop1'
pop2 = 'mypop2'
library(admixtools)
library(tidyverse)
out = fst(data, pop1, pop2, adjust_pseudohaploid = FALSE)
# `adjust_pseudohaploid = FALSE` will make sure that you always
# get results, even for populations with only a single
# pseudohaploid sample. Estimates for populations with only a
# single pseudohaploid can be biased, which is why the default
# behavior adjust_pseudohaploid = TRUE can result in an error.
write_tsv(out, outfile)
This assumes that you want to compute FST for all pairs in a list of populations which are listed one per line in /my/pops.txt
. To get all pairs from all populations in the genotype data, simply set popfile = ''
.
data = '/my/files/prefix'
outfile = '/my/files/out.txt'
popfile = '/my/files/pops.txt'
library(admixtools)
library(tidyverse)
if(file.exists(popfile)) pops = NULL else pops = read_lines(popfile)
out = fst(data, pop1 = pops, adjust_pseudohaploid = FALSE)
# `adjust_pseudohaploid = FALSE` will make sure that you always
# get results, even for populations with only a single
# pseudohaploid sample. Estimates for populations with only a
# single pseudohaploid can be biased, which is why the default
# behavior adjust_pseudohaploid = TRUE can result in an error.
write_tsv(out, outfile)
data = '/my/files/prefix'
outfile = '/my/files/out.txt'
pop1 = 'Chimp'
pop2 = 'Mbuti'
pop3 = 'Russia_Ust_Ishim'
pop4 = 'Kostenki'
library(admixtools)
library(tidyverse)
if(file.exists(popfile)) pops = NULL else pops = read_lines(popfile)
out = f4(data, pop1, pop2, pop3, pop4)
write_tsv(out, outfile)
data = '/my/files/prefix'
outfile = '/my/files/out.txt'
library(admixtools)
library(tidyverse)
out = f4(data, unique_only = TRUE)
# set `unique_only = FALSE` to also return combinations
# where some populations occur more than once
write_tsv(out, outfile)
data = '/my/files/prefix'
outfile = '/my/files/out.txt'
popfile = '/my/files/pops.txt'
library(admixtools)
library(tidyverse)
out = f4(data, pop1 = popfile)
write_tsv(out, outfile)
In the examples above, different SNPs may be used for different f-statistics, if some data is missing. To avoid any biases that can result from selecting different SNPs, you can set allsnps = FALSE
and use only SNPs which are non-missing in all populations for which f4 is computed. This type of SNP selection is also used when computing f4 from pre-computed f2-statistics which were generated with the default maxmiss = 0
setting.
data = '/my/files/prefix'
outfile = '/my/files/out.txt'
library(admixtools)
library(tidyverse)
out = f4(data, allsnps = FALSE)
write_tsv(out, outfile)
D-statistics only differ from by f4-statistics by a scaling factor. To compute D-statistics, you can use the same commands as for computing f4-statistics. The only difference is the argument f4mode = FALSE
. D-statistics cannot be computed from pre-computed f2-statistics.
data = '/my/files/prefix'
outfile = '/my/files/out.txt'
library(admixtools)
library(tidyverse)
out = f4(data, f4mode = FALSE)
write_tsv(out, outfile)
data = '/my/files/prefix'
outprefix = '/my/files/out_prefix'
left = c('leftpop1', 'leftpop2', 'leftpop3')
right = c('rightpop1', 'rightpop2', 'rightpop3', 'rightpop4')
target = c('targetpop')
library(admixtools)
library(tidyverse)
out = qpadm(data, left, right, target)
write_tsv(out$weights, paste0(outprefix, '_weights.tsv'))
write_tsv(out$f4, paste0(outprefix, '_f4.tsv'))
write_tsv(out$rankdrop, paste0(outprefix, '_rankdrop.tsv'))
write_tsv(out$popdrop, paste0(outprefix, '_popdrop.tsv'))
data = '/my/files/prefix'
outprefix = '/my/files/out_prefix'
models = tibble(
left = list(c('pop1', 'pop2'), c('pop3')),
right = list(c('pop5', 'pop6', 'pop7'), c('pop7', 'pop8')),
target = c('pop10', 'pop10'))
library(admixtools)
library(tidyverse)
out = qpadm_multi(data, models, full_results = FALSE)
out %>% rowwise %>%
mutate(across(c(left, right), ~paste0(., collapse = ','))) %>%
write_tsv(paste0(outprefix, '.tsv'))
data = '/my/files/prefix'
outprefix = '/my/files/out_prefix'
models = tibble(
left = list(c('pop1', 'pop2'), c('pop3')),
right = list(c('pop5', 'pop6', 'pop7'), c('pop7', 'pop8')),
target = c('pop10', 'pop10'))
library(admixtools)
library(tidyverse)
out = qpadm_multi(data, models, full_results = TRUE)
out %>% purrr::transpose() %>%
iwalk(~bind_rows(.x, .id = 'model') %>%
write_tsv(paste0(outprefix, .y, '.tsv')))
data = '/my/files/prefix'
outprefix = '/my/files/out_prefix'
target = 'targetpop'
leftright = c('pop1', 'pop2', 'pop3', 'pop4')
rightfix = c('pop5', 'pop6')
library(admixtools)
library(tidyverse)
models = rotate_models(leftright, target, rightfix)
out = qpadm_multi(data, models, full_results = FALSE)
# there is also `qpadm_rotate`, but it only works with pre-computed f2-statistics
out %>% rowwise %>%
mutate(across(c(left, right), ~paste0(., collapse = ','))) %>%
write_tsv(paste0(outprefix, '.tsv'))
data = '/my/files/prefix'
outprefix = '/my/files/out_prefix'
graphfile = '/my/files/graph.txt'
library(admixtools)
library(tidyverse)
# use this if you have a graph in a text file in the old ADMIXTOOLS format
graph = parse_qpgraph_graphfile(graphfile) %>% edges_to_igraph()
# use this if you have a graph in a text file in ADMIXTOOLS 2 format
graph = read_table2(graphfile) %>% edges_to_igraph()
out = qpgraph(data, graph, return_fstats = TRUE)
sc = tibble(a = c('score', 'worst_residual'), b = c(out$score, out$worst_residual))
write_tsv(out$edges, paste0(outprefix, '_edges.tsv'))
write_tsv(out$f3, paste0(outprefix, '_f3.tsv'))
write_tsv(out$f4, paste0(outprefix, '_f4.tsv'))
write_tsv(sc, paste0(outprefix, '_score.tsv'))
data = '/my/files/prefix'
outprefix = '/my/files/out_prefix'
popfile = '/my/files/pops.txt'
numadmix = 3
library(admixtools)
library(tidyverse)
if(file.exists(popfile)) pops = NULL else pops = read_lines(popfile)
f2_blocks = f2_from_geno(data)
results = find_graphs(f2_blocks, numadmix = numadmix)
winner = results %>% slice_min(score)
write_tsv(winner$edges[[1]], paste0(outprefix, '_edges.tsv'))
write_lines(paste0('score\t', winner$score[[1]]), paste0(outprefix, '_score.tsv'))