# sqs version 3.2 by John Alroy # performs shareholder quorum subsampling on an array of specimen counts # can be used to perform classical rarefaction instead of SQS # written 29 July 2010; version 2.0 completed 14 February 2011; versions 3.0, # 3.1, 3.2, and 3.3 written 3 June, 12 July, 19-23 August, and 2 December 2011 # change in version 3.3: fixed bug in computation of maximum estimated coverage # changes in version 3.2: specimens drawn and subsampled U are based on # geometric means of counts; ignore.singletons option added # change in version 3.1: an even better frequency adjustment involving # singletons and doubletons # changes in version 3.0: an even better subsampling algorithm involving a new # adjustment to u combined with a new throwback criterion # changes in version 2.0: improved subsampling algorithm; including the dominant # taxon is now the default; improved reporting of errors and basic statistics # warning: do not use this program with taxonomic occurrence data drawn from # multiple published references because it is not designed to count # single-reference taxa or adjust for long taxonomic lists # warning: version 1.0 yields estimates that are downwards-biased when q < 0.6 # and abundance distributions are highly uneven sqs<-function(ab,q,trials,method,ignore.singletons,dominant) { params <- array(data=NA,dim=0,dimnames=c("raw richness")) if (missing(trials)) { trials <- 100 } if (missing(method)) { method <- "" } else if (method != "" && method != "rarefaction" && method != "CR") { return(print('If the method is rarefaction enter method="rarefaction" or "CR"',quote=F)) } if ((q <= 0 || q >= 1) && method != "rarefaction" && method != "CR") { return(print("If the method is SQS the quota must be greater than zero and less than one",quote=F)) } else if (q < 1 && (method == "rarefaction" || method == "CR")) { return(print("If the method is rarefaction the quota must be an integer",quote=F)) } ignore <- 0 if (! missing(ignore.singletons) && (ignore.singletons == T || ignore.singletons == "yes" || ignore.singletons == "y")) { ignore <- 1 } if (missing(dominant)) { dominant <- 0 } else if (dominant != "" && dominant != "exclude" && dominant != "no") { return(print('To exclude the dominant taxon, enter dominant="exclude" or "no"',quote=F)) } # compute basic statistics specimens <- sum(ab) singletons <- 0 doubletons <- 0 highest <- 0 for (i in 1:length(ab)) { if (ab[i] == 1) { singletons <- singletons + 1 } else if (ab[i] == 2) { doubletons <- doubletons + 1 } if (ab[i] > highest) { highest <- ab[i] mostfrequent <- i } } u <- 1 - singletons / specimens # optionally, exclude the dominant taxon if (dominant == "exclude" || dominant == "no") { u <- 1 - singletons / (specimens - highest) } if (u == 0) { return(print("Coverage is zero because all taxa are singletons",quote=F)) } # compute raw taxon frequencies (temporarily) freq <- ab / specimens # standard recursive equation for Fisher's alpha alpha <- 10 oldalpha <- 0 while (abs(alpha - oldalpha) > 0.0000001) { oldalpha <- alpha alpha <- length(ab) / log(1 + specimens/alpha) } params["raw richness"] <- length(ab) params["Good's u"] <- u params["subsampled richness"] <- NA params["subsampled u"] <- NA params["Chao 1"] <- length(ab) + singletons**2/(2* doubletons) params["subsampled Chao 1"] <- NA # governing parameter of the geometric series distribution params["k"] <- abs(lm(log(sort(freq)) ~ c(1:length(freq)))$coefficients[2]) params["Fisher's alpha"] <- alpha params["Shannon's H"] <- -1 * sum(freq * log(freq)) params["Hurlbert's PIE"] <- (1 - sum(freq**2)) * length(ab) / (length(ab) - 1) params["dominance"] <- highest / specimens params["specimens"] <- specimens params["singletons"] <- singletons params["doubletons"] <- doubletons params["specimens drawn"] <- 0 if (dominant != "exclude" && dominant != "no") { highest <- 0 mostfrequent <- 0 } # return if the rarefaction quota is equal to or higher than the # specimen count if (method == "rarefaction" && q >= specimens - highest) { return(params) } # compute taxon frequencies (tweak added in version 3.1) freq <- ab - (singletons + doubletons / 2) / length(ab) freq <- freq / (specimens - highest) # return if the quorum target is higher than estimated coverage if ((q > sum(freq) && method != "rarefaction" && method != "CR") || (q >= sum(ab))) { return(params) } # create an array in which each cell corresponds to one specimen ids <- array() n <- 0 for (i in 1:length(ab)) { for (j in 1:ab[i]) { n <- n + 1 ids[n] <- i } } # subsampling trial loop # s will be the subsampled taxon count s <- array(rep(0,trials)) drawn <- array(rep(0,trials)) mostfrequentdrawn <- array(rep(0,trials)) subsingle <- array(rep(0,trials)) subdouble <- array(rep(0,trials)) subchao <- array(rep(0,trials)) for (trial in 1:trials) { pool <- ids left <- length(pool) seen <- array(data=rep(0,length(ab))) subfreq <- array(rep(0,length(ab))) if (method != "rarefaction" && method != "CR") { udrawn <- 0 # equation new to version 3.0 # the exponent corrects for downwards bias while (udrawn < q) { # draw a specimen x <- floor(runif(1,min=1,max=left+1)) # add to frequency and taxon sums if species has # not been drawn previously subfreq[pool[x]] <- subfreq[pool[x]] + 1 if (seen[pool[x]] == 0) { if (pool[x] != mostfrequent && (ignore == 0 || ab[pool[x]] > 1)) { udrawn <- udrawn + freq[pool[x]] } seen[pool[x]] <- 1 # randomly throw back some draws that put the sum over q # (an even better algorithm added in version 3.0) if (runif(1) <= freq[pool[x]] || udrawn < q) { s[trial] <- s[trial] + 1 } else { subfreq[pool[x]] <- subfreq[pool[x]] - 1 } } # decrease pool of specimens not yet drawn pool[x] <- pool[left] left <- left - 1 } } else { i <- 0 draws <- 0 while (i < q) { draws <- draws + 1 x <- floor(runif(1,min=1,max=length(ids)-draws+2)) subfreq[pool[x]] <- subfreq[pool[x]] + 1 if (pool[x] != mostfrequent) { i <- i + 1 } if (seen[pool[x]] == 0) { seen[pool[x]] <- 1 s[trial] <- s[trial] + 1 } pool[x] <- pool[length(ids)-draws+1] } } for (i in 1:length(ab)) { if (subfreq[i] == 1 && i != mostfrequent) { subsingle[trial] <- subsingle[trial] + 1 } else if (subfreq[i] == 2 && i != mostfrequent) { subdouble[trial] <- subdouble[trial] + 1 } } if (subsingle[trial] > 0 && subdouble[trial] > 0) { subchao[trial] <- s[trial] + subsingle[trial]**2/(2*subdouble[trial]) } else { subchao[trial] <- s[trial] } drawn[trial] <- sum(subfreq) if (mostfrequent != 0) { mostfrequentdrawn[trial] <- subfreq[mostfrequent] } } # compute vectors of non-zero counts options(warn=-1) s2 <- sort(sqrt(s-1))^2+1 d2 <- sort(sqrt(drawn-1))^2+1 m2 <- sort(sqrt(mostfrequentdrawn-1))^2+1 ss2 <- sort(sqrt(subsingle-1))^2+1 options(warn=0) # compute geometric means params["subsampled richness"] <- exp(mean(log(s2))) * length(s2)/length(s) params["specimens drawn"] <- exp(mean(log(d2))) * length(d2)/length(drawn) meanmost <- 0 if (sum(mostfrequentdrawn) > 0) { meanmost <- exp(mean(log(m2))) * length(m2)/length(mostfrequentdrawn) } meansubsingle <- exp(mean(log(ss2))) * length(ss2)/length(subsingle) params["subsampled u"] <- 1 - meansubsingle / (params["specimens drawn"] - meanmost) params["subsampled Chao 1"] <- exp(mean(log(subchao))) return(params) }