possiblesites=1:100 #possible site values probabilities=seq(.8,.1,length.out=100) #vector of probabilities to use in sampling to ensure unequal replicates site=sort(sample(possiblesites,500,replace=T,prob=probabilities)) #sample sites to make non-uniform replicates quercus=rbinom(length(site),1,prob=0.5) #generate binary data to simulate species presence - absence d=data.frame(site,quercus) #combine in data.frame and overwrite prdb head(d) site.freq=table(d$site) #table with site frequency dups=names(site.freq[site.freq>1]) #sites with duplicates loners=names(site.freq[site.freq==1]) #sites with no duplicates d.new=d[d$site%in%loners,] #create new table with sites that only have one plot d.new$repID=NA #create new field to hold which replicate was selected for(i in 1:length(dups)){ #start the for loop for each element of dup code=dups[i] #set code=name of site for this iteration x=d[d$site==code,] #make x a new table subsetted to site of interest presence=x$quercus #get vector of presence/absence. if (sum(presence)==0) select=sample(1:nrow(x),1) #if all zeros, choose one if (sum(presence)>0) select=sample(which(x$quercus==1),1) #if some ones, choose a one d.new=rbind(d.new,c(x[select,],repID=select)) #add to new table with ID of which replicate chosen } length(unique(d$site)) #number of original sites cbind(table(table(d$site))) #number of old sites and the number of times they occur table(table(d.new$site)) #number of new sites and the number of times they occur