###script R, version 24/05/2016
###authors: Christine Plumejeaud-Perreau and Anne S. Philippe

#####Method for assigning the individuals sampled by boat either to the "top" or to the "bottom" fraction #####
		 ###an example using Scrobicularia plana in site AIC in 2014######

  ##assess the percentage in the top depending on the length for scrobicularia plana individuals sampled by foot
aicscrfoot<-subset(benthos, benthos$abr=="scr"&benthos$car_top_bottom=='VRAI'&benthos$car_mod_id=="1"&benthos$site=="AIG")
attach(aicscrfoot)
for (i in aicscrfoot$class_length){
  a=aicscrfoot[aicscrfoot$class_length == i,]
  aicscrfoot$fractiontop[aicscrfoot$class_length == i] = length(a$number[a$T.B.H=="top"])
  aicscrfoot$fractionbottom[aicscrfoot$class_length==i]=length(a$number[a$T.B.H=="bott"])}
detach(aicscrfoot)
aicscrfoot$fractiontop[is.na(aicscrfoot$fractiontop)]<-0
aicscrfoot$fractionbottom[is.na(aicscrfoot$fractionbottom)]<-0
aicscrfoot$percenttop<-aicscrfoot$fractiontop/(aicscrfoot$fractionbottom+aicscrfoot$fractiontop) #calculates the percentage in the top for each class length

plot(aicscrfoot$percenttop~ aicscrfoot$class_length) #this plot shows that the smaller individuals are more likely to be found in the top layer
keeps<-c("class_length","percenttop")
aicscrfoot<-aicscrfoot[,keeps,drop=FALSE]#reduce the dataframe to necessary columns
aicscrfoot<-aicscrfoot[aicscrfoot$percenttop!=0,] ##do not take into account probabilities of 0
aicscrfoot<-aicscrfoot[aicscrfoot$class_length!=1,] ### do not take into account class_length 1 which is an outlier

  ##build a regression based on the dataset sampled by foot "aicscrfoot" (in Aiguillon Bay, all years together), to assess the probability of beeing in the top depending on the class_length
tmp.y <- log(aicscrfoot$percenttop)
tmp.x <- log(aicscrfoot$class_length)
tmp <- lm(tmp.y~tmp.x)
tmp$coef
par(new=T)
curve(exp(tmp$coef[1] + tmp.coef[2]*log(x)), from=1, to=40, , xlab="x", ylab="y", xaxt="n", yaxt="n",bty="n", col='red') ##apply the curve to the plot to check for fitt
tmp.predict <- exp(tmp$coef[1]+tmp$coef[2]*log(aicscrfoot$class_length))
aicscrfoot$percenttopbis <- tmp.predict
aicscrfoot$percenttopbis[aicscrfoot$percenttopbis>1]<-1
aicscrfoot$percenttopbis<-round(aicscrfoot$percenttopbis,1)
par(new=T)
plot(aicscrfoot$percenttopbis~ aicscrfoot$class_length, pch=19, col="red") # add the extrapolation model to the curve based on "aicscrfoot"
unique(aicscrfoot$percenttopbis[aicscrfoot$class_length==10]) ##example: the probability of individuals in the top for class length 10 is 0.5 (50%)
keeps<- c("class_length","percenttopbis")
aicscrfoot<-aicscrfoot[,keeps, drop=FALSE]
aicscrfoot<-aicscrfoot[!duplicated(aicscrfoot), ]#reduce the dataframe "aicscrfoot" to only get the probability values for each class_length

  ###now apply this model on the individuals of the dataframe "data", (i.e. Scro, in site AIC sampled by boats)
data<-subset(benthos, benthos$abr=="scr" & benthos$st_sit_id=="aic" & benthos$year==2014 & benthos$car_mod_id==2) #select the data we need to extrapolate
unique(data$T.B.H) # gives either "larg" or "small", one of the two cores sampled by boat
data<-merge(aicscrfoot, data, by="class_length", all.y=TRUE) #merge "data" with "aicscrfoot" to assign a probability of beeing in the top fraction to each ind. sampled by boat
unique(data$class_length[is.na(data$percenttopbis)])# 33 34 37 38 39
unique(data$percenttopbis[data$class_length>30]) # for individuals larger than 30 mm, there are only 020% chance to find them in the top

library(Hmisc)
data$test<-NA ##created a new column to be compared with the column given in the benthos dataset "newTBH", this column "test" will contain the results of the top/bottom extrapolation
length(data$test[is.na(data$test)]) ##there are 178 individuals to assign to the top or the bottom fraction in the column named "test"


unique(data$percenttopbis[data$class_length<5]) #gets a unique probability value of 90%
length(data$T.B.H[data$class_length<5]) #one individual smaller than 5mm with a probability of 90% beeing in the top
w<- sample(rep(c("top","bott"),c(9,1))) #creates a random vector of "top" and "bott" values, with 90% chance of beeing "top"
data$test[data$class_length<5]<-w[1]#take the first element of the w vector, with 90% chance of beeing "top"
length(data$test[is.na(data$test)]) ## 177 individuals left

unique(data$percenttopbis[data$class_length==5]) #gets a unique probability value of 90%
length(data$T.B.H[data$class_length==5]) #3 individual of 5mm long with a probability of 80% beeing in the top
w<- sample(rep(c("top","bott"),c(8,2))) #creates a random vector of "top" and "bott" values, with 80% chance of beeing "top"
data$test[data$class_length==5]<-w[1:3]#take the first 3 element of the w vector, with 80% chance of beeing "top"
length(data$test[is.na(data$test)]) ## 174 individuals left

unique(data$percenttopbis[data$class_length>5&data$class_length<7]) #gets a unique probability value of 90%
length(data$T.B.H[data$class_length>5&data$class_length<7]) #1 individual with a probability of 70% beeing in the top
w<- sample(rep(c("top","bott"),c(7,3))) #creates a random vector of "top" and "bott" values, with 70% chance of beeing "top"
data$test[data$class_length>5&data$class_length<7]<-w[1]#take the first element of the w vector, with 70% chance of beeing "top"
length(data$test[is.na(data$test)]) ## 173 individuals left

unique(data$percenttopbis[data$class_length>6&data$class_length<9]) #gets a unique probability value of 90%
length(data$T.B.H[data$class_length>6&data$class_length<9]) 
w<- sample(rep(c("top","bott"),c(6,4))) 
data$test[data$class_length>6&data$class_length<9]<-w[1:2]
length(data$test[is.na(data$test)]) ## 171 individuals left

unique(data$percenttopbis[data$class_length>8&data$class_length<11]) #gets a unique probability value of 90%
length(data$T.B.H[data$class_length>8&data$class_length<11]) 
w<- sample(rep(c("top","bott"),c(5,5))) 
data$test[data$class_length>8&data$class_length<11]<-w[1:5]
length(data$test[is.na(data$test)]) ## 166 individuals left

unique(data$percenttopbis[data$class_length>10&data$class_length<15]) #gets a unique probability value of 90%
length(data$T.B.H[data$class_length>10&data$class_length<15]) 
w<- sample(rep(c("top","bott"),c(40,60))) 
data$test[data$class_length>10&data$class_length<15]<-w[1:22]
length(data$test[is.na(data$test)]) ## 144 individuals left

unique(data$percenttopbis[data$class_length>14&data$class_length<24]) #gets a unique probability value of 90%
length(data$T.B.H[data$class_length>14&data$class_length<24]) 
w<- sample(rep(c("top","bott"),c(15,34))) 
data$test[data$class_length>14&data$class_length<24]<-w[1:49]
length(data$test[is.na(data$test)]) ## 95 individuals left

unique(data$percenttopbis[data$class_length>23&data$class_length<40]) #gets a unique probability value of 90%
length(data$T.B.H[data$class_length>23&data$class_length<40]) 
w<- sample(rep(c("top","bott"),c(19,76))) 
data$test[data$class_length>23&data$class_length<40]<-w[1:95]
length(data$test[is.na(data$test)]) ## 0 individuals left
unique(data$test) # "top" or "bott", accessibility was assessed semi-randomnly based on probabilities for all 178 scrobicularia individuals sampled by boat in site AIC year 2014


rm(aicscrfoot,keeps, tmp, tmp.predict, tmp.x, tmp.y,v,w) #remove all unnecessary objects from the working environment