#################################################
##
## Code zur Erstellung der GermanSL
##
## Florian Jansen / Sept 2014
#################################################
# 1. Merge all sources
# 2. Apply corrections
# 3. Change SIPNR system

# setwd("/home/jansen/.wine/drive_c/Turbowin/Species/GermanSL 1.3")
source('code/functions.r')
# source('~/workspace/vegdata/vegdata/R/tax_add.r')

# rm(list = ls())
options(width=140)
options(warn=1)
options(stringsAsFactors=FALSE)
#Sys.setlocale(category='LC_ALL',locale='de_DE.iso885915@euro')
#Sys.setlocale(category='LC_ALL', locale='German_Germany.1252')
#library(Hmisc)
library(RSQLite)
library(vegdata)
bfn <-'tv_refliste.sqlite'
m <- dbDriver("SQLite")
con <- dbConnect(m, dbname = bfn)
dbListTables(con)
# mdb.get(bfn,tables=TRUE)

#### Wisskirchen = höhere Pflanzen
Wissk <- dbGetQuery(con, "SELECT * FROM FLORKART_V_TAX_PHAN")
names(Wissk)
Wissk <- exCol(Wissk)
Wissk$NACHWEIS <- 'BfN(Wisskirchen u. Haeupler 1998)'
Wissk$SECUNDUM <- 'BfN(Wisskirchen u. Haeupler 1998)'

# höhere Taxa
HTax <- dbGetQuery(con,"SELECT * FROM FLORKART_TAX_SYSTHIGH")
HTax <- exCol(HTax)
names(HTax)[names(HTax) == 'EPI1'] <- 'TAXNAME'
HTax$NACHWEIS <- 'Wisskirchen u. Haeupler 1998'
HTax$SECUNDUM <- 'Wisskirchen u. Haeupler 1998'
HTax$GROUP <- 'S'
HTax$"Hybrid" <- 0

#### Koperski = Moose
Bryo <- dbGetQuery(con,"SELECT * FROM FLORKART_V_TAX_BRYO")
Bryo <- exCol(Bryo)
names(Bryo)[names(Bryo)%in%c('NAME')] <- c('TAXNAME')
Bryo$NACHWEIS <- 'BfN(Koperski et al.2000)'
Bryo$SECUNDUM <- 'BfN(Koperski et al.2000)'
Bryo$GROUP <- 'M'
names(Bryo)[names(Bryo) == 'BY'] <- 'BAY'

# Flechten
Lich <- dbGetQuery(con, "SELECT * FROM FLORKART_V_TAX_LICH")
Lich <- exCol(Lich)
names(Lich)[names(Lich)=='NAME'] <- 'TAXNAME'
Lich$NACHWEIS <- 'BfN(Scholz2000)'
Lich$SECUNDUM <- 'BfN(Scholz2000)'
Lich$GROUP<-'F'

## Merge primary sources
data1 <- merge(Wissk,HTax,all=TRUE)
data1 <- merge(data1,Bryo,all=TRUE)
data1 <- merge(data1,Lich, all=TRUE)
data1$EditStatus <- 'BfN'

## Ergaenzungen
erg <- dbGetQuery(con, "SELECT * FROM neueArten")
erg$NAMNR <- erg$ID + 90000
erg$EditStatus <- 'Ergaenzung'

## Korrekturen laden
korr <- dbGetQuery(con, "SELECT * FROM Korrekturen")
korr$EditStatus_korr <- 'Korrektur'

data2 <- merge(data1, erg, all=TRUE)
data2 <- merge(data2, korr, by='NAMNR', all=TRUE)
# which(is.na(data2$SIPNR))

##### Korrekturen ausführen
data3 <- data2
data3$EditStatus[data3$EditStatus_korr=='Korrektur'] <- 'Korrektur'
# nicht gewünschte löschen
ind <- data3$Aktion == 'streichen'
ind[is.na(ind)] <- FALSE  # data3$Aktion == 'streichen' & !is.na(data3$Aktion) ##Kurzform?
table(ind)
data3 <- data3[!ind,]
data3$LIST[data3$Aktion == 'Kartierungstaxa annehmen'] <- ''
data3 <- data3[data3$LIST %in% c('','BE','G') | is.na(data3$LIST) ,] # nicht angenommene Kartierungstaxa ausschliessen ("BE" sind Nominat-Taxa, G Gattungen und höher [s. auch HTax])

# Inhalte aus nicht-leeren "_korr" Felder übernehmen
for(i in grep('korr', names(data3))) {
  f <- strsplit(names(data3)[i], '_', fixed=TRUE)[[1]][1]
  k <- names(data3)[i]
  ind <- !is.na(data3[,k]) & data3[,k] !='' & data3$EditStatus=='Korrektur' 
  if(sum(ind)>0) data3[ind,f] <- data3[ind, i]
}

data3[,'TAXNAME'] <- taxname.abbr(data3[,'TAXNAME'])
data3[,'VALIDNAME'] <- taxname.abbr(data3[,'VALIDNAME'])
data3[,'AGGNAME'] <- taxname.abbr(data3[,'AGGNAME'])

# Use Turboveg-System for valid species (NAMNR == SIPNR) instead of BfN-Style (SIPNR-Group)
# data3[data3$SIPNR == 922 & !is.na(data3$SIPNR),]

valid <- data3[data3$GUELT == "1",]
for(i in 1:nrow(valid)) {
  data3$SIPNR[data3$SIPNR == valid$SIPNR[i]] <- valid$NAMNR[i]
  #  data2[data2$SIPNR == 11928 & !is.na(data2$SIPNR),]
}   # todo: better make it with tapply
# 
data3[data3$SIPNR == 11928 & !is.na(data3$SIPNR),]
# data2$SIPNR <- data2$SIPNR[match(data2$VALIDNAME, data2$TAXNAME)]
# data2$SIPNR[data2$GUELT == 1] <- data2$NAMNR[data2$GUELT == 1]

## Valid
data3$SIPNR <- ifelse(data3$GUELT==1, data3$NAMNR, data3$SIPNR)
data3$SIPNR[!is.na(data3$VALIDNAME) & is.na(data3$SIPNR)] <- 
data3$NAMNR[match(data3$VALIDNAME[!is.na(data3$VALIDNAME) & is.na(data3$SIPNR)], data3$TAXNAME)]
# which(is.na(data3$SIPNR))

## Hybride
data3$Hybrid[is.na(data3$Hybrid)] <- 0
data3$Hybrid[data3$Hybrid %in% c('FALSCH','FALSE','nein')] <- 0
data3$Hybrid[data3$Hybrid %in% c('ja','TRUE','WAHR','x')] <- 'x'

## Aggregate
valid <- data3[data3$GUELT == "1",]
ind <- data3$AGGNR %in% data3$NAMNR[data3$GUELT!='1']
data3$AGGNR[ind] <- valid$NAMNR[match(data3$AGGNR, valid$SIPNR)][ind]
data3 <- rang.fun(data3, variable = 'RANG')# fehlende Angaben in Rang ergänzen
data3 <- agg.fun(data3) # fehlende Gattungen ergänzen
data3$AGGNR[data3$GUELT == "1" & is.na(data3$AGGNR)] <- data3$NAMNR[match(data3$AGGNAME[data3$GUELT == "1" & is.na(data3$AGGNR)], data3$TAXNAME)]

data3$VALIDNAME[is.na(data3$VALIDNAME)] <- data3$TAXNAME[match(data3$SIPNR[is.na(data3$VALIDNAME)], data3$NAMNR)]

## Agg 2
data7 <- data3
# data7 <- agg.fun(data7)

data7 <- data3
data7$appendedPhrase <- NA
data7$appendedPhrase[grep('s. str.', data7$ABBREVIAT)] <- 's. str.'
data7$appendedPhrase[grep('s. l.', data7$ABBREVIAT)] <- 's. l.'
data7$appendedPhrase[grep('auct.', data7$ABBREVIAT)] <- 'auct.'

## Hybrid parents
data7$Elter1 <- taxname.abbr(data7$Elter1)
data7$Elter2 <- taxname.abbr(data7$Elter2)
data7$Elter3 <- taxname.abbr(data7$Elter3)

## Family
# source('code/functions.r')
fam <- read.table('family.csv', header=TRUE, sep='\t', row.names=1)
# fam <- fam[0,]   # Reset
data7$Family <- fam$Family[match(data7$NAMNR, fam$NAMNR)]
data7$Family[is.na(data7$Family)] <- ''
## neue Arten s.u.

## Integers
data7$NAMNR <- as.integer(data7$NAMNR)
data7$SIPNR <- as.integer(data7$SIPNR)

## Lettercodes
cat('Calculating letterodes ...')
validnames <- unique(data7$VALIDNAME)
source('code/lettercode.r')
letter <- make.lettercode(toupper(validnames))
#   head(letter)
#   which(is.na(letter))
#   letter[names(letter) == 'EUPHRASIA STRICTA']
#   names(letter)[grep('EUPHRASIA', names(letter))]
data7$lettercode <- letter[match(toupper(data7$TAXNAME), names(letter))]
#  data7[data7$TAXNAME == 'Euphrasia stricta', ]
data7$lettercode <- data7$lettercode[match(data7$SIPNR, data7$NAMNR)]
#  data7[data7$TAXNAME == 'Euphrasia stricta agg.', ]

save(data7, file='data7.rda')
