Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

model.cpp

Go to the documentation of this file.
00001 //****************************************************
00002 //  April, 1993, University of Illinois
00003 // Copyright (C) 1993, 1994 Tianlin Wang
00004 /* Copyright (C) 1994-2003 Matvec Development Team. 
00005 
00006   This program is free software; you can redistribute it and/or
00007   modify it under the terms of the GNU Library General Public
00008   License as published by the Free Software Foundation; either
00009   version 2 of the License, or (at your option) any later version.
00010   
00011   This program is distributed in the hope that it will be useful,
00012   but WITHOUT ANY WARRANTY; without even the implied warranty of
00013   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014   Library General Public License for more details.
00015     
00016   You should have received a copy of the GNU Library General Public
00017   License along with this library; if not, write to the Free
00018   Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00019   MA 02111-1307, USA 
00020 */
00021 
00022 #include <iostream>
00023 #include <sstream>
00024 #include <iomanip>
00025 #include <cstdarg>
00026 #include "session.h"
00027 #include  "stat.h"
00028 #include "model.h"
00029 #include "population.h"
00030 #include "parmmap.h"
00031 
00032 namespace matvec {
00033 
00034 
00035 void Model::initialize(void)
00036 {
00037    num_ped        = 0;
00038    max_nz         = 0;
00039    numrec         = 0;
00040    numobs         = 0;
00041    numcol         = 0;
00042    numterm        = 0;
00043    popsize        = 0;
00044    numfact        = 0;
00045    maxorder       = 0;
00046    numtrait       = 0;
00047    npattern       = 0;
00048    hmmesize       = 0;
00049    numgroup       = 0;
00050    non_zero       = 0;
00051    type           = bad_model;    // it becomes good mode after having equations - was model_type
00052    ntermGdist     = 0;
00053    pop_created    = 0;
00054    data_prepared  = 0;
00055    weightinverse  = 0;
00056    modelpcount    =0;
00057    modelstr       = 0;
00058 
00059    pop            = 0;
00060    rve            = 0;
00061    data           = 0;
00062    kvec           = 0;
00063    rawpos         = 0;
00064    idlist         = 0;
00065    pos_term       = 0;
00066    xval_term      = 0;
00067    traitname      = 0;
00068    trait_vec      = 0;
00069    rec_indid      = 0;
00070    xact_htable    = 0;
00071    factor_struct  = 0;
00072    trait_struct   = 0;
00073    unknown_prior  = 0;
00074 
00075    // **********************************************
00076    //  information variables for Model::info(stream)
00077    // **********************************************
00078    nfunk_in_dfreml = 0;
00079    reml_value  = 0.0;
00080    dfreml_called = 0;
00081 }
00082 
00083 void Model::copyfrom(const Model& M)
00084 {
00085    initialize();
00086    num_ped        = M.num_ped;
00087    yry            = M.yry;
00088    data           = M.data;
00089    term           = M.term;
00090    max_nz         = M.max_nz;
00091    numrec         = M.numrec;
00092    numobs         = M.numobs;
00093    numcol         = M.numcol;
00094    max_nz         = M.max_nz;
00095    popsize        = M.popsize;
00096    numterm        = M.numterm;
00097    numfact        = M.numfact;
00098    lng0vec        = M.lng0vec;
00099    lnr0vec        = M.lnr0vec;
00100    blupsol        = M.blupsol;
00101    rellrhs        = M.rellrhs;
00102    popsize        = M.popsize;
00103    hmmesize       = M.hmmesize;
00104    numtrait       = M.numtrait;
00105    npattern       = M.npattern;
00106    hmmesize       = M.hmmesize;
00107    numgroup       = M.numgroup;
00108    maxorder       = M.maxorder;
00109    non_zero       = M.non_zero;
00110    rec_indid      = M.rec_indid;
00111    type     = M.type;
00112    modelfname     = M.modelfname;
00113    weightname     = M.weightname;
00114    ntermGdist     = M.ntermGdist;
00115    modelstring    = M.modelstring;
00116    pattern_tree   = M.pattern_tree;
00117    residual_var   = M.residual_var;
00118    data_prepared  = M.data_prepared;
00119    weightinverse  = M.weightinverse;
00120    nfunk_in_dfreml= M.nfunk_in_dfreml;
00121    reml_value     = M.reml_value;
00122    dfreml_called  = M.dfreml_called;
00123    hmmec.resize(hmmesize,max_nz);
00124 
00125    int i,k;
00126    // pos_term, xval_term, trait_vec are working spaces
00127    if (pos_term) {delete [] pos_term; pos_term = 0;}
00128    if (M.pos_term) pos_term = new unsigned [numterm+1];
00129    if (xval_term) {delete [] xval_term; xval_term = 0;}
00130    if (M.xval_term) xval_term = new double [numterm+1];
00131    if (trait_vec) {delete [] trait_vec;  trait_vec= 0;}
00132    if (M.trait_vec) trait_vec = new double [numterm+1];
00133 
00134    if (unknown_prior) {
00135       delete [] unknown_prior;unknown_prior = 0;
00136    }
00137    if (M.unknown_prior) {
00138      if(numterm>0){
00139        unknown_prior = new UnknownDist[numterm];
00140      }
00141      else {
00142        unknown_prior = 0;
00143      }
00144       for (i=0; i<numterm; i++) {
00145          unknown_prior[i].copyfrom(M.unknown_prior[i]);
00146          if (strcmp(M.term.termlist[i].prior->name(),"UnknownDist") == 0) {
00147             term.termlist[i].prior = &(unknown_prior[i]);
00148          }
00149          else {
00150             term.termlist[i].prior = M.term.termlist[i].prior;
00151          }
00152       }
00153    }
00154    if (traitname) {delete [] traitname; traitname = 0;}
00155    if (M.traitname) {
00156      if(numtrait>0){
00157        traitname = new std::string [numtrait];
00158      }
00159      else {
00160        traitname = 0;
00161      }
00162       for (i=0; i<numtrait; i++) traitname[i] = M.traitname[i];
00163    }
00164    if (kvec) {delete [] kvec; kvec = 0;}
00165    if (M.kvec) {
00166      if(numtrait>0){
00167        kvec = new unsigned [npattern];
00168      }
00169      else {
00170        kvec = 0;
00171      }
00172       for (i=0; i<npattern; i++) kvec[i] = M.kvec[i];
00173    }
00174    if (rawpos) {delete [] rawpos; rawpos = 0;}
00175    if (M.rawpos) {
00176      if(maxorder>0){
00177        rawpos = new unsigned [maxorder];  // this is just a working space
00178      }
00179      else {
00180        rawpos = 0;
00181      }
00182    }
00183    pattern.resize(npattern);
00184    std::set<std::string>::iterator pos;
00185    for (i=0, pos=pattern_tree.begin(); pos != pattern_tree.end(); ++pos,++i) {
00186       pattern[i] = *pos;
00187    }
00188    pop_created = M.pop_created;
00189    if (M.pop_created) {
00190       pop = new Population(*(M.pop));
00191    }
00192    else {
00193       pop = M.pop;
00194    }
00195    if (rve) {
00196      delete [] rve;
00197      rve = 0;
00198    }
00199    if (M.rve) {
00200      if(npattern>0){
00201        rve = new Matrix<double> [npattern];
00202      }
00203      else {
00204        rve = 0;
00205      }
00206       check_ptr(rve);
00207       for (i=0; i<npattern; i++) rve[i].reserve(numtrait,numtrait);
00208    }
00209    if (factor_struct) {
00210       delete [] factor_struct; factor_struct = 0;
00211    }
00212    if (M.factor_struct) {
00213      if(numfact>0){
00214        factor_struct = new FieldStruct [numfact];
00215      }
00216      else {
00217        factor_struct = 0;
00218      }
00219       for (i=0; i<numfact; i++) factor_struct[i] = M.factor_struct[i];
00220    }
00221    if (trait_struct) {
00222       delete [] trait_struct; trait_struct = 0;
00223    }
00224    if (M.trait_struct) {
00225      if(numtrait>0){
00226        trait_struct = new FieldStruct* [numtrait];
00227      }
00228      else {
00229        trait_struct = 0;
00230      }
00231       for (i=0; i<numtrait; i++) {
00232          k = fieldindex(traitname[i],factor_struct,numfact);
00233          if (k >= 0 && k < numfact) {
00234             trait_struct[i] = &(factor_struct[k]);
00235          }
00236          else {
00237             throw exception("Model::copyfrom(): you have probably found a bug!");
00238          }
00239       }
00240    }
00241    if (xact_htable) {delete [] xact_htable; xact_htable = 0;}
00242    if (M.xact_htable) {
00243      if(numterm>0){
00244        xact_htable = new HashTable [numterm];
00245      }
00246      else {
00247        xact_htable = 0;
00248      }
00249       for (i=0; i<numterm; i++) xact_htable[i] = M.xact_htable[i];
00250    }
00251    if (idlist) {delete [] idlist; idlist = 0;}
00252    if (M.idlist) {
00253      if(numcol>0){
00254        idlist = new HashTable* [numcol];
00255      }
00256      else {
00257        idlist = 0;
00258      }
00259       check_ptr(idlist);
00260       if (data) for (i=0; i<numcol; i++) idlist[i] = data->hashtable[i];
00261    }
00262 }
00263 
00264 int Model::equation(const std::string &modelspecs)
00265 {
00266   //******************************************************************
00267   //  take the folowing model as example to show how I process model
00268   //
00269   //     model M("y1 =  A + B + A * B,"
00270   //             "y2 =  B         X(B)"
00271   //
00272   // *****************************************************************
00273    int retval = 1;
00274    num_ped=0;
00275    if (modelspecs == "") throw exception("Model::equation(): equation is empty");
00276    modelstring = modelspecs;
00277    if (build_model_term(modelstring)) {
00278       maxorder = term.maxorder();
00279       if (rawpos) {
00280         delete [] rawpos;
00281         rawpos=0;
00282       }
00283       if(maxorder>0){
00284         rawpos = new unsigned [maxorder];  // this is just a working space
00285       }
00286       else {
00287         rawpos = 0;
00288       }
00289       lng0vec.resize(numterm);
00290    }
00291    else {
00292       modelstring = "";
00293       retval = 0;
00294    }
00295    if (retval == 1) {
00296       type = fixed_model;
00297    }
00298    else {
00299       type = bad_model;
00300    }
00301    return retval;
00302 }
00303 
00304 void Model::fitdata(Data& D)
00305 {
00306    if (D.num_rows() > 0) {
00307       data = &D;
00308    }
00309    else {
00310       data = 0;
00311       type = bad_model;
00312       warning("Model::fitdata(D): Data is empty");
00313    }
00314    max_nz     = 0;
00315    numrec     = 0;
00316    numobs     = 0;
00317    numcol     = 0;
00318    npattern   = 0;
00319    hmmesize   = 0;
00320    data_prepared = 0;
00321    hmmec.resize(0,0);
00322 }
00323 
00324 int Model::display(void) const
00325 {
00326    std::cout << " model:  " << modelstring << "\n";
00327    return 1;
00328 }
00329 
00330 void Model::release(void)
00331 {
00332    if (idlist)        {
00333      delete [] idlist; 
00334      idlist= 0;
00335    }
00336    if (xact_htable)   {
00337      delete [] xact_htable; 
00338      xact_htable= 0;
00339    }
00340    if (kvec)          {
00341      delete [] kvec; 
00342      kvec = 0;
00343    }
00344    if (rawpos)        {
00345      delete [] rawpos; 
00346      rawpos = 0;
00347    }
00348    if (pos_term)      {
00349      delete [] pos_term; 
00350      pos_term = 0;
00351    }
00352    if (xval_term)     {
00353      delete [] xval_term; 
00354      xval_term = 0;
00355    }
00356    if (trait_vec)     {
00357      delete [] trait_vec; 
00358      trait_vec = 0;
00359    }
00360    if (traitname)     {
00361      delete [] traitname; 
00362      traitname = 0;
00363    }
00364    if (pop_created)   {
00365      delete pop; pop = 0; 
00366      pop_created=0;
00367    }
00368    if (modelstr)      {
00369      delete [] modelstr; 
00370      modelstr=0; modelpcount=0;
00371    }
00372    if (trait_struct) {
00373       delete [] trait_struct; 
00374       trait_struct=0;
00375    }
00376    if (factor_struct) {
00377       delete [] factor_struct; 
00378       numfact=0; factor_struct = 0;
00379    }
00380    if (unknown_prior) {
00381       delete [] unknown_prior; 
00382       unknown_prior=0;
00383    }
00384    if(rve){
00385      delete [] rve;
00386      rve = 0;
00387    }
00388    remove(modelfname.c_str());
00389 }
00390 
00391 int Model::prepare_data(const std::string &solver)
00392 {
00393    if (data_prepared) return 1;
00394    if (modelstring == "" || !data) throw exception("Model::prepare_data(): empty model or no data");
00395 
00396    // **************************************************************
00397    // it's necessary to make a hardcopy on disk, because data
00398    // may be changed temporary due to re-hash, etc.
00399    // **************************************************************
00400    if (!data->in_disk()) data->save_datasheet();
00401    if (!data->in_memory()) data->input_datasheet();
00402 //rlf   modelfname = SESSION.mktemp();
00403 
00404    int k,t,ii,ii1;
00405    unsigned i,nrow_in_data;
00406    char CL;
00407    act_numtrait=numtrait;
00408    //if(num_ped) act_numtrait *= num_ped;
00409    // **************************************************************
00410    //  if there is  B*B in a model, then  B must be a covariate
00411    // **************************************************************
00412    ModelTerm *T;
00413    std::string fn;
00414    unsigned ped_cnt=0,first_ped;
00415    for (t=0; t<numterm; t++) {
00416       T = &(term[t]);
00417       for (i=0; i<T->order()-1; i++) {
00418          ii = T->factorindx[i]; ii1 = T->factorindx[i+1];
00419          if (ii == ii1) {
00420             if (factor_struct[ii].classi() != 'C') {
00421                fn =  factor_struct[ii].name();
00422                throw exception("Model::prepare_data(): Is there a covariate?");
00423 //               break;
00424             }
00425          }
00426       }
00427    }
00428    nrow_in_data = numrec  = data->num_rows();
00429    numcol  = data->num_cols();
00430    // *********************************************************************
00431    //  (1) If weight exists, then set associated data-column to be 'W'
00432    //  (2) reset data record_struct[i].classi with factor_struct[j].classi
00433    // ********************************************************************
00434    for (i=0; i<numcol; i++) data->datasheet[i].classi('N');
00435    if (weightname.size() > 0) {
00436       k = data->field_index(weightname.c_str());
00437       if (k >= 0) {
00438          if (data->datasheet[k].type() != 'S') {
00439             data->datasheet[k].classi('W');
00440             data->datasheet[k].value_for_missing(0.0);
00441          }
00442          else {
00443             throw exception("Model::prepare_data(): str column can't be a weight column");
00444          }
00445       }
00446       else {
00447          throw exception("Model::prepare_data(): weight variable not in data");
00448       }
00449    }
00450 
00451    for (i=0; i<numfact; i++)  {
00452 
00453       k = data->field_index(factor_struct[i].name());
00454       if (k >= 0) {
00455          CL = factor_struct[i].classi();
00456          if (CL == 'F') {
00457             if (data->datasheet[k].type() == 'S') {
00458                CL = 'U';
00459             }
00460             else if (data->datasheet[k].type() == 'I') {
00461                CL = 'I';
00462             }
00463          }
00464          else if (CL == 'C') {
00465             if (data->datasheet[k].type() != 'F') {
00466                throw exception("Model::prepare_data(): a covariate is specified as string");
00467             }
00468          }
00469          else {
00470             if (data->datasheet[k].type() == 'I') {
00471                throw exception("Model::prepare_data(): 'intercept' can only be a fixed effect!");
00472             }
00473          }
00474          if (data->datasheet[k].classi() == 'W') {
00475             throw exception("Model::prepare_data(): a weight variable cann't be a part of model euqation");
00476          }
00477          data->datasheet[k].classi(CL);
00478          factor_struct[i].index(data->datasheet[k].index());
00479          factor_struct[i].nmiss(data->datasheet[k].nmiss());
00480       }
00481       else {
00482         std::cout << "\n" << factor_struct[i].name() << " " << k <<"\n";
00483          throw exception("Model::prepare_data(): a variable in model, but not in data");
00484       }
00485    }
00486 
00487    // ******************************************************************
00488    // determine the legality for each record: 0 = perfect, 1 = bad
00489    // ******************************************************************
00490    DataNode *column;
00491    Vector<bool> record_legality(nrow_in_data);
00492 
00493    for (t=1; t<numcol; t++) {
00494       if (data->datasheet[t].nmiss() == 0) continue;
00495       CL = data->datasheet[t].classi();
00496       if (!(CL=='F' || CL=='U' || CL=='P' || CL=='C')) continue;
00497       column = data->rawcol(t);
00498       for (i=0; i<nrow_in_data; i++) {
00499          if (record_legality[i]) continue;
00500          if (column[i].missing) {
00501             record_legality[i] = true;
00502             numrec--;
00503          }
00504       }
00505    }
00506 
00507    re_hash_data(record_legality);
00508    if (maxorder>1) assign_id_xact(record_legality); //assign ids for interation
00509 
00510    // ***********************************************************
00511    //  determine how many levels for each main term in the model
00512    //  it has already done for interaction term.
00513    // ************************************************************
00514    for (t=0; t<numfact; t++) {
00515       i = factor_struct[t].index();
00516       factor_struct[t].nlevel(data->datasheet[i].nlevel());
00517    }
00518    for (t=0; t<numterm; t++) {
00519       if (term[t].order() == 1) {
00520          term[t].numlevel = factor_struct[term[t].factorindx[0]].nlevel();
00521       }
00522    }
00523 
00524    ////////////////// allocate memory for working spaces //////////////////
00525    if (pos_term) {
00526      delete [] pos_term;
00527      pos_term = 0;
00528    }
00529    pos_term = new unsigned [numterm+1];
00530    if (xval_term) {
00531      delete [] xval_term;
00532      xval_term = 0;
00533    }
00534    xval_term = new double [numterm+1];
00535    if (trait_vec) {
00536      delete [] trait_vec;
00537      trait_vec = 0;
00538    }
00539    if(numtrait>0){
00540      trait_vec = new double [numtrait];
00541    }
00542    else {
00543      trait_vec = 0;
00544    }
00545 
00546    // ***********************************************************
00547    // for each term and trait, say, A B X y1 y2, we'll prepare
00548    // it's position and value in henderson mixed model equations
00549    //  1  100  209  1.0 1.0 0.23 y1 y2
00550    // then we dump them on disk
00551    // Finally, we release data sheet from the memory
00552    // ***********************************************************
00553    save_pos_val(record_legality,solver);
00554    data->release_datasheet();
00555 
00556    record_legality.clear();
00557 
00558    lnr0vec.resize(npattern);
00559    if (rve){
00560      delete [] rve;
00561      rve = 0;
00562    }
00563    if(npattern>0){
00564      rve = new Matrix<double> [npattern];
00565    }
00566    else {
00567      rve = 0;
00568    }
00569    check_ptr(rve);
00570    for (i=0; i<npattern; i++) rve[i].reserve(numtrait,numtrait);
00571 
00572    // *********************************************
00573    //   hmmec starting position of each model term
00574    // **********************************************
00575    hmmesize=0;
00576    unsigned end=nt_vec[0]*term[0].numlevel;
00577    /*   if(term[0].classi() == 'P') {
00578      ped_cnt=1;
00579      first_ped=0;
00580      }*/
00581    for (term[0].start = 0, i=1; i<numterm; i++) {
00582      /*if(term[i].classi() == 'P') {
00583        ped_cnt++;
00584        if(ped_cnt == 1)  first_ped=i;
00585        if(first_ped == 0) {
00586          term[i].start=ped_cnt-1;
00587        }
00588        else{
00589        term[i].start = term[first_ped-1].start + numtrait*(term[first_ped-1].numlevel+(ped_cnt-1));
00590        }
00591      if(hmmesize < (term[i].start+ act_numtrait*term[i].numlevel))  hmmesize= term[i].start+ act_numtrait*term[i].numlevel;
00592      }make
00593      else {*/
00594      //if(term[i-1].classi() == 'P' ) term[i].start = term[first_ped].start + act_numtrait*term[first_ped].numlevel;
00595      // else 
00596      if(pos_vec[i]==0){
00597        term[i].start=end;
00598        end+=nt_vec[i]*term[i].numlevel;
00599      }
00600      else{
00601        term[i].start = term[base_effect[i]].start + numtrait*pos_vec[i];
00602      }
00603        //  if(hmmesize < (term[i].start+ numtrait*term[i].numlevel))  hmmesize= term[i].start+ numtrait*term[i].numlevel;
00604        /* }*/
00605    }
00606    // *****************************************************
00607    //  get hmmesize and allocate memory for hmmec and rhs
00608    // *****************************************************
00609    for (hmmesize=0,t=0; t<numterm; t++) {
00610      hmmesize += nt_vec[t]*term[t].nlevel();
00611      if(nt_vec[t]>act_numtrait) act_numtrait=nt_vec[t];
00612    }
00613    if (max_nz == 0)  max_nz = est_nze(hmmesize,act_numtrait,popsize);
00614    data_prepared = 1;
00615    return 1;
00616 }
00617 
00618 void Model::re_hash_data(Vector<bool> &record_legality)
00619 {
00620    // ********************************************************************
00621    // * hash all levels of main factors in data for sequantial integer ids
00622    // ********************************************************************
00623    if (!data) return;
00624    if (!(data->in_memory())) data->input_datasheet();
00625 
00626    unsigned i,id,t,nrow_in_data = data->num_rows();
00627    char CL,TY;
00628    const char *strid;
00629 
00630    if (idlist) {
00631      delete [] idlist;
00632      idlist = 0;
00633    }
00634    if(numcol>0){
00635      idlist = new HashTable* [numcol];
00636    }
00637    else {
00638      idlist = 0;
00639    }
00640    check_ptr(idlist);
00641    HashTable *dhtable,tempidlist;
00642    int *tempval = new int;
00643    DataNode *column;
00644 
00645    for (t=1; t<numcol; t++) {    // first column is reserved for intercept
00646       dhtable = idlist[t] = data->hashtable[t];
00647       CL = data->datasheet[t].classi();
00648       TY = data->datasheet[t].type();
00649       column = data->rawcol(t);
00650       switch (CL) {
00651          case 'F':                  // need hash to get its sequential id
00652             if (data->hashtable[t]->size() == 0) {
00653                tempidlist.resize(nrow_in_data,sizeof(int));
00654                for (i=0; i<nrow_in_data; i++) {
00655                   if (record_legality[i]) continue;
00656                   *tempval = static_cast<int>(column[i].double_val());
00657                   tempidlist.insert(tempval);
00658                }
00659                id = tempidlist.size();
00660                data->datasheet[t].nlevel(id);
00661                dhtable->resize(id,sizeof(int));
00662                for (i=0; i<nrow_in_data; i++) {
00663                   if (record_legality[i]) continue;
00664                   *tempval = static_cast<int>(column[i].double_val());
00665                   dhtable->insert(tempval);
00666                }
00667             }
00668             break;
00669          case 'P':
00670          case 'G':
00671             // ***********************************************************
00672             // the integer ids in data will be changed here according to
00673             // pedigree, but they will not be saved on disk, ie, we'll
00674             // release datasheet at the end of save_pos_val();
00675             // data->datasheet[t].nlevel has changed, it
00676             // must be changed back at the end of save_pos_val();
00677             // ***********************************************************
00678            if(nrow_in_data>0){
00679              rec_indid = new unsigned [nrow_in_data];   // Gibbs
00680            }
00681            else {
00682              rec_indid = 0;
00683            }
00684             data->datasheet[t].nlevel(pop->size());
00685             for (i=0; i<nrow_in_data; i++) {
00686                if (record_legality[i]) continue;
00687                if (TY == 'S') {
00688                   id = column[i].unsigned_val();
00689                   strid = (const char *)dhtable->find(id);
00690                   if (pop->maxnamelen > 0) {
00691                      id = pop->hashtable.get_id(strid);
00692                   }
00693                   else {
00694                      id = 0;
00695                   }
00696                }
00697                else {
00698                   id = (unsigned) column[i].double_val();
00699                   if (pop->maxnamelen > 0) {
00700                     std::ostringstream ostr;
00701                     ostr << id;
00702                     std::string t = ostr.str();
00703                     strid = t.c_str();
00704                     id = pop->hashtable.get_id(strid);
00705                   }
00706                }
00707                rec_indid[i] = id;                        // Gibbs
00708                if (id) {
00709                   column[i].unsigned_val(id);
00710                }
00711                else {
00712                   column[i].missing = 1;
00713                   record_legality[i] = true;
00714                   numrec--;
00715                   warning("Model::re_hash_data(): %s is in the data, "
00716                         "but not in the pedigree, so it's ignored",strid);
00717                }
00718             }
00719             break;
00720          case 'C':
00721          case 'I':
00722             data->datasheet[t].nlevel(1);
00723             break;
00724          // other possible types: U,T,I,N
00725       }
00726    }
00727    if (tempval){
00728      delete tempval;
00729      tempval = 0;
00730    }
00731 }
00732 
00733 void Model::save_pos_val(Vector<bool> &record_legality, const std::string &solver)
00734 {
00735    // ***************************************************************
00736    // prepare model specific binary data, each line contains
00737    //   pos_term[t]   xval_term[t]   trait_vec[i]
00738    // Note:
00739    // (1) pos_term[t] contains position ranging from 1 to its nlevel
00740    // (2) the last column in pos_term[] is the missing patterm index
00741    // (3) the last column in xval_term[] is the weight value
00742    // ***************************************************************
00743    if (!data) return;
00744    if (!(data->in_memory())) data->input_datasheet();
00745    unsigned nrow_in_data = data->num_rows();
00746    double rawxval;
00747    Vector<unsigned> pos_datacol(numcol);  // working space
00748    Vector<double> xval_datacol(numcol);
00749    int *tempval = new int;
00750    unsigned i,j,k,t,nord;
00751    unsigned ntm1 = numterm + 1;
00752    char CL;
00753 
00754    // *****************************************************************
00755    // (1) find missing pattern,
00756    // (2) kick out record with all traits being missing,
00757    // (3) compute the number of full record (ie, no trait is missing
00758    // ****************************************************************
00759    char *patstring = new char [numtrait+1];
00760    patstring[numtrait]='\0';
00761    pattern_tree.clear();
00762    DataNode *dn;
00763    Vector<double> y_mean(numtrait);
00764    Vector<double> y_std(numtrait);
00765    Vector<unsigned> y_nlevel(numtrait);
00766    for (numobs=0,i=0; i<nrow_in_data; i++) {
00767       if (record_legality[i]) continue;
00768       for (k=0, t=0; t<numtrait; t++) {
00769          j = trait_struct[t]->index();
00770          dn = data->cell(i,j);
00771          if (dn->missing) {
00772             patstring[t] = '0';
00773             k++;
00774          }
00775          else {
00776             patstring[t] = '1';
00777             y_mean[t] += dn->double_val();
00778             y_std[t] += dn->double_val()*dn->double_val();
00779             y_nlevel[t] += 1;
00780          }
00781       }
00782       if (k == numtrait) {
00783          record_legality[i] = true;
00784          numrec--;
00785          for (k=1; k<numcol; k++) {
00786             CL = data->datasheet[k].classi();
00787             if (CL != 'C') continue;
00788             data->cell(i,k)->missing = 1;
00789             data->datasheet[k].count_miss(1);
00790          }
00791       }
00792       else {
00793          numobs++;
00794          pattern_tree.insert(patstring);
00795       }
00796    }
00797    double nr;
00798    for (t=0; t<numtrait; t++) {
00799       nr = static_cast<double>(y_nlevel[t]);
00800       if (nr == 0.0) continue;
00801       y_mean[t] /= nr;
00802       if (nr > 1.0) {
00803          y_std[t] = std::sqrt((y_std[t] - y_mean[t]*y_mean[t]*nr)/(nr-1.0));
00804       }
00805       trait_struct[t]->mean(y_mean[t]);
00806       trait_struct[t]->std(y_std[t]);
00807       trait_struct[t]->nlevel(y_nlevel[t]);
00808    }
00809 
00810    // ******************************************************************
00811    // compute means for covariates
00812    // ******************************************************************
00813    for (t=1; t<numcol; t++) {
00814       if (data->datasheet[t].classi() == 'C') data->datasheet[t].mean();
00815    }
00816 
00817    npattern = pattern_tree.size();
00818    pattern.resize(npattern);
00819    std::set<std::string>::iterator pos;
00820    for (i=0,pos = pattern_tree.begin(); pos != pattern_tree.end(); ++pos,++i) {
00821       pattern[i] = *pos;
00822    }
00823    // note pattern is in ascending order
00824    if (kvec) {
00825      delete [] kvec;
00826      kvec = 0;
00827    }
00828    if(npattern>0){
00829      kvec = new unsigned [npattern];
00830    }
00831    else {
00832      kvec = 0;
00833    }
00834    memset(kvec,'\0',sizeof(unsigned)*npattern);
00835    DataNode* recd;
00836    if(numcol>0){
00837      recd = new DataNode [numcol];
00838    }
00839    else {
00840      recd = 0;
00841    }
00842 
00843    if (modelstr) {delete [] modelstr; modelstr=0; modelpcount=0;}
00844    std::stringstream bmodfile;
00845    if (!bmodfile) throw exception(std::string("Model::save_pos_val(): cannot open file:") + modelfname);
00846    if(solver=="iod"){
00847      pos_val_vector.resize(nrow_in_data);
00848    }
00849    for (i=0; i<nrow_in_data; i++) {
00850       if (record_legality[i]) continue;
00851       xval_term[numterm] = 1.0;             // default weight value
00852       data->row(i,recd);
00853       for (j=0; j<numcol; j++) {
00854          CL = data->datasheet[j].classi();
00855          if (CL =='F') {                     // non string column
00856             xval_datacol[j] = 1.0;
00857             *tempval = static_cast<int>(recd[j].double_val());
00858             pos_datacol[j] = idlist[j]->get_id(tempval);
00859          }
00860          else if (CL=='U' || CL=='P') {    // string column
00861             xval_datacol[j] = 1.0;
00862             pos_datacol[j] = recd[j].unsigned_val();
00863          }
00864          else if (CL=='C') {
00865             xval_datacol[j] = recd[j].double_val();
00866             pos_datacol[j] = 1;
00867          }
00868          else if (CL=='W') {
00869             rawxval = recd[j].double_val();
00870             if (rawxval >= 0) {           // negative weight is not allowed
00871                if (weightinverse == 1) {  //  1/weight is demonded by user
00872                  if (rawxval > 0.0) rawxval = 1.0/rawxval;
00873                }
00874                xval_term[numterm] = rawxval;
00875             }
00876          }
00877          else if (CL=='I') {                     // Intercept
00878             xval_datacol[j] = 1.0;
00879             pos_datacol[j] = 1;
00880          }
00881       }
00882 
00883       for (t=0; t<numterm; t++) {
00884          rawxval = 1.0;
00885          nord = term[t].order();
00886          for (j=0; j<nord; j++) {
00887             k = factor_struct[term[t].factorindx[j]].index();
00888             rawpos[j] = pos_datacol[k];
00889             rawxval *= xval_datacol[k];
00890          }
00891          if (nord == 1) {
00892             pos_term[t] = rawpos[0];
00893          }
00894          else {
00895             pos_term[t] = xact_htable[t].get_id(rawpos);
00896          }
00897          xval_term[t] = rawxval;
00898       }
00899       for (t=0; t<numtrait; t++) {
00900          k = trait_struct[t]->index();
00901          if (recd[k].missing) {
00902             patstring[t] = '0';
00903             trait_vec[t] = 0.0;
00904          }
00905          else {
00906             patstring[t] = '1';
00907             trait_vec[t] = recd[k].double_val();
00908          }
00909       }
00910       for (k=0; k<npattern; ++k) {
00911          if (pattern[k] == patstring) {break;}
00912       }
00913       if (k == npattern) throw exception("Model::save_pos_val(): you have probably found a bug!");
00914       kvec[k] += 1;
00915       pos_term[numterm] = k;
00916       if(solver=="iod"){
00917         pos_val_vector[i].pos_term.resize(ntm1);
00918         pos_val_vector[i].xval_term.resize(ntm1);
00919         pos_val_vector[i].trait_vec.resize(numtrait);
00920         for (unsigned j=0;j<ntm1;j++){
00921           pos_val_vector[i].pos_term[j]  = pos_term[j];
00922           pos_val_vector[i].xval_term[j] = xval_term[j];
00923         }
00924         for (unsigned j=0;j<numtrait;j++){
00925           pos_val_vector[i].trait_vec[j] = trait_vec[j];
00926         }
00927       }
00928       else{
00929         bmodfile.write((char *)pos_term, ntm1*sizeof(unsigned));
00930         bmodfile.write((char *)xval_term, ntm1*sizeof(double));
00931         bmodfile.write((char *)trait_vec, numtrait*sizeof(double));
00932       }
00933    }
00934 //   bmodfile.close();
00935 //   bmodfile.freeze(1); What is this?
00936    // for gcc-3.2 replaced 
00937    //    modelstr=bmodfile.str();
00938    //    modelpcount=bmodfile.pcount();
00939    // with 
00940    modelstringstr=bmodfile.str();
00941    if (recd) {
00942      delete [] recd;
00943      recd = 0;
00944    }
00945    if (patstring) {
00946      delete [] patstring;
00947      patstring = 0;
00948    }
00949    if(tempval){
00950      delete tempval;
00951      tempval = 0;
00952    }
00953 
00954    // ***************************************************************
00955    //  now we change back data->datasheet[t].nlevel, if necessary
00956    //  then we release datasheet so that the change we made does not
00957    //  affect the original data
00958    // ***************************************************************
00959    for (t=0; t<numcol; t++) {
00960       if (data->datasheet[t].classi() =='P') {
00961          data->datasheet[t].nlevel(data->hashtable[t]->size());
00962       }
00963    }
00964    data->release_datasheet();    // datasheet has been released
00965 }
00966 
00967 void Model::assign_id_xact(const Vector<bool> &record_legality)
00968 {
00969    // **********************************************************
00970    // get sequantial integer ids for interation term in model
00971    // ***********************************************************
00972    if (!data) return;
00973    unsigned nord, nrow_in_data = data->num_rows();
00974 
00975    if (xact_htable) {
00976      delete [] xact_htable;
00977      xact_htable = 0;
00978    }
00979    if(numterm>0){
00980      xact_htable = new HashTable [numterm];
00981    }
00982    else {
00983      xact_htable = 0;
00984    }
00985    int t;
00986    for (t=0; t<numterm; t++) {
00987       if (term[t].order() > 1) {
00988          xact_htable[t].resize(nrow_in_data,term[t].order()*sizeof(unsigned));
00989       }
00990    }
00991    hashxact(record_legality);
00992    for (t=0;t<numterm;t++) {
00993       nord = term[t].order();
00994       if (nord>1) {
00995          term[t].numlevel = xact_htable[t].size();
00996          xact_htable[t].resize(term[t].nlevel(),nord*sizeof(unsigned));
00997       }
00998    }
00999    hashxact(record_legality);
01000 }
01001 
01002 void Model::hashxact(const Vector<bool> &record_legality)
01003 {
01004    if (!data) return;
01005    char CL;
01006    int i,j,t,k,nord;
01007    int *tempval = new int;
01008    Vector<unsigned> pos_datacol(numcol);
01009    DataNode* recd; 
01010    if(numcol>0){
01011      recd = new DataNode [numcol];
01012    }
01013    else {
01014      recd = 0;
01015    }
01016    unsigned nrow_in_data = data->num_rows();
01017 
01018    for (i=0; i<nrow_in_data; i++) {
01019       if (record_legality[i]) continue;
01020       data->row(i,recd);
01021       pos_datacol.initialize(numcol,0);
01022       for (t=0; t<numterm; t++) {
01023          nord = term[t].order();
01024          if (nord > 1) {
01025             for (j=0; j<nord; j++) {
01026                k = factor_struct[term[t].factorindx[j]].index();
01027                if (pos_datacol[k] == 0) {
01028                   CL = data->datasheet[k].classi();
01029                   if (CL == 'F') {
01030                      *tempval = static_cast<int>(recd[k].double_val());
01031                      pos_datacol[k] = idlist[k]->get_id(tempval);
01032                   }
01033                   else if (CL == 'U' ||CL == 'P') {
01034                      pos_datacol[k] = recd[k].unsigned_val();
01035                   }
01036                   else if (CL =='C') {
01037                      pos_datacol[k] = 1;
01038                   }
01039                }
01040                rawpos[j] = pos_datacol[k];
01041             }
01042             xact_htable[t].insert(rawpos);
01043          }
01044       }
01045    }
01046    if (recd) {
01047      delete [] recd;
01048      recd = 0;
01049    }
01050    if(tempval){
01051      delete tempval;
01052      tempval = 0;
01053    }
01054 }
01055 
01056 void Model::prior_dist(const std::string &termname, GeneticDist *D)
01057   // ***************************************************************
01058   //  prior is the method to specify the prior distributions for
01059   //  each terms in the linear model.
01060   // ***************************************************************
01061 {
01062    int k = term.index(termname,factor_struct);
01063    if (k < 0) throw exception("Model::prior_dist(): term not in the model equation");
01064    term[k].prior = D;
01065    term[k].classi('R');
01066    if (strcmp(D->name(),"GeneticDist") == 0) {
01067       numterm--;
01068       ntermGdist++;
01069       if (k<numterm) term.swap(k,numterm); // move non-Gdist T before Gdist T
01070    }
01071 }
01072 
01073 void Model::prior_dist(const std::string &termname,Pedigree& P, GeneticDist *D)
01074   // ***************************************************************
01075   //  prior is the method to specify the prior distributions for
01076   //  each terms in the linear model.
01077   // ***************************************************************
01078 {
01079    int k = term.index(termname,factor_struct);
01080    if (k<0) throw exception("Model::prior_dist(): term not in the model equation");
01081    if (D->ntrait() != 1) throw exception("Model::prior_dist(): only one trait allowed");
01082    term[k].prior = D;
01083    if (term[k].order() > 1) throw exception("Model::prior_dist(): pedigree cannot be for this term");
01084 
01085    term[k].classi('G');                   // factor classi will override
01086    factor_struct[term[k].factorindx[0]].classi('G'); // data column classi later
01087    if (pop_created) delete pop;
01088    pop = new Population;
01089    check_ptr(pop);
01090    pop_created = 1;
01091    try {
01092      pop->input_ped(P,D);
01093    }
01094    catch (exception &ex) {
01095       type = bad_model;
01096       throw;
01097    }
01098    numgroup = P.ngroup();
01099    popsize = pop->size();
01100    if (strcmp(D->name(),"GeneticDist") == 0) {
01101       term[k].numlevel = popsize;
01102       numterm--;
01103       ntermGdist++;
01104       if (k<numterm) term.swap(k,numterm); // move non-Gdist T before Gdist T
01105    }
01106 }
01107 
01108 void Model::RSamplerPrior_dist(const std::string &termname,Pedigree& P, Data *D,GeneticDist *G){
01109   // Authors: L. Radu Totir and Rohan L. Fernando 
01110   // (June, 2003) 
01111   // Contributors: 
01112   int k = term.index(termname,factor_struct);
01113   if (k<0) throw exception("Model::prior_dist(): term not in the model equation");
01114   if (G->ntrait() != 1) throw exception("Model::prior_dist(): only one trait allowed");
01115   term[k].prior = G;
01116   if (term[k].order() > 1) throw exception("Model::prior_dist(): pedigree cannot be for this term");
01117   
01118   term[k].classi('G');                   // factor classi will override
01119   factor_struct[term[k].factorindx[0]].classi('G'); // data column classi later
01120   if (pop_created) delete pop;
01121   pop = new Population;
01122   check_ptr(pop);
01123   pop_created = 1;
01124   pop->model = this;
01125   try {
01126     pop->setupRSampler(P,D,G);
01127     // newHG
01128     pop->ListAlleleFounders();                         
01129     pop->SetPossibleHaplotypes(); 
01130     // newHG
01131   }
01132   catch (exception &ex) {
01133       type = bad_model;
01134           cerr << ex.what() << endl;  
01135       throw;
01136   }
01137   numgroup = P.ngroup();
01138   popsize = pop->size();
01139   if (strcmp(G->name(),"GeneticDist") == 0) {
01140     term[k].numlevel = popsize;
01141     numterm--;
01142     ntermGdist++;
01143     if (k<numterm) term.swap(k,numterm); // move non-Gdist T before Gdist T
01144   }
01145 }
01146 /*! \fn void Model::RSamplerPrior_dist(const std::string
01147     &termname,Pedigree& P, Data *D,GeneticDist *G)
01148  *  \brief method to specify the prior distributions for
01149  each terms in the model for which the R-sampler is invoked; it also
01150  sets up the structures needed by the peeling process.
01151 */
01152 
01153 void Model::DGSamplerSetup(unsigned numLoci, Data *D){
01154   // Authors: L. Radu Totir 
01155   // (August, 2004) 
01156   // Contributors:
01157   pop->descent_graph_setup(D);
01158   pop->founder_allele_counter = 0;
01159   int popsize = pop->size();
01160   int zero = 0;  
01161   for (int i=0;i<popsize;i++) {  
01162     pop->member(i)->set_founder_alleles(0);
01163   }
01164   unsigned lastLocus = numLoci;
01165   for (int i=0; i<popsize; i++) {
01166     if (pop->member(i)->m_counter_map.empty()){
01167       int zero = 0;    
01168       pop->member(i)->m_counter_map.resize(numLoci,zero);
01169       pop->member(i)->p_counter_map.resize(numLoci,zero);      
01170     }
01171     pop->member(i)->search_heteroz(lastLocus);  
01172     //cout << pop->member(i)-> id() << " " << pop->member(i)-> ord_heter << endl;
01173   }       
01174 }
01175 
01176 void Model::DGSampler(unsigned numOfSamples,unsigned numOfSL,unsigned numOfHaplo,unsigned numOfSLCas){
01177   // Authors: L. Radu Totir 
01178   // (August, 2004) 
01179   // Contributors:
01180   double l_hood = 0.0;
01181   unsigned k = 0, j = 0, option = 0;
01182   unsigned k1 = numOfSL;
01183   unsigned k2 = numOfHaplo;
01184   unsigned k3 = numOfSLCas;
01185   cout<< endl << "Descent graph iterations = " << numOfSamples << endl;
01186   for(int sample=1; sample <= numOfSamples; sample++) {
01187     if(sample%5000 == 0) cout<<sample<<".";  
01188     //  cout<<endl<< "SAMPLE.................................. " << sample <<endl;  
01189     if (k >= k1 + k2 + k3)                {k = 0;}
01190     if (k < k1)                           {option = 1;}  
01191     else if ((k >= k1) && (k < k1 + k2))  {option = 2;}
01192     else if (k <= k1 + k2 + k3)           {option = 3;}
01193     k++;    
01194     j++;    
01195     
01196     l_hood = pop->MH_ibd_sample_map(l_hood,option);    
01197     pop->sum_descentState_map();  // update pdq's for all marker loci 
01198     if (j >= 100000) {     
01199       j = 0;    
01200       cout << "SI_PDQ sample = " << sample << endl;  
01201     }  
01202   }
01203   cout << endl << "Haplotypes origins mm mp pm pp / mother - father " << endl;
01204   for (int i=0; i<pop->size(); i++) {
01205     pop->member(i)->display_freq_haplotype(numOfSamples);
01206   }  
01207   cout << endl; 
01208 }
01209 
01210 void Model::RSamplerInitialDG(const std::string &samplerType,const std::string &whatToCompute){
01211   // Authors: L. Radu Totir 
01212   // (August, 2004) 
01213   // Contributors: 
01214   double maxNumGNodeElements = 4.0;
01215   int    maxDimensionCutSet  = 7;
01216   unsigned maxsize = (unsigned)(std::pow((maxNumGNodeElements),(maxDimensionCutSet))+.5);
01217   
01218   pop->getInitialGNodeListSample(maxsize,0,pop->popsize,pop->popsize,samplerType);
01219   if(whatToCompute=="probDescHaplo"){
01220     pop->SetFreqHaploFounders();
01221     pop->sampleSegregationIndicators();
01222   }
01223 }
01224 /*! \fn void Model::RSamplerInitialDG(const std::string &samplerType,const 
01225 std::string &whatToCompute)
01226 *   \brief method to generate the initial descent graph to be used by pop_graph
01227 */
01228 
01229 void Model::RSampler(std::string inputFileName){
01230         // Authors: Rohan L. Fernando
01231         // (November, 2004) 
01232         // Contributors:
01233         if (type == bad_model){
01234                 return;
01235         }
01236         matvec::ParmMap parmMap;
01237         // These are the default values 
01238         parmMap["pedBlockSize"]  = "0";
01239         parmMap["numLoci"]       = "1";
01240         parmMap["numOfSamples"]  = "1000";
01241         parmMap["maxCutsetSize"] = "16384";
01242         parmMap["resultsFile"]   = "";
01243         parmMap["samplerType"]   = "genotypic";
01244         parmMap["samplerUsed"]   = "MH";
01245         parmMap["whatToCompute"] = "genotypeFreq";
01246         parmMap["howToSample"]   = "single";
01247         parmMap["printFlag"]     = "0";
01248         // Now we read in the specific values for this job
01249         parmMap.inputParms(inputFileName);
01250         
01251         matvec::RSamplerParms rSamplerParms;
01252         rSamplerParms.pedBlockSize  = parmMap.getUnsignedValue("pedBlockSize");
01253         rSamplerParms.numLoci       = parmMap.getUnsignedValue("numLoci");
01254         rSamplerParms.numOfSamples  = parmMap.getUnsignedValue("numOfSamples");
01255         rSamplerParms.maxCutsetSize = parmMap.getUnsignedValue("maxCutsetSize");
01256         rSamplerParms.printFlag     = parmMap.getUnsignedValue("printFlag");
01257         rSamplerParms.resultsFile   = parmMap["resultsFile"];
01258         rSamplerParms.samplerType   = parmMap["samplerType"];
01259         rSamplerParms.samplerUsed   = parmMap["samplerUsed"];
01260         rSamplerParms.whatToCompute = parmMap["whatToCompute"];
01261         rSamplerParms.howToSample   = parmMap["howToSample"];
01262         rSamplerParms.display();
01263         RSampler(rSamplerParms);        
01264 }
01265         
01266 
01267 void Model::RSampler(RSamplerParms rsParms){
01268         // Authors: Rohan L. Fernando
01269         // (October, 2004) 
01270         // Contributors:
01271         
01272         myRSamplerParms = rsParms;
01273         if (myRSamplerParms.samplerType == "genotypic"){
01274                 myRSamplerParms.pedBlockSize = rsParms.pedBlockSize ? rsParms.pedBlockSize : pop->size();
01275         }
01276         else if(myRSamplerParms.samplerType == "allelic"){
01277                 myRSamplerParms.pedBlockSize = rsParms.pedBlockSize ? rsParms.pedBlockSize : 2*pop->size();             
01278         }
01279         RSampler(myRSamplerParms.pedBlockSize, myRSamplerParms.numLoci,     myRSamplerParms.numOfSamples, myRSamplerParms.samplerType,
01280                          myRSamplerParms.howToSample,  myRSamplerParms.samplerUsed, myRSamplerParms.whatToCompute);
01281         
01282 }
01283 
01284 void Model::RSampler(unsigned pedBlockSize, unsigned numLoci,unsigned numOfSamples,const std::string &samplerType,
01285                                          const std::string &howToSample, const std::string &samplerUsed, const std::string &whatToCompute){
01286   // Authors: L. Radu Totir 
01287   // (September, 2004) 
01288   // Contributors:
01289   if(howToSample=="single"){
01290     if(samplerUsed=="gibbs"){
01291       RSamplerGibbs(pedBlockSize,numLoci,numOfSamples,samplerType,whatToCompute);
01292     }
01293     else if(samplerUsed=="MH"){
01294       RSamplerMH(pedBlockSize,numLoci,numOfSamples,samplerType,whatToCompute);
01295     }
01296     else if(samplerUsed=="gibbsMH"){
01297       unsigned stepMH = 3;
01298       RSamplerGibbsMH(pedBlockSize,numLoci,numOfSamples,stepMH,samplerType,whatToCompute);
01299     }
01300     else {
01301       cerr << "samplerUsed must be one of: gibbs, MH or gibbsMH;" << endl;
01302     }  
01303   }
01304   else if(howToSample=="joint"){
01305     if (samplerType!="allelic"){
01306       cerr << "For joint sampling, samplerType must be set to allelic;" << endl;
01307       exit(1);
01308     } 
01309     if(samplerUsed=="gibbs"){
01310       cerr << "For joint sampling, please set the samplerUsed to MH for now;" << endl;
01311       exit(1);
01312     }
01313     else if(samplerUsed=="MH"){
01314       RSamplerMH(numLoci,numOfSamples,samplerType,whatToCompute);
01315     }
01316     else if(samplerUsed=="gibbsMH"){
01317       cerr << "Not implemented yet for joint sampling, please set samplerUsed to MH for now;" << endl;
01318       exit(1);
01319     }
01320     else {
01321       cerr << "samplerUsed must be one of: gibbs, MH or gibbsMH;" << endl;
01322     } 
01323   }
01324 }      
01325 
01326 void Model::RSamplerGibbs(unsigned pedBlockSize,unsigned numLoci,unsigned numOfSamples,const std::string &samplerType,const std::string &whatToCompute){
01327         // Authors: L. Radu Totir 
01328         // (June, 2004) 
01329         // Contributors: 
01330         // if (pedBlockSize!=pop->popsize) throw exception("Model::RSamplerGibbs(...): size of pedigree block must be equal to the population size");
01331         double maxNumGNodeElements = 4.0;
01332         int    maxDimensionCutSet  = 7;
01333         unsigned maxsize = (unsigned)(std::pow((maxNumGNodeElements),(maxDimensionCutSet))+.5);
01334         matvec::Matrix<double> IBDCovMatrix;
01335         IBDCovMatrix.resize(1,1,0.0);
01336         // cout << maxsize << endl; 
01337         // next get ready and do the Gibbs sampling in blocks
01338         unsigned stopBlock=0,startBlock=0, count=1;
01339         for (int i=0;i<numOfSamples;i++){
01340                 if (i==0){
01341                         // get initial sample by sequential inputation
01342                         // cout << "Sample " << 1 << endl;
01343                         pop->getInitialGNodeListSample(maxsize,0,pop->popsize,pedBlockSize,samplerType);
01344                         if (whatToCompute=="haplotypeFreq"){
01345                                 pop->countHaplotypes(samplerType);
01346                         }
01347                         else if(whatToCompute=="probDescHaplo"){
01348                                 pop->SetFreqHaploFounders();
01349                         }
01350                         else if(whatToCompute=="ibdCovMatrix"){
01351                                 IBDCovMatrix.resize(pop->popsize,pop->popsize,0.0);
01352                         }
01353                 }
01354                 else{
01355                         // ask it to use both left and right loci 
01356                         pop->lookOnlyAtLocusToYourLeft = false;
01357                         // cout << "Sample " << setw(5) << i+1 << endl; 
01358                         do{ 
01359                                 if(stopBlock < pop->popsize){
01360                                         stopBlock += pedBlockSize;
01361                                         startBlock = stopBlock - pedBlockSize;
01362                                 }
01363                                 else {
01364                                         stopBlock  = pedBlockSize;
01365                                         startBlock = stopBlock - pedBlockSize;
01366                                 }
01367                                 //cout << "startBlock = " << startBlock << endl;
01368                                 //cout << "stopBlock  = " << stopBlock  << endl;
01369                                 pop->getGNodeListSample(maxsize,startBlock,stopBlock,pedBlockSize,samplerType);
01370                                 if(whatToCompute=="probDescHaplo"){
01371                                         pop->UpdateFreqHaploFounders();
01372                                 }
01373                         } while(stopBlock < pop->popsize);
01374                 }
01375                 if (whatToCompute=="haplotypeFreq"){
01376                         pop->countHaplotypes(samplerType);
01377                 }
01378                 else if(whatToCompute=="probDescHaplo"){
01379                         pop->sampleSegregationIndicators(); 
01380                 } 
01381                 else if(whatToCompute=="ibdCovMatrix"){
01382                         IBDCovMatrix += getIBDMatrix();
01383                 }
01384         }
01385         if (whatToCompute=="haplotypeFreq"){
01386                 pop->displayHaplotypeFrequencies(numOfSamples);
01387         }
01388         else if(whatToCompute=="probDescHaplo"){
01389                 pop->CalcFreqHaploFounders(numOfSamples);
01390                 pop->DisplayFreqHaploFounders();
01391         }
01392         else if(whatToCompute=="ibdCovMatrix"){
01393                 char *s = "./tryRES/IBDCovMatrix"; 
01394                 ofstream outfile;
01395                 outfile.open(s);
01396                 outfile << IBDCovMatrix/numOfSamples;
01397         }
01398 }
01399 /*! \fn void Model::RSamplerGibbs(unsigned pedBlockSize, unsigned numLoci,unsigned numOfSamples,const std::string &samplerType="genotypic",const std::string &whatToCompute="genotypeProb")
01400 *  \brief method to sample ordered genotypes using the Gibbs sampler to sample across loci ( the first sample is obtained by sequential imputation)
01401 */
01402 
01403 void Model::RSamplerMH(unsigned pedBlockSize,unsigned numLoci,unsigned numOfSamples,const std::string &samplerType,const std::string &whatToCompute){
01404         // Authors: L. Radu Totir and Rohan L. Fernando 
01405         // (October, 2003) 
01406         // Contributors: 
01407         unsigned maxsize = myRSamplerParms.maxCutsetSize;
01408 
01409         
01410         int    badCount  = 0;
01411         int    goodCount = 1; // 1 because first sample is always accepted
01412         double alpha      = 1.0;
01413         double ranNumber  = 0.0;
01414         double logOldProposal, logNewProposal, logOldTarget, logNewTarget;
01415         pop->gNodeList.logProposal = 0.0;
01416         pop->gNodeList.logTarget   = 0.0;
01417         matvec::Matrix<double> IBDCovMatrix;
01418         IBDCovMatrix.resize(1,1,0.0);
01419         // next get ready and do the MH sampling in blocks
01420         unsigned stopBlock=0,startBlock=0;
01421         if(samplerType=="genotypic"){
01422                 stopBlock = pop->popsize;
01423         }
01424         else if(samplerType=="allelic"){
01425                 stopBlock = 2*(pop->popsize);
01426         } 
01427         unsigned k=1,k1=1,k2=1;
01428         for (int i=0;i<numOfSamples;i++){
01429                 if(i==0){
01430                         // get initial sample
01431                         //cout << "Sample " << 1 << endl;
01432                         pop->getInitialGNodeListSample(maxsize,startBlock,stopBlock,pedBlockSize,samplerType);
01433                         stopBlock=0;
01434                         logOldTarget = pop->gNodeList.logTarget;
01435                         logOldProposal = pop->gNodeList.logOldProposal;
01436                         pop->copyGNodeStatesToCandidateStates(samplerType);
01437                         pop->storeSampledGametes();
01438                         pop->copyCandidateToAccepted(samplerType);
01439                         if(whatToCompute=="probDescHaplo"){
01440                                 pop->SetFreqHaploFounders();
01441                         }
01442                         else if(whatToCompute=="ibdCovMatrix"){
01443                                 IBDCovMatrix.resize(pop->popsize,pop->popsize,0.0);
01444                         }
01445                         else if(whatToCompute=="genotypeFreq"){
01446                                 pop->initGenotypeFreq();
01447                         }
01448                 }
01449                 else{
01450                         if(k<=k1){
01451                                 TransitionSet::transmissionType = 1;
01452                                 k++;
01453                         }
01454                         else if(k<=k2){
01455                                 TransitionSet::transmissionType = 2;
01456                                 k++;
01457                         }
01458                         if(k==k2){
01459                                 k = 1;
01460                         }
01461                         //cout << "Sample " << setw(5) << i+1 << endl; 
01462                         do{ 
01463                                 if(stopBlock < pop->popsize){
01464                                         stopBlock += pedBlockSize;
01465                                         startBlock = stopBlock - pedBlockSize;
01466                                 }
01467                                 else {
01468                                         stopBlock  = pedBlockSize;
01469                                         startBlock = stopBlock - pedBlockSize;
01470                                 }
01471                                 // cout << "startBlock = " << startBlock << endl;
01472                                 // cout << "stopBlock  = " << stopBlock  << endl;
01473                                 // q(NEW) and pi(NEW)
01474                                 pop->gNodeList.logProposal = 0.0;
01475                                 pop->gNodeList.logTarget   = 0.0;
01476                                 pop->getGNodeListSample(maxsize,startBlock,stopBlock,pedBlockSize,samplerType);
01477                                 pop->copyGNodeStatesToCandidateStates(samplerType);
01478                                 pop->storeSampledGametes();
01479                                 logNewProposal = pop->gNodeList.logProposal;
01480                                 logNewTarget   = pop->gNodeList.logTarget;
01481                                 if(TransitionSet::transmissionType!=1){
01482                                         //cout << "calculate q(OLD) for sample " << i+1 << endl;        
01483                                         pop->gNodeList.logOldProposal = 0.0;
01484                                         pop->getOldGNodeListProbability(maxsize,startBlock,stopBlock,pedBlockSize,samplerType);
01485                                         logOldProposal = pop->gNodeList.logOldProposal;
01486                                 }
01487                                 //cout << "logNewTarget   = " << logNewTarget   << endl;
01488                                 //cout << "logOldTarget   = " << logOldTarget   << endl;
01489                                 //cout << "logNewProposal = " << logNewProposal << endl;
01490                                 //cout << "logOldProposal = " << logOldProposal << endl;
01491                                 // accept or reject the new sample
01492                                 alpha=std::exp(logNewTarget + logOldProposal - logNewProposal - logOldTarget);
01493                                 //cout << " alpha = " << alpha << endl;
01494                                 //cout << (*pop->member(4));
01495                                 //cout << endl;
01496                                 ranNumber=ranf();
01497                                 if (ranNumber <= alpha) {
01498                                         logOldTarget = logNewTarget;
01499                                         if(TransitionSet::transmissionType==1){
01500                                                 logOldProposal = logNewProposal;
01501                                         }
01502                                         pop->copyCandidateToAccepted(samplerType);
01503                                         goodCount++;
01504                                 }
01505                                 else {
01506                                         pop->storeSampledGametes();
01507                                         badCount++;
01508                                         //cout << "badCount = " << badCount << endl;
01509                                 }
01510                                 //cout << "goodCount = " << goodCount << endl;
01511                                 if(whatToCompute=="probDescHaplo"){
01512                                         pop->UpdateFreqHaploFounders();
01513                                 }
01514                                 if (!(i%10000)){
01515                                         cout << "No of samples accepted = " << goodCount << " out of a total of " << goodCount+badCount << endl;
01516                                         double ratio = double(badCount)/(goodCount+badCount);
01517                                         // cout << "Rejection rate = " << ratio << endl;
01518                                 }
01519                         } while(stopBlock < pop->popsize);
01520                 }
01521                 if (whatToCompute=="haplotypeFreq"){
01522                         pop->countHaplotypes(samplerType);
01523                 }
01524                 else if (whatToCompute=="genotypeFreq"){
01525                         pop->countGenotypes(samplerType);
01526                 }               
01527                 else if(whatToCompute=="probDescHaplo"){
01528                         pop->sampleSegregationIndicators();
01529                 } 
01530                 else if(whatToCompute=="ibdCovMatrix"){
01531                         IBDCovMatrix += getIBDMatrix();
01532                 }
01533                 else if(whatToCompute=="initialDG"){
01534                         pop->sampleSegregationIndicators();
01535                 }
01536         }
01537         cout << "No of samples accepted = " << goodCount << endl;
01538         double ratio = double(badCount)/(goodCount+badCount);
01539         cout << "Rejection rate = " << ratio << endl;
01540         if (whatToCompute=="haplotypeFreq"){
01541                 if(myRSamplerParms.resultsFile == ""){
01542                         pop->displayHaplotypeFrequencies(numOfSamples);
01543                 }
01544                 else {
01545                         std::ofstream outfile(myRSamplerParms.resultsFile.c_str());
01546                         pop->displayHaplotypeFrequencies(numOfSamples, outfile);
01547                 }
01548         }
01549         else if (whatToCompute=="genotypeFreq"){
01550                 if(myRSamplerParms.resultsFile == ""){
01551                         pop->displayGenotypeFrequencies(numOfSamples);
01552                 }
01553                 else {
01554                         std::ofstream outfile(myRSamplerParms.resultsFile.c_str());
01555                         pop->displayGenotypeFrequencies(numOfSamples, outfile);
01556                 }
01557         }
01558         else if (whatToCompute=="initialDG"){
01559                 if(myRSamplerParms.resultsFile == ""){
01560                         pop->displaySegregationIndicators();
01561                 }
01562                 else {
01563                         std::ofstream outfile(myRSamplerParms.resultsFile.c_str());
01564                         pop->displaySegregationIndicators(outfile);
01565                 }
01566         }
01567         else if(whatToCompute=="probDescHaplo"){
01568                 pop->CalcFreqHaploFounders(numOfSamples);
01569                 pop->DisplayFreqHaploFounders();
01570         }
01571         else if(whatToCompute=="ibdCovMatrix"){
01572                 char *s = "./tryRES/IBDCovMatrix"; 
01573                 ofstream outfile;
01574                 outfile.open(s);
01575                 outfile << IBDCovMatrix/numOfSamples;
01576         }
01577 }
01578 /*! \fn void Model::RSamplerMH(unsigned pedBlockSize, unsigned numLoci,unsigned numOfSamples,const std::string &samplerType="genotypic",const std::string &whatToCompute="genotypeProb")
01579 *  \brief method to sample ordered genotypes using the Metropolis Hastings algorithm to accept samples proposed by sequential imputation.
01580 */
01581 
01582 void Model::RSamplerGibbsMH(unsigned pedBlockSize, unsigned numLoci,unsigned numOfSamples,unsigned stepGibbsMH, const std::string &samplerType,const std::string &whatToCompute){
01583         // Authors: L. Radu Totir 
01584         // (July, 2004) 
01585         // Contributors: 
01586         // if (pedBlockSize!=pop->popsize) throw exception("Model::RSamplerGibbs(...): size of pedigree block must be equal to the population size");
01587         double maxNumGNodeElements = 4.0;
01588         int    maxDimensionCutSet  = 7;
01589         unsigned maxsize = (unsigned)(std::pow((maxNumGNodeElements),(maxDimensionCutSet))+.5);
01590         matvec::Matrix<double> IBDCovMatrix;
01591         IBDCovMatrix.resize(1,1,0.0);
01592         int    badCount  = 0;
01593         int    goodCount = 1; // 1 because first sample is always accepted
01594         double alpha      = 1.0;
01595         double ranNumber  = 0.0;
01596         double logOldProposal, logNewProposal, logOldTarget, logNewTarget;
01597         pop->gNodeList.logProposal = 0.0;
01598         pop->gNodeList.logTarget   = 0.0;
01599         unsigned stopBlock=0,startBlock=0, count=1;
01600         for (int i=0;i<numOfSamples;i++){
01601                 cout << "Sample " << setw(5) << i+1 << endl; 
01602                 if (i==0){
01603                         // get initial sample
01604                         pop->getInitialGNodeListSample(maxsize,0,pop->popsize,pedBlockSize,samplerType);
01605                         logOldTarget = pop->gNodeList.logTarget;
01606                         logOldProposal = pop->gNodeList.logOldProposal;
01607                         pop->copyGNodeStatesToCandidateStates(samplerType);
01608                         pop->storeSampledGametes();
01609                         pop->copyCandidateToAccepted(samplerType);
01610                         if(whatToCompute=="haplotypeFreq"){
01611                                 pop->countHaplotypes(samplerType);
01612                         }
01613                         else if(whatToCompute=="probDescHaplo"){
01614                                 pop->SetFreqHaploFounders();
01615                         }
01616                         else if(whatToCompute=="ibdCovMatrix"){
01617                                 IBDCovMatrix.resize(pop->popsize,pop->popsize,0.0);
01618                         }
01619                 }
01620                 // repeat initial MH sampling procedure after N samples 
01621                 else if (!(i%stepGibbsMH)){
01622                         pop->copyGNodeStatesToCandidateStates(samplerType);
01623                         pop->storeSampledGametes();
01624                         // for sequential inputation look only at left locus
01625                         pop->lookOnlyAtLocusToYourLeft = true;
01626                         // q(NEW) and pi(NEW)
01627                         pop->gNodeList.logProposal = 0.0;
01628                         pop->gNodeList.logTarget   = 0.0;
01629                         pop->getInitialGNodeListSample(maxsize,0,pop->popsize,pedBlockSize,samplerType);
01630                         pop->copyGNodeStatesToCandidateStates(samplerType);
01631                         pop->storeSampledGametes();
01632                         logNewProposal = pop->gNodeList.logProposal;
01633                         logNewTarget   = pop->gNodeList.logTarget;
01634                         pop->gNodeList.logOldProposal = 0.0;
01635                         pop->getOldGNodeListProbability(maxsize,0,pop->popsize,pedBlockSize,samplerType);
01636                         logOldProposal = pop->gNodeList.logOldProposal;
01637                         cout << "logNewTarget   = " << logNewTarget   << endl;
01638                         cout << "logOldProposal = " << logOldProposal << endl;
01639                         cout << "logOldTarget   = " << logOldTarget   << endl;
01640                         cout << "logNewProposal = " << logNewProposal << endl;
01641                         alpha=std::exp(logNewTarget + logOldProposal - logNewProposal - logOldTarget);
01642                         cout << " alpha = " << alpha << endl;
01643                         ranNumber=ranf();
01644                         if (ranNumber <= alpha) {
01645                                 logOldTarget = logNewTarget;
01646                                 pop->copyCandidateToAccepted(samplerType);
01647                                 goodCount++;
01648                         }
01649                         else {
01650                                 pop->retreiveSampledGametes();
01651                                 badCount++;
01652                         }
01653                         if(whatToCompute=="probDescHaplo"){
01654                                 pop->UpdateFreqHaploFounders();
01655                                 pop->sampleSegregationIndicators();
01656                                 // switch for 1 column in all the declarations
01657                                 // pop->resizeSegregationIndicators(); 
01658                         }
01659                         else if(whatToCompute=="ibdCovMatrix"){
01660                                 IBDCovMatrix += getIBDMatrix();
01661                         }
01662                         cout << "Got " <<  goodCount << " valid MH samples of " << count++ << " tries." << endl;
01663                 }
01664                 // do Gibbs across loci
01665                 else{
01666                         // for Gibbs look to both the left and right locus
01667                         pop->lookOnlyAtLocusToYourLeft = false;
01668                         pop->gNodeList.logProposal = 0.0;
01669                         pop->gNodeList.logTarget   = 0.0;
01670                         do{ 
01671                                 if(stopBlock < pop->popsize){
01672                                         stopBlock += pedBlockSize;
01673                                         startBlock = stopBlock - pedBlockSize;
01674                                 }
01675                                 else {
01676                                         stopBlock  = pedBlockSize;
01677                                         startBlock = stopBlock - pedBlockSize;
01678                                 }
01679                                 // cout << "startBlock = " << startBlock << endl;
01680                                 // cout << "stopBlock  = " << stopBlock  << endl;
01681                                 pop->getGNodeListSample(maxsize,startBlock,stopBlock,pedBlockSize,samplerType);
01682                                 // keep track of the target probability for potential MH step
01683                                 logOldTarget   = pop->gNodeList.logTarget;
01684                                 pop->copyGNodeStatesToCandidateStates(samplerType);
01685                                 pop->storeSampledGametes();
01686                                 pop->copyCandidateToAccepted(samplerType);
01687                                 if(whatToCompute=="probDescHaplo"){
01688                                         pop->UpdateFreqHaploFounders();
01689                                 }
01690                         } while(stopBlock < pop->popsize);
01691                         if(whatToCompute=="probDescHaplo"){
01692                                 pop->sampleSegregationIndicators();
01693                                 // switch for 1 column in all the declarations
01694                                 // pop->resizeSegregationIndicators(); 
01695                         } 
01696                         else if(whatToCompute=="ibdCovMatrix"){
01697                                 IBDCovMatrix += getIBDMatrix();
01698                         }
01699                 }
01700         }
01701         if(whatToCompute=="probDescHaplo"){
01702                 pop->CalcFreqHaploFounders(numOfSamples);
01703                 pop->DisplayFreqHaploFounders();
01704         }
01705         else if(whatToCompute=="ibdCovMatrix"){
01706                 char *s = "./tryRES/IBDCovMatrix"; 
01707                 ofstream outfile;
01708                 outfile.open(s);
01709                 outfile << IBDCovMatrix/numOfSamples;
01710         }
01711 }
01712 
01713 matvec::Matrix<double> Model::getIBDMatrix(void){
01714   // Author: L. Radu Totir
01715   // (July, 2004) 
01716   // Contributors: 
01717   matvec::Matrix<double> geneticValVec;
01718   matvec::Matrix<double> CovMatrix;
01719   CovMatrix.resize(pop->popsize,pop->popsize,0.0);
01720   matvec::Vector<double> qtlMu;
01721   qtlMu.resize(4,0.0);
01722   qtlMu[0]=-1.41421,qtlMu[3]=1.41421;
01723   geneticValVec.resize(pop->popsize,1,0.0);
01724   for (unsigned t=0;t<pop->popsize;t++){
01725     // cout << (pop->popmember[t])->id() << "   " 
01726     //  << (pop->popmember[t])->genotNodeVector[1].acceptedGenotypeState 
01727     //      << endl;
01728     int geno = (pop->popmember[t])->genotNodeVector[1].acceptedGenotypeState;
01729     geneticValVec[t][0] = qtlMu[geno];
01730   }
01731   CovMatrix =geneticValVec*geneticValVec.transpose();
01732   return CovMatrix;
01733 }
01734 
01735 void Model::variance(const std::string &termname,Pedigree& P,const double v00,...)
01736   // *************************************************************************
01737   // * variance is the method to get variance components for each random
01738   // * effect.
01739   // * the example usage is below:
01740   // *
01741   // *    M.variance("animal",P,5.0)
01742   // *********************************************************************
01743 {
01744 
01745    if (numtrait<1) throw exception("Model::variance(args): no trait(dependent-variable) in the model");
01746    int t1,t2;
01747    doubleMatrix var(numtrait,numtrait);
01748    va_list param_pt;                      // param_pt, an object of va_list
01749    va_start(param_pt,v00);                // call the setup macro
01750    var[0][0] = v00;
01751    for (t2=1; t2<numtrait; t2++) var[0][t2] = va_arg(param_pt,double);
01752    for (t1=1; t1<numtrait; t1++) for (t2=0; t2<numtrait; t2++) {
01753        var[t1][t2] = va_arg(param_pt,double);
01754    }
01755    va_end(param_pt);
01756    variance(termname,P,var);
01757 }
01758 
01759 void Model::variance(const std::string &termname,Pedigree& P, const doubleMatrix& v)
01760   // **********************************************************************
01761   // * variance is the method to get variance components for each random
01762   // * effect.
01763   // *********************************************************************
01764 {
01765    num_ped++;
01766    if (!factor_struct) {
01767       std::cerr << "Model::variance(args): no equation(s) in the model\n";
01768       return;
01769    }
01770 
01771    int k = term.index(termname,factor_struct);
01772 
01773 
01774 
01775    if ( k < 0) {
01776       std::cerr << "Model::variance(): term not in the model\n";
01777       return;
01778    }
01779    //   if (term[k].order() > 1) {
01780    //      std::cerr << "Model::variance(): Pedigree can only link to single effect\n";
01781    //      return;
01782    //   }
01783    if (term[k].classi() != 'F')  {     // term[k] classi is being changing
01784      data = 0;
01785      //   for (int i=0;i<term[k].order();i++) {
01786      //     factor_struct[term[k].factorindx[i]].classi('F');
01787      //   }
01788    }
01789    term[k].classi('P');                    // factor classi will override
01790    type = mixed_model;    factor_struct[term[k].factorindx[0]].classi('P'); // data column classi later
01791    if (pop_created) delete pop;
01792    pop = new Population;
01793    check_ptr(pop);
01794    pop_created = 1;
01795    term[k].prior->resize(numtrait);     // prior must be UnknownDist
01796    if (pop->input_ped(P,term[k].prior)) {
01797       type = bad_model;
01798       return;
01799    }
01800    numgroup = P.ngroup();
01801    popsize = pop->size();
01802    if (v.num_rows() == v.num_cols()/* && v.num_rows() == numtrait*/) {
01803       if (v.psd()) {
01804          term[k].prior->var_matrix()->copy(v);
01805       }
01806       else {
01807          std::cerr << "Model::variance(): not positive(semi) def: " << termname << "\n";
01808       }
01809    }
01810    else {
01811       std::cerr << "Model::variance(): invalid variance matrix size\n";
01812    }
01813 }
01814 
01815 void Model::variance(const std::string &termname,const doubleMatrix& v)
01816   // *********************************************************************
01817   // * variance is the method to get variance components for each random
01818   // * term in a model.
01819   // *********************************************************************
01820 {
01821    if (!factor_struct) {
01822       std::cerr << "Model::variance(args): no equation(s) in the model\n";
01823       return;
01824    }
01825    if (v.num_rows() != v.num_cols() || v.num_rows() != numtrait) {
01826       std::cerr << "Model::variance(): invalid variance matrix size\n";
01827       return;
01828    }
01829    if (!v.psd()) {
01830       std::cerr << "Model::variance:: not positive(semi) def: " << termname << "\n";
01831       return;
01832    }
01833    if (termname == "residual") {
01834       residual_var = v;
01835       return;
01836    }
01837    int i,k;
01838    k = term.index(termname,factor_struct);
01839    if ( k >= 0) {
01840       if (term[k].classi() != 'F') {     // term[k] classi is being changing
01841          data = 0;
01842          //         for (i=0;i<term[k].order();i++) {
01843          // factor_struct[term[k].factorindx[i]].classi('F');
01844          //         }
01845       }
01846       term[k].classi('R');                    // factor classi will override
01847       type = mixed_model;
01848       term[k].prior->resize(numtrait);
01849       term[k].prior->var_matrix()->copy(v);
01850    }
01851    else {
01852       std::cerr << "Model::variance(..): not in model: " << termname << "\n";
01853    }
01854 }
01855 
01856 void Model::variance(const std::string &termname,const double v00,...)
01857   // *************************************************************************
01858   // * variance is the method to get variance components for each random
01859   // * term in a model.
01860   // * the example usage is below:
01861   // *
01862   // *    M.variance("animal",1.0,...)
01863   // *********************************************************************
01864 {
01865    if (numtrait<1) {
01866       std::cerr << "Model::variance(args): no trait(dependent-variable) in the model\n";
01867       return;
01868    }
01869    doubleMatrix var(numtrait,numtrait);
01870    int t1,t2;
01871    va_list param_pt;                       // an object param_pt
01872    va_start(param_pt,v00);                // call the setup macro
01873    var[0][0] = v00;
01874    for (t2=1; t2<numtrait; t2++) var[0][t2] = va_arg(param_pt,double);
01875 
01876    for (t1=1; t1<numtrait; t1++) for (t2=0; t2<numtrait; t2++) {
01877       var[t1][t2] = va_arg(param_pt,double);
01878    }
01879    va_end(param_pt);
01880    variance(termname,var);
01881 }
01882 
01883 
01884 
01885 
01886 ////SDK
01887 
01888 
01889 void Model::VarLink(const std::string &termname,const std::string &termname2)
01890 
01891   // *********************************************************************
01892   // * VarLink is the method to get covariancelink a set of variance
01893   // * components  in a model.
01894   // *********************************************************************
01895 {
01896    if (!factor_struct) {
01897       std::cerr << "Model::VarLink(args): no equation(s) in the model\n";
01898       return;
01899    }
01900 
01901    int i,k,k2;
01902    k = term.index(termname,factor_struct);
01903 
01904    k2 = term.index(termname2,factor_struct);
01905 
01906    if( k < 0 ){
01907       std::cerr << "Model::VarLink(..): not in model: " << termname << "\n";
01908       return;
01909    }
01910    if( k2 < 0 ){
01911       std::cerr << "Model::VarLink(..): not in model: " << termname2 << "\n";
01912       return;
01913    }
01914    int bek=base_effect[k];
01915    if(term[bek].classi() != 'R' && term[bek].classi() != 'P'){
01916       std::cerr << "Model::VarLink(..): not a random effect: " << termname << "\n";
01917       return;
01918    }
01919    int bek2=base_effect[k2];
01920    if(term[bek2].classi() != 'R' && term[bek2].classi() != 'P'){
01921       std::cerr << "Model::VarLink(..): not a random effect: " << termname2 << "\n";
01922       return;
01923    }
01924    if(term[bek].classi() != term[bek2].classi() ){
01925      std::cerr << "Model::VarLink(..): different types of random effects: \n";
01926       return;
01927    }
01928 
01929 
01930 
01931    int be=base_effect[k];
01932    int be2=base_effect[k2];
01933    if(var_link[be]!=var_link[be2]){
01934      int vl=var_link[be];
01935      int vl2=var_link[be2];
01936      if(vl2 <vl) {
01937        int tmp;
01938        tmp=vl;
01939        vl=vl2;
01940        vl2=tmp;
01941      }
01942      var_link[vl2]=vl;  
01943    }
01944 }
01945 
01946 
01947 void Model::covariance(const std::string &termname,const std::string &termname2,const doubleMatrix& v)
01948   // *********************************************************************
01949   // * covariance is the method to get covariance components for a set
01950   // * of correlated  random terms in a model.
01951   // *********************************************************************
01952 {
01953    if (!factor_struct) {
01954       std::cerr << "Model::covariance(args): no equation(s) in the model\n";
01955       return;
01956    }
01957    if (v.num_rows() != v.num_cols() || v.num_rows() != numtrait) {
01958       std::cerr << "Model::covariance(): invalid variance matrix size\n";
01959       return;
01960    }
01961    int i,k,k2;
01962    k = term.index(termname,factor_struct);
01963 
01964    k2 = term.index(termname2,factor_struct);
01965 
01966    if( k < 0 ){
01967       std::cerr << "Model::covariance(..): not in model: " << termname << "\n";
01968       return;
01969    }
01970    if( k2 < 0 ){
01971       std::cerr << "Model::covariance(..): not in model: " << termname2 << "\n";
01972       return;
01973    }
01974    int bek=base_effect[k];
01975    if(term[bek].classi() != 'R' && term[bek].classi() != 'P'){
01976       std::cerr << "Model::covariance(..): not a random effect: " << termname << "\n";
01977       return;
01978    }
01979    int bek2=base_effect[k2];
01980    if(term[bek2].classi() != 'R' && term[bek2].classi() != 'P'){
01981       std::cerr << "Model::covariance(..): not a random effect: " << termname2 << "\n";
01982       return;
01983    }
01984    if(term[bek].classi() != term[bek2].classi() ){
01985      std::cerr << "Model::covariance(..): different types of random effects: \n";
01986       return;
01987    }
01988 
01989 
01990    int be;
01991    if(base_effect[k]==base_effect[k2]){  
01992      be=base_effect[k];
01993    }
01994    else {
01995      be=base_effect[k];
01996      int be2=base_effect[k2];
01997      if(be2 <be) {
01998        int tmp;
01999        tmp=k;
02000        k=k2;
02001        k2=tmp;
02002        tmp=be;
02003        be=be2;
02004        be2=tmp;
02005      }
02006      base_effect[k2]=be;
02007      doubleMatrix orig1,orig2;
02008      orig1.copy(*term[be].prior->var_matrix());
02009      orig2.copy(*term[be2].prior->var_matrix());
02010      int newsize=nt_vec[be]+nt_vec[be2];
02011      Vector<int> orig=trait_map[be];
02012      trait_map[be].reserve(newsize);
02013      int ii=0;
02014      for(int i=0;i<nt_vec[be];i++,ii++) trait_map[be][ii]=orig[i];
02015      for(int i=0;i<nt_vec[be2];i++,ii++) trait_map[be][ii]=trait_map[be2][i];
02016      term[be].prior->var_matrix()->resize(newsize,newsize);
02017 
02018      for(int i=0;i<nt_vec[be];i++){
02019        for(int j=0;j<nt_vec[be];j++){
02020          term[be].prior->var_matrix()->me[i][j]=orig1[i][j];
02021        }
02022      }
02023 
02024      for(int i=0;i<nt_vec[be2];i++){
02025        for(int j=0;j<nt_vec[be2];j++){
02026          term[be].prior->var_matrix()->me[nt_vec[be]+i][nt_vec[be]+j]=orig2[i][j];
02027        }
02028      }
02029      nt_vec[be]=newsize;
02030      term[be2].prior->var_matrix()->clear();
02031      nt_vec[be2]=0;
02032      term[be2].classi('F');
02033      int last_eff=be;
02034      for(;corr_link[last_eff] != -1;)last_eff=corr_link[last_eff];
02035      corr_link[last_eff]=be2;
02036      int last_pos=pos_vec[last_eff];
02037      for(;corr_link[last_eff] != -1;){
02038        last_eff=corr_link[last_eff];
02039        pos_vec[last_eff]=pos_vec[last_eff]+last_pos+1;
02040      }
02041    }
02042 
02043    int srow,scol;
02044    srow=pos_vec[k]*numtrait;
02045    scol=pos_vec[k2]*numtrait;
02046    for(int irow=0;irow<numtrait;irow++){
02047      for(int jcol=0;jcol<numtrait;jcol++){
02048        term[be].prior->var_matrix()->me[srow+irow][scol+jcol]=v[irow][jcol];
02049        term[be].prior->var_matrix()->me[scol+jcol][srow+irow]=v[irow][jcol];
02050      }
02051    }      
02052 
02053 
02054    //   std::cout << "Cov " << *term[be].prior->var_matrix();
02055    
02056 
02057 }
02058 
02059 void Model::covariance(const std::string &termname,const std::string &termname2,const double v00,...)
02060   // *************************************************************************
02061   // * variance is the method to get variance components for each random
02062   // * term in a model.
02063   // * the example usage is below:
02064   // *
02065   // *    M.variance("animal",1.0,...)
02066   // *********************************************************************
02067 {
02068    if (numtrait<1) {
02069       std::cerr << "Model::variance(args): no trait(dependent-variable) in the model\n";
02070       return;
02071    }
02072    doubleMatrix var(numtrait,numtrait);
02073    int t1,t2;
02074    va_list param_pt;                       // an object param_pt
02075    va_start(param_pt,v00);                // call the setup macro
02076    var[0][0] = v00;
02077    for (t2=1; t2<numtrait; t2++) var[0][t2] = va_arg(param_pt,double);
02078 
02079    for (t1=1; t1<numtrait; t1++) for (t2=0; t2<numtrait; t2++) {
02080       var[t1][t2] = va_arg(param_pt,double);
02081    }
02082    va_end(param_pt);
02083    covariance(termname,termname2,var);
02084 }
02085 
02086 
02087 
02088 
02089 
02090 
02091 
02092 
02093 
02094 ////SDK
02095 
02096 
02097 void Model::covariate(const std::string &covariate_names)
02098 {
02099   // *********************************************************************
02100   //  it specifies a list of effect (factor) as covariates.
02101   //  note that an effect is not a model-term, which consists of effects,
02102   //  instead an effect is associated with a data column. It is required
02103   //  that the name of an effect must be the same as the associated
02104   //  data-column.
02105   // **********************************************************************
02106    if (!factor_struct) {
02107       std::cerr << "Model::covariate(): no equation(s) in the model\n";
02108       return;
02109    }
02110 
02111    int j,k,t;
02112    unsigned i,nc;
02113    std::vector<std::string> efflist;
02114    nc = split(covariate_names," ,",&efflist);  // efflist = covariate_names.split(nc,", ");
02115 
02116    for (i=0; i<nc; i++) {
02117       k = fieldindex(efflist[i],factor_struct,numfact);
02118       if (k < 0) throw exception("Model::covariate(): not in the model, thus it's ignored");
02119        // need to check if classi is changed
02120       if (factor_struct[k].classi() != 'F') data= 0;
02121       factor_struct[k].classi('C');
02122 
02123       ////////////////////////////////////////////////////////////////////
02124       // if a term contains at least one covariate, then its classi = 'C'
02125       // else leave it alone
02126       //////////////////////////////////////////////////////////////////
02127       for (t=0; t<numterm; t++) {
02128          j = term[t].partial_match(efflist[i].c_str(),factor_struct);
02129          if (j >=0 ) {
02130             term[t].classi('C');
02131             break;
02132          }
02133       }
02134    }
02135 }
02136 
02137 static void nest_xaction(std::string &s)
02138 {
02139    int i,j;
02140    int n = s.size();
02141    for (i=0; i<n; i++) {
02142       if ( s[i] == '(' ) s[i] = '*';
02143       if ( s[i] == ')' ) s[i] = ' ';
02144    }
02145 
02146       // replace space around * with *
02147    i = 0;
02148    while (i<n) {
02149       if (s[i] == '*' ) {
02150          j = i;
02151          while (s[--j] == ' ') s[j]='*';
02152          while (s[++i] == ' ') s[i]='*';
02153       }
02154       i++;
02155    }
02156 }
02157 
02158 int Model::build_model_term(const std::string &modelspec)
02159 {
02160    int retval = 1;
02161    unsigned i,j,k;
02162 
02163    std::string mspec = modelspec;  // modelspec:  y1 = A  B + A (B), y2 = B  + X*B
02164    nest_xaction(mspec);           // now mspec:  y1 = A  B + A**B , y2 = B  + X*B
02165 
02166    std::string sep("=+,* ");
02167    std::string TMP = mspec;
02168    ////////   TMP.unique("=+,* ");                // TMP ->y1 y2 A B X
02169    std::vector<std::string> tmpvec;
02170    std::vector<std::string>::iterator it;
02171    std::string::size_type begidx;
02172    split(TMP,sep,&tmpvec);
02173 
02174    sort(tmpvec.begin(),tmpvec.end());
02175    it = unique(tmpvec.begin(),tmpvec.end());
02176    tmpvec.erase(it,tmpvec.end());   
02177 //    for(it = tmpvec.begin(); it != tmpvec.end(); it++){
02178 //      std::cout << *it << endl;
02179 //    }
02180 
02181    categorical = 0;
02182    numfact =  tmpvec.size();
02183    if(factor_struct){
02184      delete [] factor_struct;
02185      factor_struct = 0;
02186    }
02187    if(numfact>0){
02188      factor_struct = new FieldStruct [numfact];
02189    }
02190    else {
02191      factor_struct = 0;
02192    }
02193    check_ptr(factor_struct);
02194    for (i=0; i<numfact; i++) factor_struct[i].name(tmpvec[i]);
02195 
02196    TMP = mspec;
02197    numtrait = 0;
02198    sep = "=";
02199    begidx = TMP.find(sep,0);
02200    while(begidx != std::string::npos) {
02201       numtrait++;
02202       begidx = TMP.find(sep,begidx+1);
02203    }
02204    if (numtrait == 0) {
02205       std::cerr << "Model::build_model_term(): there is no trait in the model\n";
02206       return 0;
02207    }
02208    if(trait_struct){
02209      delete [] trait_struct;
02210      trait_struct = 0;
02211    }
02212    if(numtrait>0){
02213      trait_struct = new FieldStruct* [numtrait];
02214    }
02215    else {
02216      trait_struct = 0;
02217    }
02218    check_ptr(trait_struct);
02219    if(traitname){
02220      delete [] traitname;
02221      traitname = 0;
02222    }
02223    if(numtrait>0){
02224      traitname = new std::string [numtrait];
02225    }
02226    else {
02227      traitname = 0;
02228    }
02229    check_ptr(traitname);
02230    std::vector<std::string> model_trait;
02231    std::vector<std::string> trtnm;
02232    sep = ",";
02233    split(TMP,sep,&model_trait);
02234    if (numtrait !=  model_trait.size()) throw exception("Model::build_model_term(): model for each trait is not separated properly");
02235 
02236    sep = " +";
02237    unsigned n = 0;
02238    for (i=0; i<numtrait; i++) {
02239      // trait name should be before "="
02240      begidx = model_trait[i].find("=");
02241      // bbbtrtnmbb is the trait name with possible surrounding blanks
02242      std::string bbbtrtnmbbb = model_trait[i].substr(0,begidx);
02243      // getting rid of the surrounding blanks
02244      split(bbbtrtnmbbb,sep,&trtnm);
02245      traitname[i]  = trtnm[i];
02246      model_trait[i]  = model_trait[i].substr(begidx+1);
02247      n += split(model_trait[i],sep);  /// n += model_trait[i].ntoken("+ ");
02248      k = fieldindex(traitname[i],factor_struct,numfact);
02249      if (k >= 0 && k < numfact) {
02250        factor_struct[k].classi('T');
02251        trait_struct[i] = &(factor_struct[k]);
02252      }
02253      else {
02254        throw exception("Model::build_model_term(): you have probably found a bug!");
02255      }
02256    }
02257    TermList T(n,numtrait);
02258    tmpvec.clear();
02259    sep = " +";
02260    for (numterm=0,i=0; i<numtrait; i++) {
02261       tmpvec.clear();
02262       n = split(model_trait[i],sep,&tmpvec);  //   tmpvec = model_trait[i].split(n,"+ ");
02263       for (k=0; k<n; k++) {
02264          for (j=0; j<numterm; j++) {
02265             if (T[j].match(tmpvec[k],"* ",factor_struct)) break;
02266          }
02267          if (j==numterm) {
02268             T[numterm++].input(tmpvec[k].c_str(),factor_struct,numfact);
02269          }
02270          T[j].trait[i]= 1;
02271       }
02272    }
02273 
02274    term.resize(numterm,numtrait);
02275    if (unknown_prior) {
02276      delete [] unknown_prior;                    // note that unknown_prior[i]
02277      unknown_prior = 0;
02278    }
02279    if(numterm>0){
02280      unknown_prior = new UnknownDist[numterm];     // needs to resize(numtrait)
02281    }
02282    else {
02283      unknown_prior = 0;
02284    }
02285    check_ptr(unknown_prior);
02286    nt_vec.resize(numterm,numtrait);
02287    base_effect.reserve(numterm);
02288    corr_link.resize(numterm,-1);
02289    pos_vec.resize(numterm);
02290    trait_map.reserve(numterm);
02291    var_link.reserve(numterm);
02292    
02293 
02294    for (i=0; i<numterm; i++) {
02295      base_effect[i]=i;
02296      var_link[i]=i;
02297       term[i] = T[i];
02298       term[i].prior = &(unknown_prior[i]);
02299       trait_map[i].resize(numtrait,i);
02300    }
02301    return retval;
02302 }
02303 
02304 void Model::trait_effect_level(const unsigned mmeaddr, std::string& retval,
02305                                int info_flag)
02306 {
02307    // ******************************************************************
02308    // mmeaddr = [0, hmmesize-1]
02309    // info_flag = 1 information: (traitname):termname:levelname(default)
02310    //             2                          termname:levelname
02311    //             3                                   levelname
02312    // ******************************************************************
02313    if (mmeaddr >= hmmesize) throw exception("Model::trait_effect_level(): arg out of range");
02314    retval = "";
02315    char CL,strid[11];
02316    std::string tempstr;
02317    int nord,*tempval;
02318    unsigned addr,i,j,k,t,kk,ii,jj,nlevel;
02319    unsigned *tempuns;
02320    Vector<unsigned> unsvec(maxorder);
02321 
02322    for (addr=0, i=0; i<numterm; i++) {
02323       nlevel = term[i].nlevel();
02324       addr += nlevel*numtrait;
02325       if (addr > mmeaddr) break;
02326    }
02327    if (i >= numterm) throw exception("Model::trait_effect_level(): arg out of range");
02328    addr = term[i].startaddr();
02329    nord = term[i].order();
02330    for (k=addr,j=0; j<nlevel; j++) {
02331       k += numtrait;
02332       if (k > mmeaddr) break;
02333    }
02334    for (addr= k-numtrait, t=0; t<numtrait; t++) {
02335       if (mmeaddr == addr++) break;
02336    }
02337    if (nord == 1) {
02338       unsvec[0] = j+1;
02339    }
02340    else {
02341       tempuns = (unsigned *)(xact_htable[i].find(j+1));
02342       for (ii=0; ii<nord; ii++) unsvec[ii] = tempuns[ii];
02343    }
02344 
02345 
02346    if (term[i].addr[t] == 0) throw exception("Model::trait_effect_level(): not defined address");
02347    if (numtrait>1 && info_flag == 1) {
02348       retval = trait_struct[t]->name();
02349      retval.append(":");
02350    }
02351 
02352    if (info_flag == 1 || info_flag == 2) {
02353       for (ii=0; ii<nord; ii++) {
02354          retval.append(factor_struct[term[i].factorindx[ii]].name());
02355          if (ii+1 < nord) retval.append("*");
02356       }
02357       retval.append(":");
02358    }
02359    for (ii=0; ii<nord; ii++) {
02360       kk = factor_struct[term[i].factorindx[ii]].index();
02361       CL = data->datasheet[kk].classi();
02362       jj = unsvec[ii];
02363 
02364       if (CL == 'F') {
02365          tempval = (int *)(idlist[kk]->find(jj));
02366          sprintf(strid,"%d",*tempval);
02367          tempstr = strid;
02368       }
02369       else if (CL =='U') {
02370          tempstr = (const char *)idlist[kk]->find(jj);
02371       }
02372       else if ( CL =='C') {
02373          tempstr = data->datasheet[kk].name();
02374       }
02375       else if (CL =='P') {
02376          tempstr = (const char *)pop->hashtable.find(jj);
02377       }
02378       else if (CL =='I') {
02379          tempstr = "1";
02380       }
02381       else {
02382          warning("Model::save(): unknown column type: %c",CL);
02383          break;
02384       }
02385       retval.append(tempstr);
02386       if (ii+1 < nord) retval.append("*");
02387    }
02388 }
02389 
02390 Vector<double> Model::get_solutions(const std::string &termname)
02391 {
02392   // Authors: Liviu R. Totir and Rohan L. Fernando (November, 2002) 
02393   // Contributors: 
02394   Vector<double> retval;
02395   if (!get_blupsol()) throw exception("Model::get_solutions(): get_blupsol() failed");
02396   
02397   int k = term.index(termname,factor_struct);
02398   if (k < 0) throw exception("Model::get_solutions(): no such term in the model");
02399   int nlevels = term[k].nlevel()*numtrait;
02400   unsigned i,startaddr;
02401   startaddr = term[k].startaddr();
02402   retval.resize(nlevels);
02403   for (i=0; i<nlevels; i++) {
02404     retval[i] = blupsol[startaddr+i];
02405   }
02406   return retval;
02407 }
02408 
02409 void Model::save(const std::string &fname, const int io_mode)
02410 {
02411    if (type == bad_model) {
02412       warning("Model::save(): model is too bad, nothing is saved");
02413       return;
02414    }
02415    if((numterm+ntermGdist) == 0) return;
02416    if (blupsol.size() != hmmesize + ntermGdist*popsize) return;
02417    if (!data) {
02418       warning("Model::save(): no data has been fitted");
02419       return;
02420    }
02421    std::ofstream ofs;
02422    ofs.open(fname.c_str(),(OpenModeType)io_mode);
02423    if (!ofs) {
02424       warning("Model::save: %s cann't open or already exit",fname.c_str());
02425       return;
02426    }
02427    ofs.setf(std::ios::unitbuf | std::ios::showpoint);
02428    ofs.precision(SESSION.output_precision);
02429    int W = SESSION.output_precision + 6;        // 6 = +.e+00
02430    char S = ' ';                                // one blank space 
02431    std::string SS;
02432    SS = "          ";
02433 
02434    if (!data->in_memory()) data->input_datasheet();
02435    this->info(ofs);
02436 
02437    double *ve = blupsol.begin();
02438 
02439    char* tempstr;
02440    int nord, *tempval;
02441    unsigned *tempuns;
02442    unsigned i,j,t,k,ii,kk,jj,nlevel,total_numterm;
02443    char CL;
02444                    
02445    ofs << "\n            BLUP (BLUE, Posterior_Mean) \n";
02446    ofs << "    ----------------------------------------------- \n\n";
02447    total_numterm = numterm+ntermGdist;
02448    for (i=0; i<total_numterm; i++) {   
02449       ofs << S << std::setw(W)<< "Term";
02450       for (t=0; t<numtrait; t++) {
02451          ofs << S << std::setw(W-2) << "Trait" << S << t+1;
02452       }
02453       ofs <<"\n";
02454       
02455       ofs << S << std::setw(W) << factor_struct[term[i].factorindx[0]].name().c_str();
02456       for (t=1; t<term[i].order(); t++) {
02457          ofs << "*" << factor_struct[term[i].factorindx[t]].name().c_str();
02458       }
02459       for (t=0; t<numtrait; t++) ofs <<S <<std::setw(W)<<trait_struct[t]->name().c_str();
02460       ofs << "\n";
02461       nlevel = term[i].nlevel();
02462       k = term[i].startaddr();
02463       nord = term[i].order();
02464       if (nord == 1) {      // single_factor_term
02465          kk = factor_struct[term[i].factorindx[0]].index();
02466          CL = data->datasheet[kk].classi();
02467          switch(CL) {
02468             case 'F':
02469                for (j=0; j<nlevel; j++) {
02470                   tempval = (int *)(idlist[kk]->find(j+1));
02471                   ofs << S << std::setw(W) << *tempval;
02472                   for (t=0; t<numtrait; t++,k++) {
02473                      ofs << S << std::setw(W);
02474                      if (term[i].trait[t]) {
02475                         ofs << ve[k];
02476                      }
02477                      else {
02478                         ofs << SS;
02479                      }
02480                   }
02481                   k+=(nt_vec[base_effect[i]]-numtrait);
02482                   ofs << "\n";
02483                }
02484                break;
02485             case 'U':
02486                for (j=0; j<nlevel; j++) {
02487                   tempstr = (char *)(idlist[kk]->find(j+1));
02488                   ofs << S << std::setw(W) << tempstr;
02489                   for (t=0; t<numtrait; t++,k++) {
02490                      ofs << S << std::setw(W);
02491                      if (term[i].trait[t]) {
02492                         ofs << ve[k];
02493                      }
02494                      else {
02495                         ofs << SS;
02496                      }
02497                   }
02498                   ofs << "\n";
02499                }
02500                break;
02501             case 'P':
02502                for (j=0; j<nlevel; j++) {
02503                   ofs << S;
02504                   if (pop->maxnamelen > 0) {
02505                      ofs << std::setw(W) << (char *)pop->hashtable.find(j+1);
02506                   }
02507                   else {
02508                      ofs << std::setw(W) << j+1;
02509                   }
02510                   for (t=0; t<numtrait; t++,k++) {
02511                      ofs << S;
02512                      if (term[i].trait[t]) {
02513                         ofs << std::setw(W) << ve[k];
02514                      }
02515                      else {
02516                         ofs << std::setw(W) << SS;
02517                      }
02518                   }
02519                   k+=(nt_vec[base_effect[i]]-numtrait);
02520                   ofs << "\n";
02521                }
02522                break;
02523             case 'C':
02524                ofs << S << std::setw(W) << data->datasheet[kk].name();
02525                for (t=0; t<numtrait; t++,k++) {
02526                   ofs << S << std::setw(W);
02527                   if (term[i].trait[t]) {
02528                      ofs << ve[k];
02529                   }
02530                   else {
02531                      ofs << SS;
02532                   }
02533                }
02534                ofs << "\n";
02535                break;
02536             case 'I':
02537                ofs << S << std::setw(W) << "1";
02538                for (t=0; t<numtrait; t++,k++) {
02539                   ofs << S << std::setw(W);
02540                   if (term[i].trait[t]) {
02541                      ofs << ve[k];
02542                   }
02543                   else {
02544                      ofs << SS;
02545                   }
02546                }
02547                ofs << "\n";
02548                break;
02549             default:
02550                warning("Model.save(): unknown column type: %c",CL);
02551                break;
02552          }
02553          if (i+1 < total_numterm) ofs << "\n";
02554       }
02555       else {     // there are interactions
02556          for (j=0; j<nlevel; j++) {
02557             tempuns = (unsigned *)(xact_htable[i].find(j+1));
02558             kk = factor_struct[term[i].factorindx[0]].index();
02559             CL = data->datasheet[kk].classi();
02560             jj = tempuns[0];
02561 
02562             if (CL == 'F') {
02563                tempval = (int *)(idlist[kk]->find(jj));
02564                ofs << S << std::setw(W) << *tempval;
02565             }
02566             else if (CL =='U') {
02567                tempstr = (char *)(idlist[kk]->find(jj));
02568                ofs << S << std::setw(W) << tempstr;
02569             }
02570             else if ( CL =='C') {
02571                ofs << S << std::setw(W) << data->datasheet[kk].name();
02572             }
02573             else if (CL =='P') {
02574                ofs << S << std::setw(W) << (char *)pop->hashtable.find(jj);
02575             }
02576             else if (CL =='I') {
02577                ofs << S << std::setw(W) << "1";
02578             }
02579             else {
02580                warning("Model::save(): unknown column type: %c",CL);
02581                break;
02582             }
02583 
02584             for (ii=1; ii<nord; ii++) {
02585                kk = factor_struct[term[i].factorindx[ii]].index();
02586                CL = data->datasheet[kk].classi();
02587                jj = tempuns[ii];
02588 
02589                if (CL == 'F') {
02590                   tempval = (int *)(idlist[kk]->find(jj));
02591                   ofs << "*" << *tempval;
02592                }
02593                else if (CL =='U') {
02594                   tempstr = (char *)(idlist[kk]->find(jj));
02595                   ofs << "*" << tempstr;
02596                }
02597                else if (CL =='C' || CL =='I') {
02598                   ofs << "*" << data->datasheet[kk].name();
02599                }
02600                else if (CL =='P') {
02601                   ofs << "*" << (char *)pop->hashtable.find(jj);
02602                }
02603                else {
02604                  warning("Model::save(): unknown column type: %c",CL);
02605                }
02606             }
02607             for (t=0; t<numtrait; t++,k++) {
02608                ofs << S << std::setw(W);
02609                if (term[i].trait[t]) ofs << ve[k];
02610                else ofs << SS;
02611             }
02612             k+=(nt_vec[base_effect[i]]-numtrait);
02613             ofs << "\n";
02614          }
02615          if (i+1 < total_numterm) ofs << "\n";
02616       }
02617    }
02618    ofs.close();
02619    data->release_datasheet();
02620 }
02621 
02622 double Model::minfun(const Vector<double> &x, const int n)
02623 {
02624    double llhood = 0.0;
02625    switch (minfun_indx) {
02626       case 1:                //  dfreml vce,   Model_vce.cc
02627          llhood = minfun_vce(x,n);
02628          break;
02629       case 2:                // MLE peeling,  Model_peeling
02630          llhood = minfun_peeling(x,n);
02631          break;
02632 // BRS
02633       case 3:   // Multipoint 
02634          llhood = minfun_multipoint(x,n);
02635          break;
02636 // BRS
02637       default:
02638          warning("Model::Model::minfun(): unknown minfun_indx: %d",minfun_indx);
02639    }
02640    return llhood;
02641 }
02642 
02643 void Model::RSamplerMH(unsigned numLoci,unsigned numOfSamples,const std::string &samplerType,const std::string &whatToCompute){
02644         // Authors: L. Radu Totir and Rohan L. Fernando 
02645         // (October, 2003) 
02646         // Contributors: 
02647         
02648         unsigned maxsize = myRSamplerParms.maxCutsetSize;
02649         // cout << maxsize << endl; 
02650         int    badCount  = 0;
02651         int    goodCount = 1; // 1 because first sample is always accepted
02652         double alpha      = 1.0;
02653         double ranNumber  = 0.0;
02654         double logOldProposal, logNewProposal, logOldTarget, logNewTarget;
02655         pop->gNodeList.logProposal = 0.0;
02656         pop->gNodeList.logTarget   = 0.0;
02657         matvec::Matrix<double> IBDCovMatrix;
02658         IBDCovMatrix.resize(1,1,0.0);
02659         for (int i=0;i<numOfSamples;i++){
02660                 if(i==0){
02661                         // get initial sample
02662                         //cout << "Sample " << 1 << endl;
02663                         pop->getInitialGNodeListSample(maxsize,numLoci,samplerType);
02664                         logOldTarget = pop->gNodeList.logTarget;
02665                         logOldProposal = pop->gNodeList.logOldProposal;
02666                         pop->copyGNodeStatesToCandidateStates(samplerType);
02667                         pop->storeSampledGametes();
02668                         pop->copyCandidateToAccepted(samplerType);
02669                         if(whatToCompute=="probDescHaplo"){
02670                                 pop->SetFreqHaploFounders();
02671                         }
02672                         else if(whatToCompute=="ibdCovMatrix"){
02673                                 IBDCovMatrix.resize(pop->popsize,pop->popsize,0.0);
02674                         }
02675                         else if(whatToCompute=="genotypeFreq"){
02676                                 pop->initGenotypeFreq();
02677                         }
02678                 }
02679                 else{
02680                         // q(NEW) and pi(NEW)
02681                         pop->gNodeList.logProposal = 0.0;
02682                         pop->gNodeList.logTarget   = 0.0;
02683                         pop->getGNodeListSample(maxsize,numLoci,samplerType);
02684                         pop->copyGNodeStatesToCandidateStates(samplerType);
02685                         pop->storeSampledGametes();
02686                         logNewProposal = pop->gNodeList.logProposal;
02687                         logNewTarget   = pop->gNodeList.logTarget;
02688                         if (!(i%100)){
02689                                 cout << "logNewTarget   = " << logNewTarget   << endl;
02690                                 cout << "logOldTarget   = " << logOldTarget   << endl;
02691                                 cout << "logNewProposal = " << logNewProposal << endl;
02692                                 cout << "logOldProposal = " << logOldProposal << endl;
02693                         }
02694                         // accept or reject the new sample
02695                         alpha=std::exp(logNewTarget + logOldProposal - logNewProposal - logOldTarget);
02696                         if (!(i%100)){
02697                                 cout << " alpha = " << alpha << endl;
02698                         }
02699                         ranNumber=ranf();
02700                         if (ranNumber <= alpha) {
02701                                 logOldTarget = logNewTarget;
02702                                 logOldProposal = logNewProposal;
02703                                 pop->copyCandidateToAccepted(samplerType);
02704                                 goodCount++;
02705                         }
02706                         else {
02707                                 pop->storeSampledGametes();
02708                                 badCount++;
02709                                 //cout << "badCount = " << badCount << endl;
02710                         }
02711                         //cout << "goodCount = " << goodCount << endl;
02712                         if(whatToCompute=="probDescHaplo"){
02713                                 pop->UpdateFreqHaploFounders();
02714                         }
02715                         if (!(i%100)){
02716                                 cout << "No of samples accepted = " << goodCount << " out of a total of " << goodCount+badCount << endl;
02717                                 double ratio = double(badCount)/(goodCount+badCount);
02718                                 // cout << "Rejection rate = " << ratio << endl;
02719                         }
02720                 }
02721                 if (whatToCompute=="haplotypeFreq"){
02722                         pop->countHaplotypes(samplerType);
02723                 }
02724                 else if (whatToCompute=="genotypeFreq"){
02725                         pop->countGenotypes(samplerType);
02726                 }
02727                 else if(whatToCompute=="probDescHaplo"){
02728                         pop->sampleSegregationIndicators();
02729                 } 
02730                 else if(whatToCompute=="ibdCovMatrix"){
02731                         IBDCovMatrix += getIBDMatrix();
02732                 }
02733         }
02734         cout << "No of samples accepted = " << goodCount << endl;
02735         double ratio = double(badCount)/(goodCount+badCount);
02736         cout << "Rejection rate = " << ratio << endl;
02737         if (whatToCompute=="haplotypeFreq"){
02738                 pop->displayHaplotypeFrequencies(numOfSamples);
02739         }
02740         else if(whatToCompute=="probDescHaplo"){
02741                 pop->CalcFreqHaploFounders(numOfSamples);
02742                 pop->DisplayFreqHaploFounders();
02743         }
02744         else if(whatToCompute=="ibdCovMatrix"){
02745                 char *s = "./tryRES/IBDCovMatrix"; 
02746                 ofstream outfile;
02747                 outfile.open(s);
02748                 outfile << IBDCovMatrix/numOfSamples;
02749         }
02750 }
02751 /*! \fn void Model::RSamplerMH(unsigned pedBlockSize, unsigned numLoci,unsigned numOfSamples,const std::string &samplerType="genotypic",const std::string &whatToCompute="genotypeProb")
02752 *  \brief method to sample ordered genotypes using the Metropolis Hastings algorithm to accept samples proposed by sequential imputation.
02753 */
02754 } ////// end of namespace matvec
02755 
02756 

Generated on Thu Jun 16 17:13:49 2005 for Matvec by doxygen1.2.16