00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <iostream>
00023 #include <sstream>
00024 #include <iomanip>
00025 #include <cstdarg>
00026 #include "session.h"
00027 #include "stat.h"
00028 #include "model.h"
00029 #include "population.h"
00030 #include "parmmap.h"
00031
00032 namespace matvec {
00033
00034
00035 void Model::initialize(void)
00036 {
00037 num_ped = 0;
00038 max_nz = 0;
00039 numrec = 0;
00040 numobs = 0;
00041 numcol = 0;
00042 numterm = 0;
00043 popsize = 0;
00044 numfact = 0;
00045 maxorder = 0;
00046 numtrait = 0;
00047 npattern = 0;
00048 hmmesize = 0;
00049 numgroup = 0;
00050 non_zero = 0;
00051 type = bad_model;
00052 ntermGdist = 0;
00053 pop_created = 0;
00054 data_prepared = 0;
00055 weightinverse = 0;
00056 modelpcount =0;
00057 modelstr = 0;
00058
00059 pop = 0;
00060 rve = 0;
00061 data = 0;
00062 kvec = 0;
00063 rawpos = 0;
00064 idlist = 0;
00065 pos_term = 0;
00066 xval_term = 0;
00067 traitname = 0;
00068 trait_vec = 0;
00069 rec_indid = 0;
00070 xact_htable = 0;
00071 factor_struct = 0;
00072 trait_struct = 0;
00073 unknown_prior = 0;
00074
00075
00076
00077
00078 nfunk_in_dfreml = 0;
00079 reml_value = 0.0;
00080 dfreml_called = 0;
00081 }
00082
00083 void Model::copyfrom(const Model& M)
00084 {
00085 initialize();
00086 num_ped = M.num_ped;
00087 yry = M.yry;
00088 data = M.data;
00089 term = M.term;
00090 max_nz = M.max_nz;
00091 numrec = M.numrec;
00092 numobs = M.numobs;
00093 numcol = M.numcol;
00094 max_nz = M.max_nz;
00095 popsize = M.popsize;
00096 numterm = M.numterm;
00097 numfact = M.numfact;
00098 lng0vec = M.lng0vec;
00099 lnr0vec = M.lnr0vec;
00100 blupsol = M.blupsol;
00101 rellrhs = M.rellrhs;
00102 popsize = M.popsize;
00103 hmmesize = M.hmmesize;
00104 numtrait = M.numtrait;
00105 npattern = M.npattern;
00106 hmmesize = M.hmmesize;
00107 numgroup = M.numgroup;
00108 maxorder = M.maxorder;
00109 non_zero = M.non_zero;
00110 rec_indid = M.rec_indid;
00111 type = M.type;
00112 modelfname = M.modelfname;
00113 weightname = M.weightname;
00114 ntermGdist = M.ntermGdist;
00115 modelstring = M.modelstring;
00116 pattern_tree = M.pattern_tree;
00117 residual_var = M.residual_var;
00118 data_prepared = M.data_prepared;
00119 weightinverse = M.weightinverse;
00120 nfunk_in_dfreml= M.nfunk_in_dfreml;
00121 reml_value = M.reml_value;
00122 dfreml_called = M.dfreml_called;
00123 hmmec.resize(hmmesize,max_nz);
00124
00125 int i,k;
00126
00127 if (pos_term) {delete [] pos_term; pos_term = 0;}
00128 if (M.pos_term) pos_term = new unsigned [numterm+1];
00129 if (xval_term) {delete [] xval_term; xval_term = 0;}
00130 if (M.xval_term) xval_term = new double [numterm+1];
00131 if (trait_vec) {delete [] trait_vec; trait_vec= 0;}
00132 if (M.trait_vec) trait_vec = new double [numterm+1];
00133
00134 if (unknown_prior) {
00135 delete [] unknown_prior;unknown_prior = 0;
00136 }
00137 if (M.unknown_prior) {
00138 if(numterm>0){
00139 unknown_prior = new UnknownDist[numterm];
00140 }
00141 else {
00142 unknown_prior = 0;
00143 }
00144 for (i=0; i<numterm; i++) {
00145 unknown_prior[i].copyfrom(M.unknown_prior[i]);
00146 if (strcmp(M.term.termlist[i].prior->name(),"UnknownDist") == 0) {
00147 term.termlist[i].prior = &(unknown_prior[i]);
00148 }
00149 else {
00150 term.termlist[i].prior = M.term.termlist[i].prior;
00151 }
00152 }
00153 }
00154 if (traitname) {delete [] traitname; traitname = 0;}
00155 if (M.traitname) {
00156 if(numtrait>0){
00157 traitname = new std::string [numtrait];
00158 }
00159 else {
00160 traitname = 0;
00161 }
00162 for (i=0; i<numtrait; i++) traitname[i] = M.traitname[i];
00163 }
00164 if (kvec) {delete [] kvec; kvec = 0;}
00165 if (M.kvec) {
00166 if(numtrait>0){
00167 kvec = new unsigned [npattern];
00168 }
00169 else {
00170 kvec = 0;
00171 }
00172 for (i=0; i<npattern; i++) kvec[i] = M.kvec[i];
00173 }
00174 if (rawpos) {delete [] rawpos; rawpos = 0;}
00175 if (M.rawpos) {
00176 if(maxorder>0){
00177 rawpos = new unsigned [maxorder];
00178 }
00179 else {
00180 rawpos = 0;
00181 }
00182 }
00183 pattern.resize(npattern);
00184 std::set<std::string>::iterator pos;
00185 for (i=0, pos=pattern_tree.begin(); pos != pattern_tree.end(); ++pos,++i) {
00186 pattern[i] = *pos;
00187 }
00188 pop_created = M.pop_created;
00189 if (M.pop_created) {
00190 pop = new Population(*(M.pop));
00191 }
00192 else {
00193 pop = M.pop;
00194 }
00195 if (rve) {
00196 delete [] rve;
00197 rve = 0;
00198 }
00199 if (M.rve) {
00200 if(npattern>0){
00201 rve = new Matrix<double> [npattern];
00202 }
00203 else {
00204 rve = 0;
00205 }
00206 check_ptr(rve);
00207 for (i=0; i<npattern; i++) rve[i].reserve(numtrait,numtrait);
00208 }
00209 if (factor_struct) {
00210 delete [] factor_struct; factor_struct = 0;
00211 }
00212 if (M.factor_struct) {
00213 if(numfact>0){
00214 factor_struct = new FieldStruct [numfact];
00215 }
00216 else {
00217 factor_struct = 0;
00218 }
00219 for (i=0; i<numfact; i++) factor_struct[i] = M.factor_struct[i];
00220 }
00221 if (trait_struct) {
00222 delete [] trait_struct; trait_struct = 0;
00223 }
00224 if (M.trait_struct) {
00225 if(numtrait>0){
00226 trait_struct = new FieldStruct* [numtrait];
00227 }
00228 else {
00229 trait_struct = 0;
00230 }
00231 for (i=0; i<numtrait; i++) {
00232 k = fieldindex(traitname[i],factor_struct,numfact);
00233 if (k >= 0 && k < numfact) {
00234 trait_struct[i] = &(factor_struct[k]);
00235 }
00236 else {
00237 throw exception("Model::copyfrom(): you have probably found a bug!");
00238 }
00239 }
00240 }
00241 if (xact_htable) {delete [] xact_htable; xact_htable = 0;}
00242 if (M.xact_htable) {
00243 if(numterm>0){
00244 xact_htable = new HashTable [numterm];
00245 }
00246 else {
00247 xact_htable = 0;
00248 }
00249 for (i=0; i<numterm; i++) xact_htable[i] = M.xact_htable[i];
00250 }
00251 if (idlist) {delete [] idlist; idlist = 0;}
00252 if (M.idlist) {
00253 if(numcol>0){
00254 idlist = new HashTable* [numcol];
00255 }
00256 else {
00257 idlist = 0;
00258 }
00259 check_ptr(idlist);
00260 if (data) for (i=0; i<numcol; i++) idlist[i] = data->hashtable[i];
00261 }
00262 }
00263
00264 int Model::equation(const std::string &modelspecs)
00265 {
00266
00267
00268
00269
00270
00271
00272
00273 int retval = 1;
00274 num_ped=0;
00275 if (modelspecs == "") throw exception("Model::equation(): equation is empty");
00276 modelstring = modelspecs;
00277 if (build_model_term(modelstring)) {
00278 maxorder = term.maxorder();
00279 if (rawpos) {
00280 delete [] rawpos;
00281 rawpos=0;
00282 }
00283 if(maxorder>0){
00284 rawpos = new unsigned [maxorder];
00285 }
00286 else {
00287 rawpos = 0;
00288 }
00289 lng0vec.resize(numterm);
00290 }
00291 else {
00292 modelstring = "";
00293 retval = 0;
00294 }
00295 if (retval == 1) {
00296 type = fixed_model;
00297 }
00298 else {
00299 type = bad_model;
00300 }
00301 return retval;
00302 }
00303
00304 void Model::fitdata(Data& D)
00305 {
00306 if (D.num_rows() > 0) {
00307 data = &D;
00308 }
00309 else {
00310 data = 0;
00311 type = bad_model;
00312 warning("Model::fitdata(D): Data is empty");
00313 }
00314 max_nz = 0;
00315 numrec = 0;
00316 numobs = 0;
00317 numcol = 0;
00318 npattern = 0;
00319 hmmesize = 0;
00320 data_prepared = 0;
00321 hmmec.resize(0,0);
00322 }
00323
00324 int Model::display(void) const
00325 {
00326 std::cout << " model: " << modelstring << "\n";
00327 return 1;
00328 }
00329
00330 void Model::release(void)
00331 {
00332 if (idlist) {
00333 delete [] idlist;
00334 idlist= 0;
00335 }
00336 if (xact_htable) {
00337 delete [] xact_htable;
00338 xact_htable= 0;
00339 }
00340 if (kvec) {
00341 delete [] kvec;
00342 kvec = 0;
00343 }
00344 if (rawpos) {
00345 delete [] rawpos;
00346 rawpos = 0;
00347 }
00348 if (pos_term) {
00349 delete [] pos_term;
00350 pos_term = 0;
00351 }
00352 if (xval_term) {
00353 delete [] xval_term;
00354 xval_term = 0;
00355 }
00356 if (trait_vec) {
00357 delete [] trait_vec;
00358 trait_vec = 0;
00359 }
00360 if (traitname) {
00361 delete [] traitname;
00362 traitname = 0;
00363 }
00364 if (pop_created) {
00365 delete pop; pop = 0;
00366 pop_created=0;
00367 }
00368 if (modelstr) {
00369 delete [] modelstr;
00370 modelstr=0; modelpcount=0;
00371 }
00372 if (trait_struct) {
00373 delete [] trait_struct;
00374 trait_struct=0;
00375 }
00376 if (factor_struct) {
00377 delete [] factor_struct;
00378 numfact=0; factor_struct = 0;
00379 }
00380 if (unknown_prior) {
00381 delete [] unknown_prior;
00382 unknown_prior=0;
00383 }
00384 if(rve){
00385 delete [] rve;
00386 rve = 0;
00387 }
00388 remove(modelfname.c_str());
00389 }
00390
00391 int Model::prepare_data(const std::string &solver)
00392 {
00393 if (data_prepared) return 1;
00394 if (modelstring == "" || !data) throw exception("Model::prepare_data(): empty model or no data");
00395
00396
00397
00398
00399
00400 if (!data->in_disk()) data->save_datasheet();
00401 if (!data->in_memory()) data->input_datasheet();
00402
00403
00404 int k,t,ii,ii1;
00405 unsigned i,nrow_in_data;
00406 char CL;
00407 act_numtrait=numtrait;
00408
00409
00410
00411
00412 ModelTerm *T;
00413 std::string fn;
00414 unsigned ped_cnt=0,first_ped;
00415 for (t=0; t<numterm; t++) {
00416 T = &(term[t]);
00417 for (i=0; i<T->order()-1; i++) {
00418 ii = T->factorindx[i]; ii1 = T->factorindx[i+1];
00419 if (ii == ii1) {
00420 if (factor_struct[ii].classi() != 'C') {
00421 fn = factor_struct[ii].name();
00422 throw exception("Model::prepare_data(): Is there a covariate?");
00423
00424 }
00425 }
00426 }
00427 }
00428 nrow_in_data = numrec = data->num_rows();
00429 numcol = data->num_cols();
00430
00431
00432
00433
00434 for (i=0; i<numcol; i++) data->datasheet[i].classi('N');
00435 if (weightname.size() > 0) {
00436 k = data->field_index(weightname.c_str());
00437 if (k >= 0) {
00438 if (data->datasheet[k].type() != 'S') {
00439 data->datasheet[k].classi('W');
00440 data->datasheet[k].value_for_missing(0.0);
00441 }
00442 else {
00443 throw exception("Model::prepare_data(): str column can't be a weight column");
00444 }
00445 }
00446 else {
00447 throw exception("Model::prepare_data(): weight variable not in data");
00448 }
00449 }
00450
00451 for (i=0; i<numfact; i++) {
00452
00453 k = data->field_index(factor_struct[i].name());
00454 if (k >= 0) {
00455 CL = factor_struct[i].classi();
00456 if (CL == 'F') {
00457 if (data->datasheet[k].type() == 'S') {
00458 CL = 'U';
00459 }
00460 else if (data->datasheet[k].type() == 'I') {
00461 CL = 'I';
00462 }
00463 }
00464 else if (CL == 'C') {
00465 if (data->datasheet[k].type() != 'F') {
00466 throw exception("Model::prepare_data(): a covariate is specified as string");
00467 }
00468 }
00469 else {
00470 if (data->datasheet[k].type() == 'I') {
00471 throw exception("Model::prepare_data(): 'intercept' can only be a fixed effect!");
00472 }
00473 }
00474 if (data->datasheet[k].classi() == 'W') {
00475 throw exception("Model::prepare_data(): a weight variable cann't be a part of model euqation");
00476 }
00477 data->datasheet[k].classi(CL);
00478 factor_struct[i].index(data->datasheet[k].index());
00479 factor_struct[i].nmiss(data->datasheet[k].nmiss());
00480 }
00481 else {
00482 std::cout << "\n" << factor_struct[i].name() << " " << k <<"\n";
00483 throw exception("Model::prepare_data(): a variable in model, but not in data");
00484 }
00485 }
00486
00487
00488
00489
00490 DataNode *column;
00491 Vector<bool> record_legality(nrow_in_data);
00492
00493 for (t=1; t<numcol; t++) {
00494 if (data->datasheet[t].nmiss() == 0) continue;
00495 CL = data->datasheet[t].classi();
00496 if (!(CL=='F' || CL=='U' || CL=='P' || CL=='C')) continue;
00497 column = data->rawcol(t);
00498 for (i=0; i<nrow_in_data; i++) {
00499 if (record_legality[i]) continue;
00500 if (column[i].missing) {
00501 record_legality[i] = true;
00502 numrec--;
00503 }
00504 }
00505 }
00506
00507 re_hash_data(record_legality);
00508 if (maxorder>1) assign_id_xact(record_legality);
00509
00510
00511
00512
00513
00514 for (t=0; t<numfact; t++) {
00515 i = factor_struct[t].index();
00516 factor_struct[t].nlevel(data->datasheet[i].nlevel());
00517 }
00518 for (t=0; t<numterm; t++) {
00519 if (term[t].order() == 1) {
00520 term[t].numlevel = factor_struct[term[t].factorindx[0]].nlevel();
00521 }
00522 }
00523
00524
00525 if (pos_term) {
00526 delete [] pos_term;
00527 pos_term = 0;
00528 }
00529 pos_term = new unsigned [numterm+1];
00530 if (xval_term) {
00531 delete [] xval_term;
00532 xval_term = 0;
00533 }
00534 xval_term = new double [numterm+1];
00535 if (trait_vec) {
00536 delete [] trait_vec;
00537 trait_vec = 0;
00538 }
00539 if(numtrait>0){
00540 trait_vec = new double [numtrait];
00541 }
00542 else {
00543 trait_vec = 0;
00544 }
00545
00546
00547
00548
00549
00550
00551
00552
00553 save_pos_val(record_legality,solver);
00554 data->release_datasheet();
00555
00556 record_legality.clear();
00557
00558 lnr0vec.resize(npattern);
00559 if (rve){
00560 delete [] rve;
00561 rve = 0;
00562 }
00563 if(npattern>0){
00564 rve = new Matrix<double> [npattern];
00565 }
00566 else {
00567 rve = 0;
00568 }
00569 check_ptr(rve);
00570 for (i=0; i<npattern; i++) rve[i].reserve(numtrait,numtrait);
00571
00572
00573
00574
00575 hmmesize=0;
00576 unsigned end=nt_vec[0]*term[0].numlevel;
00577
00578
00579
00580
00581 for (term[0].start = 0, i=1; i<numterm; i++) {
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596 if(pos_vec[i]==0){
00597 term[i].start=end;
00598 end+=nt_vec[i]*term[i].numlevel;
00599 }
00600 else{
00601 term[i].start = term[base_effect[i]].start + numtrait*pos_vec[i];
00602 }
00603
00604
00605 }
00606
00607
00608
00609 for (hmmesize=0,t=0; t<numterm; t++) {
00610 hmmesize += nt_vec[t]*term[t].nlevel();
00611 if(nt_vec[t]>act_numtrait) act_numtrait=nt_vec[t];
00612 }
00613 if (max_nz == 0) max_nz = est_nze(hmmesize,act_numtrait,popsize);
00614 data_prepared = 1;
00615 return 1;
00616 }
00617
00618 void Model::re_hash_data(Vector<bool> &record_legality)
00619 {
00620
00621
00622
00623 if (!data) return;
00624 if (!(data->in_memory())) data->input_datasheet();
00625
00626 unsigned i,id,t,nrow_in_data = data->num_rows();
00627 char CL,TY;
00628 const char *strid;
00629
00630 if (idlist) {
00631 delete [] idlist;
00632 idlist = 0;
00633 }
00634 if(numcol>0){
00635 idlist = new HashTable* [numcol];
00636 }
00637 else {
00638 idlist = 0;
00639 }
00640 check_ptr(idlist);
00641 HashTable *dhtable,tempidlist;
00642 int *tempval = new int;
00643 DataNode *column;
00644
00645 for (t=1; t<numcol; t++) {
00646 dhtable = idlist[t] = data->hashtable[t];
00647 CL = data->datasheet[t].classi();
00648 TY = data->datasheet[t].type();
00649 column = data->rawcol(t);
00650 switch (CL) {
00651 case 'F':
00652 if (data->hashtable[t]->size() == 0) {
00653 tempidlist.resize(nrow_in_data,sizeof(int));
00654 for (i=0; i<nrow_in_data; i++) {
00655 if (record_legality[i]) continue;
00656 *tempval = static_cast<int>(column[i].double_val());
00657 tempidlist.insert(tempval);
00658 }
00659 id = tempidlist.size();
00660 data->datasheet[t].nlevel(id);
00661 dhtable->resize(id,sizeof(int));
00662 for (i=0; i<nrow_in_data; i++) {
00663 if (record_legality[i]) continue;
00664 *tempval = static_cast<int>(column[i].double_val());
00665 dhtable->insert(tempval);
00666 }
00667 }
00668 break;
00669 case 'P':
00670 case 'G':
00671
00672
00673
00674
00675
00676
00677
00678 if(nrow_in_data>0){
00679 rec_indid = new unsigned [nrow_in_data];
00680 }
00681 else {
00682 rec_indid = 0;
00683 }
00684 data->datasheet[t].nlevel(pop->size());
00685 for (i=0; i<nrow_in_data; i++) {
00686 if (record_legality[i]) continue;
00687 if (TY == 'S') {
00688 id = column[i].unsigned_val();
00689 strid = (const char *)dhtable->find(id);
00690 if (pop->maxnamelen > 0) {
00691 id = pop->hashtable.get_id(strid);
00692 }
00693 else {
00694 id = 0;
00695 }
00696 }
00697 else {
00698 id = (unsigned) column[i].double_val();
00699 if (pop->maxnamelen > 0) {
00700 std::ostringstream ostr;
00701 ostr << id;
00702 std::string t = ostr.str();
00703 strid = t.c_str();
00704 id = pop->hashtable.get_id(strid);
00705 }
00706 }
00707 rec_indid[i] = id;
00708 if (id) {
00709 column[i].unsigned_val(id);
00710 }
00711 else {
00712 column[i].missing = 1;
00713 record_legality[i] = true;
00714 numrec--;
00715 warning("Model::re_hash_data(): %s is in the data, "
00716 "but not in the pedigree, so it's ignored",strid);
00717 }
00718 }
00719 break;
00720 case 'C':
00721 case 'I':
00722 data->datasheet[t].nlevel(1);
00723 break;
00724
00725 }
00726 }
00727 if (tempval){
00728 delete tempval;
00729 tempval = 0;
00730 }
00731 }
00732
00733 void Model::save_pos_val(Vector<bool> &record_legality, const std::string &solver)
00734 {
00735
00736
00737
00738
00739
00740
00741
00742
00743 if (!data) return;
00744 if (!(data->in_memory())) data->input_datasheet();
00745 unsigned nrow_in_data = data->num_rows();
00746 double rawxval;
00747 Vector<unsigned> pos_datacol(numcol);
00748 Vector<double> xval_datacol(numcol);
00749 int *tempval = new int;
00750 unsigned i,j,k,t,nord;
00751 unsigned ntm1 = numterm + 1;
00752 char CL;
00753
00754
00755
00756
00757
00758
00759 char *patstring = new char [numtrait+1];
00760 patstring[numtrait]='\0';
00761 pattern_tree.clear();
00762 DataNode *dn;
00763 Vector<double> y_mean(numtrait);
00764 Vector<double> y_std(numtrait);
00765 Vector<unsigned> y_nlevel(numtrait);
00766 for (numobs=0,i=0; i<nrow_in_data; i++) {
00767 if (record_legality[i]) continue;
00768 for (k=0, t=0; t<numtrait; t++) {
00769 j = trait_struct[t]->index();
00770 dn = data->cell(i,j);
00771 if (dn->missing) {
00772 patstring[t] = '0';
00773 k++;
00774 }
00775 else {
00776 patstring[t] = '1';
00777 y_mean[t] += dn->double_val();
00778 y_std[t] += dn->double_val()*dn->double_val();
00779 y_nlevel[t] += 1;
00780 }
00781 }
00782 if (k == numtrait) {
00783 record_legality[i] = true;
00784 numrec--;
00785 for (k=1; k<numcol; k++) {
00786 CL = data->datasheet[k].classi();
00787 if (CL != 'C') continue;
00788 data->cell(i,k)->missing = 1;
00789 data->datasheet[k].count_miss(1);
00790 }
00791 }
00792 else {
00793 numobs++;
00794 pattern_tree.insert(patstring);
00795 }
00796 }
00797 double nr;
00798 for (t=0; t<numtrait; t++) {
00799 nr = static_cast<double>(y_nlevel[t]);
00800 if (nr == 0.0) continue;
00801 y_mean[t] /= nr;
00802 if (nr > 1.0) {
00803 y_std[t] = std::sqrt((y_std[t] - y_mean[t]*y_mean[t]*nr)/(nr-1.0));
00804 }
00805 trait_struct[t]->mean(y_mean[t]);
00806 trait_struct[t]->std(y_std[t]);
00807 trait_struct[t]->nlevel(y_nlevel[t]);
00808 }
00809
00810
00811
00812
00813 for (t=1; t<numcol; t++) {
00814 if (data->datasheet[t].classi() == 'C') data->datasheet[t].mean();
00815 }
00816
00817 npattern = pattern_tree.size();
00818 pattern.resize(npattern);
00819 std::set<std::string>::iterator pos;
00820 for (i=0,pos = pattern_tree.begin(); pos != pattern_tree.end(); ++pos,++i) {
00821 pattern[i] = *pos;
00822 }
00823
00824 if (kvec) {
00825 delete [] kvec;
00826 kvec = 0;
00827 }
00828 if(npattern>0){
00829 kvec = new unsigned [npattern];
00830 }
00831 else {
00832 kvec = 0;
00833 }
00834 memset(kvec,'\0',sizeof(unsigned)*npattern);
00835 DataNode* recd;
00836 if(numcol>0){
00837 recd = new DataNode [numcol];
00838 }
00839 else {
00840 recd = 0;
00841 }
00842
00843 if (modelstr) {delete [] modelstr; modelstr=0; modelpcount=0;}
00844 std::stringstream bmodfile;
00845 if (!bmodfile) throw exception(std::string("Model::save_pos_val(): cannot open file:") + modelfname);
00846 if(solver=="iod"){
00847 pos_val_vector.resize(nrow_in_data);
00848 }
00849 for (i=0; i<nrow_in_data; i++) {
00850 if (record_legality[i]) continue;
00851 xval_term[numterm] = 1.0;
00852 data->row(i,recd);
00853 for (j=0; j<numcol; j++) {
00854 CL = data->datasheet[j].classi();
00855 if (CL =='F') {
00856 xval_datacol[j] = 1.0;
00857 *tempval = static_cast<int>(recd[j].double_val());
00858 pos_datacol[j] = idlist[j]->get_id(tempval);
00859 }
00860 else if (CL=='U' || CL=='P') {
00861 xval_datacol[j] = 1.0;
00862 pos_datacol[j] = recd[j].unsigned_val();
00863 }
00864 else if (CL=='C') {
00865 xval_datacol[j] = recd[j].double_val();
00866 pos_datacol[j] = 1;
00867 }
00868 else if (CL=='W') {
00869 rawxval = recd[j].double_val();
00870 if (rawxval >= 0) {
00871 if (weightinverse == 1) {
00872 if (rawxval > 0.0) rawxval = 1.0/rawxval;
00873 }
00874 xval_term[numterm] = rawxval;
00875 }
00876 }
00877 else if (CL=='I') {
00878 xval_datacol[j] = 1.0;
00879 pos_datacol[j] = 1;
00880 }
00881 }
00882
00883 for (t=0; t<numterm; t++) {
00884 rawxval = 1.0;
00885 nord = term[t].order();
00886 for (j=0; j<nord; j++) {
00887 k = factor_struct[term[t].factorindx[j]].index();
00888 rawpos[j] = pos_datacol[k];
00889 rawxval *= xval_datacol[k];
00890 }
00891 if (nord == 1) {
00892 pos_term[t] = rawpos[0];
00893 }
00894 else {
00895 pos_term[t] = xact_htable[t].get_id(rawpos);
00896 }
00897 xval_term[t] = rawxval;
00898 }
00899 for (t=0; t<numtrait; t++) {
00900 k = trait_struct[t]->index();
00901 if (recd[k].missing) {
00902 patstring[t] = '0';
00903 trait_vec[t] = 0.0;
00904 }
00905 else {
00906 patstring[t] = '1';
00907 trait_vec[t] = recd[k].double_val();
00908 }
00909 }
00910 for (k=0; k<npattern; ++k) {
00911 if (pattern[k] == patstring) {break;}
00912 }
00913 if (k == npattern) throw exception("Model::save_pos_val(): you have probably found a bug!");
00914 kvec[k] += 1;
00915 pos_term[numterm] = k;
00916 if(solver=="iod"){
00917 pos_val_vector[i].pos_term.resize(ntm1);
00918 pos_val_vector[i].xval_term.resize(ntm1);
00919 pos_val_vector[i].trait_vec.resize(numtrait);
00920 for (unsigned j=0;j<ntm1;j++){
00921 pos_val_vector[i].pos_term[j] = pos_term[j];
00922 pos_val_vector[i].xval_term[j] = xval_term[j];
00923 }
00924 for (unsigned j=0;j<numtrait;j++){
00925 pos_val_vector[i].trait_vec[j] = trait_vec[j];
00926 }
00927 }
00928 else{
00929 bmodfile.write((char *)pos_term, ntm1*sizeof(unsigned));
00930 bmodfile.write((char *)xval_term, ntm1*sizeof(double));
00931 bmodfile.write((char *)trait_vec, numtrait*sizeof(double));
00932 }
00933 }
00934
00935
00936
00937
00938
00939
00940 modelstringstr=bmodfile.str();
00941 if (recd) {
00942 delete [] recd;
00943 recd = 0;
00944 }
00945 if (patstring) {
00946 delete [] patstring;
00947 patstring = 0;
00948 }
00949 if(tempval){
00950 delete tempval;
00951 tempval = 0;
00952 }
00953
00954
00955
00956
00957
00958
00959 for (t=0; t<numcol; t++) {
00960 if (data->datasheet[t].classi() =='P') {
00961 data->datasheet[t].nlevel(data->hashtable[t]->size());
00962 }
00963 }
00964 data->release_datasheet();
00965 }
00966
00967 void Model::assign_id_xact(const Vector<bool> &record_legality)
00968 {
00969
00970
00971
00972 if (!data) return;
00973 unsigned nord, nrow_in_data = data->num_rows();
00974
00975 if (xact_htable) {
00976 delete [] xact_htable;
00977 xact_htable = 0;
00978 }
00979 if(numterm>0){
00980 xact_htable = new HashTable [numterm];
00981 }
00982 else {
00983 xact_htable = 0;
00984 }
00985 int t;
00986 for (t=0; t<numterm; t++) {
00987 if (term[t].order() > 1) {
00988 xact_htable[t].resize(nrow_in_data,term[t].order()*sizeof(unsigned));
00989 }
00990 }
00991 hashxact(record_legality);
00992 for (t=0;t<numterm;t++) {
00993 nord = term[t].order();
00994 if (nord>1) {
00995 term[t].numlevel = xact_htable[t].size();
00996 xact_htable[t].resize(term[t].nlevel(),nord*sizeof(unsigned));
00997 }
00998 }
00999 hashxact(record_legality);
01000 }
01001
01002 void Model::hashxact(const Vector<bool> &record_legality)
01003 {
01004 if (!data) return;
01005 char CL;
01006 int i,j,t,k,nord;
01007 int *tempval = new int;
01008 Vector<unsigned> pos_datacol(numcol);
01009 DataNode* recd;
01010 if(numcol>0){
01011 recd = new DataNode [numcol];
01012 }
01013 else {
01014 recd = 0;
01015 }
01016 unsigned nrow_in_data = data->num_rows();
01017
01018 for (i=0; i<nrow_in_data; i++) {
01019 if (record_legality[i]) continue;
01020 data->row(i,recd);
01021 pos_datacol.initialize(numcol,0);
01022 for (t=0; t<numterm; t++) {
01023 nord = term[t].order();
01024 if (nord > 1) {
01025 for (j=0; j<nord; j++) {
01026 k = factor_struct[term[t].factorindx[j]].index();
01027 if (pos_datacol[k] == 0) {
01028 CL = data->datasheet[k].classi();
01029 if (CL == 'F') {
01030 *tempval = static_cast<int>(recd[k].double_val());
01031 pos_datacol[k] = idlist[k]->get_id(tempval);
01032 }
01033 else if (CL == 'U' ||CL == 'P') {
01034 pos_datacol[k] = recd[k].unsigned_val();
01035 }
01036 else if (CL =='C') {
01037 pos_datacol[k] = 1;
01038 }
01039 }
01040 rawpos[j] = pos_datacol[k];
01041 }
01042 xact_htable[t].insert(rawpos);
01043 }
01044 }
01045 }
01046 if (recd) {
01047 delete [] recd;
01048 recd = 0;
01049 }
01050 if(tempval){
01051 delete tempval;
01052 tempval = 0;
01053 }
01054 }
01055
01056 void Model::prior_dist(const std::string &termname, GeneticDist *D)
01057
01058
01059
01060
01061 {
01062 int k = term.index(termname,factor_struct);
01063 if (k < 0) throw exception("Model::prior_dist(): term not in the model equation");
01064 term[k].prior = D;
01065 term[k].classi('R');
01066 if (strcmp(D->name(),"GeneticDist") == 0) {
01067 numterm--;
01068 ntermGdist++;
01069 if (k<numterm) term.swap(k,numterm);
01070 }
01071 }
01072
01073 void Model::prior_dist(const std::string &termname,Pedigree& P, GeneticDist *D)
01074
01075
01076
01077
01078 {
01079 int k = term.index(termname,factor_struct);
01080 if (k<0) throw exception("Model::prior_dist(): term not in the model equation");
01081 if (D->ntrait() != 1) throw exception("Model::prior_dist(): only one trait allowed");
01082 term[k].prior = D;
01083 if (term[k].order() > 1) throw exception("Model::prior_dist(): pedigree cannot be for this term");
01084
01085 term[k].classi('G');
01086 factor_struct[term[k].factorindx[0]].classi('G');
01087 if (pop_created) delete pop;
01088 pop = new Population;
01089 check_ptr(pop);
01090 pop_created = 1;
01091 try {
01092 pop->input_ped(P,D);
01093 }
01094 catch (exception &ex) {
01095 type = bad_model;
01096 throw;
01097 }
01098 numgroup = P.ngroup();
01099 popsize = pop->size();
01100 if (strcmp(D->name(),"GeneticDist") == 0) {
01101 term[k].numlevel = popsize;
01102 numterm--;
01103 ntermGdist++;
01104 if (k<numterm) term.swap(k,numterm);
01105 }
01106 }
01107
01108 void Model::RSamplerPrior_dist(const std::string &termname,Pedigree& P, Data *D,GeneticDist *G){
01109
01110
01111
01112 int k = term.index(termname,factor_struct);
01113 if (k<0) throw exception("Model::prior_dist(): term not in the model equation");
01114 if (G->ntrait() != 1) throw exception("Model::prior_dist(): only one trait allowed");
01115 term[k].prior = G;
01116 if (term[k].order() > 1) throw exception("Model::prior_dist(): pedigree cannot be for this term");
01117
01118 term[k].classi('G');
01119 factor_struct[term[k].factorindx[0]].classi('G');
01120 if (pop_created) delete pop;
01121 pop = new Population;
01122 check_ptr(pop);
01123 pop_created = 1;
01124 pop->model = this;
01125 try {
01126 pop->setupRSampler(P,D,G);
01127
01128 pop->ListAlleleFounders();
01129 pop->SetPossibleHaplotypes();
01130
01131 }
01132 catch (exception &ex) {
01133 type = bad_model;
01134 cerr << ex.what() << endl;
01135 throw;
01136 }
01137 numgroup = P.ngroup();
01138 popsize = pop->size();
01139 if (strcmp(G->name(),"GeneticDist") == 0) {
01140 term[k].numlevel = popsize;
01141 numterm--;
01142 ntermGdist++;
01143 if (k<numterm) term.swap(k,numterm);
01144 }
01145 }
01146
01147
01148
01149
01150
01151
01152
01153 void Model::DGSamplerSetup(unsigned numLoci, Data *D){
01154
01155
01156
01157 pop->descent_graph_setup(D);
01158 pop->founder_allele_counter = 0;
01159 int popsize = pop->size();
01160 int zero = 0;
01161 for (int i=0;i<popsize;i++) {
01162 pop->member(i)->set_founder_alleles(0);
01163 }
01164 unsigned lastLocus = numLoci;
01165 for (int i=0; i<popsize; i++) {
01166 if (pop->member(i)->m_counter_map.empty()){
01167 int zero = 0;
01168 pop->member(i)->m_counter_map.resize(numLoci,zero);
01169 pop->member(i)->p_counter_map.resize(numLoci,zero);
01170 }
01171 pop->member(i)->search_heteroz(lastLocus);
01172
01173 }
01174 }
01175
01176 void Model::DGSampler(unsigned numOfSamples,unsigned numOfSL,unsigned numOfHaplo,unsigned numOfSLCas){
01177
01178
01179
01180 double l_hood = 0.0;
01181 unsigned k = 0, j = 0, option = 0;
01182 unsigned k1 = numOfSL;
01183 unsigned k2 = numOfHaplo;
01184 unsigned k3 = numOfSLCas;
01185 cout<< endl << "Descent graph iterations = " << numOfSamples << endl;
01186 for(int sample=1; sample <= numOfSamples; sample++) {
01187 if(sample%5000 == 0) cout<<sample<<".";
01188
01189 if (k >= k1 + k2 + k3) {k = 0;}
01190 if (k < k1) {option = 1;}
01191 else if ((k >= k1) && (k < k1 + k2)) {option = 2;}
01192 else if (k <= k1 + k2 + k3) {option = 3;}
01193 k++;
01194 j++;
01195
01196 l_hood = pop->MH_ibd_sample_map(l_hood,option);
01197 pop->sum_descentState_map();
01198 if (j >= 100000) {
01199 j = 0;
01200 cout << "SI_PDQ sample = " << sample << endl;
01201 }
01202 }
01203 cout << endl << "Haplotypes origins mm mp pm pp / mother - father " << endl;
01204 for (int i=0; i<pop->size(); i++) {
01205 pop->member(i)->display_freq_haplotype(numOfSamples);
01206 }
01207 cout << endl;
01208 }
01209
01210 void Model::RSamplerInitialDG(const std::string &samplerType,const std::string &whatToCompute){
01211
01212
01213
01214 double maxNumGNodeElements = 4.0;
01215 int maxDimensionCutSet = 7;
01216 unsigned maxsize = (unsigned)(std::pow((maxNumGNodeElements),(maxDimensionCutSet))+.5);
01217
01218 pop->getInitialGNodeListSample(maxsize,0,pop->popsize,pop->popsize,samplerType);
01219 if(whatToCompute=="probDescHaplo"){
01220 pop->SetFreqHaploFounders();
01221 pop->sampleSegregationIndicators();
01222 }
01223 }
01224
01225
01226
01227
01228
01229 void Model::RSampler(std::string inputFileName){
01230
01231
01232
01233 if (type == bad_model){
01234 return;
01235 }
01236 matvec::ParmMap parmMap;
01237
01238 parmMap["pedBlockSize"] = "0";
01239 parmMap["numLoci"] = "1";
01240 parmMap["numOfSamples"] = "1000";
01241 parmMap["maxCutsetSize"] = "16384";
01242 parmMap["resultsFile"] = "";
01243 parmMap["samplerType"] = "genotypic";
01244 parmMap["samplerUsed"] = "MH";
01245 parmMap["whatToCompute"] = "genotypeFreq";
01246 parmMap["howToSample"] = "single";
01247 parmMap["printFlag"] = "0";
01248
01249 parmMap.inputParms(inputFileName);
01250
01251 matvec::RSamplerParms rSamplerParms;
01252 rSamplerParms.pedBlockSize = parmMap.getUnsignedValue("pedBlockSize");
01253 rSamplerParms.numLoci = parmMap.getUnsignedValue("numLoci");
01254 rSamplerParms.numOfSamples = parmMap.getUnsignedValue("numOfSamples");
01255 rSamplerParms.maxCutsetSize = parmMap.getUnsignedValue("maxCutsetSize");
01256 rSamplerParms.printFlag = parmMap.getUnsignedValue("printFlag");
01257 rSamplerParms.resultsFile = parmMap["resultsFile"];
01258 rSamplerParms.samplerType = parmMap["samplerType"];
01259 rSamplerParms.samplerUsed = parmMap["samplerUsed"];
01260 rSamplerParms.whatToCompute = parmMap["whatToCompute"];
01261 rSamplerParms.howToSample = parmMap["howToSample"];
01262 rSamplerParms.display();
01263 RSampler(rSamplerParms);
01264 }
01265
01266
01267 void Model::RSampler(RSamplerParms rsParms){
01268
01269
01270
01271
01272 myRSamplerParms = rsParms;
01273 if (myRSamplerParms.samplerType == "genotypic"){
01274 myRSamplerParms.pedBlockSize = rsParms.pedBlockSize ? rsParms.pedBlockSize : pop->size();
01275 }
01276 else if(myRSamplerParms.samplerType == "allelic"){
01277 myRSamplerParms.pedBlockSize = rsParms.pedBlockSize ? rsParms.pedBlockSize : 2*pop->size();
01278 }
01279 RSampler(myRSamplerParms.pedBlockSize, myRSamplerParms.numLoci, myRSamplerParms.numOfSamples, myRSamplerParms.samplerType,
01280 myRSamplerParms.howToSample, myRSamplerParms.samplerUsed, myRSamplerParms.whatToCompute);
01281
01282 }
01283
01284 void Model::RSampler(unsigned pedBlockSize, unsigned numLoci,unsigned numOfSamples,const std::string &samplerType,
01285 const std::string &howToSample, const std::string &samplerUsed, const std::string &whatToCompute){
01286
01287
01288
01289 if(howToSample=="single"){
01290 if(samplerUsed=="gibbs"){
01291 RSamplerGibbs(pedBlockSize,numLoci,numOfSamples,samplerType,whatToCompute);
01292 }
01293 else if(samplerUsed=="MH"){
01294 RSamplerMH(pedBlockSize,numLoci,numOfSamples,samplerType,whatToCompute);
01295 }
01296 else if(samplerUsed=="gibbsMH"){
01297 unsigned stepMH = 3;
01298 RSamplerGibbsMH(pedBlockSize,numLoci,numOfSamples,stepMH,samplerType,whatToCompute);
01299 }
01300 else {
01301 cerr << "samplerUsed must be one of: gibbs, MH or gibbsMH;" << endl;
01302 }
01303 }
01304 else if(howToSample=="joint"){
01305 if (samplerType!="allelic"){
01306 cerr << "For joint sampling, samplerType must be set to allelic;" << endl;
01307 exit(1);
01308 }
01309 if(samplerUsed=="gibbs"){
01310 cerr << "For joint sampling, please set the samplerUsed to MH for now;" << endl;
01311 exit(1);
01312 }
01313 else if(samplerUsed=="MH"){
01314 RSamplerMH(numLoci,numOfSamples,samplerType,whatToCompute);
01315 }
01316 else if(samplerUsed=="gibbsMH"){
01317 cerr << "Not implemented yet for joint sampling, please set samplerUsed to MH for now;" << endl;
01318 exit(1);
01319 }
01320 else {
01321 cerr << "samplerUsed must be one of: gibbs, MH or gibbsMH;" << endl;
01322 }
01323 }
01324 }
01325
01326 void Model::RSamplerGibbs(unsigned pedBlockSize,unsigned numLoci,unsigned numOfSamples,const std::string &samplerType,const std::string &whatToCompute){
01327
01328
01329
01330
01331 double maxNumGNodeElements = 4.0;
01332 int maxDimensionCutSet = 7;
01333 unsigned maxsize = (unsigned)(std::pow((maxNumGNodeElements),(maxDimensionCutSet))+.5);
01334 matvec::Matrix<double> IBDCovMatrix;
01335 IBDCovMatrix.resize(1,1,0.0);
01336
01337
01338 unsigned stopBlock=0,startBlock=0, count=1;
01339 for (int i=0;i<numOfSamples;i++){
01340 if (i==0){
01341
01342
01343 pop->getInitialGNodeListSample(maxsize,0,pop->popsize,pedBlockSize,samplerType);
01344 if (whatToCompute=="haplotypeFreq"){
01345 pop->countHaplotypes(samplerType);
01346 }
01347 else if(whatToCompute=="probDescHaplo"){
01348 pop->SetFreqHaploFounders();
01349 }
01350 else if(whatToCompute=="ibdCovMatrix"){
01351 IBDCovMatrix.resize(pop->popsize,pop->popsize,0.0);
01352 }
01353 }
01354 else{
01355
01356 pop->lookOnlyAtLocusToYourLeft = false;
01357
01358 do{
01359 if(stopBlock < pop->popsize){
01360 stopBlock += pedBlockSize;
01361 startBlock = stopBlock - pedBlockSize;
01362 }
01363 else {
01364 stopBlock = pedBlockSize;
01365 startBlock = stopBlock - pedBlockSize;
01366 }
01367
01368
01369 pop->getGNodeListSample(maxsize,startBlock,stopBlock,pedBlockSize,samplerType);
01370 if(whatToCompute=="probDescHaplo"){
01371 pop->UpdateFreqHaploFounders();
01372 }
01373 } while(stopBlock < pop->popsize);
01374 }
01375 if (whatToCompute=="haplotypeFreq"){
01376 pop->countHaplotypes(samplerType);
01377 }
01378 else if(whatToCompute=="probDescHaplo"){
01379 pop->sampleSegregationIndicators();
01380 }
01381 else if(whatToCompute=="ibdCovMatrix"){
01382 IBDCovMatrix += getIBDMatrix();
01383 }
01384 }
01385 if (whatToCompute=="haplotypeFreq"){
01386 pop->displayHaplotypeFrequencies(numOfSamples);
01387 }
01388 else if(whatToCompute=="probDescHaplo"){
01389 pop->CalcFreqHaploFounders(numOfSamples);
01390 pop->DisplayFreqHaploFounders();
01391 }
01392 else if(whatToCompute=="ibdCovMatrix"){
01393 char *s = "./tryRES/IBDCovMatrix";
01394 ofstream outfile;
01395 outfile.open(s);
01396 outfile << IBDCovMatrix/numOfSamples;
01397 }
01398 }
01399
01400
01401
01402
01403 void Model::RSamplerMH(unsigned pedBlockSize,unsigned numLoci,unsigned numOfSamples,const std::string &samplerType,const std::string &whatToCompute){
01404
01405
01406
01407 unsigned maxsize = myRSamplerParms.maxCutsetSize;
01408
01409
01410 int badCount = 0;
01411 int goodCount = 1;
01412 double alpha = 1.0;
01413 double ranNumber = 0.0;
01414 double logOldProposal, logNewProposal, logOldTarget, logNewTarget;
01415 pop->gNodeList.logProposal = 0.0;
01416 pop->gNodeList.logTarget = 0.0;
01417 matvec::Matrix<double> IBDCovMatrix;
01418 IBDCovMatrix.resize(1,1,0.0);
01419
01420 unsigned stopBlock=0,startBlock=0;
01421 if(samplerType=="genotypic"){
01422 stopBlock = pop->popsize;
01423 }
01424 else if(samplerType=="allelic"){
01425 stopBlock = 2*(pop->popsize);
01426 }
01427 unsigned k=1,k1=1,k2=1;
01428 for (int i=0;i<numOfSamples;i++){
01429 if(i==0){
01430
01431
01432 pop->getInitialGNodeListSample(maxsize,startBlock,stopBlock,pedBlockSize,samplerType);
01433 stopBlock=0;
01434 logOldTarget = pop->gNodeList.logTarget;
01435 logOldProposal = pop->gNodeList.logOldProposal;
01436 pop->copyGNodeStatesToCandidateStates(samplerType);
01437 pop->storeSampledGametes();
01438 pop->copyCandidateToAccepted(samplerType);
01439 if(whatToCompute=="probDescHaplo"){
01440 pop->SetFreqHaploFounders();
01441 }
01442 else if(whatToCompute=="ibdCovMatrix"){
01443 IBDCovMatrix.resize(pop->popsize,pop->popsize,0.0);
01444 }
01445 else if(whatToCompute=="genotypeFreq"){
01446 pop->initGenotypeFreq();
01447 }
01448 }
01449 else{
01450 if(k<=k1){
01451 TransitionSet::transmissionType = 1;
01452 k++;
01453 }
01454 else if(k<=k2){
01455 TransitionSet::transmissionType = 2;
01456 k++;
01457 }
01458 if(k==k2){
01459 k = 1;
01460 }
01461
01462 do{
01463 if(stopBlock < pop->popsize){
01464 stopBlock += pedBlockSize;
01465 startBlock = stopBlock - pedBlockSize;
01466 }
01467 else {
01468 stopBlock = pedBlockSize;
01469 startBlock = stopBlock - pedBlockSize;
01470 }
01471
01472
01473
01474 pop->gNodeList.logProposal = 0.0;
01475 pop->gNodeList.logTarget = 0.0;
01476 pop->getGNodeListSample(maxsize,startBlock,stopBlock,pedBlockSize,samplerType);
01477 pop->copyGNodeStatesToCandidateStates(samplerType);
01478 pop->storeSampledGametes();
01479 logNewProposal = pop->gNodeList.logProposal;
01480 logNewTarget = pop->gNodeList.logTarget;
01481 if(TransitionSet::transmissionType!=1){
01482
01483 pop->gNodeList.logOldProposal = 0.0;
01484 pop->getOldGNodeListProbability(maxsize,startBlock,stopBlock,pedBlockSize,samplerType);
01485 logOldProposal = pop->gNodeList.logOldProposal;
01486 }
01487
01488
01489
01490
01491
01492 alpha=std::exp(logNewTarget + logOldProposal - logNewProposal - logOldTarget);
01493
01494
01495
01496 ranNumber=ranf();
01497 if (ranNumber <= alpha) {
01498 logOldTarget = logNewTarget;
01499 if(TransitionSet::transmissionType==1){
01500 logOldProposal = logNewProposal;
01501 }
01502 pop->copyCandidateToAccepted(samplerType);
01503 goodCount++;
01504 }
01505 else {
01506 pop->storeSampledGametes();
01507 badCount++;
01508
01509 }
01510
01511 if(whatToCompute=="probDescHaplo"){
01512 pop->UpdateFreqHaploFounders();
01513 }
01514 if (!(i%10000)){
01515 cout << "No of samples accepted = " << goodCount << " out of a total of " << goodCount+badCount << endl;
01516 double ratio = double(badCount)/(goodCount+badCount);
01517
01518 }
01519 } while(stopBlock < pop->popsize);
01520 }
01521 if (whatToCompute=="haplotypeFreq"){
01522 pop->countHaplotypes(samplerType);
01523 }
01524 else if (whatToCompute=="genotypeFreq"){
01525 pop->countGenotypes(samplerType);
01526 }
01527 else if(whatToCompute=="probDescHaplo"){
01528 pop->sampleSegregationIndicators();
01529 }
01530 else if(whatToCompute=="ibdCovMatrix"){
01531 IBDCovMatrix += getIBDMatrix();
01532 }
01533 else if(whatToCompute=="initialDG"){
01534 pop->sampleSegregationIndicators();
01535 }
01536 }
01537 cout << "No of samples accepted = " << goodCount << endl;
01538 double ratio = double(badCount)/(goodCount+badCount);
01539 cout << "Rejection rate = " << ratio << endl;
01540 if (whatToCompute=="haplotypeFreq"){
01541 if(myRSamplerParms.resultsFile == ""){
01542 pop->displayHaplotypeFrequencies(numOfSamples);
01543 }
01544 else {
01545 std::ofstream outfile(myRSamplerParms.resultsFile.c_str());
01546 pop->displayHaplotypeFrequencies(numOfSamples, outfile);
01547 }
01548 }
01549 else if (whatToCompute=="genotypeFreq"){
01550 if(myRSamplerParms.resultsFile == ""){
01551 pop->displayGenotypeFrequencies(numOfSamples);
01552 }
01553 else {
01554 std::ofstream outfile(myRSamplerParms.resultsFile.c_str());
01555 pop->displayGenotypeFrequencies(numOfSamples, outfile);
01556 }
01557 }
01558 else if (whatToCompute=="initialDG"){
01559 if(myRSamplerParms.resultsFile == ""){
01560 pop->displaySegregationIndicators();
01561 }
01562 else {
01563 std::ofstream outfile(myRSamplerParms.resultsFile.c_str());
01564 pop->displaySegregationIndicators(outfile);
01565 }
01566 }
01567 else if(whatToCompute=="probDescHaplo"){
01568 pop->CalcFreqHaploFounders(numOfSamples);
01569 pop->DisplayFreqHaploFounders();
01570 }
01571 else if(whatToCompute=="ibdCovMatrix"){
01572 char *s = "./tryRES/IBDCovMatrix";
01573 ofstream outfile;
01574 outfile.open(s);
01575 outfile << IBDCovMatrix/numOfSamples;
01576 }
01577 }
01578
01579
01580
01581
01582 void Model::RSamplerGibbsMH(unsigned pedBlockSize, unsigned numLoci,unsigned numOfSamples,unsigned stepGibbsMH, const std::string &samplerType,const std::string &whatToCompute){
01583
01584
01585
01586
01587 double maxNumGNodeElements = 4.0;
01588 int maxDimensionCutSet = 7;
01589 unsigned maxsize = (unsigned)(std::pow((maxNumGNodeElements),(maxDimensionCutSet))+.5);
01590 matvec::Matrix<double> IBDCovMatrix;
01591 IBDCovMatrix.resize(1,1,0.0);
01592 int badCount = 0;
01593 int goodCount = 1;
01594 double alpha = 1.0;
01595 double ranNumber = 0.0;
01596 double logOldProposal, logNewProposal, logOldTarget, logNewTarget;
01597 pop->gNodeList.logProposal = 0.0;
01598 pop->gNodeList.logTarget = 0.0;
01599 unsigned stopBlock=0,startBlock=0, count=1;
01600 for (int i=0;i<numOfSamples;i++){
01601 cout << "Sample " << setw(5) << i+1 << endl;
01602 if (i==0){
01603
01604 pop->getInitialGNodeListSample(maxsize,0,pop->popsize,pedBlockSize,samplerType);
01605 logOldTarget = pop->gNodeList.logTarget;
01606 logOldProposal = pop->gNodeList.logOldProposal;
01607 pop->copyGNodeStatesToCandidateStates(samplerType);
01608 pop->storeSampledGametes();
01609 pop->copyCandidateToAccepted(samplerType);
01610 if(whatToCompute=="haplotypeFreq"){
01611 pop->countHaplotypes(samplerType);
01612 }
01613 else if(whatToCompute=="probDescHaplo"){
01614 pop->SetFreqHaploFounders();
01615 }
01616 else if(whatToCompute=="ibdCovMatrix"){
01617 IBDCovMatrix.resize(pop->popsize,pop->popsize,0.0);
01618 }
01619 }
01620
01621 else if (!(i%stepGibbsMH)){
01622 pop->copyGNodeStatesToCandidateStates(samplerType);
01623 pop->storeSampledGametes();
01624
01625 pop->lookOnlyAtLocusToYourLeft = true;
01626
01627 pop->gNodeList.logProposal = 0.0;
01628 pop->gNodeList.logTarget = 0.0;
01629 pop->getInitialGNodeListSample(maxsize,0,pop->popsize,pedBlockSize,samplerType);
01630 pop->copyGNodeStatesToCandidateStates(samplerType);
01631 pop->storeSampledGametes();
01632 logNewProposal = pop->gNodeList.logProposal;
01633 logNewTarget = pop->gNodeList.logTarget;
01634 pop->gNodeList.logOldProposal = 0.0;
01635 pop->getOldGNodeListProbability(maxsize,0,pop->popsize,pedBlockSize,samplerType);
01636 logOldProposal = pop->gNodeList.logOldProposal;
01637 cout << "logNewTarget = " << logNewTarget << endl;
01638 cout << "logOldProposal = " << logOldProposal << endl;
01639 cout << "logOldTarget = " << logOldTarget << endl;
01640 cout << "logNewProposal = " << logNewProposal << endl;
01641 alpha=std::exp(logNewTarget + logOldProposal - logNewProposal - logOldTarget);
01642 cout << " alpha = " << alpha << endl;
01643 ranNumber=ranf();
01644 if (ranNumber <= alpha) {
01645 logOldTarget = logNewTarget;
01646 pop->copyCandidateToAccepted(samplerType);
01647 goodCount++;
01648 }
01649 else {
01650 pop->retreiveSampledGametes();
01651 badCount++;
01652 }
01653 if(whatToCompute=="probDescHaplo"){
01654 pop->UpdateFreqHaploFounders();
01655 pop->sampleSegregationIndicators();
01656
01657
01658 }
01659 else if(whatToCompute=="ibdCovMatrix"){
01660 IBDCovMatrix += getIBDMatrix();
01661 }
01662 cout << "Got " << goodCount << " valid MH samples of " << count++ << " tries." << endl;
01663 }
01664
01665 else{
01666
01667 pop->lookOnlyAtLocusToYourLeft = false;
01668 pop->gNodeList.logProposal = 0.0;
01669 pop->gNodeList.logTarget = 0.0;
01670 do{
01671 if(stopBlock < pop->popsize){
01672 stopBlock += pedBlockSize;
01673 startBlock = stopBlock - pedBlockSize;
01674 }
01675 else {
01676 stopBlock = pedBlockSize;
01677 startBlock = stopBlock - pedBlockSize;
01678 }
01679
01680
01681 pop->getGNodeListSample(maxsize,startBlock,stopBlock,pedBlockSize,samplerType);
01682
01683 logOldTarget = pop->gNodeList.logTarget;
01684 pop->copyGNodeStatesToCandidateStates(samplerType);
01685 pop->storeSampledGametes();
01686 pop->copyCandidateToAccepted(samplerType);
01687 if(whatToCompute=="probDescHaplo"){
01688 pop->UpdateFreqHaploFounders();
01689 }
01690 } while(stopBlock < pop->popsize);
01691 if(whatToCompute=="probDescHaplo"){
01692 pop->sampleSegregationIndicators();
01693
01694
01695 }
01696 else if(whatToCompute=="ibdCovMatrix"){
01697 IBDCovMatrix += getIBDMatrix();
01698 }
01699 }
01700 }
01701 if(whatToCompute=="probDescHaplo"){
01702 pop->CalcFreqHaploFounders(numOfSamples);
01703 pop->DisplayFreqHaploFounders();
01704 }
01705 else if(whatToCompute=="ibdCovMatrix"){
01706 char *s = "./tryRES/IBDCovMatrix";
01707 ofstream outfile;
01708 outfile.open(s);
01709 outfile << IBDCovMatrix/numOfSamples;
01710 }
01711 }
01712
01713 matvec::Matrix<double> Model::getIBDMatrix(void){
01714
01715
01716
01717 matvec::Matrix<double> geneticValVec;
01718 matvec::Matrix<double> CovMatrix;
01719 CovMatrix.resize(pop->popsize,pop->popsize,0.0);
01720 matvec::Vector<double> qtlMu;
01721 qtlMu.resize(4,0.0);
01722 qtlMu[0]=-1.41421,qtlMu[3]=1.41421;
01723 geneticValVec.resize(pop->popsize,1,0.0);
01724 for (unsigned t=0;t<pop->popsize;t++){
01725
01726
01727
01728 int geno = (pop->popmember[t])->genotNodeVector[1].acceptedGenotypeState;
01729 geneticValVec[t][0] = qtlMu[geno];
01730 }
01731 CovMatrix =geneticValVec*geneticValVec.transpose();
01732 return CovMatrix;
01733 }
01734
01735 void Model::variance(const std::string &termname,Pedigree& P,const double v00,...)
01736
01737
01738
01739
01740
01741
01742
01743 {
01744
01745 if (numtrait<1) throw exception("Model::variance(args): no trait(dependent-variable) in the model");
01746 int t1,t2;
01747 doubleMatrix var(numtrait,numtrait);
01748 va_list param_pt;
01749 va_start(param_pt,v00);
01750 var[0][0] = v00;
01751 for (t2=1; t2<numtrait; t2++) var[0][t2] = va_arg(param_pt,double);
01752 for (t1=1; t1<numtrait; t1++) for (t2=0; t2<numtrait; t2++) {
01753 var[t1][t2] = va_arg(param_pt,double);
01754 }
01755 va_end(param_pt);
01756 variance(termname,P,var);
01757 }
01758
01759 void Model::variance(const std::string &termname,Pedigree& P, const doubleMatrix& v)
01760
01761
01762
01763
01764 {
01765 num_ped++;
01766 if (!factor_struct) {
01767 std::cerr << "Model::variance(args): no equation(s) in the model\n";
01768 return;
01769 }
01770
01771 int k = term.index(termname,factor_struct);
01772
01773
01774
01775 if ( k < 0) {
01776 std::cerr << "Model::variance(): term not in the model\n";
01777 return;
01778 }
01779
01780
01781
01782
01783 if (term[k].classi() != 'F') {
01784 data = 0;
01785
01786
01787
01788 }
01789 term[k].classi('P');
01790 type = mixed_model; factor_struct[term[k].factorindx[0]].classi('P');
01791 if (pop_created) delete pop;
01792 pop = new Population;
01793 check_ptr(pop);
01794 pop_created = 1;
01795 term[k].prior->resize(numtrait);
01796 if (pop->input_ped(P,term[k].prior)) {
01797 type = bad_model;
01798 return;
01799 }
01800 numgroup = P.ngroup();
01801 popsize = pop->size();
01802 if (v.num_rows() == v.num_cols()) {
01803 if (v.psd()) {
01804 term[k].prior->var_matrix()->copy(v);
01805 }
01806 else {
01807 std::cerr << "Model::variance(): not positive(semi) def: " << termname << "\n";
01808 }
01809 }
01810 else {
01811 std::cerr << "Model::variance(): invalid variance matrix size\n";
01812 }
01813 }
01814
01815 void Model::variance(const std::string &termname,const doubleMatrix& v)
01816
01817
01818
01819
01820 {
01821 if (!factor_struct) {
01822 std::cerr << "Model::variance(args): no equation(s) in the model\n";
01823 return;
01824 }
01825 if (v.num_rows() != v.num_cols() || v.num_rows() != numtrait) {
01826 std::cerr << "Model::variance(): invalid variance matrix size\n";
01827 return;
01828 }
01829 if (!v.psd()) {
01830 std::cerr << "Model::variance:: not positive(semi) def: " << termname << "\n";
01831 return;
01832 }
01833 if (termname == "residual") {
01834 residual_var = v;
01835 return;
01836 }
01837 int i,k;
01838 k = term.index(termname,factor_struct);
01839 if ( k >= 0) {
01840 if (term[k].classi() != 'F') {
01841 data = 0;
01842
01843
01844
01845 }
01846 term[k].classi('R');
01847 type = mixed_model;
01848 term[k].prior->resize(numtrait);
01849 term[k].prior->var_matrix()->copy(v);
01850 }
01851 else {
01852 std::cerr << "Model::variance(..): not in model: " << termname << "\n";
01853 }
01854 }
01855
01856 void Model::variance(const std::string &termname,const double v00,...)
01857
01858
01859
01860
01861
01862
01863
01864 {
01865 if (numtrait<1) {
01866 std::cerr << "Model::variance(args): no trait(dependent-variable) in the model\n";
01867 return;
01868 }
01869 doubleMatrix var(numtrait,numtrait);
01870 int t1,t2;
01871 va_list param_pt;
01872 va_start(param_pt,v00);
01873 var[0][0] = v00;
01874 for (t2=1; t2<numtrait; t2++) var[0][t2] = va_arg(param_pt,double);
01875
01876 for (t1=1; t1<numtrait; t1++) for (t2=0; t2<numtrait; t2++) {
01877 var[t1][t2] = va_arg(param_pt,double);
01878 }
01879 va_end(param_pt);
01880 variance(termname,var);
01881 }
01882
01883
01884
01885
01886
01887
01888
01889 void Model::VarLink(const std::string &termname,const std::string &termname2)
01890
01891
01892
01893
01894
01895 {
01896 if (!factor_struct) {
01897 std::cerr << "Model::VarLink(args): no equation(s) in the model\n";
01898 return;
01899 }
01900
01901 int i,k,k2;
01902 k = term.index(termname,factor_struct);
01903
01904 k2 = term.index(termname2,factor_struct);
01905
01906 if( k < 0 ){
01907 std::cerr << "Model::VarLink(..): not in model: " << termname << "\n";
01908 return;
01909 }
01910 if( k2 < 0 ){
01911 std::cerr << "Model::VarLink(..): not in model: " << termname2 << "\n";
01912 return;
01913 }
01914 int bek=base_effect[k];
01915 if(term[bek].classi() != 'R' && term[bek].classi() != 'P'){
01916 std::cerr << "Model::VarLink(..): not a random effect: " << termname << "\n";
01917 return;
01918 }
01919 int bek2=base_effect[k2];
01920 if(term[bek2].classi() != 'R' && term[bek2].classi() != 'P'){
01921 std::cerr << "Model::VarLink(..): not a random effect: " << termname2 << "\n";
01922 return;
01923 }
01924 if(term[bek].classi() != term[bek2].classi() ){
01925 std::cerr << "Model::VarLink(..): different types of random effects: \n";
01926 return;
01927 }
01928
01929
01930
01931 int be=base_effect[k];
01932 int be2=base_effect[k2];
01933 if(var_link[be]!=var_link[be2]){
01934 int vl=var_link[be];
01935 int vl2=var_link[be2];
01936 if(vl2 <vl) {
01937 int tmp;
01938 tmp=vl;
01939 vl=vl2;
01940 vl2=tmp;
01941 }
01942 var_link[vl2]=vl;
01943 }
01944 }
01945
01946
01947 void Model::covariance(const std::string &termname,const std::string &termname2,const doubleMatrix& v)
01948
01949
01950
01951
01952 {
01953 if (!factor_struct) {
01954 std::cerr << "Model::covariance(args): no equation(s) in the model\n";
01955 return;
01956 }
01957 if (v.num_rows() != v.num_cols() || v.num_rows() != numtrait) {
01958 std::cerr << "Model::covariance(): invalid variance matrix size\n";
01959 return;
01960 }
01961 int i,k,k2;
01962 k = term.index(termname,factor_struct);
01963
01964 k2 = term.index(termname2,factor_struct);
01965
01966 if( k < 0 ){
01967 std::cerr << "Model::covariance(..): not in model: " << termname << "\n";
01968 return;
01969 }
01970 if( k2 < 0 ){
01971 std::cerr << "Model::covariance(..): not in model: " << termname2 << "\n";
01972 return;
01973 }
01974 int bek=base_effect[k];
01975 if(term[bek].classi() != 'R' && term[bek].classi() != 'P'){
01976 std::cerr << "Model::covariance(..): not a random effect: " << termname << "\n";
01977 return;
01978 }
01979 int bek2=base_effect[k2];
01980 if(term[bek2].classi() != 'R' && term[bek2].classi() != 'P'){
01981 std::cerr << "Model::covariance(..): not a random effect: " << termname2 << "\n";
01982 return;
01983 }
01984 if(term[bek].classi() != term[bek2].classi() ){
01985 std::cerr << "Model::covariance(..): different types of random effects: \n";
01986 return;
01987 }
01988
01989
01990 int be;
01991 if(base_effect[k]==base_effect[k2]){
01992 be=base_effect[k];
01993 }
01994 else {
01995 be=base_effect[k];
01996 int be2=base_effect[k2];
01997 if(be2 <be) {
01998 int tmp;
01999 tmp=k;
02000 k=k2;
02001 k2=tmp;
02002 tmp=be;
02003 be=be2;
02004 be2=tmp;
02005 }
02006 base_effect[k2]=be;
02007 doubleMatrix orig1,orig2;
02008 orig1.copy(*term[be].prior->var_matrix());
02009 orig2.copy(*term[be2].prior->var_matrix());
02010 int newsize=nt_vec[be]+nt_vec[be2];
02011 Vector<int> orig=trait_map[be];
02012 trait_map[be].reserve(newsize);
02013 int ii=0;
02014 for(int i=0;i<nt_vec[be];i++,ii++) trait_map[be][ii]=orig[i];
02015 for(int i=0;i<nt_vec[be2];i++,ii++) trait_map[be][ii]=trait_map[be2][i];
02016 term[be].prior->var_matrix()->resize(newsize,newsize);
02017
02018 for(int i=0;i<nt_vec[be];i++){
02019 for(int j=0;j<nt_vec[be];j++){
02020 term[be].prior->var_matrix()->me[i][j]=orig1[i][j];
02021 }
02022 }
02023
02024 for(int i=0;i<nt_vec[be2];i++){
02025 for(int j=0;j<nt_vec[be2];j++){
02026 term[be].prior->var_matrix()->me[nt_vec[be]+i][nt_vec[be]+j]=orig2[i][j];
02027 }
02028 }
02029 nt_vec[be]=newsize;
02030 term[be2].prior->var_matrix()->clear();
02031 nt_vec[be2]=0;
02032 term[be2].classi('F');
02033 int last_eff=be;
02034 for(;corr_link[last_eff] != -1;)last_eff=corr_link[last_eff];
02035 corr_link[last_eff]=be2;
02036 int last_pos=pos_vec[last_eff];
02037 for(;corr_link[last_eff] != -1;){
02038 last_eff=corr_link[last_eff];
02039 pos_vec[last_eff]=pos_vec[last_eff]+last_pos+1;
02040 }
02041 }
02042
02043 int srow,scol;
02044 srow=pos_vec[k]*numtrait;
02045 scol=pos_vec[k2]*numtrait;
02046 for(int irow=0;irow<numtrait;irow++){
02047 for(int jcol=0;jcol<numtrait;jcol++){
02048 term[be].prior->var_matrix()->me[srow+irow][scol+jcol]=v[irow][jcol];
02049 term[be].prior->var_matrix()->me[scol+jcol][srow+irow]=v[irow][jcol];
02050 }
02051 }
02052
02053
02054
02055
02056
02057 }
02058
02059 void Model::covariance(const std::string &termname,const std::string &termname2,const double v00,...)
02060
02061
02062
02063
02064
02065
02066
02067 {
02068 if (numtrait<1) {
02069 std::cerr << "Model::variance(args): no trait(dependent-variable) in the model\n";
02070 return;
02071 }
02072 doubleMatrix var(numtrait,numtrait);
02073 int t1,t2;
02074 va_list param_pt;
02075 va_start(param_pt,v00);
02076 var[0][0] = v00;
02077 for (t2=1; t2<numtrait; t2++) var[0][t2] = va_arg(param_pt,double);
02078
02079 for (t1=1; t1<numtrait; t1++) for (t2=0; t2<numtrait; t2++) {
02080 var[t1][t2] = va_arg(param_pt,double);
02081 }
02082 va_end(param_pt);
02083 covariance(termname,termname2,var);
02084 }
02085
02086
02087
02088
02089
02090
02091
02092
02093
02094
02095
02096
02097 void Model::covariate(const std::string &covariate_names)
02098 {
02099
02100
02101
02102
02103
02104
02105
02106 if (!factor_struct) {
02107 std::cerr << "Model::covariate(): no equation(s) in the model\n";
02108 return;
02109 }
02110
02111 int j,k,t;
02112 unsigned i,nc;
02113 std::vector<std::string> efflist;
02114 nc = split(covariate_names," ,",&efflist);
02115
02116 for (i=0; i<nc; i++) {
02117 k = fieldindex(efflist[i],factor_struct,numfact);
02118 if (k < 0) throw exception("Model::covariate(): not in the model, thus it's ignored");
02119
02120 if (factor_struct[k].classi() != 'F') data= 0;
02121 factor_struct[k].classi('C');
02122
02123
02124
02125
02126
02127 for (t=0; t<numterm; t++) {
02128 j = term[t].partial_match(efflist[i].c_str(),factor_struct);
02129 if (j >=0 ) {
02130 term[t].classi('C');
02131 break;
02132 }
02133 }
02134 }
02135 }
02136
02137 static void nest_xaction(std::string &s)
02138 {
02139 int i,j;
02140 int n = s.size();
02141 for (i=0; i<n; i++) {
02142 if ( s[i] == '(' ) s[i] = '*';
02143 if ( s[i] == ')' ) s[i] = ' ';
02144 }
02145
02146
02147 i = 0;
02148 while (i<n) {
02149 if (s[i] == '*' ) {
02150 j = i;
02151 while (s[--j] == ' ') s[j]='*';
02152 while (s[++i] == ' ') s[i]='*';
02153 }
02154 i++;
02155 }
02156 }
02157
02158 int Model::build_model_term(const std::string &modelspec)
02159 {
02160 int retval = 1;
02161 unsigned i,j,k;
02162
02163 std::string mspec = modelspec;
02164 nest_xaction(mspec);
02165
02166 std::string sep("=+,* ");
02167 std::string TMP = mspec;
02168
02169 std::vector<std::string> tmpvec;
02170 std::vector<std::string>::iterator it;
02171 std::string::size_type begidx;
02172 split(TMP,sep,&tmpvec);
02173
02174 sort(tmpvec.begin(),tmpvec.end());
02175 it = unique(tmpvec.begin(),tmpvec.end());
02176 tmpvec.erase(it,tmpvec.end());
02177
02178
02179
02180
02181 categorical = 0;
02182 numfact = tmpvec.size();
02183 if(factor_struct){
02184 delete [] factor_struct;
02185 factor_struct = 0;
02186 }
02187 if(numfact>0){
02188 factor_struct = new FieldStruct [numfact];
02189 }
02190 else {
02191 factor_struct = 0;
02192 }
02193 check_ptr(factor_struct);
02194 for (i=0; i<numfact; i++) factor_struct[i].name(tmpvec[i]);
02195
02196 TMP = mspec;
02197 numtrait = 0;
02198 sep = "=";
02199 begidx = TMP.find(sep,0);
02200 while(begidx != std::string::npos) {
02201 numtrait++;
02202 begidx = TMP.find(sep,begidx+1);
02203 }
02204 if (numtrait == 0) {
02205 std::cerr << "Model::build_model_term(): there is no trait in the model\n";
02206 return 0;
02207 }
02208 if(trait_struct){
02209 delete [] trait_struct;
02210 trait_struct = 0;
02211 }
02212 if(numtrait>0){
02213 trait_struct = new FieldStruct* [numtrait];
02214 }
02215 else {
02216 trait_struct = 0;
02217 }
02218 check_ptr(trait_struct);
02219 if(traitname){
02220 delete [] traitname;
02221 traitname = 0;
02222 }
02223 if(numtrait>0){
02224 traitname = new std::string [numtrait];
02225 }
02226 else {
02227 traitname = 0;
02228 }
02229 check_ptr(traitname);
02230 std::vector<std::string> model_trait;
02231 std::vector<std::string> trtnm;
02232 sep = ",";
02233 split(TMP,sep,&model_trait);
02234 if (numtrait != model_trait.size()) throw exception("Model::build_model_term(): model for each trait is not separated properly");
02235
02236 sep = " +";
02237 unsigned n = 0;
02238 for (i=0; i<numtrait; i++) {
02239
02240 begidx = model_trait[i].find("=");
02241
02242 std::string bbbtrtnmbbb = model_trait[i].substr(0,begidx);
02243
02244 split(bbbtrtnmbbb,sep,&trtnm);
02245 traitname[i] = trtnm[i];
02246 model_trait[i] = model_trait[i].substr(begidx+1);
02247 n += split(model_trait[i],sep);
02248 k = fieldindex(traitname[i],factor_struct,numfact);
02249 if (k >= 0 && k < numfact) {
02250 factor_struct[k].classi('T');
02251 trait_struct[i] = &(factor_struct[k]);
02252 }
02253 else {
02254 throw exception("Model::build_model_term(): you have probably found a bug!");
02255 }
02256 }
02257 TermList T(n,numtrait);
02258 tmpvec.clear();
02259 sep = " +";
02260 for (numterm=0,i=0; i<numtrait; i++) {
02261 tmpvec.clear();
02262 n = split(model_trait[i],sep,&tmpvec);
02263 for (k=0; k<n; k++) {
02264 for (j=0; j<numterm; j++) {
02265 if (T[j].match(tmpvec[k],"* ",factor_struct)) break;
02266 }
02267 if (j==numterm) {
02268 T[numterm++].input(tmpvec[k].c_str(),factor_struct,numfact);
02269 }
02270 T[j].trait[i]= 1;
02271 }
02272 }
02273
02274 term.resize(numterm,numtrait);
02275 if (unknown_prior) {
02276 delete [] unknown_prior;
02277 unknown_prior = 0;
02278 }
02279 if(numterm>0){
02280 unknown_prior = new UnknownDist[numterm];
02281 }
02282 else {
02283 unknown_prior = 0;
02284 }
02285 check_ptr(unknown_prior);
02286 nt_vec.resize(numterm,numtrait);
02287 base_effect.reserve(numterm);
02288 corr_link.resize(numterm,-1);
02289 pos_vec.resize(numterm);
02290 trait_map.reserve(numterm);
02291 var_link.reserve(numterm);
02292
02293
02294 for (i=0; i<numterm; i++) {
02295 base_effect[i]=i;
02296 var_link[i]=i;
02297 term[i] = T[i];
02298 term[i].prior = &(unknown_prior[i]);
02299 trait_map[i].resize(numtrait,i);
02300 }
02301 return retval;
02302 }
02303
02304 void Model::trait_effect_level(const unsigned mmeaddr, std::string& retval,
02305 int info_flag)
02306 {
02307
02308
02309
02310
02311
02312
02313 if (mmeaddr >= hmmesize) throw exception("Model::trait_effect_level(): arg out of range");
02314 retval = "";
02315 char CL,strid[11];
02316 std::string tempstr;
02317 int nord,*tempval;
02318 unsigned addr,i,j,k,t,kk,ii,jj,nlevel;
02319 unsigned *tempuns;
02320 Vector<unsigned> unsvec(maxorder);
02321
02322 for (addr=0, i=0; i<numterm; i++) {
02323 nlevel = term[i].nlevel();
02324 addr += nlevel*numtrait;
02325 if (addr > mmeaddr) break;
02326 }
02327 if (i >= numterm) throw exception("Model::trait_effect_level(): arg out of range");
02328 addr = term[i].startaddr();
02329 nord = term[i].order();
02330 for (k=addr,j=0; j<nlevel; j++) {
02331 k += numtrait;
02332 if (k > mmeaddr) break;
02333 }
02334 for (addr= k-numtrait, t=0; t<numtrait; t++) {
02335 if (mmeaddr == addr++) break;
02336 }
02337 if (nord == 1) {
02338 unsvec[0] = j+1;
02339 }
02340 else {
02341 tempuns = (unsigned *)(xact_htable[i].find(j+1));
02342 for (ii=0; ii<nord; ii++) unsvec[ii] = tempuns[ii];
02343 }
02344
02345
02346 if (term[i].addr[t] == 0) throw exception("Model::trait_effect_level(): not defined address");
02347 if (numtrait>1 && info_flag == 1) {
02348 retval = trait_struct[t]->name();
02349 retval.append(":");
02350 }
02351
02352 if (info_flag == 1 || info_flag == 2) {
02353 for (ii=0; ii<nord; ii++) {
02354 retval.append(factor_struct[term[i].factorindx[ii]].name());
02355 if (ii+1 < nord) retval.append("*");
02356 }
02357 retval.append(":");
02358 }
02359 for (ii=0; ii<nord; ii++) {
02360 kk = factor_struct[term[i].factorindx[ii]].index();
02361 CL = data->datasheet[kk].classi();
02362 jj = unsvec[ii];
02363
02364 if (CL == 'F') {
02365 tempval = (int *)(idlist[kk]->find(jj));
02366 sprintf(strid,"%d",*tempval);
02367 tempstr = strid;
02368 }
02369 else if (CL =='U') {
02370 tempstr = (const char *)idlist[kk]->find(jj);
02371 }
02372 else if ( CL =='C') {
02373 tempstr = data->datasheet[kk].name();
02374 }
02375 else if (CL =='P') {
02376 tempstr = (const char *)pop->hashtable.find(jj);
02377 }
02378 else if (CL =='I') {
02379 tempstr = "1";
02380 }
02381 else {
02382 warning("Model::save(): unknown column type: %c",CL);
02383 break;
02384 }
02385 retval.append(tempstr);
02386 if (ii+1 < nord) retval.append("*");
02387 }
02388 }
02389
02390 Vector<double> Model::get_solutions(const std::string &termname)
02391 {
02392
02393
02394 Vector<double> retval;
02395 if (!get_blupsol()) throw exception("Model::get_solutions(): get_blupsol() failed");
02396
02397 int k = term.index(termname,factor_struct);
02398 if (k < 0) throw exception("Model::get_solutions(): no such term in the model");
02399 int nlevels = term[k].nlevel()*numtrait;
02400 unsigned i,startaddr;
02401 startaddr = term[k].startaddr();
02402 retval.resize(nlevels);
02403 for (i=0; i<nlevels; i++) {
02404 retval[i] = blupsol[startaddr+i];
02405 }
02406 return retval;
02407 }
02408
02409 void Model::save(const std::string &fname, const int io_mode)
02410 {
02411 if (type == bad_model) {
02412 warning("Model::save(): model is too bad, nothing is saved");
02413 return;
02414 }
02415 if((numterm+ntermGdist) == 0) return;
02416 if (blupsol.size() != hmmesize + ntermGdist*popsize) return;
02417 if (!data) {
02418 warning("Model::save(): no data has been fitted");
02419 return;
02420 }
02421 std::ofstream ofs;
02422 ofs.open(fname.c_str(),(OpenModeType)io_mode);
02423 if (!ofs) {
02424 warning("Model::save: %s cann't open or already exit",fname.c_str());
02425 return;
02426 }
02427 ofs.setf(std::ios::unitbuf | std::ios::showpoint);
02428 ofs.precision(SESSION.output_precision);
02429 int W = SESSION.output_precision + 6;
02430 char S = ' ';
02431 std::string SS;
02432 SS = " ";
02433
02434 if (!data->in_memory()) data->input_datasheet();
02435 this->info(ofs);
02436
02437 double *ve = blupsol.begin();
02438
02439 char* tempstr;
02440 int nord, *tempval;
02441 unsigned *tempuns;
02442 unsigned i,j,t,k,ii,kk,jj,nlevel,total_numterm;
02443 char CL;
02444
02445 ofs << "\n BLUP (BLUE, Posterior_Mean) \n";
02446 ofs << " ----------------------------------------------- \n\n";
02447 total_numterm = numterm+ntermGdist;
02448 for (i=0; i<total_numterm; i++) {
02449 ofs << S << std::setw(W)<< "Term";
02450 for (t=0; t<numtrait; t++) {
02451 ofs << S << std::setw(W-2) << "Trait" << S << t+1;
02452 }
02453 ofs <<"\n";
02454
02455 ofs << S << std::setw(W) << factor_struct[term[i].factorindx[0]].name().c_str();
02456 for (t=1; t<term[i].order(); t++) {
02457 ofs << "*" << factor_struct[term[i].factorindx[t]].name().c_str();
02458 }
02459 for (t=0; t<numtrait; t++) ofs <<S <<std::setw(W)<<trait_struct[t]->name().c_str();
02460 ofs << "\n";
02461 nlevel = term[i].nlevel();
02462 k = term[i].startaddr();
02463 nord = term[i].order();
02464 if (nord == 1) {
02465 kk = factor_struct[term[i].factorindx[0]].index();
02466 CL = data->datasheet[kk].classi();
02467 switch(CL) {
02468 case 'F':
02469 for (j=0; j<nlevel; j++) {
02470 tempval = (int *)(idlist[kk]->find(j+1));
02471 ofs << S << std::setw(W) << *tempval;
02472 for (t=0; t<numtrait; t++,k++) {
02473 ofs << S << std::setw(W);
02474 if (term[i].trait[t]) {
02475 ofs << ve[k];
02476 }
02477 else {
02478 ofs << SS;
02479 }
02480 }
02481 k+=(nt_vec[base_effect[i]]-numtrait);
02482 ofs << "\n";
02483 }
02484 break;
02485 case 'U':
02486 for (j=0; j<nlevel; j++) {
02487 tempstr = (char *)(idlist[kk]->find(j+1));
02488 ofs << S << std::setw(W) << tempstr;
02489 for (t=0; t<numtrait; t++,k++) {
02490 ofs << S << std::setw(W);
02491 if (term[i].trait[t]) {
02492 ofs << ve[k];
02493 }
02494 else {
02495 ofs << SS;
02496 }
02497 }
02498 ofs << "\n";
02499 }
02500 break;
02501 case 'P':
02502 for (j=0; j<nlevel; j++) {
02503 ofs << S;
02504 if (pop->maxnamelen > 0) {
02505 ofs << std::setw(W) << (char *)pop->hashtable.find(j+1);
02506 }
02507 else {
02508 ofs << std::setw(W) << j+1;
02509 }
02510 for (t=0; t<numtrait; t++,k++) {
02511 ofs << S;
02512 if (term[i].trait[t]) {
02513 ofs << std::setw(W) << ve[k];
02514 }
02515 else {
02516 ofs << std::setw(W) << SS;
02517 }
02518 }
02519 k+=(nt_vec[base_effect[i]]-numtrait);
02520 ofs << "\n";
02521 }
02522 break;
02523 case 'C':
02524 ofs << S << std::setw(W) << data->datasheet[kk].name();
02525 for (t=0; t<numtrait; t++,k++) {
02526 ofs << S << std::setw(W);
02527 if (term[i].trait[t]) {
02528 ofs << ve[k];
02529 }
02530 else {
02531 ofs << SS;
02532 }
02533 }
02534 ofs << "\n";
02535 break;
02536 case 'I':
02537 ofs << S << std::setw(W) << "1";
02538 for (t=0; t<numtrait; t++,k++) {
02539 ofs << S << std::setw(W);
02540 if (term[i].trait[t]) {
02541 ofs << ve[k];
02542 }
02543 else {
02544 ofs << SS;
02545 }
02546 }
02547 ofs << "\n";
02548 break;
02549 default:
02550 warning("Model.save(): unknown column type: %c",CL);
02551 break;
02552 }
02553 if (i+1 < total_numterm) ofs << "\n";
02554 }
02555 else {
02556 for (j=0; j<nlevel; j++) {
02557 tempuns = (unsigned *)(xact_htable[i].find(j+1));
02558 kk = factor_struct[term[i].factorindx[0]].index();
02559 CL = data->datasheet[kk].classi();
02560 jj = tempuns[0];
02561
02562 if (CL == 'F') {
02563 tempval = (int *)(idlist[kk]->find(jj));
02564 ofs << S << std::setw(W) << *tempval;
02565 }
02566 else if (CL =='U') {
02567 tempstr = (char *)(idlist[kk]->find(jj));
02568 ofs << S << std::setw(W) << tempstr;
02569 }
02570 else if ( CL =='C') {
02571 ofs << S << std::setw(W) << data->datasheet[kk].name();
02572 }
02573 else if (CL =='P') {
02574 ofs << S << std::setw(W) << (char *)pop->hashtable.find(jj);
02575 }
02576 else if (CL =='I') {
02577 ofs << S << std::setw(W) << "1";
02578 }
02579 else {
02580 warning("Model::save(): unknown column type: %c",CL);
02581 break;
02582 }
02583
02584 for (ii=1; ii<nord; ii++) {
02585 kk = factor_struct[term[i].factorindx[ii]].index();
02586 CL = data->datasheet[kk].classi();
02587 jj = tempuns[ii];
02588
02589 if (CL == 'F') {
02590 tempval = (int *)(idlist[kk]->find(jj));
02591 ofs << "*" << *tempval;
02592 }
02593 else if (CL =='U') {
02594 tempstr = (char *)(idlist[kk]->find(jj));
02595 ofs << "*" << tempstr;
02596 }
02597 else if (CL =='C' || CL =='I') {
02598 ofs << "*" << data->datasheet[kk].name();
02599 }
02600 else if (CL =='P') {
02601 ofs << "*" << (char *)pop->hashtable.find(jj);
02602 }
02603 else {
02604 warning("Model::save(): unknown column type: %c",CL);
02605 }
02606 }
02607 for (t=0; t<numtrait; t++,k++) {
02608 ofs << S << std::setw(W);
02609 if (term[i].trait[t]) ofs << ve[k];
02610 else ofs << SS;
02611 }
02612 k+=(nt_vec[base_effect[i]]-numtrait);
02613 ofs << "\n";
02614 }
02615 if (i+1 < total_numterm) ofs << "\n";
02616 }
02617 }
02618 ofs.close();
02619 data->release_datasheet();
02620 }
02621
02622 double Model::minfun(const Vector<double> &x, const int n)
02623 {
02624 double llhood = 0.0;
02625 switch (minfun_indx) {
02626 case 1:
02627 llhood = minfun_vce(x,n);
02628 break;
02629 case 2:
02630 llhood = minfun_peeling(x,n);
02631 break;
02632
02633 case 3:
02634 llhood = minfun_multipoint(x,n);
02635 break;
02636
02637 default:
02638 warning("Model::Model::minfun(): unknown minfun_indx: %d",minfun_indx);
02639 }
02640 return llhood;
02641 }
02642
02643 void Model::RSamplerMH(unsigned numLoci,unsigned numOfSamples,const std::string &samplerType,const std::string &whatToCompute){
02644
02645
02646
02647
02648 unsigned maxsize = myRSamplerParms.maxCutsetSize;
02649
02650 int badCount = 0;
02651 int goodCount = 1;
02652 double alpha = 1.0;
02653 double ranNumber = 0.0;
02654 double logOldProposal, logNewProposal, logOldTarget, logNewTarget;
02655 pop->gNodeList.logProposal = 0.0;
02656 pop->gNodeList.logTarget = 0.0;
02657 matvec::Matrix<double> IBDCovMatrix;
02658 IBDCovMatrix.resize(1,1,0.0);
02659 for (int i=0;i<numOfSamples;i++){
02660 if(i==0){
02661
02662
02663 pop->getInitialGNodeListSample(maxsize,numLoci,samplerType);
02664 logOldTarget = pop->gNodeList.logTarget;
02665 logOldProposal = pop->gNodeList.logOldProposal;
02666 pop->copyGNodeStatesToCandidateStates(samplerType);
02667 pop->storeSampledGametes();
02668 pop->copyCandidateToAccepted(samplerType);
02669 if(whatToCompute=="probDescHaplo"){
02670 pop->SetFreqHaploFounders();
02671 }
02672 else if(whatToCompute=="ibdCovMatrix"){
02673 IBDCovMatrix.resize(pop->popsize,pop->popsize,0.0);
02674 }
02675 else if(whatToCompute=="genotypeFreq"){
02676 pop->initGenotypeFreq();
02677 }
02678 }
02679 else{
02680
02681 pop->gNodeList.logProposal = 0.0;
02682 pop->gNodeList.logTarget = 0.0;
02683 pop->getGNodeListSample(maxsize,numLoci,samplerType);
02684 pop->copyGNodeStatesToCandidateStates(samplerType);
02685 pop->storeSampledGametes();
02686 logNewProposal = pop->gNodeList.logProposal;
02687 logNewTarget = pop->gNodeList.logTarget;
02688 if (!(i%100)){
02689 cout << "logNewTarget = " << logNewTarget << endl;
02690 cout << "logOldTarget = " << logOldTarget << endl;
02691 cout << "logNewProposal = " << logNewProposal << endl;
02692 cout << "logOldProposal = " << logOldProposal << endl;
02693 }
02694
02695 alpha=std::exp(logNewTarget + logOldProposal - logNewProposal - logOldTarget);
02696 if (!(i%100)){
02697 cout << " alpha = " << alpha << endl;
02698 }
02699 ranNumber=ranf();
02700 if (ranNumber <= alpha) {
02701 logOldTarget = logNewTarget;
02702 logOldProposal = logNewProposal;
02703 pop->copyCandidateToAccepted(samplerType);
02704 goodCount++;
02705 }
02706 else {
02707 pop->storeSampledGametes();
02708 badCount++;
02709
02710 }
02711
02712 if(whatToCompute=="probDescHaplo"){
02713 pop->UpdateFreqHaploFounders();
02714 }
02715 if (!(i%100)){
02716 cout << "No of samples accepted = " << goodCount << " out of a total of " << goodCount+badCount << endl;
02717 double ratio = double(badCount)/(goodCount+badCount);
02718
02719 }
02720 }
02721 if (whatToCompute=="haplotypeFreq"){
02722 pop->countHaplotypes(samplerType);
02723 }
02724 else if (whatToCompute=="genotypeFreq"){
02725 pop->countGenotypes(samplerType);
02726 }
02727 else if(whatToCompute=="probDescHaplo"){
02728 pop->sampleSegregationIndicators();
02729 }
02730 else if(whatToCompute=="ibdCovMatrix"){
02731 IBDCovMatrix += getIBDMatrix();
02732 }
02733 }
02734 cout << "No of samples accepted = " << goodCount << endl;
02735 double ratio = double(badCount)/(goodCount+badCount);
02736 cout << "Rejection rate = " << ratio << endl;
02737 if (whatToCompute=="haplotypeFreq"){
02738 pop->displayHaplotypeFrequencies(numOfSamples);
02739 }
02740 else if(whatToCompute=="probDescHaplo"){
02741 pop->CalcFreqHaploFounders(numOfSamples);
02742 pop->DisplayFreqHaploFounders();
02743 }
02744 else if(whatToCompute=="ibdCovMatrix"){
02745 char *s = "./tryRES/IBDCovMatrix";
02746 ofstream outfile;
02747 outfile.open(s);
02748 outfile << IBDCovMatrix/numOfSamples;
02749 }
02750 }
02751
02752
02753
02754 }
02755
02756