00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <iostream>
00023 #include <iomanip>
00024 #include <cstdio>
00025 #include <vector>
00026 #include <string>
00027
00028 #include "session.h"
00029 #include "util.h"
00030 #include "doublematrix.h"
00031 #include "data.h"
00032
00033 namespace matvec {
00034
00035 Data::Data(void)
00036 {
00037 numcol = 0;
00038 maxnumcol = 0;
00039 numrec = 0;
00040 new_col = 1;
00041 datasheet = 0;
00042 hashtable = 0;
00043 resize(0,0,20);
00044 tdfname = SESSION.mktemp();
00045 }
00046
00047 Data::Data(Data& D)
00048 {
00049 numcol = 0;
00050 maxnumcol = 0;
00051 numrec = 0;
00052 datasheet = 0;
00053 hashtable = 0;
00054 copyfrom(D);
00055 }
00056
00057 const Data& Data::operator=(Data& A)
00058 {
00059 copyfrom(A);
00060 return *this;
00061 }
00062
00063 const Data& Data::operator=(const Field& V)
00064 {
00065 if (new_col == 1) {
00066 resize(V.len(),numcol,20);
00067 }
00068 else {
00069 if (V.len() != numrec) {
00070 warning("Data = Col: size incompatible");
00071 return *this;
00072 }
00073 }
00074 if (!data_in_memory) input_datasheet();
00075 std::string cname = datasheet[new_col].name();
00076 int indx = datasheet[new_col].index();
00077 datasheet[new_col] = V;
00078 datasheet[new_col].name(cname);
00079 datasheet[new_col].index(indx);
00080 if (V.hashtable) hashtable[new_col]->copyfrom( *(V.hashtable));
00081 save_datasheet(0);
00082 return *this;
00083 }
00084
00085 void Data::copyfrom(Data& A)
00086 {
00087 if (this == &A) return;
00088 if (A.data_on_disk == 0) A.save_datasheet();
00089 resize(A.numrec,A.numcol,A.maxnumcol);
00090 new_col = A.new_col;
00091 for (unsigned i=0; i<numcol; i++) hashtable[i]->copyfrom(*(A.hashtable[i]));
00092 tdfname = A.tdfname;
00093 data_on_disk = 1;
00094 data_in_memory = 0;
00095 input_datasheet();
00096 save_datasheet(0);
00097 }
00098
00099 Data& Data::resize(const unsigned nr,const unsigned nc,const unsigned mc)
00100 {
00101 if (numrec == nr && numcol == nc && maxnumcol== mc) return *this;
00102 release();
00103 numrec = nr;
00104 numcol = nc;
00105 if (mc < nc) {
00106 maxnumcol = nc + 10;
00107 }
00108 else {
00109 maxnumcol = mc + 1;
00110 }
00111 if (numrec==0) numcol = 0;
00112 if (numcol==0) numrec = 0;
00113
00114 data_in_memory = 1;
00115 data_on_disk = 0;
00116 hashtable = new HashTable *[maxnumcol];
00117 check_ptr(hashtable);
00118 unsigned i;
00119 for (i=0; i<maxnumcol; i++) {
00120 hashtable[i] = new HashTable;
00121 check_ptr(hashtable[i]);
00122 }
00123 datasheet = new Field [maxnumcol];
00124 check_ptr(datasheet);
00125 datasheet[0].resize(0);
00126 for (i=1; i<numcol; i++) datasheet[i].resize(numrec);
00127 return *this;
00128 }
00129
00130 int Data::field_index(const std::string &colname) const
00131 {
00132 for (unsigned i=0; i<numcol; i++) {
00133 if (datasheet[i].name() == colname) {
00134 return datasheet[i].index();
00135 }
00136 }
00137 return -1;
00138 }
00139
00140 void Data::field_index_vec(Vector<int> &ivec,const std::string &fdname)
00141 {
00142 if (numcol<1) {
00143 return;
00144 }
00145 unsigned i,nc;
00146 if (fdname == "") {
00147 nc = numcol-1;
00148 ivec.reserve(nc);
00149 for (i=0; i<nc; i++) ivec[i] = i+1;
00150 }
00151 else {
00152 int k,nskip,j;
00153 std::string sep(" ,");
00154 std::string fmt(fdname);
00155 std::vector<std::string> tmpvec;
00156 nc = split(fmt,sep,&tmpvec);
00157 Vector<int> tmpivec(nc);
00158 for (nskip=0,i=0; i<nc; i++) {
00159 k = field_index(tmpvec[i]);
00160 if (k<0) {
00161 warning("Data::field_index_vec(): %s: unknown, it's skipped",tmpvec[i].c_str());
00162 nskip++;
00163 }
00164 tmpivec[i] = k;
00165 }
00166
00167 ivec.reserve(nc - nskip);
00168 for (j=0,i=0; i<nc; i++) {
00169 if (tmpivec[i] >= 0) ivec[j++] = tmpivec[i];
00170 }
00171 }
00172 return;
00173 }
00174
00175 void Data::input(const std::string &fname,const std::string &recfmt)
00176 {
00177 size_t linewidth = 1024;
00178 char *line = new char [linewidth];
00179 int k;
00180 unsigned i,j,nc,nr,id;
00181 if (recfmt == "") {
00182 warning("Data::input(): no column-name specified");
00183 return;
00184 }
00185 std::string tmpstr;
00186 tmpstr = recfmt;
00187 i = 0;
00188 while (tmpstr[i] == ' ') {i++;}
00189 if (tmpstr[i] == '$') throw exception("Data::input(): $ is misplaced");
00190 i = 0;
00191 while (tmpstr[i]) {
00192 if (tmpstr[i] == '$' ) {
00193 tmpstr[i] = ' ';
00194 j = i;
00195 while (tmpstr[--j] == ' ');
00196 tmpstr[++j] = '$';
00197 }
00198 i++;
00199 }
00200 std::string fmt = "intercept ";
00201 fmt.append(tmpstr);
00202
00203 std::string sep(" ,");
00204 std::vector<std::string> tmpvec;
00205 unsigned tncol = split(fmt,sep,&tmpvec);
00206 nc = tncol;
00207 for (i=0; i<tncol; ++i) if (tmpvec[i] == "_skip") nc--;
00208 std::ifstream in(fname.c_str(),std::ios::in);
00209 if (!in) {
00210 if(line){
00211 delete [] line;
00212 line=0;
00213 }
00214 throw exception("Data::input(): cannot open file");
00215 }
00216 if (!in.getline(line,linewidth)) {
00217 warning("Data::input(): empty datafile: %s",fname.c_str());
00218 if(line){
00219 delete [] line;
00220 line=0;
00221 }
00222 return;
00223 }
00224 while (!validline(line)) {
00225 if (!in.getline(line,linewidth)) {
00226 warning("Data::input(): no real data in datafile: %s",fname.c_str());
00227 if(line){
00228 delete [] line;
00229 line=0;}
00230 return;
00231 }
00232 }
00233 std::string T(line);
00234 i = split(T," ");
00235 if (i < tncol-1) {
00236 if(line){
00237 delete [] line;
00238 line=0;
00239 }
00240 throw exception("Data::input(): the # of columns in data < the expected");
00241 return;
00242 }
00243 in.clear();
00244 in.seekg(0,std::ios::beg);
00245 nr = 0;
00246 while (in.getline(line,linewidth)) if (validline(line)) nr++;
00247 resize(nr,nc);
00248 int ThereareStrcol = 0;
00249 Vector<int> intvec(tncol);
00250 std::string tstr;
00251 for (i=0; i<tncol; i++) {
00252 tstr = tmpvec[i];
00253 if (tstr.find("_skip") >= 0) {
00254 for (k=i+1; k<tncol; k++) {
00255 if (tstr == tmpvec[k]) {
00256 if(line){
00257 delete [] line;
00258 line=0;
00259 }
00260 throw exception("Data::input(): duplicated column names");
00261 }
00262 }
00263 }
00264 }
00265 std::string::size_type begidx;
00266 for (k=0,i=0; i<tncol; i++) {
00267 if (tmpvec[i] == "_skip") {
00268 intvec[i] = -1;
00269 }
00270 else {
00271 intvec[i] = k;
00272 begidx = tmpvec[i].find("$");
00273 if (begidx != std::string::npos) {
00274 tmpvec[i].replace(begidx,1,"");
00275 datasheet[k].type('S');
00276 ThereareStrcol = 1;
00277 hashtable[k]->resize(numrec);
00278 }
00279 datasheet[k].name(tmpvec[i]);
00280 datasheet[k].index(k);
00281 k++;
00282 }
00283 }
00284 char *token;
00285 std::fstream tdatfile(tdfname.c_str(),std::ios::out);
00286
00287 if (!tdatfile) {
00288 if(line){
00289 delete [] line;
00290 line=0;
00291 }
00292 throw exception("Data::input(): cannot open file");
00293 }
00294 DataNode* dat_cell;
00295 double x;
00296 char *endpt;
00297 j = 0;
00298
00299 in.clear();
00300 in.seekg(0L,std::ios::beg);
00301 while (in.getline(line,linewidth)) {
00302 if (validline(line)) {
00303 token = strtok(line,", ");
00304 i = 1;
00305 while (token) {
00306 if (i >= tncol) break;
00307 k = intvec[i++];
00308 if (k > 0) {
00309 dat_cell = &datasheet[k][j];
00310 if (strcmp(token,".")) {
00311 dat_cell->missing = 0;
00312 if (datasheet[k].type() == 'S') {
00313 hashtable[k]->insert(token);
00314 id = strlen(token)+1;
00315 tdatfile.write((char *)&id,sizeof(unsigned));
00316 tdatfile.write(token,id);
00317 }
00318 else {
00319 x = strtod(token,&endpt);
00320 if (*endpt == '\0') {
00321 dat_cell->double_val(x);
00322 }
00323 else {
00324 warning("Data::input(): numeric column has non-numerics "
00325 "at the corner of row %d and column %d.\n"
00326 " SUGGESTION: claim it as string column in"
00327 " D.input() with $ sign",
00328 j+1,i-1);
00329 resize(0,0);
00330 in.close();
00331 tdatfile.close();
00332 if(line){
00333 delete [] line;
00334 line=0;
00335 }
00336 return;
00337 }
00338 }
00339 }
00340 else {
00341 dat_cell->missing = 1;
00342 datasheet[k].count_miss(1);
00343 }
00344 }
00345 token = strtok('\0',", ");
00346 }
00347 j++;
00348 }
00349 }
00350 in.close();
00351 tdatfile.close();
00352 datasheet[0].type('I');
00353 datasheet[0].nlevel(1);
00354 datasheet[0].nmiss(0);
00355
00356
00357
00358
00359 if (ThereareStrcol) {
00360 for (i=1; i<numcol; i++) {
00361 if (datasheet[i].type() != 'S') continue;
00362 id = hashtable[i]->size();
00363 hashtable[i]->resize(id);
00364 datasheet[i].nlevel(id);
00365 }
00366 tdatfile.open(tdfname.c_str(),std::ios::in);
00367 for (i=0; i<numrec; i++) {
00368 for (j=1; j<numcol; j++) {
00369 dat_cell = &datasheet[j][i];
00370 if (datasheet[j].type() == 'S' && !(dat_cell->missing)) {
00371 tdatfile.read((char *)&id,sizeof(unsigned));
00372 tdatfile.read(line,id);
00373 id = hashtable[j]->insert(line);
00374 dat_cell->unsigned_val(id);
00375 }
00376 }
00377 }
00378 tdatfile.close();
00379 }
00380 if(line){
00381 delete [] line;
00382 line=0;
00383 }
00384
00385
00386
00387
00388
00389 save_datasheet(0); // save a copy to hard-disk
00390 }
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415 void Data::value_for_missing(const double vm)
00416 {
00417 if (!data_in_memory) input_datasheet();
00418 for (unsigned i=0; i<numcol; i++) datasheet[i].value_for_missing(vm);
00419 }
00420
00421 void Data::save_datasheet(const int relse)
00422 {
00423 if (!datasheet) {
00424 warning("Data::save_datasheet(): no data to save");
00425 return;
00426 }
00427 std::ofstream df(tdfname.c_str(),std::ios::out);
00428 if (!df) throw exception("Data::save_datasheet(): cannot open file");
00429 for (unsigned i=1; i<numcol; i++) {
00430 df.write((char *)datasheet[i].dat_vec,numrec*sizeof(DataNode));
00431 }
00432 df.close();
00433 data_on_disk = 1;
00434 if (relse) release_datasheet();
00435 }
00436
00437 void Data::input_datasheet(void)
00438 {
00439 if (data_in_memory) return;
00440 if (data_on_disk) {
00441 std::ifstream df(tdfname.c_str());
00442 if (!df) throw exception("Data::input_datasheet(): cannot open file");
00443 for (unsigned i=1; i<numcol; i++) {
00444 datasheet[i].resize(numrec);
00445 df.read((char *)datasheet[i].dat_vec,numrec*sizeof(DataNode));
00446 }
00447 df.close();
00448 data_in_memory = 1;
00449 }
00450 else {
00451 warning("Data::input_datasheet(): data is not on disk");
00452 }
00453 }
00454
00455 void Data::release_datasheet(void)
00456 {
00457 if (datasheet) {
00458
00459 if (!data_on_disk) save_datasheet();
00460 for (unsigned i=1; i<numcol; i++) datasheet[i].resize(0);
00461 data_in_memory = 0;
00462 }
00463 }
00464
00465 void Data::row(const unsigned i,DataNode* recd)
00466 {
00467 if (!data_in_memory) input_datasheet();
00468 if (!recd) {
00469 if(numcol>0) {
00470 recd = new DataNode [numcol];
00471 }
00472 else {
00473 recd = 0;
00474 }
00475 }
00476 for (unsigned j=1; j<numcol; j++) recd[j] = datasheet[j][i];
00477 }
00478
00479 Field Data::col(const std::string &cname)
00480 {
00481 int k = field_index(cname);
00482 if (k<=0) {
00483 warning("Data::col(%s): no such column",cname.c_str());
00484 return Field();
00485 }
00486 if (!data_in_memory) input_datasheet();
00487
00488 HashTable *tmp_hashtable = 0;
00489 DataNode *retval;
00490 if (numrec>0){
00491 retval = new DataNode [numrec];
00492 }
00493 else {
00494 retval = 0;
00495 }
00496 unsigned i;
00497 DataNode *colk = datasheet[k].dat_vec;
00498 if (datasheet[k].type()=='S') {
00499 tmp_hashtable = new HashTable;
00500 *tmp_hashtable = *(hashtable[k]);
00501 for (i=0; i<numrec; i++) {
00502 if (colk[i].missing) {retval[i].missing = 1;}
00503 else { retval[i].unsigned_val(colk[i].unsigned_val()); }
00504 }
00505 }
00506 else {
00507 for (i=0; i<numrec; i++) {
00508 if (colk[i].missing) { retval[i].missing = 1; }
00509 else { retval[i].double_val(colk[i].double_val()); }
00510 }
00511 }
00512 return Field(numrec,retval,datasheet[k].col_struct,tmp_hashtable);
00513 }
00514
00515 DataNode* Data::rawcol(const std::string &cname)
00516 {
00517 int k = field_index(cname);
00518 if (k > 0) {
00519 if (!data_in_memory) input_datasheet();
00520 return datasheet[k].dat_vec;
00521 }
00522 else {
00523 warning("Data::rawcol(%s): no such column",cname.c_str());
00524 return 0;
00525 }
00526 }
00527
00528 DataNode* Data::rawcol(unsigned c)
00529 {
00530 if (c <= 0 || c >= numcol) throw exception("Data::rawcol(): out of range");
00531 if (!data_in_memory) input_datasheet();
00532 return datasheet[c].dat_vec;
00533 }
00534
00535 Data& Data::newcol(const std::string &cname)
00536 {
00537 if (cname == "") {
00538 warning("Data::newcol(cname), cname is empty");
00539 return *this;
00540 }
00541 unsigned i;
00542 int k = field_index(cname);
00543 if (k > 0) {
00544 if (datasheet[k].type() == 'S') {
00545 warning("Data::newcol(): %s exits, can't overwrite string column",cname.c_str());
00546 return *this;
00547 }
00548 warning("Data.newcol(): %s exits, it's been overwritten",cname.c_str());
00549 new_col = k;
00550 }
00551 else {
00552 if (!data_in_memory) input_datasheet();
00553 if (numcol == maxnumcol) {
00554 Field *tmp_datasheet = new Field [maxnumcol];
00555 check_ptr(tmp_datasheet);
00556 HashTable **tmp_hashtable = new HashTable *[maxnumcol];
00557 check_ptr(tmp_hashtable);
00558 for (i=0; i<maxnumcol; i++) {
00559 tmp_datasheet[i] = datasheet[i];
00560 tmp_hashtable[i] = hashtable[i];
00561 }
00562 if(datasheet){
00563 delete [] datasheet;
00564 datasheet=0;
00565 }
00566 if(hashtable){
00567 delete [] hashtable;
00568 hashtable=0;
00569 }
00570 maxnumcol += 10;
00571 datasheet = new Field [maxnumcol];
00572 check_ptr(datasheet);
00573 hashtable = new HashTable *[maxnumcol];
00574 check_ptr(hashtable);
00575 for (i=0; i<numcol; i++) {
00576 datasheet[i] = tmp_datasheet[i];
00577 hashtable[i] = tmp_hashtable[i];
00578 }
00579 for (i=numcol; i<maxnumcol; i++) {
00580 hashtable[i] = new HashTable;
00581 check_ptr(hashtable[i]);
00582 datasheet[i] = 0;
00583 }
00584 if(tmp_datasheet){
00585 delete [] tmp_datasheet;
00586 tmp_datasheet=0;
00587 }
00588 if(tmp_hashtable){
00589 delete [] tmp_hashtable;
00590 tmp_hashtable=0;
00591 }
00592 }
00593 new_col = numcol++;
00594 datasheet[new_col].name(cname);
00595 datasheet[new_col].type('F');
00596 datasheet[new_col].index(new_col);
00597 datasheet[new_col].resize(numrec);
00598 }
00599 return *this;
00600 }
00601
00602
00603 void Data::newcol(const std::string &cname,const Field& col)
00604 {
00605 unsigned i,n = col.size();
00606 if (n != numrec)
00607 warning("Data::newcol():%d,%d: size not conformable",numrec,n);
00608 this->newcol(cname);
00609 datasheet[new_col].col_struct = col.col_struct;
00610 datasheet[new_col].name(cname);
00611 datasheet[new_col].index(new_col);
00612 if (numrec < n) n = numrec;
00613 DataNode *tc = datasheet[new_col].dat_vec;
00614 for (i=0; i<n; i++) tc[i] = col.elem(i);
00615 for (i=n; i<numrec; i++) tc[i].missing = 1;
00616 datasheet[new_col].count_miss(numrec-n);
00617 }
00618
00619 Data& Data::adjoin(Data& b)
00620 {
00621 unsigned n = b.num_rows();
00622 if (n != numrec) {
00623 warning("Data::adjoin(b):%d,%d: size unconformable: truncated",numrec,n);
00624 }
00625 if (!b.in_memory()) b.input_datasheet();
00626 unsigned i,nc=b.num_cols();
00627 for (i=0; i<nc; i++) {
00628 this->newcol("junk");
00629 datasheet[new_col] = b.datasheet[i];
00630 datasheet[new_col].index(new_col);
00631 }
00632 save_datasheet(0);
00633 return *this;
00634 }
00635
00636 Data& Data::stack(Data& b)
00637 {
00638 warning("Data::stack(b): not yet available");
00639 return *this;
00640 }
00641
00642 DataNode* Data::cell(const unsigned r,const unsigned c)
00643 {
00644 if (!data_in_memory) input_datasheet();
00645 if (r>=numrec || (c>=numcol&& c==0)) {
00646 warning("Data::cell(%d,%d): out of range",c,r);
00647 return 0;
00648 }
00649 else {
00650 return &datasheet[c][r];
00651 }
00652 }
00653
00654 void Data::release(void)
00655 {
00656 if (datasheet) {
00657 delete [] datasheet;
00658 datasheet = 0;
00659 }
00660 if (hashtable) {
00661 for (int i=maxnumcol-1; i>=0; i--){
00662 if(hashtable[i]){
00663 delete hashtable[i];
00664 hashtable[i]=0;
00665 }
00666 }
00667 if(hashtable){
00668 delete [] hashtable;
00669 hashtable=0;
00670 }
00671 }
00672 }
00673
00674
00675 Field Data::max(const std::string &cname)
00676 {
00677 if (!data_in_memory) input_datasheet();
00678 int i,nc=0;
00679 Vector<int> ivec;
00680 field_index_vec(ivec,cname);
00681 nc = ivec.size();
00682 Field xcol(nc);
00683 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].max();
00684 return xcol;
00685 }
00686
00687 Field Data::min(const std::string &cname)
00688 {
00689 if (!data_in_memory) input_datasheet();
00690 int i,nc=0;
00691 Vector<int> ivec;
00692 field_index_vec(ivec,cname);
00693 nc = ivec.size();
00694 Field xcol(nc);
00695 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].min();
00696 return xcol;
00697 }
00698
00699 Field Data::mean(const std::string &cname)
00700 {
00701 if (!data_in_memory) input_datasheet();
00702 int i,nc=0;
00703 Vector<int> ivec;
00704 field_index_vec(ivec,cname);
00705 nc = ivec.size();
00706 Field xcol(nc);
00707 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].mean();
00708 return xcol;
00709 }
00710
00711 Field Data::variance(const std::string &cname)
00712 {
00713 if (!data_in_memory) input_datasheet();
00714 int i,nc=0;
00715 Vector<int> ivec;
00716 field_index_vec(ivec,cname);
00717 nc = ivec.size();
00718 Field xcol(nc);
00719 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].covariance();
00720 return xcol;
00721 }
00722
00723 Field Data::sum(const std::string &cname)
00724 {
00725 if (!data_in_memory) input_datasheet();
00726 int i,nc=0;
00727 Vector<int> ivec;
00728 field_index_vec(ivec,cname);
00729 nc = ivec.size();
00730 Field xcol(nc);
00731 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].sum();
00732 return xcol;
00733 }
00734
00735 Field Data::sumsq(const std::string &cname)
00736 {
00737 if (!data_in_memory) input_datasheet();
00738 int i,nc=0;
00739 Vector<int> ivec;
00740 field_index_vec(ivec,cname);
00741 nc = ivec.size();
00742 Field xcol(nc);
00743 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].sumsq();
00744 return xcol;
00745 }
00746
00747 Field Data::product(const std::string &cname)
00748 {
00749 if (!data_in_memory) input_datasheet();
00750 int i,nc=0;
00751 Vector<int> ivec;
00752 field_index_vec(ivec,cname);
00753 nc = ivec.size();
00754 Field xcol(nc);
00755 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].product();
00756 return xcol;
00757 }
00758
00759 void Data::stat(void)
00760 {
00761 unsigned W = SESSION.output_precision+6;
00762 if (!data_in_memory) input_datasheet();
00763 unsigned i;
00764
00765 std::cout << "\n Name";
00766 for (i=1; i<numcol; i++) {
00767 if (datasheet[i].type()=='S') continue;
00768 std::cout << " " << std::setw(W) << datasheet[i].name();
00769 }
00770 std::cout << "\n";
00771
00772 std::cout << " Nobs";
00773 for (i=1; i<numcol; i++) {
00774 if (datasheet[i].type()=='S') continue;
00775 std::cout << " ";
00776 if (datasheet[i].type() == 'F') {
00777 std::cout << std::setw(W) << numrec-datasheet[i].nmiss();
00778 }
00779 else {
00780 std::cout << std::setw(W) << ".";
00781 }
00782 }
00783 std::cout << "\n";
00784
00785 std::cout << " Min ";
00786 for (i=1; i<numcol; i++) {
00787 if (datasheet[i].type()=='S') continue;
00788 std::cout << datasheet[i].min();
00789 }
00790 std::cout << "\n";
00791
00792 std::cout << " Max ";
00793 for (i=1; i<numcol; i++) {
00794 if (datasheet[i].type()=='S') continue;
00795 std::cout << datasheet[i].max();
00796 }
00797 std::cout << "\n";
00798
00799 std::cout << " Mean";
00800 for (i=1; i<numcol; i++) {
00801 if (datasheet[i].type()=='S') continue;
00802 std::cout << datasheet[i].mean();
00803 }
00804 std::cout << "\n";
00805
00806 std::cout << " S.D.";
00807 DataNode var;
00808 for (i=1; i<numcol; i++) {
00809 if (datasheet[i].type()=='S') continue;
00810 var = datasheet[i].covariance();
00811 if (!var.missing) var.double_val(std::sqrt(var.double_val()));
00812 std::cout << var;
00813 }
00814 std::cout << "\n\n";
00815
00816 return;
00817 }
00818
00819 doubleMatrix Data::mat(void)
00820 {
00821 doubleMatrix retval(numrec,numcol);
00822 if (numrec==0) throw exception("Data::mat(): empty data object");
00823 if (!data_in_memory) input_datasheet();
00824 DataNode *dat = 0;
00825 unsigned i,j;
00826 double *dpt;
00827 for (i=0; i<numrec; i++) {
00828 dpt = retval[i];
00829 for (j=1; j<numcol; j++) {
00830 if (datasheet[j].type() != 'S') {
00831 dat = &(datasheet[j].dat_vec[i]);
00832 if (dat->missing == 0) {
00833 dpt[j] = dat->double_val();
00834 }
00835 else {
00836 dpt[j] = 0.0;
00837 }
00838 }
00839 }
00840 }
00841 release_datasheet();
00842 return retval;
00843 }
00844
00845 void Data::print(std::ostream& stream,const Vector<int> intvec,const int ic)
00846 {
00847 if (numrec==0) {
00848 std::cout << "\t empty data object\n" << std::flush;
00849 return;
00850 }
00851 if (!data_in_memory) input_datasheet();
00852 int nc = intvec.size();
00853 int kk;
00854 unsigned i,j,k,id;
00855 unsigned W = SESSION.output_precision+6;
00856 const char *str;
00857 char ch;
00858 stream.precision(SESSION.output_precision);
00859 DataNode *dat = 0;
00860 for (k=23,i=0; i<numrec; i++) {
00861 if (ic && i>=k) {
00862 k += 23;
00863 stream << " more ... [q for quit] ";
00864 std::cin.get(ch);
00865 std::cin.seekg(0L,std::ios::beg);
00866 if (ch == 'q') break;
00867 }
00868 for (j=0; j<nc; j++) {
00869 kk = intvec[j];
00870 if (kk < 1) continue;
00871 dat = &(datasheet[kk].dat_vec[i]);
00872 if (dat->missing) {
00873 stream << " " << std::setw(W) << ".";
00874 }
00875 else {
00876 if (datasheet[kk].type()=='S') {
00877 id = dat->unsigned_val();
00878 str = (const char*)(hashtable[kk]->find(id));
00879 stream << " " << std::setw(W) << str;
00880 }
00881 else {
00882 stream << " " << std::setw(W) << dat->double_val();
00883 }
00884 }
00885 }
00886 stream << "\n";
00887 }
00888 stream << std::flush;
00889 release_datasheet();
00890 }
00891
00892 void Data::display(const std::string &fdname,const int ic)
00893 {
00894 Vector<int> intvec;
00895 field_index_vec(intvec,fdname);
00896 print(std::cout,intvec,ic);
00897 }
00898
00899 void Data::save(const std::string &fname,
00900 const int io_mode )
00901 {
00902 std::ofstream ofs;
00903 ofs.open(fname.c_str(),(OpenModeType)io_mode);
00904 if (!ofs) throw exception("Data::save(): cannot open file");
00905 Vector<int> intvec;
00906 field_index_vec(intvec);
00907 print(ofs,intvec,0);
00908 ofs.close();
00909 }
00910
00911 std::ostream& operator<<(std::ostream& stream, Data& A)
00912 {
00913 unsigned nc = A.num_cols() - 1;
00914 if (nc == 0) return stream;
00915 Vector<int> intvec(nc);
00916 for (int i=0; i<nc; i++) intvec[i] = i+1;
00917 A.print(stream,intvec,1);
00918 return stream;
00919 }
00920
00921 }
00922