#include <data.h>
Definition at line 36 of file data.h.
Public Methods | |
| Data (void) | |
| Data (Data &D) | |
| ~Data (void) | |
| const Data & | operator= (Data &A) |
| const Data & | operator= (const Field &V) |
| Data & | resize (const unsigned nr, const unsigned nc, const unsigned mc=0) |
| int | in_memory (void) const |
| int | in_disk (void) const |
| int | field_index (const std::string &colname) const |
| void | field_index_vec (Vector< int > &intvec, const std::string &fdname="") |
| void | value_for_missing (const double vm) |
| void | input (const std::string &fname, const std::string &recfmt) |
| void | save_datasheet (const int relse=1) |
| void | input_datasheet (void) |
| void | release_datasheet (void) |
| void | release (void) |
| void | print (std::ostream &stream, const Vector< int > intvec, const int ic=0) |
| void | save (const std::string &fname, const int io_mode=std::ios::out) |
| void | display (const std::string &fieldnames="", const int ic=0) |
| void | newcol (const std::string &cname, const Field &col) |
| void | row (const unsigned i, DataNode *recd) |
| Field | col (const std::string &cname) |
| Data & | newcol (const std::string &cname) |
| Data & | stack (Data &b) |
| Data & | adjoin (Data &b) |
| DataNode * | cell (const unsigned r, const unsigned c) |
| DataNode * | rawcol (unsigned c) |
| DataNode * | rawcol (const std::string &cname) |
| unsigned | size (void) const |
| unsigned | num_cols (void) const |
| unsigned | num_rows (void) const |
| Field | mean (const std::string &cname="") |
| Field | variance (const std::string &cname="") |
| Field | sum (const std::string &cname="") |
| Field | sumsq (const std::string &cname="") |
| Field | product (const std::string &cname="") |
| Field | max (const std::string &cname="") |
| Field | min (const std::string &cname="") |
| void | stat (void) |
| doubleMatrix | mat (void) |
Public Attributes | |
| HashTable ** | hashtable |
| Field * | datasheet |
Protected Methods | |
| void | copyfrom (Data &A) |
Protected Attributes | |
| int | data_on_disk |
| int | data_in_memory |
| unsigned | numcol |
| unsigned | new_col |
| unsigned | maxnumcol |
| unsigned | numrec |
| std::string | tdfname |
Friends | |
| std::ostream & | operator<< (std::ostream &stream, Data &A) |
|
|
Definition at line 35 of file data.cpp. References datasheet, hashtable, maxnumcol, matvec::Session::mktemp(), new_col, numcol, numrec, resize(), matvec::SESSION, and tdfname.
|
|
|
Definition at line 47 of file data.cpp. References copyfrom(), datasheet, hashtable, maxnumcol, numcol, and numrec.
|
|
|
Definition at line 50 of file data.h. References release().
00050 {release();}
|
|
|
Definition at line 619 of file data.cpp. References datasheet, in_memory(), input_datasheet(), new_col, newcol(), num_cols(), num_rows(), numrec, save_datasheet(), and matvec::warning().
00620 {
00621 unsigned n = b.num_rows();
00622 if (n != numrec) {
00623 warning("Data::adjoin(b):%d,%d: size unconformable: truncated",numrec,n);
00624 }
00625 if (!b.in_memory()) b.input_datasheet();
00626 unsigned i,nc=b.num_cols();
00627 for (i=0; i<nc; i++) {
00628 this->newcol("junk");
00629 datasheet[new_col] = b.datasheet[i];
00630 datasheet[new_col].index(new_col);
00631 }
00632 save_datasheet(0);
00633 return *this;
00634 }
|
|
||||||||||||
|
Definition at line 642 of file data.cpp. References data_in_memory, datasheet, input_datasheet(), numcol, numrec, and matvec::warning(). Referenced by matvec::Model::save_pos_val().
00643 {
00644 if (!data_in_memory) input_datasheet(); // data must be in memory
00645 if (r>=numrec || (c>=numcol&& c==0)) {
00646 warning("Data::cell(%d,%d): out of range",c,r);
00647 return 0;
00648 }
00649 else {
00650 return &datasheet[c][r];
00651 }
00652 }
|
|
|
Definition at line 479 of file data.cpp. References matvec::Field::dat_vec, data_in_memory, datasheet, matvec::DataNode::double_val(), field_index(), hashtable, input_datasheet(), matvec::DataNode::missing, numrec, matvec::DataNode::unsigned_val(), and matvec::warning().
00480 {
00481 int k = field_index(cname);
00482 if (k<=0) { // first column intercept is not accessible
00483 warning("Data::col(%s): no such column",cname.c_str());
00484 return Field();
00485 }
00486 if (!data_in_memory) input_datasheet();
00487
00488 HashTable *tmp_hashtable = 0;
00489 DataNode *retval;
00490 if (numrec>0){
00491 retval = new DataNode [numrec];
00492 }
00493 else {
00494 retval = 0;
00495 }
00496 unsigned i;
00497 DataNode *colk = datasheet[k].dat_vec;
00498 if (datasheet[k].type()=='S') {
00499 tmp_hashtable = new HashTable;
00500 *tmp_hashtable = *(hashtable[k]);
00501 for (i=0; i<numrec; i++) {
00502 if (colk[i].missing) {retval[i].missing = 1;}
00503 else { retval[i].unsigned_val(colk[i].unsigned_val()); }
00504 }
00505 }
00506 else {
00507 for (i=0; i<numrec; i++) {
00508 if (colk[i].missing) { retval[i].missing = 1; }
00509 else { retval[i].double_val(colk[i].double_val()); }
00510 }
00511 }
00512 return Field(numrec,retval,datasheet[k].col_struct,tmp_hashtable);
00513 }
|
|
|
Definition at line 85 of file data.cpp. References matvec::HashTable::copyfrom(), data_in_memory, data_on_disk, hashtable, input_datasheet(), maxnumcol, new_col, numcol, numrec, resize(), save_datasheet(), and tdfname. Referenced by Data(), and operator=().
00086 {
00087 if (this == &A) return;
00088 if (A.data_on_disk == 0) A.save_datasheet();
00089 resize(A.numrec,A.numcol,A.maxnumcol);
00090 new_col = A.new_col;
00091 for (unsigned i=0; i<numcol; i++) hashtable[i]->copyfrom(*(A.hashtable[i]));
00092 tdfname = A.tdfname;
00093 data_on_disk = 1;
00094 data_in_memory = 0;
00095 input_datasheet();
00096 save_datasheet(0); // save changes
00097 }
|
|
||||||||||||
|
Definition at line 892 of file data.cpp. References field_index_vec(), and print().
00893 {
00894 Vector<int> intvec;
00895 field_index_vec(intvec,fdname);
00896 print(std::cout,intvec,ic);
00897 }
|
|
|
Definition at line 130 of file data.cpp. References datasheet, matvec::Field::index(), matvec::Field::name(), and numcol. Referenced by col(), field_index_vec(), matvec::KP(), newcol(), matvec::Model::prepare_data(), and rawcol().
|
|
||||||||||||
|
Definition at line 140 of file data.cpp. References field_index(), numcol, matvec::Vector< T >::reserve(), matvec::split(), and matvec::warning(). Referenced by display(), max(), mean(), min(), product(), save(), sum(), sumsq(), and variance().
00141 {
00142 if (numcol<1) { // first column is a reserved: intercept
00143 return;
00144 }
00145 unsigned i,nc;
00146 if (fdname == "") {
00147 nc = numcol-1;
00148 ivec.reserve(nc);
00149 for (i=0; i<nc; i++) ivec[i] = i+1; // don't print intercept
00150 }
00151 else {
00152 int k,nskip,j;
00153 std::string sep(" ,");
00154 std::string fmt(fdname);
00155 std::vector<std::string> tmpvec;
00156 nc = split(fmt,sep,&tmpvec); //////// fmt.split(n,sep); field name can be any length
00157 Vector<int> tmpivec(nc);
00158 for (nskip=0,i=0; i<nc; i++) {
00159 k = field_index(tmpvec[i]);
00160 if (k<0) {
00161 warning("Data::field_index_vec(): %s: unknown, it's skipped",tmpvec[i].c_str());
00162 nskip++;
00163 }
00164 tmpivec[i] = k;
00165 }
00166
00167 ivec.reserve(nc - nskip);
00168 for (j=0,i=0; i<nc; i++) {
00169 if (tmpivec[i] >= 0) ivec[j++] = tmpivec[i];
00170 }
00171 }
00172 return;
00173 }
|
|
|
Definition at line 59 of file data.h. References data_on_disk. Referenced by matvec::Model::prepare_data().
00059 {return data_on_disk;}
|
|
|
Definition at line 58 of file data.h. References data_in_memory. Referenced by adjoin(), matvec::Population::input_data(), matvec::Population::input_markerData(), matvec::Model::prepare_data(), matvec::Model::re_hash_data(), matvec::Model::save(), matvec::GLMM::save(), and matvec::Model::save_pos_val().
00058 {return data_in_memory;}
|
|
||||||||||||
|
Definition at line 175 of file data.cpp. References matvec::Field::count_miss(), datasheet, matvec::DataNode::double_val(), matvec::Vector< T >::find(), hashtable, matvec::Field::index(), matvec::HashTable::insert(), matvec::DataNode::missing, matvec::Field::name(), matvec::Field::nlevel(), matvec::Field::nmiss(), numcol, numrec, matvec::HashTable::resize(), resize(), matvec::HashTable::size(), matvec::split(), tdfname, matvec::Field::type(), matvec::DataNode::unsigned_val(), matvec::validline(), and matvec::warning().
00176 {
00177 size_t linewidth = 1024;
00178 char *line = new char [linewidth];
00179 int k;
00180 unsigned i,j,nc,nr,id;
00181 if (recfmt == "") {
00182 warning("Data::input(): no column-name specified");
00183 return;
00184 }
00185 std::string tmpstr;
00186 tmpstr = recfmt;
00187 i = 0;
00188 while (tmpstr[i] == ' ') {i++;} // find first nonspace char
00189 if (tmpstr[i] == '$') throw exception("Data::input(): $ is misplaced");
00190 i = 0;
00191 while (tmpstr[i]) { // move $ to the end of each token
00192 if (tmpstr[i] == '$' ) {
00193 tmpstr[i] = ' ';
00194 j = i;
00195 while (tmpstr[--j] == ' ');
00196 tmpstr[++j] = '$';
00197 }
00198 i++;
00199 }
00200 std::string fmt = "intercept "; // first column is reserved for intercept
00201 fmt.append(tmpstr);
00202
00203 std::string sep(" ,");
00204 std::vector<std::string> tmpvec;
00205 unsigned tncol = split(fmt,sep,&tmpvec); ///// split(tncol,sep); tncol >= 1 is required
00206 nc = tncol;
00207 for (i=0; i<tncol; ++i) if (tmpvec[i] == "_skip") nc--;
00208 std::ifstream in(fname.c_str(),std::ios::in);
00209 if (!in) {
00210 if(line){
00211 delete [] line;
00212 line=0;
00213 }
00214 throw exception("Data::input(): cannot open file");
00215 }
00216 if (!in.getline(line,linewidth)) {
00217 warning("Data::input(): empty datafile: %s",fname.c_str());
00218 if(line){
00219 delete [] line;
00220 line=0;
00221 }
00222 return;
00223 }
00224 while (!validline(line)) {
00225 if (!in.getline(line,linewidth)) {
00226 warning("Data::input(): no real data in datafile: %s",fname.c_str());
00227 if(line){
00228 delete [] line;
00229 line=0;}
00230 return;
00231 }
00232 }
00233 std::string T(line);
00234 i = split(T," ");
00235 if (i < tncol-1) {
00236 if(line){
00237 delete [] line;
00238 line=0;
00239 }
00240 throw exception("Data::input(): the # of columns in data < the expected");
00241 return;
00242 }
00243 in.clear();
00244 in.seekg(0,std::ios::beg);
00245 nr = 0;
00246 while (in.getline(line,linewidth)) if (validline(line)) nr++;
00247 resize(nr,nc);
00248 int ThereareStrcol = 0;
00249 Vector<int> intvec(tncol);
00250 std::string tstr;
00251 for (i=0; i<tncol; i++) {
00252 tstr = tmpvec[i];
00253 if (tstr.find("_skip") >= 0) {
00254 for (k=i+1; k<tncol; k++) {
00255 if (tstr == tmpvec[k]) {
00256 if(line){
00257 delete [] line;
00258 line=0;
00259 }
00260 throw exception("Data::input(): duplicated column names");
00261 }
00262 }
00263 }
00264 }
00265 std::string::size_type begidx;
00266 for (k=0,i=0; i<tncol; i++) {
00267 if (tmpvec[i] == "_skip") {
00268 intvec[i] = -1;
00269 }
00270 else {
00271 intvec[i] = k;
00272 begidx = tmpvec[i].find("$");
00273 if (begidx != std::string::npos) {
00274 tmpvec[i].replace(begidx,1,"");
00275 datasheet[k].type('S'); // string column
00276 ThereareStrcol = 1;
00277 hashtable[k]->resize(numrec);
00278 }
00279 datasheet[k].name(tmpvec[i]);
00280 datasheet[k].index(k);
00281 k++;
00282 }
00283 } // k == numcol-1
00284 char *token;
00285 std::fstream tdatfile(tdfname.c_str(),std::ios::out);
00286
00287 if (!tdatfile) {
00288 if(line){
00289 delete [] line;
00290 line=0;
00291 }
00292 throw exception("Data::input(): cannot open file");
00293 }
00294 DataNode* dat_cell;
00295 double x;
00296 char *endpt;
00297 j = 0;
00298
00299 in.clear();
00300 in.seekg(0L,std::ios::beg); // rewind data file
00301 while (in.getline(line,linewidth)) {
00302 if (validline(line)) {
00303 token = strtok(line,", ");
00304 i = 1;
00305 while (token) {
00306 if (i >= tncol) break;
00307 k = intvec[i++];
00308 if (k > 0) {
00309 dat_cell = &datasheet[k][j];
00310 if (strcmp(token,".")) {
00311 dat_cell->missing = 0;
00312 if (datasheet[k].type() == 'S') {
00313 hashtable[k]->insert(token);
00314 id = strlen(token)+1;
00315 tdatfile.write((char *)&id,sizeof(unsigned));
00316 tdatfile.write(token,id);
00317 }
00318 else {
00319 x = strtod(token,&endpt); // sscanf(token,"%lf",&x);
00320 if (*endpt == '\0') {
00321 dat_cell->double_val(x);
00322 }
00323 else {
00324 warning("Data::input(): numeric column has non-numerics "
00325 "at the corner of row %d and column %d.\n"
00326 " SUGGESTION: claim it as string column in"
00327 " D.input() with $ sign",
00328 j+1,i-1);
00329 resize(0,0);
00330 in.close();
00331 tdatfile.close();
00332 if(line){
00333 delete [] line;
00334 line=0;
00335 }
00336 return;
00337 }
00338 }
00339 }
00340 else {
00341 dat_cell->missing = 1;
00342 datasheet[k].count_miss(1);
00343 }
00344 }
00345 token = strtok('\0',", ");
00346 }
00347 j++;
00348 } // end of validline(line)
00349 }
00350 in.close();
00351 tdatfile.close();
00352 datasheet[0].type('I'); // I = type for intercept
00353 datasheet[0].nlevel(1); // I = type for intercept
00354 datasheet[0].nmiss(0);
00355
00356 /////////////////////////////////////////////////////
00357 // now re-hash for each string field, if necessary
00358 ////////////////////////////////////////////////////
00359 if (ThereareStrcol) {
00360 for (i=1; i<numcol; i++) {
00361 if (datasheet[i].type() != 'S') continue;
00362 id = hashtable[i]->size();
00363 hashtable[i]->resize(id);
00364 datasheet[i].nlevel(id);
00365 }
00366 tdatfile.open(tdfname.c_str(),std::ios::in);
00367 for (i=0; i<numrec; i++) {
00368 for (j=1; j<numcol; j++) {
00369 dat_cell = &datasheet[j][i];
00370 if (datasheet[j].type() == 'S' && !(dat_cell->missing)) {
00371 tdatfile.read((char *)&id,sizeof(unsigned));
00372 tdatfile.read(line,id);
00373 id = hashtable[j]->insert(line);
00374 dat_cell->unsigned_val(id);
00375 }
00376 }
00377 }
00378 tdatfile.close();
00379 }
00380 if(line){
00381 delete [] line;
00382 line=0;
00383 }
00384 ////////////////////////////////////////////////////////////////////
00385 // save a copy of data is a must. Because data could be changed
00386 // temporarily for some special purposes, the change can be droped
00387 // by release datasheet
00388 ////////////////////////////////////////////////////////////////////
00389 save_datasheet(0); // save a copy to hard-disk
00390 }
|
|
|
Definition at line 437 of file data.cpp. References data_in_memory, datasheet, numcol, numrec, matvec::Field::resize(), tdfname, and matvec::warning(). Referenced by adjoin(), cell(), col(), copyfrom(), matvec::Population::input_data(), matvec::Population::input_markerData(), mat(), max(), mean(), min(), newcol(), operator=(), matvec::Model::prepare_data(), print(), product(), rawcol(), matvec::Model::re_hash_data(), row(), matvec::Model::save(), matvec::GLMM::save(), matvec::Model::save_pos_val(), stat(), sum(), sumsq(), value_for_missing(), and variance().
00438 {
00439 if (data_in_memory) return;
00440 if (data_on_disk) {
00441 std::ifstream df(tdfname.c_str());
00442 if (!df) throw exception("Data::input_datasheet(): cannot open file");
00443 for (unsigned i=1; i<numcol; i++) { // first column is an intercept
00444 datasheet[i].resize(numrec);
00445 df.read((char *)datasheet[i].dat_vec,numrec*sizeof(DataNode));
00446 }
00447 df.close();
00448 data_in_memory = 1; // data now is in memorry
00449 }
00450 else {
00451 warning("Data::input_datasheet(): data is not on disk");
00452 }
00453 }
|
|
|
Definition at line 819 of file data.cpp. References matvec::Field::dat_vec, data_in_memory, datasheet, matvec::DataNode::double_val(), input_datasheet(), matvec::DataNode::missing, numcol, numrec, release_datasheet(), and matvec::Field::type().
00820 {
00821 doubleMatrix retval(numrec,numcol);
00822 if (numrec==0) throw exception("Data::mat(): empty data object");
00823 if (!data_in_memory) input_datasheet();
00824 DataNode *dat = 0;
00825 unsigned i,j;
00826 double *dpt;
00827 for (i=0; i<numrec; i++) {
00828 dpt = retval[i];
00829 for (j=1; j<numcol; j++) { // first column intercept should be ignored
00830 if (datasheet[j].type() != 'S') {
00831 dat = &(datasheet[j].dat_vec[i]);
00832 if (dat->missing == 0) {
00833 dpt[j] = dat->double_val();
00834 }
00835 else {
00836 dpt[j] = 0.0;
00837 }
00838 }
00839 }
00840 }
00841 release_datasheet();
00842 return retval;
00843 }
|
|
|
Definition at line 675 of file data.cpp. References data_in_memory, datasheet, field_index_vec(), input_datasheet(), matvec::Vector< T >::max(), and matvec::Vector< T >::size().
00676 {
00677 if (!data_in_memory) input_datasheet();
00678 int i,nc=0;
00679 Vector<int> ivec;
00680 field_index_vec(ivec,cname);
00681 nc = ivec.size();
00682 Field xcol(nc);
00683 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].max();
00684 return xcol;
00685 }
|
|
|
Definition at line 699 of file data.cpp. References data_in_memory, datasheet, field_index_vec(), input_datasheet(), and matvec::Vector< T >::size().
00700 {
00701 if (!data_in_memory) input_datasheet();
00702 int i,nc=0;
00703 Vector<int> ivec;
00704 field_index_vec(ivec,cname);
00705 nc = ivec.size();
00706 Field xcol(nc);
00707 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].mean();
00708 return xcol;
00709 }
|
|
|
Definition at line 687 of file data.cpp. References data_in_memory, datasheet, field_index_vec(), input_datasheet(), matvec::Vector< T >::min(), and matvec::Vector< T >::size().
00688 {
00689 if (!data_in_memory) input_datasheet();
00690 int i,nc=0;
00691 Vector<int> ivec;
00692 field_index_vec(ivec,cname);
00693 nc = ivec.size();
00694 Field xcol(nc);
00695 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].min();
00696 return xcol;
00697 }
|
|
|
Definition at line 535 of file data.cpp. References matvec::check_ptr(), data_in_memory, datasheet, field_index(), hashtable, input_datasheet(), maxnumcol, new_col, numcol, numrec, matvec::Field::type(), and matvec::warning().
00536 {
00537 if (cname == "") {
00538 warning("Data::newcol(cname), cname is empty");
00539 return *this;
00540 }
00541 unsigned i;
00542 int k = field_index(cname);
00543 if (k > 0) { // first column intercept cannot be overwritten
00544 if (datasheet[k].type() == 'S') {
00545 warning("Data::newcol(): %s exits, can't overwrite string column",cname.c_str());
00546 return *this;
00547 }
00548 warning("Data.newcol(): %s exits, it's been overwritten",cname.c_str());
00549 new_col = k;
00550 }
00551 else {
00552 if (!data_in_memory) input_datasheet(); // data must be in memory
00553 if (numcol == maxnumcol) { // data sheet is full
00554 Field *tmp_datasheet = new Field [maxnumcol];
00555 check_ptr(tmp_datasheet);
00556 HashTable **tmp_hashtable = new HashTable *[maxnumcol];
00557 check_ptr(tmp_hashtable);
00558 for (i=0; i<maxnumcol; i++) {
00559 tmp_datasheet[i] = datasheet[i];
00560 tmp_hashtable[i] = hashtable[i];
00561 }
00562 if(datasheet){
00563 delete [] datasheet; // note I do not delete datasheet[i]
00564 datasheet=0;
00565 }
00566 if(hashtable){
00567 delete [] hashtable; // note I do not delete hashtable[i]
00568 hashtable=0;
00569 }
00570 maxnumcol += 10;
00571 datasheet = new Field [maxnumcol];
00572 check_ptr(datasheet);
00573 hashtable = new HashTable *[maxnumcol];
00574 check_ptr(hashtable);
00575 for (i=0; i<numcol; i++) {
00576 datasheet[i] = tmp_datasheet[i];
00577 hashtable[i] = tmp_hashtable[i];
00578 }
00579 for (i=numcol; i<maxnumcol; i++) {
00580 hashtable[i] = new HashTable;
00581 check_ptr(hashtable[i]);
00582 datasheet[i] = 0;
00583 }
00584 if(tmp_datasheet){
00585 delete [] tmp_datasheet;
00586 tmp_datasheet=0;
00587 }
00588 if(tmp_hashtable){
00589 delete [] tmp_hashtable;
00590 tmp_hashtable=0;
00591 }
00592 }
00593 new_col = numcol++;
00594 datasheet[new_col].name(cname);
00595 datasheet[new_col].type('F'); // floating point number for the colum
00596 datasheet[new_col].index(new_col);
00597 datasheet[new_col].resize(numrec);
00598 }
00599 return *this;
00600 }
|
|
||||||||||||
|
Definition at line 603 of file data.cpp. References matvec::Field::col_struct, datasheet, matvec::Field::elem(), matvec::DataNode::missing, new_col, numrec, matvec::Field::size(), and matvec::warning(). Referenced by adjoin().
00604 {
00605 unsigned i,n = col.size();
00606 if (n != numrec)
00607 warning("Data::newcol():%d,%d: size not conformable",numrec,n);
00608 this->newcol(cname);
00609 datasheet[new_col].col_struct = col.col_struct;
00610 datasheet[new_col].name(cname); // cname override Field.name()
00611 datasheet[new_col].index(new_col);
00612 if (numrec < n) n = numrec;
00613 DataNode *tc = datasheet[new_col].dat_vec;
00614 for (i=0; i<n; i++) tc[i] = col.elem(i);
00615 for (i=n; i<numrec; i++) tc[i].missing = 1;
00616 datasheet[new_col].count_miss(numrec-n);
00617 }
|
|
|
Definition at line 84 of file data.h. References numcol. Referenced by adjoin(), matvec::Population::input_data(), matvec::Population::input_markerData(), matvec::operator<<(), and matvec::Model::prepare_data().
00084 {return numcol;}
|
|
|
Definition at line 85 of file data.h. References numrec. Referenced by adjoin(), matvec::Model::assign_id_xact(), matvec::Model::fitdata(), matvec::Model::hashxact(), matvec::Population::input_data(), matvec::Population::input_markerData(), matvec::KP(), matvec::Model::prepare_data(), matvec::Model::re_hash_data(), and matvec::Model::save_pos_val().
00085 {return numrec;}
|
|
|
Definition at line 63 of file data.cpp. References data_in_memory, datasheet, hashtable, matvec::Field::hashtable, input_datasheet(), matvec::Field::len(), new_col, numcol, numrec, resize(), save_datasheet(), and matvec::warning().
00064 {
00065 if (new_col == 1) { // first column intercept can't be overwritten
00066 resize(V.len(),numcol,20);
00067 }
00068 else {
00069 if (V.len() != numrec) {
00070 warning("Data = Col: size incompatible");
00071 return *this;
00072 }
00073 }
00074 if (!data_in_memory) input_datasheet();
00075 std::string cname = datasheet[new_col].name();
00076 int indx = datasheet[new_col].index();
00077 datasheet[new_col] = V;
00078 datasheet[new_col].name(cname);
00079 datasheet[new_col].index(indx);
00080 if (V.hashtable) hashtable[new_col]->copyfrom( *(V.hashtable));
00081 save_datasheet(0); // save the changes on disk, but keep them in memory
00082 return *this;
00083 }
|
|
|
Definition at line 57 of file data.cpp. References copyfrom().
00058 {
00059 copyfrom(A);
00060 return *this;
00061 }
|
|
||||||||||||||||
|
Definition at line 845 of file data.cpp. References matvec::Field::dat_vec, data_in_memory, datasheet, matvec::DataNode::double_val(), matvec::HashTable::find(), hashtable, input_datasheet(), matvec::DataNode::missing, numrec, matvec::Session::output_precision, release_datasheet(), matvec::SESSION, matvec::Vector< T >::size(), matvec::Field::type(), and matvec::DataNode::unsigned_val(). Referenced by display(), matvec::operator<<(), and save().
00846 {
00847 if (numrec==0) {
00848 std::cout << "\t empty data object\n" << std::flush;
00849 return;
00850 }
00851 if (!data_in_memory) input_datasheet();
00852 int nc = intvec.size();
00853 int kk;
00854 unsigned i,j,k,id;
00855 unsigned W = SESSION.output_precision+6;
00856 const char *str;
00857 char ch;
00858 stream.precision(SESSION.output_precision);
00859 DataNode *dat = 0;
00860 for (k=23,i=0; i<numrec; i++) {
00861 if (ic && i>=k) {
00862 k += 23;
00863 stream << " more ... [q for quit] ";
00864 std::cin.get(ch);
00865 std::cin.seekg(0L,std::ios::beg);
00866 if (ch == 'q') break;
00867 }
00868 for (j=0; j<nc; j++) {
00869 kk = intvec[j];
00870 if (kk < 1) continue; // first coloumn is reserved for intercept
00871 dat = &(datasheet[kk].dat_vec[i]);
00872 if (dat->missing) {
00873 stream << " " << std::setw(W) << ".";
00874 }
00875 else {
00876 if (datasheet[kk].type()=='S') {
00877 id = dat->unsigned_val();
00878 str = (const char*)(hashtable[kk]->find(id));
00879 stream << " " << std::setw(W) << str;
00880 }
00881 else {
00882 stream << " " << std::setw(W) << dat->double_val();
00883 }
00884 }
00885 }
00886 stream << "\n";
00887 }
00888 stream << std::flush;
00889 release_datasheet();
00890 }
|
|
|
Definition at line 747 of file data.cpp. References data_in_memory, datasheet, field_index_vec(), input_datasheet(), and matvec::Vector< T >::size().
00748 {
00749 if (!data_in_memory) input_datasheet();
00750 int i,nc=0;
00751 Vector<int> ivec;
00752 field_index_vec(ivec,cname);
00753 nc = ivec.size();
00754 Field xcol(nc);
00755 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].product();
00756 return xcol;
00757 }
|
|
|
Definition at line 515 of file data.cpp. References matvec::Field::dat_vec, data_in_memory, datasheet, field_index(), input_datasheet(), and matvec::warning().
00516 {
00517 int k = field_index(cname);
00518 if (k > 0) { // first column intercept is not accessible
00519 if (!data_in_memory) input_datasheet();
00520 return datasheet[k].dat_vec;
00521 }
00522 else {
00523 warning("Data::rawcol(%s): no such column",cname.c_str());
00524 return 0;
00525 }
00526 }
|
|
|
Definition at line 528 of file data.cpp. References matvec::Field::dat_vec, data_in_memory, datasheet, input_datasheet(), and numcol. Referenced by matvec::Population::input_data(), matvec::Population::input_markerData(), matvec::Model::prepare_data(), and matvec::Model::re_hash_data().
00529 {
00530 if (c <= 0 || c >= numcol) throw exception("Data::rawcol(): out of range");
00531 if (!data_in_memory) input_datasheet();
00532 return datasheet[c].dat_vec;
00533 }
|
|
|
Definition at line 654 of file data.cpp. References datasheet, hashtable, and maxnumcol. Referenced by resize(), and ~Data().
00655 {
00656 if (datasheet) {
00657 delete [] datasheet;
00658 datasheet = 0;
00659 }
00660 if (hashtable) {
00661 for (int i=maxnumcol-1; i>=0; i--){
00662 if(hashtable[i]){
00663 delete hashtable[i];
00664 hashtable[i]=0;
00665 }
00666 }
00667 if(hashtable){
00668 delete [] hashtable;
00669 hashtable=0;
00670 }
00671 }
00672 }
|
|
|
Definition at line 455 of file data.cpp. References data_in_memory, data_on_disk, datasheet, numcol, matvec::Field::resize(), and save_datasheet(). Referenced by matvec::Population::input_data(), matvec::Population::input_markerData(), mat(), matvec::Model::prepare_data(), print(), matvec::Model::save(), matvec::GLMM::save(), save_datasheet(), and matvec::Model::save_pos_val().
00456 {
00457 if (datasheet) {
00458 // any data must have a hard copy in disk
00459 if (!data_on_disk) save_datasheet();
00460 for (unsigned i=1; i<numcol; i++) datasheet[i].resize(0);
00461 data_in_memory = 0; // data is not in memory, but should on disk;
00462 }
00463 }
|
|
||||||||||||||||
|
Definition at line 99 of file data.cpp. References matvec::check_ptr(), data_in_memory, data_on_disk, datasheet, hashtable, maxnumcol, numcol, numrec, release(), and matvec::Field::resize(). Referenced by copyfrom(), Data(), input(), and operator=().
00100 {
00101 if (numrec == nr && numcol == nc && maxnumcol== mc) return *this;
00102 release();
00103 numrec = nr;
00104 numcol = nc;
00105 if (mc < nc) {
00106 maxnumcol = nc + 10; // 10 is the buffer columns
00107 }
00108 else {
00109 maxnumcol = mc + 1; // first column is reserved for intercept
00110 }
00111 if (numrec==0) numcol = 0;
00112 if (numcol==0) numrec = 0;
00113
00114 data_in_memory = 1;
00115 data_on_disk = 0;
00116 hashtable = new HashTable *[maxnumcol];
00117 check_ptr(hashtable);
00118 unsigned i;
00119 for (i=0; i<maxnumcol; i++) {
00120 hashtable[i] = new HashTable;
00121 check_ptr(hashtable[i]);
00122 }
00123 datasheet = new Field [maxnumcol];
00124 check_ptr(datasheet);
00125 datasheet[0].resize(0); // first column is reserved for intercept
00126 for (i=1; i<numcol; i++) datasheet[i].resize(numrec);
00127 return *this;
00128 }
|
|
||||||||||||
|
Definition at line 465 of file data.cpp. References data_in_memory, datasheet, input_datasheet(), and numcol. Referenced by matvec::Model::hashxact(), and matvec::Model::save_pos_val().
00466 {
00467 if (!data_in_memory) input_datasheet();
00468 if (!recd) {
00469 if(numcol>0) {
00470 recd = new DataNode [numcol];
00471 }
00472 else {
00473 recd = 0;
00474 }
00475 }
00476 for (unsigned j=1; j<numcol; j++) recd[j] = datasheet[j][i];
00477 }
|
|
||||||||||||
|
Definition at line 899 of file data.cpp. References field_index_vec(), and print().
00901 {
00902 std::ofstream ofs;
00903 ofs.open(fname.c_str(),(OpenModeType)io_mode);
00904 if (!ofs) throw exception("Data::save(): cannot open file");
00905 Vector<int> intvec;
00906 field_index_vec(intvec);
00907 print(ofs,intvec,0);
00908 ofs.close();
00909 }
|
|
|
Definition at line 421 of file data.cpp. References data_on_disk, datasheet, numcol, numrec, release_datasheet(), tdfname, and matvec::warning(). Referenced by adjoin(), copyfrom(), operator=(), matvec::Model::prepare_data(), and release_datasheet().
00422 {
00423 if (!datasheet) {
00424 warning("Data::save_datasheet(): no data to save");
00425 return;
00426 }
00427 std::ofstream df(tdfname.c_str(),std::ios::out);
00428 if (!df) throw exception("Data::save_datasheet(): cannot open file");
00429 for (unsigned i=1; i<numcol; i++) { // first column is an intercept
00430 df.write((char *)datasheet[i].dat_vec,numrec*sizeof(DataNode));
00431 }
00432 df.close();
00433 data_on_disk = 1;
00434 if (relse) release_datasheet();
00435 }
|
|
|
Definition at line 83 of file data.h. References numrec.
00083 {return numrec;}
|
|
|
Definition at line 636 of file data.cpp. References matvec::warning().
00637 {
00638 warning("Data::stack(b): not yet available");
00639 return *this;
00640 }
|
|
|
Definition at line 759 of file data.cpp. References matvec::Field::covariance(), data_in_memory, datasheet, matvec::DataNode::double_val(), input_datasheet(), matvec::Field::max(), matvec::Field::mean(), matvec::Field::min(), matvec::DataNode::missing, matvec::Field::name(), matvec::Field::nmiss(), numcol, numrec, matvec::Session::output_precision, matvec::SESSION, and matvec::Field::type().
00760 {
00761 unsigned W = SESSION.output_precision+6;
00762 if (!data_in_memory) input_datasheet();
00763 unsigned i;
00764
00765 std::cout << "\n Name";
00766 for (i=1; i<numcol; i++) { // first column intercept should be ignored
00767 if (datasheet[i].type()=='S') continue;
00768 std::cout << " " << std::setw(W) << datasheet[i].name();
00769 }
00770 std::cout << "\n";
00771
00772 std::cout << " Nobs";
00773 for (i=1; i<numcol; i++) {
00774 if (datasheet[i].type()=='S') continue;
00775 std::cout << " ";
00776 if (datasheet[i].type() == 'F') {
00777 std::cout << std::setw(W) << numrec-datasheet[i].nmiss();
00778 }
00779 else {
00780 std::cout << std::setw(W) << ".";
00781 }
00782 }
00783 std::cout << "\n";
00784
00785 std::cout << " Min ";
00786 for (i=1; i<numcol; i++) {
00787 if (datasheet[i].type()=='S') continue;
00788 std::cout << datasheet[i].min();
00789 }
00790 std::cout << "\n";
00791
00792 std::cout << " Max ";
00793 for (i=1; i<numcol; i++) {
00794 if (datasheet[i].type()=='S') continue;
00795 std::cout << datasheet[i].max();
00796 }
00797 std::cout << "\n";
00798
00799 std::cout << " Mean";
00800 for (i=1; i<numcol; i++) {
00801 if (datasheet[i].type()=='S') continue;
00802 std::cout << datasheet[i].mean();
00803 }
00804 std::cout << "\n";
00805
00806 std::cout << " S.D.";
00807 DataNode var;
00808 for (i=1; i<numcol; i++) {
00809 if (datasheet[i].type()=='S') continue;
00810 var = datasheet[i].covariance();
00811 if (!var.missing) var.double_val(std::sqrt(var.double_val()));
00812 std::cout << var;
00813 }
00814 std::cout << "\n\n";
00815
00816 return;
00817 }
|
|
|
Definition at line 723 of file data.cpp. References data_in_memory, datasheet, field_index_vec(), input_datasheet(), matvec::Vector< T >::size(), and matvec::Vector< T >::sum().
00724 {
00725 if (!data_in_memory) input_datasheet();
00726 int i,nc=0;
00727 Vector<int> ivec;
00728 field_index_vec(ivec,cname);
00729 nc = ivec.size();
00730 Field xcol(nc);
00731 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].sum();
00732 return xcol;
00733 }
|
|
|
Definition at line 735 of file data.cpp. References data_in_memory, datasheet, field_index_vec(), input_datasheet(), matvec::Vector< T >::size(), and matvec::Vector< T >::sumsq().
00736 {
00737 if (!data_in_memory) input_datasheet();
00738 int i,nc=0;
00739 Vector<int> ivec;
00740 field_index_vec(ivec,cname);
00741 nc = ivec.size();
00742 Field xcol(nc);
00743 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].sumsq();
00744 return xcol;
00745 }
|
|
|
Definition at line 415 of file data.cpp. References data_in_memory, datasheet, input_datasheet(), numcol, and matvec::Field::value_for_missing().
00416 {
00417 if (!data_in_memory) input_datasheet();
00418 for (unsigned i=0; i<numcol; i++) datasheet[i].value_for_missing(vm);
00419 }
|
|
|
Definition at line 711 of file data.cpp. References data_in_memory, datasheet, field_index_vec(), input_datasheet(), and matvec::Vector< T >::size().
00712 {
00713 if (!data_in_memory) input_datasheet();
00714 int i,nc=0;
00715 Vector<int> ivec;
00716 field_index_vec(ivec,cname);
00717 nc = ivec.size();
00718 Field xcol(nc);
00719 for (i=0; i<nc; i++) xcol[i] = datasheet[ivec[i]].covariance();
00720 return xcol;
00721 }
|
|
||||||||||||
|
Definition at line 911 of file data.cpp.
00912 {
00913 unsigned nc = A.num_cols() - 1; // first column is reserved for intercept
00914 if (nc == 0) return stream;
00915 Vector<int> intvec(nc);
00916 for (int i=0; i<nc; i++) intvec[i] = i+1;
00917 A.print(stream,intvec,1);
00918 return stream;
00919 }
|
|
|
Definition at line 38 of file data.h. Referenced by cell(), col(), copyfrom(), in_memory(), input_datasheet(), mat(), max(), mean(), min(), newcol(), operator=(), print(), product(), rawcol(), release_datasheet(), resize(), row(), stat(), sum(), sumsq(), value_for_missing(), and variance(). |
|
|
Definition at line 38 of file data.h. Referenced by copyfrom(), in_disk(), release_datasheet(), resize(), and save_datasheet(). |
|
|
Definition at line 46 of file data.h. Referenced by adjoin(), cell(), col(), Data(), field_index(), matvec::Model::hashxact(), input(), matvec::Population::input_data(), input_datasheet(), matvec::Population::input_markerData(), matvec::KP(), mat(), max(), mean(), min(), newcol(), operator=(), matvec::Model::prepare_data(), print(), product(), rawcol(), matvec::Model::re_hash_data(), release(), release_datasheet(), resize(), row(), matvec::Model::save(), matvec::GLMM::save(), save_datasheet(), matvec::Model::save_pos_val(), stat(), sum(), sumsq(), matvec::Model::trait_effect_level(), value_for_missing(), and variance(). |
|
|
Definition at line 45 of file data.h. Referenced by col(), matvec::Model::copyfrom(), copyfrom(), Data(), input(), matvec::Population::input_data(), matvec::Population::input_markerData(), newcol(), operator=(), print(), matvec::Model::re_hash_data(), release(), resize(), and matvec::Model::save_pos_val(). |
|
|
Definition at line 39 of file data.h. Referenced by copyfrom(), Data(), newcol(), release(), and resize(). |
|
|
Definition at line 39 of file data.h. Referenced by adjoin(), copyfrom(), Data(), newcol(), and operator=(). |
|
|
Definition at line 39 of file data.h. Referenced by cell(), copyfrom(), Data(), field_index(), field_index_vec(), input(), input_datasheet(), mat(), newcol(), num_cols(), operator=(), rawcol(), release_datasheet(), resize(), row(), save_datasheet(), stat(), and value_for_missing(). |
|
|
Definition at line 39 of file data.h. Referenced by adjoin(), cell(), col(), copyfrom(), Data(), input(), input_datasheet(), mat(), newcol(), num_rows(), operator=(), print(), resize(), save_datasheet(), size(), and stat(). |
|
|
Definition at line 40 of file data.h. Referenced by copyfrom(), Data(), input(), input_datasheet(), and save_datasheet(). |
1.2.16