NAMESPACE_UPP #define CHECK_CACHE() // CheckCache() #define CHECK_BLOCK(block, index) // CheckBlock(block, index) #ifdef _DEBUG //#define CHECK_INDEX // comment this line to turn index checking off #endif class DataClient { public: enum { DEFAULT_CACHE_SIZE = 100000 }; DataClient(); virtual ~DataClient(); virtual void Serialize(Stream& stream); void StreamMaster(Stream& stream, DataFile& df, dfid_t id); void Open(DataFile& datafile, int index, dfid_t id); int Create(DataFile& datafile, dfid_t id); void Close(); virtual void Unlink(); bool IsOpen() const { return datafile; } DataFile& GetDataFile() const { return *datafile; } int GetMasterIndex() const { return master_index; } bool IsReadOnly() const { return datafile -> IsReadOnly(); } dfid_t GetID() const { return id; } virtual void SetCacheSize(int size); int GetCacheSize() const { return cache_size; } virtual int GetUsedCacheSize() const; virtual void ShrinkCache(); virtual void Commit() = 0; virtual void Rollback(); static void GetIndex(int *dest, int count, Stream& src); static void SetIndex(Stream& dest, const int *src, int count); static void GetIndex(Vector& dest, Stream& src) { GetIndex(dest.Begin(), dest.GetCount(), src); } static void SetIndex(Stream& dest, const Vector& src) { SetIndex(dest, src.Begin(), src.GetCount()); } protected: enum { LOG2_INDEX_BYTES = 2, INDEX_BYTES = 1 << LOG2_INDEX_BYTES, }; void FlushMaster(); virtual void DoOpen(); virtual void DoCreate(); virtual void DoClose(); // NO-OP by default protected: DataFile *datafile; dfid_t id; int master_index; int cache_size; int commit_index; mutable bool master_dirty; }; class DataColumn : public DataClient { public: DataColumn(); virtual ~DataColumn(); int GetCount() const { return row_count; } virtual void SetCount(int count) = 0; protected: int row_count; }; class DataColumnFix : public DataColumn { public: DataColumnFix(int item_width); virtual ~DataColumnFix(); virtual void Unlink(); virtual void Serialize(Stream& stream); virtual void Commit(); virtual void SetCount(int count); virtual int GetUsedCacheSize() const; virtual void ShrinkCache(); protected: const byte *GetRaw(int index); void SetRaw(int index, const void *value); private: virtual void DoClose(); void FlushCache(); void ClearCache(); void FetchCache(int block); private: enum { ADD_STEP = 10, }; struct Block { Block() : dirty(false), index(-1) {} int index; bool dirty; byte buffer[DataFile::BLOCK_BYTES]; }; int item_width; unsigned per_block; Vector block_index; ArrayMap cache; int last_index; int last_index1; int last_index2; int last_index3; Block *last_block; Block *last_block1; Block *last_block2; Block *last_block3; }; class DataColumnVar : public DataColumn { public: DataColumnVar(); virtual ~DataColumnVar(); virtual void Unlink(); virtual void Serialize(Stream& stream); virtual void Commit(); virtual void SetCount(int count); virtual int GetUsedCacheSize() const; virtual void ShrinkCache(); public: enum { SHORT_BYTES = 2, MAX_BYTES = DataFile::BLOCK_BYTES, INIT_BYTES = MAX_BYTES / 2, JOIN_BYTES = MAX_BYTES / 4, SINGLE_BYTES = MAX_BYTES / 4, ADD_STEP = 100, }; protected: String GetRaw(int index); void SetRaw(int index, String value); private: struct Block { Block() : raw_dirty(false), total(SHORT_BYTES), offsets(SHORT_BYTES) {} int offsets; int total; bool raw_dirty; VectorMap dirty; String buffer; }; virtual void DoClose(); int FindBlock(int index) const; void FlushCache(); void FlushCache(int index, int set_span = -1); void ClearCache(); int FetchCache(int block); int GetDataLength(int index) const; bool CanJoin(int index, int length) const; void JoinBlock(int index, char *buffer, int offset, int count); int AddBlock(int index, char *buffer, int offset, int count); void AdjustLastIndex(int i) { if(last_index < 0 || i < index_index[last_index] || i >= index_index[last_index + 1]) last_index = FindBlock(i); } void CheckCache() const; void CheckBlock(const Block& b, int index) const; private: Vector index_index; Vector block_index; Index cache_index; Array cache; int last_index; int current_size; // size of all cached objects }; class DataRowIndex : public DataClient { public: DataRowIndex(); virtual void Unlink(); virtual void Serialize(Stream& stream); virtual void Commit(); virtual int GetUsedCacheSize() const; virtual void ShrinkCache(); int GetCount() const { return exact_count; } int GetFirstEmpty() const { return first_empty; } int GetEndIndex() const { return end_index; } int Add(); Vector Add(int count); void AddAt(int index); void Remove(int index); void Remove(const Vector& index); Vector GetIndex(); bool IsEmpty(int index); private: struct Block { Block() : dirty(false) {} bool dirty; Vector data; int FindIndex(int offset) const; }; virtual void DoClose(); void FlushCache(); void ClearCache(); int FetchCache(int block); void CalcFirstEmpty(); void CalcEndIndex(); void CalcExactCount(); void CheckBlock(const Block& block); void UnCollapse(); private: enum { LOG2_BLOCK_ITEMS = DataFile::LOG2_BLOCK_BYTES - LOG2_INDEX_BYTES - 1, BLOCK_ITEMS = 1 << LOG2_BLOCK_ITEMS, }; Vector block_index; ArrayMap cache; int exact_count; int end_index; int first_empty; int current_size; bool collapsed; }; class DataSource { public: DataSource() {} virtual ~DataSource() {} virtual void SetRowIndex(DataRowIndex *index) {} virtual Value Get(int index) = 0; Value operator [] (int index) { return Get(index); } virtual void Add(int index, Value value) { Set(index, value); } virtual void Set(int index, Value value) = 0; virtual void Remove(int index) { Set(index, Value()); } virtual void CheckAdd(int index, Value value) { CheckSet(index, value); } virtual void CheckSet(int index, Value value) {} virtual void Commit() {} virtual void Rollback() {} virtual int GetUsedCacheSize() const { return 0; } virtual void ShrinkCache() {} virtual String Describe() const = 0; }; template C& DataFetch(C& container, DataSource& source, const Vector& index) { typedef typename C::ValueType CVT; for(const int *p = index.Begin(), *e = index.End(); p != e; ++p) container.Add(CalcType::ValueTo(source.Get(*p))); return container; } template C DataFetch(DataSource& source, const Vector& index, C * = 0) { C container; return DataFetch(container, source, index); } template C& DataFetchSorted(C& container, DataSource& source, const Vector& index, const L& sort, C * = 0) { DataFetch(container, source, index); Sort(container, sort); return container; } template C DataFetchSorted(DataSource& source, const Vector& index, const L& sort, C * = 0) { C container; return DataFetchSorted(container, source, index); } class DataFilter : public DataSource { public: DataFilter() : source(0) {} void SetSource(DataSource *_source) { source = _source; } DataSource *GetSource() const { return source; } virtual void SetRowIndex(DataRowIndex *index) { source -> SetRowIndex(index); } virtual Value Get(int index) { ASSERT(source); return source -> Get(index); } virtual void Add(int index, Value value) { ASSERT(source); source -> Add(index, value); } virtual void Set(int index, Value value) { ASSERT(source); source -> Set(index, value); } virtual void Remove(int index) { ASSERT(source); source -> Remove(index); } virtual void CheckAdd(int index, Value value) { ASSERT(source); source -> CheckAdd(index, value); } virtual void CheckSet(int index, Value value) { ASSERT(source); source -> CheckSet(index, value); } virtual void Commit() { ASSERT(source); source -> Commit(); } virtual void Rollback() { ASSERT(source); source -> Rollback(); } virtual int GetUsedCacheSize() const { ASSERT(source); return source->GetUsedCacheSize(); } virtual void ShrinkCache() { ASSERT(source); source->ShrinkCache(); } protected: DataSource *source; }; class DataConvertFilter : public DataFilter { public: DataConvertFilter(const Convert& convert = NoConvert()) : convert(&convert) {} void SetConvert(const Convert& _convert) { convert = &_convert; } const Convert& operator <<= (const Convert& _convert) { convert = &_convert; return _convert; } virtual Value Get(int index); virtual void Set(int index, Value value); virtual void CheckSet(int index, Value value); protected: const Convert *convert; }; class DataMapFilter : public DataFilter { public: DataMapFilter() {} void SetMap(const Index& _map) { map <<= _map; } const Index& operator <<= (const Index& _map) { return map <<= _map; } void PickMap(Index& _map) { map = _map; } void operator |= (Index& _map) { map = _map; } virtual void CheckSet(int index, Value value); protected: Index map; }; class DataMinMaxFilter : public DataFilter { public: DataMinMaxFilter(double min = Null, double max = Null, bool not_null = false) : min(min), max(max), not_null(not_null) {} void MinMax(double _min, double _max) { min = _min; max = _max; } double GetMin() const { return min; } double GetMax() const { return max; } void NotNull(bool _nn = true) { not_null = true; } void NoNotNull() { not_null = false; } bool IsNotNull() const { return not_null; } virtual void CheckSet(int index, Value value); protected: double min, max; bool not_null; }; class DataOrderedIndexBase : public DataFilter, public DataClient { public: DataOrderedIndexBase(bool lazy); virtual void Serialize(Stream& stream); virtual void Add(int index, Value value); virtual void Set(int index, Value value); virtual void Remove(int index); virtual int GetUsedCacheSize() const; virtual void ShrinkCache(); #ifdef CHECK_INDEX virtual void CheckIndex() = 0; #endif virtual void SetRowIndex(DataRowIndex *ri) { dirty = true; row_index = ri; DataFilter::SetRowIndex(ri); } virtual void Commit(); virtual void Rollback(); Vector GetRange(Value min, Value max); Vector GetEqual(const Index& values); void GetMinMax(Ref min, Ref max); int GetFirstEqual(Value value, int exclude_row = -1); bool IsEnabled() const { return !disabled; } void Disable(); void Enable(); virtual String Describe() const; protected: virtual void DoClose(); virtual void RefreshIndex() = 0; virtual int FindIndex(int index, Value value) = 0; protected: DataRowIndex *row_index; bool disabled; bool lazy; bool dirty; Vector ordered_index; }; template class DataOrderedIndex : public DataOrderedIndexBase { public: DataOrderedIndex(bool lazy) : DataOrderedIndexBase(lazy) {} #ifdef CHECK_INDEX virtual void CheckIndex(); #endif private: virtual void RefreshIndex(); virtual int FindIndex(int index, Value value); private: struct LessSort { LessSort(const Vector& values) : values(values) {} bool operator () (int i, int j) const { return values[i] < values[j] || values[i] == values[j] && i < j; } const Vector& values; }; struct LessFind { LessFind(DataSource *source, int rindex) : source(source), rindex(rindex) {} bool operator () (int index, const T& value) const { T t = CalcType::ValueTo(source -> Get(index)); return t < value || t == value && index < rindex; } DataSource *source; int rindex; }; }; template void DataOrderedIndex::RefreshIndex() { if(!disabled && dirty) { CDB_TIMING("DataOrderedIndex::RefreshIndex"); int start = msecs(); ASSERT(row_index); if(!lazy) master_dirty = !datafile -> IsReadOnly(); Vector all = row_index->GetIndex(); Vector values; ordered_index.SetCount(all.GetCount()); values.SetCount(all.GetCount()); for(int i = 0; i < ordered_index.GetCount(); i++) { ordered_index[i] = i; values[i] = CalcType::ValueTo(source->Get(all[i])); } Sort(ordered_index, LessSort(values)); for(int i = 0; i < ordered_index.GetCount(); i++) ordered_index[i] = all[ordered_index[i]]; dirty = false; RLOG(msecs(start) << " msec / DataOrderedIndex<" << typeid(T).name() << ">::RefreshIndex(" << source->Describe() << ")"); } } #ifdef CHECK_INDEX template void DataOrderedIndex::CheckIndex() { if(disabled) return; for(int i = 1; i < ordered_index.GetCount(); i++) if(LessFind(source, ordered_index[i - 1])(ordered_index[i], CalcType::ValueTo(source->Get(ordered_index[i - 1])))) { String s; s << "CheckIndex error at " << i << "\n" "[" << (i - 1) << "] = " << StdFormat(Get(ordered_index[i - 1])) << " (" << ordered_index[i - 1] << "), [" << i << "] = " << StdFormat(Get(ordered_index[i])) << " (" << ordered_index[i] << ")\n"; RLOG(s); NEVER(); } } #endif template int DataOrderedIndex::FindIndex(int index, Value value) { return BinFindIndex(ordered_index, CalcType::ValueTo(value), LessFind(source, index)); } template class DataColumnFixSimple : public DataColumnFix, public DataSource { public: typedef T ValueType; DataColumnFixSimple() : DataColumnFix(item_size) {} virtual void Commit() { DataColumnFix::Commit(); } virtual void Rollback() { DataColumnFix::Rollback(); } virtual int GetUsedCacheSize() const { return DataColumnFix::GetUsedCacheSize(); } virtual void ShrinkCache() { DataColumnFix::ShrinkCache(); } virtual void CheckSet(int index, Value v); virtual String Describe() const { return GetTypeName(typeid(T).name()); } }; template void DataColumnFixSimple::CheckSet(int index, Value v) { if(!IsNull(v) && !CalcType::IsType(v)) throw ObjectExc('<' + StdFormat(v) + ">: očekávána hodnota typu '" + CalcType::Describe() + '\''); } template class DataColumnVarSimple : public DataColumnVar, public DataSource { public: typedef T ValueType; virtual void Commit() { DataColumnVar::Commit(); } virtual void Rollback() { DataColumnVar::Rollback(); } virtual int GetUsedCacheSize() const { return DataColumnVar::GetUsedCacheSize(); } virtual void ShrinkCache() { DataColumnVar::ShrinkCache(); } virtual void CheckSet(int index, Value v); virtual String Describe() const { return GetTypeName(typeid(T).name()); } }; template void DataColumnVarSimple::CheckSet(int index, Value v) { if(!IsNull(v) && !CalcType::IsType(v)) throw ObjectExc('<' + StdFormat(v) + ">: očekávána hodnota typu '" + CalcType::Describe() + '\''); } template class DataColumnFixOf : public DataColumnFixSimple { public: virtual Value Get(int index) { return (C)*(const T *)this->GetRaw(index); } virtual void Set(int index, Value value) { T t = (C)value; this->SetRaw(index, &t); } }; template class DataColumnOf; enum { LOG2_BOOL_BLOCK_BITS = 5, BOOL_BLOCK_BITS = 1 << LOG2_BOOL_BLOCK_BITS, BOOL_BLOCK_BYTES = sizeof(int) }; template <> class DataColumnOf : public DataColumnFixSimple { public: virtual Value Get(int index); virtual void Set(int index, Value value); virtual void SetCount(int count); }; template <> class DataColumnOf : public DataColumnFixOf {}; template <> class DataColumnOf : public DataColumnFixOf {}; template <> class DataColumnOf : public DataColumnFixOf {}; template <> class DataColumnOf : public DataColumnFixOf {}; template <> class DataColumnOf : public DataColumnVarSimple { public: virtual Value Get(int index) { return GetRaw(index); } virtual void Set(int index, Value value) { SetRaw(index, value); } }; template <> class DataColumnOf : public DataColumnFixSimple { public: virtual Value Get(int index); virtual void Set(int index, Value value); }; template <> class DataColumnOf