From 2d5b5e778543c463188ca40c0001883e68150d0f Mon Sep 17 00:00:00 2001 From: Mirek Fidler Date: Thu, 19 Jun 2025 14:29:59 +0200 Subject: [PATCH] Core: StringsStream --- autotest/StringsStream/StringsStream.cpp | 43 +++++++ autotest/StringsStream/StringsStream.upp | 9 ++ uppsrc/Core/Core.upp | 1 + uppsrc/Core/StringsStream.cpp | 134 ++++++++++++++++++++ uppsrc/Core/Util.h | 57 +++++++++ uppsrc/Core/src.tpp/StringsStream_en-us.tpp | 48 +++++++ 6 files changed, 292 insertions(+) create mode 100644 autotest/StringsStream/StringsStream.cpp create mode 100644 autotest/StringsStream/StringsStream.upp create mode 100644 uppsrc/Core/StringsStream.cpp create mode 100644 uppsrc/Core/src.tpp/StringsStream_en-us.tpp diff --git a/autotest/StringsStream/StringsStream.cpp b/autotest/StringsStream/StringsStream.cpp new file mode 100644 index 000000000..e9a2f13b4 --- /dev/null +++ b/autotest/StringsStream/StringsStream.cpp @@ -0,0 +1,43 @@ +#include + +using namespace Upp; + +CONSOLE_APP_MAIN +{ + StdLogSetup(LOG_COUT|LOG_FILE); + for(int pass = 0; pass < 10; pass++) { + for(int part = 7; part < 200; part += (part < 20 ? 1 : 33)) { + String eta; + StringsStreamOut sso(part); + for(int i = 0; i < 25773 + pass; i++) { + String s = AsString(pass ? Random() : i); + sso << s; + eta << s; + } + + Vector s = sso.PickResult(); + ASSERT(Join(s, "") == eta); + + StringsStreamIn ssi(s); + + ASSERT(LoadStream(ssi) == eta); + DDUMP(eta.GetCount()); + } + } + + Vector data; + for(int i = 0; i < 256*1024*1024; i++) + data << Random(); + + Vector s = StoreAsStrings(data); + + DDUMP(s.GetCount()); + + Vector data2; + + LoadFromStrings(data2, s); + + ASSERT(data == data2); + + LOG("=============== OK"); +} diff --git a/autotest/StringsStream/StringsStream.upp b/autotest/StringsStream/StringsStream.upp new file mode 100644 index 000000000..f3384ab4e --- /dev/null +++ b/autotest/StringsStream/StringsStream.upp @@ -0,0 +1,9 @@ +uses + Core; + +file + StringsStream.cpp; + +mainconfig + "" = ""; + diff --git a/uppsrc/Core/Core.upp b/uppsrc/Core/Core.upp index 81f805f4c..04a4df2bf 100644 --- a/uppsrc/Core/Core.upp +++ b/uppsrc/Core/Core.upp @@ -84,6 +84,7 @@ file Debug.cpp, Util.h, Ini.cpp, + StringsStream.cpp, Util.cpp, mathutil.cpp, Random.cpp, diff --git a/uppsrc/Core/StringsStream.cpp b/uppsrc/Core/StringsStream.cpp new file mode 100644 index 000000000..352a129c9 --- /dev/null +++ b/uppsrc/Core/StringsStream.cpp @@ -0,0 +1,134 @@ +#include "Core.h" + +namespace Upp { + +StringsStreamOut::StringsStreamOut(int part_size_) +{ + part_size = part_size_; + ResetBuffer(); + style = STRM_WRITE; +} + +void StringsStreamOut::ResetBuffer() +{ + wdata.SetCount(part_size); + buffer = (byte *)wdata.begin(); + wrlim = (byte *)wdata.end(); + ptr = buffer; +} + +int64 StringsStreamOut::GetSize() const +{ + return ptr - buffer + (int64)part.GetCount() * part_size; +} + +bool StringsStreamOut::IsOpen() const +{ + return true; +} + +void StringsStreamOut::_Put(int w) +{ + byte h = w; + _Put(&h, 1); +} + +void StringsStreamOut::_Put(const void *data, dword sz) +{ + if(!sz) + return; + const char *p = (const char *)data; + for(;;) { + dword psz = min(dword(wrlim - ptr), sz); + if(psz) { + memcpy8(ptr, p, psz); + ptr += psz; + sz -= psz; + } + if(sz == 0) + break; + p += psz; + part.Add(wdata); + ResetBuffer(); + } +} + +Vector StringsStreamOut::PickResult() +{ + if(ptr != buffer) { + wdata.SetLength(ptr - buffer); + part.Add(wdata); + } + return pick(part); +} + +StringsStreamIn::StringsStreamIn(const Vector& part) +: part(part) +{ + i = 0; + ResetBuffer(); + size = 0; + for(const String& s : part) + size += s.GetCount(); + style = STRM_READ; +} + +void StringsStreamIn::ResetBuffer() +{ + ptr = buffer = (byte *)part[i].begin(); + rdlim = (byte *)part[i].end(); +} + +int StringsStreamIn::_Term() +{ + while(i < part.GetCount()) { + if(ptr < rdlim) + return *ptr; + i++; + ResetBuffer(); + } + return -1; +} + +int StringsStreamIn::_Get() +{ + int c = _Term(); + if(c >= 0) + ptr++; + return c; +} + +dword StringsStreamIn::_Get(void *data, dword size) +{ + dword sz = size; + char *p = (char *)data; + for(;;) { + dword psz = min(dword(rdlim - ptr), sz); + if(psz) { + memcpy8(p, ptr, psz); + ptr += psz; + sz -= psz; + } + if(sz == 0) + return size; + p += psz; + if(i < part.GetCount()) { + i++; + ResetBuffer(); + } + else + return size - sz; + } +} + +int64 StringsStreamIn::GetSize() const +{ + return size; +} + +bool StringsStreamIn::IsOpen() const +{ + return true; +} + +} diff --git a/uppsrc/Core/Util.h b/uppsrc/Core/Util.h index 5ded3b198..3a33f01df 100644 --- a/uppsrc/Core/Util.h +++ b/uppsrc/Core/Util.h @@ -376,6 +376,50 @@ public: String GetKey(int groupIndex, int keyIndex) { return settings[groupIndex].GetKey(keyIndex); } }; +// ------------------- Multipart memory streams -------------- + +class StringsStreamOut : public Stream { +protected: + virtual void _Put(int w); + virtual void _Put(const void *data, dword size); + +private: + int part_size; + StringBuffer wdata; + Vector part; + + void ResetBuffer(); + +public: + virtual int64 GetSize() const; + virtual bool IsOpen() const; + + Vector PickResult(); + + StringsStreamOut(int part_size = 4096*1024); +}; + +class StringsStreamIn : public Stream { +protected: + virtual int _Term(); + virtual int _Get(); + virtual dword _Get(void *data, dword size); + +public: + virtual int64 GetSize() const; + virtual bool IsOpen() const; + +private: + const Vector& part; + int i; + int64 size; + + void ResetBuffer(); + +public: + StringsStreamIn(const Vector& part); +}; + // ------------------- Advanced streaming -------------------- void CheckedSerialize(const Event serialize, Stream& stream, int version = Null); @@ -418,6 +462,19 @@ bool LoadFromString(T& x, const String& s) { return Load(x, ss); } +template +Vector StoreAsStrings(T& x) { + StringsStreamOut ss; + Store(x, ss); + return ss.PickResult(); +} + +template +bool LoadFromStrings(T& x, const Vector& s) { + StringsStreamIn ss(s); + return Load(x, ss); +} + void RegisterGlobalConfig(const char *name); void RegisterGlobalSerialize(const char *name, Event WhenSerialize); void RegisterGlobalConfig(const char *name, Event<> WhenFlush); diff --git a/uppsrc/Core/src.tpp/StringsStream_en-us.tpp b/uppsrc/Core/src.tpp/StringsStream_en-us.tpp new file mode 100644 index 000000000..c353035e6 --- /dev/null +++ b/uppsrc/Core/src.tpp/StringsStream_en-us.tpp @@ -0,0 +1,48 @@ +topic "StringsStream[In|Out]"; +[i448;a25;kKO9;2 $$1,0#37138531426314131252341829483380:class] +[l288;2 $$2,2#27521748481378242620020725143825:desc] +[0 $$3,0#96390100711032703541132217272105:end] +[H6;0 $$4,0#05600065144404261032431302351956:begin] +[i448;a25;kKO9;2 $$5,0#37138531426314131252341829483370:item] +[l288;a4;*@5;1 $$6,6#70004532496200323422659154056402:requirement] +[l288;i1121;b17;O9;~~~.1408;2 $$7,0#10431211400427159095818037425705:param] +[i448;b42;O9;2 $$8,8#61672508125594000341940100500538:tparam] +[b42;2 $$9,9#13035079074754324216151401829390:normal] +[2 $$0,0#00000000000000000000000000000000:Default] +[{_} +[ {{10000@(113.42.0) [s0;%% [*@7;4 StringsStreamOut]]}}&] +[s1;@(0.0.255)3 &] +[s1;:Upp`:`:StringsStreamOut: [@(0.0.255)3 class][3 ][*3 StringsStreamOut][3 +][@(0.0.255)3 :][3 ][@(0.0.255)3 public][3 Stream]&] +[s2; [3 Output stream that stores data into Vector output chunks. +This is useful when handling very large serializations `- it +allows output to be > 2GB (which is String hard size limit because +of optimisation concerns) but also splits allocation into smaller +chunks which is potentially faster for repeated operation.]&] +[s3; &] +[s0;i448;a25;kKO9;:noref:@(0.0.255) &] +[ {{10000F(128)G(128)@1 [s0;%% [* Public Method List]]}}&] +[s3; &] +[s5;:Upp`:`:StringsStreamOut`:`:StringsStreamOut`(int`): [* StringsStreamOut]([@(0.0.255) i +nt] [*@3 part`_size] [@(0.0.255) `=] [@3 4096] [@(0.0.255) `*][@3 1024])&] +[s2;%% Constructor `- [*@3 pa][%-*@3 rt`_size] is the size of data chunk.&] +[s3; &] +[s4; &] +[s5;:Upp`:`:StringsStreamOut`:`:PickResult`(`): Vector [* PickResult]()&] +[s2;%% Returns the output data. Can be called just once for any instance.&] +[s3; &] +[s0;%% &] +[ {{10000@(113.42.0) [s0;%% [*@7;4 StringsStreamIn]]}}&] +[s3; &] +[s1;:Upp`:`:StringsStreamIn: [@(0.0.255)3 class][3 ][*3 StringsStreamIn][3 +][@(0.0.255)3 :][3 ][@(0.0.255)3 public][3 Stream]&] +[s2;%% Input stream corresponding to StringsStreamOut `- reads data +from multiple chunks.&] +[s0;i448;a25;kKO9;:noref:@(0.0.255) &] +[ {{10000F(128)G(128)@1 [s0;%% [* Public Method List]]}}&] +[s3; &] +[s5;:Upp`:`:StringsStreamIn`:`:StringsStreamIn`(const Vector`&`): [* StringsStreamIn]([@(0.0.255) c +onst] Vector[@(0.0.255) `&] [*@3 part])&] +[s2;%% Creates input stream for [%-*@3 part] data chunks.&] +[s3; &] +[s0;%% ]] \ No newline at end of file