mirror of
https://github.com/levinsv/pgadmin3.git
synced 2026-05-15 06:05:49 -06:00
337 lines
6.8 KiB
C++
337 lines
6.8 KiB
C++
//////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// pgAdmin III - PostgreSQL Tools
|
|
//
|
|
// Copyright (C) 2002 - 2016, The pgAdmin Development Team
|
|
// This software is released under the PostgreSQL Licence
|
|
//
|
|
// utffile.cpp - file io with BOM interpretation
|
|
//
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "pgAdmin3.h"
|
|
#include "utils/utffile.h"
|
|
#include "utils/sysLogger.h"
|
|
|
|
wxMBConvUTF16BE wxConvUTF16BE;
|
|
wxMBConvUTF16LE wxConvUTF16LE;
|
|
wxMBConvUTF32BE wxConvUTF32BE_unik;
|
|
wxMBConvUTF32LE wxConvUTF32LE_unik;
|
|
|
|
// these are the magic characters identifying an Unicode file
|
|
#define BOM_UTF8 "\357\273\277"
|
|
#define BOM_UTF16LE "\377\376"
|
|
#define BOM_UTF16BE "\376\377"
|
|
#define BOM_UTF32LE "\377\376\000\000"
|
|
#define BOM_UTF32BE "\000\000\376\377"
|
|
|
|
wxUtfFile::wxUtfFile() : wxFile()
|
|
{
|
|
}
|
|
|
|
|
|
wxUtfFile::wxUtfFile(const wxChar *szFileName, OpenMode mode, wxFontEncoding encoding) : wxFile()
|
|
{
|
|
m_strFileName = szFileName;
|
|
Open(szFileName, mode, wxS_DEFAULT, encoding);
|
|
}
|
|
|
|
|
|
wxUtfFile::wxUtfFile(int fd, wxFontEncoding encoding) : wxFile(fd)
|
|
{
|
|
EvalBOM(encoding);
|
|
}
|
|
|
|
|
|
off_t wxUtfFile::Read(wxString &str, off_t nCount)
|
|
{
|
|
if (nCount == (off_t) - 1)
|
|
nCount = Length() - Tell();
|
|
if (!nCount)
|
|
return 0;
|
|
|
|
char *buffer = new char[nCount + 4];
|
|
// on some systems, len returned from wxFile::read might not reflect the number of bytes written
|
|
// to the buffer, but the bytes read from file. In case of CR/LF translation, this is not the same.
|
|
memset(buffer, 0, nCount + 4);
|
|
off_t len = wxFile::Read(buffer, nCount);
|
|
|
|
if (len >= 0)
|
|
{
|
|
memset(buffer + len, 0, 4);
|
|
|
|
if (m_conversion)
|
|
{
|
|
int decr;
|
|
size_t nLen = 0;
|
|
|
|
|
|
// We are trying 4 times to convert, in case the last utf char
|
|
// was truncated.
|
|
for (decr = 0 ; len > 0 && decr < 4 ; decr++)
|
|
{
|
|
nLen = m_conversion->MB2WC(NULL, buffer, 0);
|
|
if ( nLen != (size_t) - 1 )
|
|
break;
|
|
len--;
|
|
buffer[len] = 0;
|
|
}
|
|
|
|
if (nLen == (size_t) - 1)
|
|
{
|
|
if (!m_strFileName.IsEmpty())
|
|
{
|
|
wxLogWarning(_("The file \"%s\" could not be opened because it contains characters that could not be interpreted."), m_strFileName.c_str());
|
|
}
|
|
Seek(decr - nLen, wxFromCurrent);
|
|
return (size_t) - 1;
|
|
}
|
|
if (decr)
|
|
Seek(-decr, wxFromCurrent);
|
|
|
|
m_conversion->MB2WC((wchar_t *)(wxChar *)wxStringBuffer(str, nLen + 1), (const char *)buffer, (size_t)(nLen + 1));
|
|
}
|
|
else
|
|
str = (wxChar *)buffer;
|
|
}
|
|
|
|
delete[] buffer;
|
|
return len;
|
|
}
|
|
|
|
|
|
bool wxUtfFile::Write(const wxString &str)
|
|
{
|
|
size_t len = str.Length();
|
|
if (!len)
|
|
return true;
|
|
|
|
if (m_conversion)
|
|
{
|
|
wxWX2MBbuf buf = str.mb_str(*m_conversion);
|
|
if (!buf)
|
|
return false;
|
|
|
|
return wxFile::Write(str, *m_conversion);
|
|
}
|
|
else
|
|
return wxFile::Write(str.c_str(), len * sizeof(wxChar)) == len * sizeof(wxChar);
|
|
}
|
|
|
|
|
|
|
|
bool wxUtfFile::Create(const wxChar *szFileName, bool bOverwrite, int access, wxFontEncoding encoding)
|
|
{
|
|
if (!wxFile::Create(szFileName, bOverwrite, access))
|
|
return false;
|
|
|
|
DetermineConversion(encoding);
|
|
WriteBOM();
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool wxUtfFile::Open(const wxChar *szFileName, OpenMode mode, int access, wxFontEncoding encoding)
|
|
{
|
|
if (!wxFile::Open(szFileName, mode, access))
|
|
return false;
|
|
|
|
|
|
m_bomOffset = 0;
|
|
DetermineConversion(wxFONTENCODING_SYSTEM);
|
|
|
|
if (mode != write && EvalBOM(encoding))
|
|
{
|
|
// File freshly created, need BOM
|
|
if (mode != read)
|
|
WriteBOM();
|
|
}
|
|
else if (mode != read && encoding != wxFONTENCODING_DEFAULT)
|
|
{
|
|
// force BOM to a specific value
|
|
switch (encoding)
|
|
{
|
|
case wxFONTENCODING_UTF8:
|
|
case wxFONTENCODING_UTF16BE:
|
|
case wxFONTENCODING_UTF16LE:
|
|
case wxFONTENCODING_UTF32BE:
|
|
case wxFONTENCODING_UTF32LE:
|
|
break;
|
|
default:
|
|
encoding = wxFONTENCODING_SYSTEM;
|
|
break;
|
|
}
|
|
if (encoding != m_encoding)
|
|
{
|
|
DetermineConversion(encoding);
|
|
WriteBOM();
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
void wxUtfFile::Attach(int fd, wxFontEncoding encoding)
|
|
{
|
|
wxFile::Attach(fd);
|
|
EvalBOM(encoding);
|
|
}
|
|
|
|
|
|
off_t wxUtfFile::Seek(off_t ofs, wxSeekMode mode)
|
|
{
|
|
off_t pos;
|
|
if (mode == wxFromStart)
|
|
pos = wxFile::Seek(ofs + m_bomOffset, wxFromStart) - m_bomOffset;
|
|
else
|
|
pos = wxFile::Seek(ofs, mode) - m_bomOffset;
|
|
|
|
if (pos != wxInvalidOffset)
|
|
pos -= m_bomOffset;
|
|
|
|
return pos;
|
|
}
|
|
|
|
|
|
|
|
wxFontEncoding wxUtfFile::GetEncoding()
|
|
{
|
|
if (IsOpened())
|
|
return m_encoding;
|
|
else
|
|
return wxFONTENCODING_DEFAULT;
|
|
}
|
|
|
|
|
|
void wxUtfFile::WriteBOM()
|
|
{
|
|
if (!settings->GetWriteBOM())
|
|
return;
|
|
|
|
wxFile::Seek(0);
|
|
switch (m_encoding)
|
|
{
|
|
case wxFONTENCODING_UTF8:
|
|
wxFile::Write(BOM_UTF8, 3);
|
|
m_bomOffset = 3;
|
|
break;
|
|
case wxFONTENCODING_UTF16BE:
|
|
wxFile::Write(BOM_UTF16BE, 2);
|
|
m_bomOffset = 2;
|
|
break;
|
|
case wxFONTENCODING_UTF16LE:
|
|
wxFile::Write(BOM_UTF16LE, 2);
|
|
m_bomOffset = 2;
|
|
break;
|
|
case wxFONTENCODING_UTF32BE:
|
|
wxFile::Write(BOM_UTF32LE, 4);
|
|
m_bomOffset = 4;
|
|
break;
|
|
case wxFONTENCODING_UTF32LE:
|
|
wxFile::Write(BOM_UTF32LE, 4);
|
|
m_bomOffset = 4;
|
|
break;
|
|
default:
|
|
m_bomOffset = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
void wxUtfFile::DetermineConversion(wxFontEncoding encoding)
|
|
{
|
|
switch (encoding)
|
|
{
|
|
case wxFONTENCODING_UTF8:
|
|
case wxFONTENCODING_UTF16BE:
|
|
case wxFONTENCODING_UTF16LE:
|
|
case wxFONTENCODING_UTF32BE:
|
|
case wxFONTENCODING_UTF32LE:
|
|
// we know these
|
|
m_encoding = encoding;
|
|
break;
|
|
default:
|
|
m_encoding = wxFONTENCODING_SYSTEM;
|
|
break;
|
|
}
|
|
|
|
if (m_encoding == wxFONTENCODING_UNICODE)
|
|
m_conversion = 0;
|
|
else
|
|
{
|
|
switch (m_encoding)
|
|
{
|
|
case wxFONTENCODING_SYSTEM:
|
|
m_conversion = &wxConvLibc;
|
|
break;
|
|
case wxFONTENCODING_UTF8:
|
|
m_conversion = &wxConvUTF8;
|
|
break;
|
|
case wxFONTENCODING_UTF16BE:
|
|
m_conversion = &wxConvUTF16BE;
|
|
break;
|
|
case wxFONTENCODING_UTF16LE:
|
|
m_conversion = &wxConvUTF16LE;
|
|
break;
|
|
case wxFONTENCODING_UTF32BE:
|
|
m_conversion = &wxConvUTF32BE_unik;
|
|
break;
|
|
case wxFONTENCODING_UTF32LE:
|
|
m_conversion = &wxConvUTF32LE_unik;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
bool wxUtfFile::EvalBOM(wxFontEncoding encoding)
|
|
{
|
|
// returns true, if BOM needs to be written.
|
|
|
|
char bombuf[4] = "###";
|
|
long len = wxFile::Read(bombuf, 4);
|
|
|
|
if (!memcmp(bombuf, BOM_UTF32BE, 4))
|
|
{
|
|
encoding = wxFONTENCODING_UTF32BE;
|
|
m_bomOffset = 4;
|
|
}
|
|
else if (!memcmp(bombuf, BOM_UTF32LE, 4))
|
|
{
|
|
encoding = wxFONTENCODING_UTF32LE;
|
|
m_bomOffset = 4;
|
|
}
|
|
else if (!memcmp(bombuf, BOM_UTF8, 3))
|
|
{
|
|
encoding = wxFONTENCODING_UTF8;
|
|
m_bomOffset = 3;
|
|
}
|
|
else if (!memcmp(bombuf, BOM_UTF16BE, 2))
|
|
{
|
|
encoding = wxFONTENCODING_UTF16BE;
|
|
m_bomOffset = 2;
|
|
}
|
|
else if (!memcmp(bombuf, BOM_UTF16LE, 2))
|
|
{
|
|
encoding = wxFONTENCODING_UTF16LE;
|
|
m_bomOffset = 2;
|
|
}
|
|
else
|
|
{
|
|
// no encoding was found.
|
|
m_bomOffset = 0;
|
|
}
|
|
|
|
DetermineConversion(encoding);
|
|
|
|
// if this file has length 0, BOM needs to be written because it's freshly created
|
|
if (len == 0)
|
|
return true;
|
|
else if (len != m_bomOffset)
|
|
Seek(0);
|
|
|
|
return false;
|
|
}
|