VideoTools
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
gconvert_unix.cpp
1 //
2 // Copyright (C) 2017 Graeme Walker
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 // ===
17 //
18 // gconvert_unix.cpp
19 //
20 
21 #include "gdef.h"
22 #include "gconvert.h"
23 #include "gstr.h"
24 #include "glog.h"
25 #include "gassert.h"
26 #include <vector>
27 #include <iconv.h>
28 
29 /// \class G::ConvertImp
30 /// A pimple-pattern implementation class for G::Convert.
31 ///
33 {
34 public:
35  ConvertImp( const std::string & to_code , const std::string & from_code ) ;
36  ~ConvertImp() ;
37  std::string narrow( const std::wstring & s , const std::string & context ) ;
38  std::wstring widen( const std::string & s , const std::string & context ) ;
39  static ConvertImp & utf8_to_utf16() ;
40  static ConvertImp & ansi_to_utf16() ;
41  static ConvertImp & utf16_to_utf8() ;
42  static ConvertImp & utf16_to_ansi() ;
43 
44 private:
45  ConvertImp( const ConvertImp & ) ;
46  void operator=( const ConvertImp & ) ;
47  void reset() ;
48  static std::wstring to_wstring( const std::vector<char> & ) ;
49  static void from_wstring( std::vector<char> & , const std::wstring & ) ;
50  static size_t call( size_t (*fn)(iconv_t,const char**,size_t*,char**,size_t*) ,
51  iconv_t m , const char ** inbuf , size_t * inbytesleft ,
52  char ** outbuf , size_t * outbytesleft ) ;
53  static size_t call( size_t (*fn)(iconv_t,char**,size_t*,char**,size_t*) ,
54  iconv_t m , const char ** inbuf , size_t * inbytesleft ,
55  char ** outbuf , size_t * outbytesleft ) ;
56 
57 private:
58  iconv_t m ;
59 } ;
60 
61 G::ConvertImp::ConvertImp( const std::string & to_code , const std::string & from_code )
62 {
63  m = ::iconv_open( to_code.c_str() , from_code.c_str() ) ;
64  if( m == reinterpret_cast<iconv_t>(-1) )
65  throw G::Convert::Error( "iconv_open failed for " + from_code + " -> " + to_code ) ;
66 }
67 
68 G::ConvertImp::~ConvertImp()
69 {
70  ::iconv_close( m ) ;
71 }
72 
73 void G::ConvertImp::reset()
74 {
75  ::iconv( m , nullptr , nullptr , nullptr , nullptr ) ;
76 }
77 
78 size_t G::ConvertImp::call( size_t (*fn)(iconv_t,const char**,size_t*,char**,size_t*) ,
79  iconv_t m , const char ** inbuf , size_t * inbytesleft ,
80  char ** outbuf , size_t * outbytesleft )
81 {
82  return (*fn)( m , inbuf , inbytesleft , outbuf , outbytesleft ) ;
83 }
84 
85 size_t G::ConvertImp::call( size_t (*fn)(iconv_t,char**,size_t*,char**,size_t*) ,
86  iconv_t m , const char ** inbuf , size_t * inbytesleft ,
87  char ** outbuf , size_t * outbytesleft )
88 {
89  char * p = const_cast<char*>(*inbuf) ;
90  size_t rc = (*fn)( m , &p , inbytesleft , outbuf , outbytesleft ) ;
91  *inbuf = p ;
92  return rc ;
93 }
94 
95 std::wstring G::ConvertImp::widen( const std::string & s , const std::string & context )
96 {
97  reset() ;
98 
99  // in-buffer
100  const char * in_p_end = s.data() + s.length() ;
101  const char * in_p_start = s.data() ;
102  size_t in_n_start = s.size() ;
103  const char * in_p = in_p_start ;
104  size_t in_n = in_n_start ;
105 
106  // out-buffer
107  std::vector<char> out_buffer( 10U + s.size()*4U ) ; //kiss
108  char * out_p_start = &out_buffer[0] ;
109  size_t out_n_start = out_buffer.size() ;
110  char * out_p = out_p_start ;
111  size_t out_n = out_n_start ;
112 
113  // iconv()
114  //G_DEBUG( "G::ConvertImp::widen: in...\n" << G::hexdump<16>(in_p_start,in_p_end) ) ;
115  size_t rc = call( ::iconv , m , &in_p , &in_n , &out_p , &out_n ) ;
116  const size_t e = static_cast<size_t>(ssize_t(-1)) ;
117  if( rc == e || in_p != in_p_end || out_n > out_n_start )
118  throw G::Convert::Error( "iconv failed" + std::string(context.empty()?"":": ") + context ) ;
119  //G_DEBUG( "G::ConvertImp::widen: out...\n" << G::hexdump<16>(out_p_start,out_p) ) ;
120 
121  out_buffer.resize( out_n_start - out_n ) ;
122  return to_wstring( out_buffer ) ;
123 }
124 
125 std::string G::ConvertImp::narrow( const std::wstring & s , const std::string & context )
126 {
127  reset() ;
128 
129  // in-buffer
130  std::vector<char> in_buffer ;
131  from_wstring( in_buffer , s ) ;
132  const char * in_p_end = &in_buffer[0] + in_buffer.size() ;
133  const char * in_p_start = &in_buffer[0] ;
134  size_t in_n_start = in_buffer.size() ;
135  const char * in_p = in_p_start ;
136  size_t in_n = in_n_start ;
137 
138  // out-buffer
139  std::vector<char> out_buffer( 10U + in_buffer.size()*4U ) ; //kiss
140  char * out_p_start = &out_buffer[0] ;
141  size_t out_n_start = out_buffer.size() ;
142  char * out_p = out_p_start ;
143  size_t out_n = out_n_start ;
144 
145  // iconv()
146  //G_DEBUG( "G::ConvertImp::narrow: in...\n" << G::hexdump<16>(in_p_start,in_p_end) ) ;
147  size_t rc = call( ::iconv , m , &in_p , &in_n , &out_p , &out_n ) ;
148  const size_t e = static_cast<size_t>(ssize_t(-1)) ;
149  if( rc == e || in_p != in_p_end || out_n > out_n_start )
150  throw G::Convert::Error( "iconv failed" + std::string(context.empty()?"":": ") + context ) ;
151  //G_DEBUG( "G::ConvertImp::narrow: out...\n" << G::hexdump<16>(out_p_start,out_p) ) ;
152 
153  return std::string( &out_buffer[0] , &out_buffer[0] + (out_n_start-out_n) ) ;
154 }
155 
156 void G::ConvertImp::from_wstring( std::vector<char> & buffer , const std::wstring & s )
157 {
158  // wchar_t is not necessarily two bytes, so do it long-hand
159  buffer.reserve( s.size() + 2U ) ;
160  buffer.push_back( 0xff ) ;
161  buffer.push_back( 0xfe ) ;
162  for( size_t i = 0U ; i < s.size() ; i++ )
163  {
164  unsigned int n = s.at(i) ;
165  buffer.push_back( n & 0xff ) ;
166  buffer.push_back( n >> 8U ) ;
167  }
168 }
169 
170 std::wstring G::ConvertImp::to_wstring( const std::vector<char> & buffer )
171 {
172  // wchar_t is not necessarily two bytes, so do it long-hand
173  std::wstring result ;
174  result.reserve( buffer.size() ) ;
175  const unsigned char * p = reinterpret_cast<const unsigned char*>(&buffer[0]) ;
176  const unsigned char * end = p + buffer.size() ;
177  bool bom_ff_fe = buffer.size() >= 2U && p[0] == 0xff && p[1] == 0xfe ;
178  bool bom_fe_ff = buffer.size() >= 2U && p[0] == 0xfe && p[1] == 0xff ;
179  if( bom_ff_fe || bom_fe_ff ) p += 2 ;
180  int hi = bom_fe_ff ? 0 : 1 ;
181  int lo = bom_fe_ff ? 1 : 0 ;
182  for( ; (p+1) < end ; p += 2 )
183  {
184  wchar_t w = static_cast<wchar_t>( ( static_cast<unsigned int>(p[hi]) << 8 ) | p[lo] ) ;
185  result.append( 1U , w ) ;
186  }
187  return result ;
188 }
189 
190 G::ConvertImp & G::ConvertImp::utf16_to_utf8()
191 {
192  static ConvertImp c( "UTF-8" , "UTF-16" ) ;
193  return c ;
194 }
195 
196 G::ConvertImp & G::ConvertImp::utf16_to_ansi()
197 {
198  static ConvertImp c( "ISO-8859-15" , "UTF-16" ) ;
199  return c ;
200 }
201 
202 G::ConvertImp & G::ConvertImp::utf8_to_utf16()
203 {
204  static ConvertImp c( "UTF-16" , "UTF-8" ) ;
205  return c ;
206 }
207 
208 G::ConvertImp & G::ConvertImp::ansi_to_utf16()
209 {
210  static ConvertImp c( "UTF-16" , "ISO-8859-15" ) ;
211  return c ;
212 }
213 
214 // ==
215 
216 std::wstring G::Convert::widen( const std::string & s , bool utf8 , const std::string & context )
217 {
218  if( s.empty() )
219  {
220  return std::wstring() ;
221  }
222  else if( utf8 )
223  {
224  return ConvertImp::utf8_to_utf16().widen( s , context ) ;
225  }
226  else
227  {
228  return ConvertImp::ansi_to_utf16().widen( s , context ) ;
229  }
230 }
231 
232 std::string G::Convert::narrow( const std::wstring & s , bool utf8 , const std::string & context )
233 {
234  if( s.empty() )
235  {
236  return std::string() ;
237  }
238  else if( utf8 )
239  {
240  return ConvertImp::utf16_to_utf8().narrow( s , context ) ;
241  }
242  else
243  {
244  return ConvertImp::utf16_to_ansi().narrow( s , context ) ;
245  }
246 }
247 
A pimple-pattern implementation class for G::Convert.