VideoTools
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
ghttpclientparser.cpp
Go to the documentation of this file.
1 //
2 // Copyright (C) 2017 Graeme Walker
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 // ===
17 //
18 // ghttpclientparser.cpp
19 //
20 
21 #include "gdef.h"
22 #include "gnet.h"
23 #include "ghttpclientprotocol.h"
24 #include "gassert.h"
25 #include "gstr.h"
26 #include "gurl.h"
27 #include "gbase64.h"
28 #include "gmd5.h"
29 #include "glimits.h"
30 #include "ghexdump.h"
31 #include "glog.h"
32 #include <algorithm> // std::search()
33 #include <stdexcept>
34 
35 GNet::HttpClientParser::HttpClientParser( const std::string & protocol ) :
36  m_sep(std::string::npos)
37 {
38  G_ASSERT( protocol.empty() || protocol.size() == 4U ) ;
39  m_http = protocol.empty() ? "HTTP/1." : (protocol+"/1.") ;
40  m_data.reserve( 100000U ) ;
41 }
42 
43 void GNet::HttpClientParser::apply( const char * p , size_t n )
44 {
45  m_data.append( p , n ) ;
46  if( m_data.size() > G::limits::net_file_limit )
47  throw std::runtime_error( "receive buffer size limit exceeded" ) ;
48 
49  // look for a blank line separator and parse the header when found
50  if( m_sep == std::string::npos )
51  {
52  m_sep = m_data.find( "\r\n\r\n" ) ;
53  if( m_sep != std::string::npos )
54  {
55  m_hlist = headerInfo( m_data ) ;
56  m_response_status = responseStatusInfo( m_http , m_data ) ;
57  }
58  }
59 }
60 
61 GNet::HttpClientParser::List GNet::HttpClientParser::headerInfo( const std::string & data )
62 {
63  List result ;
64  typedef std::string::size_type pos_t ;
65  const pos_t npos = std::string::npos ;
66  for( pos_t pos = 0U ;; pos += 2U )
67  {
68  pos_t next_pos = data.find( "\r\n" , pos ) ;
69  if( next_pos == npos || pos == next_pos ) break ;
70  result.push_back( HeaderInfo(data.substr(pos,next_pos-pos)) ) ;
71  pos = next_pos ;
72  }
73  return result ;
74 }
75 
77 {
78  m_data.clear() ;
79  m_sep = std::string::npos ;
80  m_hlist.clear() ;
81  m_response_status = ResponseStatusInfo() ;
82 }
83 
84 GNet::HttpClientParser::ResponseStatusInfo GNet::HttpClientParser::responseStatusInfo( const std::string & http ,
85  const std::string & data )
86 {
87  // "HTTP/1.x NNN reason...\r\n"
88 
89  ResponseStatusInfo result ;
90 
91  const size_t npos = std::string::npos ;
92  size_t pos1 = data.find(' ') ;
93  size_t pos2 = (pos1==npos||(pos1+1U)==data.size()) ? npos : data.find(" ",pos1+1U) ;
94  size_t pos3 = (pos2==npos||(pos2+1U)==data.size()) ? npos : data.find("\n",pos2+1U) ;
95  if( data.find(http) == 0U && pos1 != npos && pos2 != npos && pos3 != npos )
96  {
97  std::string s1 = G::Str::trimmed( data.substr(pos1,pos2-pos1) , " " ) ;
98  std::string s2 = G::Str::trimmed( data.substr(pos2,pos3-pos2) , " \r\n" ) ;
99  result.value = ( s1.empty() || !G::Str::isUInt(s1) ) ? -1 : G::Str::toInt(s1) ;
100  result.ok = result.value >= 200 && result.value < 300 ;
101  result.unauthorised = result.value == 401 && s2 == "Unauthorized" ;
102  result.retry = result.value == 503 ? 1U : 0U ; // TODO parse out Retry-After value
103  }
104  return result ;
105 }
106 
108 {
109  return m_response_status.value ;
110 }
111 
113 {
114  return m_response_status.unauthorised ;
115 }
116 
118 {
119  return m_response_status.ok ;
120 }
121 
123 {
124  return m_response_status.retry > 0U ;
125 }
126 
128 {
129  return m_sep != std::string::npos ;
130 }
131 
133 {
134  return m_hlist.size() ;
135 }
136 
138 {
139  const size_t npos = std::string::npos ;
140  size_t start = m_data.find(" ") ;
141  size_t end = m_data.find_first_of("\n\r") ;
142  std::string s = ( start != npos && end != npos && end > start ) ? m_data.substr(start+1U,end-start) : std::string() ;
143  return G::Str::printable( G::Str::trimmed( s , G::Str::ws() ) ) ;
144 }
145 
146 GNet::HttpClientParser::HeaderInfo::HeaderInfo( const std::string & line ) :
147  m_line(line)
148 {
149  size_t pos = line.find( ":" ) ;
150  m_lhs = G::Str::trimmed( G::Str::head(m_line,pos,m_line) , " " ) ;
151  m_rhs = G::Str::trimmed( G::Str::tail(m_line,pos,std::string()) , " " ) ;
152  m_value = G::Str::trimmed( G::Str::head(m_rhs,m_rhs.find(";"),m_rhs) , " " ) ;
153  G::Str::splitIntoTokens( m_rhs , m_tokens , ";, " ) ;
154 }
155 
156 bool GNet::HttpClientParser::HeaderInfo::match( const std::string & key ) const
157 {
158  return G::Str::imatch( key , m_lhs ) ;
159 }
160 
161 int GNet::HttpClientParser::HeaderInfo::value( int default_ ) const
162 {
163  return
164  !m_value.empty() && G::Str::isUInt(m_value) ?
165  G::Str::toInt(m_value) :
166  default_ ;
167 }
168 
169 GNet::HttpClientParser::List::const_iterator GNet::HttpClientParser::hfind( const List & hlist ,
170  const std::string & key )
171 {
172  for( List::const_iterator p = hlist.begin() ; p != hlist.end() ; ++p )
173  {
174  if( (*p).match(key) )
175  return p ;
176  }
177  return hlist.end() ;
178 }
179 
180 GNet::HttpClientParser::List::const_iterator GNet::HttpClientParser::hfind( const List & hlist , size_t index )
181 {
182  if( index >= hlist.size() ) throw std::range_error( "invalid http header index" ) ;
183  return hlist.begin() + index ;
184 }
185 
186 std::vector<size_t> GNet::HttpClientParser::headers( const std::string & key ) const
187 {
188  std::vector<size_t> result ;
189  size_t i = 0U ;
190  for( List::const_iterator p = m_hlist.begin() ; p != m_hlist.end() ; ++p , i++ )
191  {
192  if( (*p).match(key) )
193  result.push_back( i ) ;
194  }
195  return result ;
196 }
197 
198 std::string GNet::HttpClientParser::header( const std::string & key , const std::string & default_ ) const
199 {
200  List::const_iterator p = hfind( m_hlist , key ) ;
201  return p == m_hlist.end() ? default_ : (*p).m_rhs ;
202 }
203 
204 std::string GNet::HttpClientParser::header( size_t index , const std::string & default_ ) const
205 {
206  List::const_iterator p = hfind( m_hlist , index ) ;
207  return p == m_hlist.end() ? default_ : (*p).m_rhs ;
208 }
209 
210 int GNet::HttpClientParser::headerValue( const std::string & key , int default_ ) const
211 {
212  List::const_iterator p = hfind( m_hlist , key ) ;
213  return p == m_hlist.end() ? default_ : (*p).value(default_) ;
214 }
215 
216 int GNet::HttpClientParser::headerValue( size_t index , int default_ ) const
217 {
218  List::const_iterator p = hfind( m_hlist , index ) ;
219  return p == m_hlist.end() ? default_ : (*p).value(default_) ;
220 }
221 
223 {
224  return static_cast<size_t>( headerValue("Content-Length",0) ) ;
225 }
226 
227 std::string GNet::HttpClientParser::headerWord( const std::string & key , const std::string & default_ ) const
228 {
229  List::const_iterator p = hfind( m_hlist , key ) ;
230  return p == m_hlist.end() || (*p).m_tokens.empty() ? default_ : (*p).m_tokens.at(0U) ;
231 }
232 
233 std::string GNet::HttpClientParser::headerWord( size_t index , const std::string & default_ ) const
234 {
235  List::const_iterator p = hfind( m_hlist , index ) ;
236  return p == m_hlist.end() || (*p).m_tokens.empty() ? default_ : (*p).m_tokens.at(0U) ;
237 }
238 
239 std::string GNet::HttpClientParser::headerAttribute( const std::string & key , const std::string & attribute_key ,
240  const std::string & default_ ) const
241 {
242  List::const_iterator p = hfind( m_hlist , key ) ;
243  return p == m_hlist.end() ? default_ : headerAttribute( p , attribute_key , default_ ) ;
244 }
245 
246 std::string GNet::HttpClientParser::headerAttribute( size_t index , const std::string & attribute_key ,
247  const std::string & default_ ) const
248 {
249  List::const_iterator p = hfind( m_hlist , index ) ;
250  return p == m_hlist.end() ? default_ : headerAttribute( p , attribute_key , default_ ) ;
251 }
252 
253 std::string GNet::HttpClientParser::headerAttribute( List::const_iterator p , const std::string & attribute_key ,
254  const std::string & default_ ) const
255 {
256  const G::StringArray & attributes = (*p).m_tokens ;
257  for( size_t i = 0U ; i < attributes.size() ; i++ )
258  {
259  if( attributes[i].find(attribute_key+"=") == 0U )
260  {
261  std::string result = attributes[i].substr(attribute_key.length()+1U) ;
262  if( result.find("\"") == 0U && result.length() >= 2U &&
263  (result.rfind("\"")+1U) == result.length() )
264  {
265  result = result.substr(1U,result.length()-2U) ;
266  }
267  return result ;
268  }
269  }
270  return default_ ;
271 }
272 
274 {
275  return headerWord( "Content-Type" ) ;
276 }
277 
279 {
280  std::string content_type = headerContentType() ;
281  bool multipart = content_type.find("multipart") == 0U ;
282  return multipart ;
283 }
284 
285 std::string GNet::HttpClientParser::headerMultipartBoundary() const
286 {
287  if( !headerMultipart() ) return std::string() ;
288  return headerAttribute( "Content-Type" , "boundary" ) ;
289 }
290 
292 {
293  return
294  m_sep != std::string::npos &&
295  m_data.size() >= ( m_sep + 4U + headerContentLength() ) ;
296 }
297 
298 std::string GNet::HttpClientParser::body() const
299 {
300  return std::string( bodyData() , bodySize() ) ;
301 }
302 
304 {
305  return m_sep == std::string::npos ? nullptr : ( m_data.data() + m_sep + 4U ) ;
306 }
307 
309 {
310  return m_sep == std::string::npos ? 0U : ( m_data.size() - m_sep - 4U ) ;
311 }
312 
314 {
315  PartInfo part_info = partInfo() ;
316  m_data.erase( part_info.start , part_info.headersize+part_info.bodysize ) ;
317 }
318 
320 {
321  PartInfo part_info = partInfo() ;
322  return part_info.start != 0U && m_data.size() >= (part_info.start+part_info.headersize+part_info.bodysize) ;
323 }
324 
325 GNet::HttpClientParser::PartInfo GNet::HttpClientParser::partInfo() const
326 {
327  PartInfo part_info ;
328  std::string::size_type npos = std::string::npos ;
329  std::string::size_type pos1 = m_data.find( "\r\n--" + headerMultipartBoundary() + "\r\n" ) ;
330  std::string::size_type pos2 = G::Str::ifind( m_data , "\nContent-Length: " , pos1 ) ;
331  std::string::size_type pos2a = G::Str::ifind( m_data , "\nContent-Type: " , pos1 ) ;
332  std::string::size_type pos3 = m_data.find( "\r\n" , pos2 ) ;
333  std::string::size_type pos3a = m_data.find( "\r\n" , pos2a ) ;
334  std::string::size_type pos4 = m_data.find( "\r\n\r\n" , pos2 ) ;
335  if( pos1 != npos && pos2 != npos && pos2a != npos &&
336  pos3 != npos && pos3a != npos && pos4 != npos &&
337  pos4 >= pos3 && pos4 >= pos3a )
338  {
339  std::string cl = m_data.substr( pos2+17U , pos3-pos2-17U ) ;
340  if( !cl.empty() && G::Str::isUInt(cl) )
341  {
342  part_info.start = pos1 + 2U ;
343  part_info.headersize = pos4 - part_info.start + 4U ;
344  part_info.bodysize = G::Str::toUInt(cl) ;
345  part_info.type = m_data.substr( pos2a+15U , pos3a-pos2a-15U ) ;
346  }
347  }
348  return part_info ;
349 }
350 
352 {
353  PartInfo part_info = partInfo() ;
354  return m_data.data() + part_info.start + part_info.headersize ;
355 }
356 
358 {
359  PartInfo part_info = partInfo() ;
360  return part_info.bodysize ;
361 }
362 
364 {
365  PartInfo part_info = partInfo() ;
366  return part_info.type ;
367 }
368 
369 // ==
370 
371 GNet::HttpClientParser::ResponseStatusInfo::ResponseStatusInfo()
372 {
373  ok = false ;
374  value = -1 ;
375  unauthorised = false ;
376  retry = 0U ;
377 }
378 
379 /// \file ghttpclientparser.cpp
std::string header(const std::string &header_key, const std::string &default_=std::string()) const
Returns the value of the given header.
static std::string printable(const std::string &in, char escape= '\\')
Returns a printable represention of the given input string.
Definition: gstr.cpp:663
bool gotBody() const
Returns true if the body is complete.
std::vector< size_t > headers(const std::string &header_key) const
Returns the indexes for the headers with the given key.
int headerValue(const std::string &header_key, int default_=-1) const
Returns the integer value of a numeric header.
size_t headerCount() const
Returns the number of headers.
static int toInt(const std::string &s)
Converts string 's' to an int.
Definition: gstr.cpp:368
std::string headerContentType() const
Returns the value of the "Content-Type" header.
Synopsis:
static bool imatch(const std::string &, const std::string &)
Returns true if the two strings are the same, ignoring case.
Definition: gstr.cpp:1107
std::vector< std::string > StringArray
A std::vector of std::strings.
Definition: gstrings.h:33
static void splitIntoTokens(const std::string &in, StringArray &out, const std::string &ws)
Splits the string into 'ws'-delimited tokens.
Definition: gstr.cpp:868
bool responseUnauthorised() const
Returns true for a "401 Unauthorized" response.
std::string headerWord(const std::string &header_key, const std::string &default_=std::string()) const
Returns the first part of the header with the given key.
bool gotPart() const
Returns true if a multipart part is complete.
static std::string tail(const std::string &in, std::string::size_type pos, const std::string &default_=std::string())
Returns the last part of the string after the given position.
Definition: gstr.cpp:1051
bool responseOk() const
Returns true for a 2xx response.
std::string partType() const
Returns the content-type of the part.
static unsigned int toUInt(const std::string &s)
Converts string 's' to an unsigned int.
Definition: gstr.cpp:450
bool gotHeaders() const
Returns true if headers are complete.
static std::string head(const std::string &in, std::string::size_type pos, const std::string &default_=std::string())
Returns the first part of the string up to just before the given position.
Definition: gstr.cpp:1037
void clear()
Clears the contents, returning the object to a newly-constructed state.
bool headerMultipart() const
Returns true if the main body is of type "multipart".
bool responseRetry() const
Returns true for a 503 response.
static std::string trimmed(const std::string &s, const std::string &ws)
Returns a trim()med version of s.
Definition: gstr.cpp:213
void clearPart()
Clears the current multipart body part.
size_t headerContentLength() const
Returns the value of the "Content-Length" header.
int responseValue() const
Returns the response value (eg. 200), or minus one.
size_t partSize() const
Returns the part size.
HttpClientParser(const std::string &protocol=std::string())
Constructor. The protocol defaults to "HTTP".
std::string headerAttribute(const std::string &header_key, const std::string &attribute_key, const std::string &default_=std::string()) const
Returns a named attribute of the specified header.
std::string responseSummary() const
Returns a summary of the response for debugging and error reporting.
const char * partData() const
Returns the part data.
static bool isUInt(const std::string &s)
Returns true if the string can be converted into an unsigned integer without throwing an exception...
Definition: gstr.cpp:266
std::string body() const
Returns the body data.
static std::string::size_type ifind(const std::string &s, const std::string &key, std::string::size_type pos=0U)
Does a case-insensitive std::string::find().
Definition: gstr.cpp:1112
void apply(const char *p, size_t n)
Adds some data.
const char * bodyData() const
Returns the body data.
static std::string ws()
A convenience function returning standard whitespace characters.
Definition: gstr.cpp:1027
size_t bodySize() const
Returns the body size.