VideoTools
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
gurl.cpp
Go to the documentation of this file.
1 //
2 // Copyright (C) 2017 Graeme Walker
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 // ===
17 //
18 // gurl.cpp
19 //
20 
21 #include "gdef.h"
22 #include "gurl.h"
23 #include "gstr.h"
24 #include "gassert.h"
25 #include <map>
26 #include <string>
27 
29 {
30  init( std::string() ) ;
31 }
32 
33 G::Url::Url( const std::string & url )
34 {
35  init( url ) ;
36 }
37 
38 std::string G::Url::str() const
39 {
40  return join( m_protocol , m_authorisation , address() , m_path , m_params , m_anchor ) ;
41 }
42 
43 std::string G::Url::join( const std::string & protocol , const std::string & authorisation ,
44  const std::string & address , const std::string & path , const std::string & params ,
45  const std::string & anchor )
46 {
47  std::string result ;
48  result.reserve( 6U + protocol.size() + authorisation.size() +
49  address.size() + path.size() + params.size() + anchor.size() ) ;
50 
51  if( !protocol.empty() )
52  result.append( protocol + "://" ) ;
53 
54  if( !authorisation.empty() )
55  result.append( authorisation + "@" ) ;
56 
57  if( !address.empty() )
58  result.append( address ) ;
59 
60  result.append( path ) ;
61  if( path.empty() )
62  result.append( "/" ) ;
63 
64  if( !params.empty() )
65  result.append( "?" + params ) ;
66 
67  if( !anchor.empty() )
68  result.append( "#" + anchor ) ;
69 
70  return result ;
71 }
72 
73 std::string G::Url::summary() const
74 {
75  std::ostringstream ss ;
76  ss << protocol() << "://" << address() ;
77  if( path() != "/" )
78  ss << path() ;
79  return ss.str() ;
80 }
81 
82 void G::Url::init( std::string url )
83 {
84  typedef std::string::size_type pos_t ;
85  const pos_t npos = std::string::npos ;
86 
87  pos_t pos = 0U ;
88 
89  // "protocol://user:pwd@host:port/path1/path2.ext?key1=value1&key2#anchor".
90 
91  // remove and save the protocol
92  if( (pos = url.find("://")) != npos )
93  {
94  m_protocol = G::Str::head( url , pos ) ;
95  url = G::Str::tail( url , pos+2U ) ;
96  }
97 
98  // remove and save the authorisation and address
99  pos = url.find("/") ;
100  {
101  m_address = G::Str::head( url , pos , url ) ;
102  url = G::Str::tail( url , pos ) ;
103  if( (pos = m_address.find("@")) != npos )
104  {
105  m_authorisation = G::Str::head( m_address , pos ) ;
106  m_address = G::Str::tail( m_address , pos ) ;
107  }
108  if( m_address.find('[') == 0U && m_address.find(']') != std::string::npos )
109  {
110  size_t closepos = m_address.find(']') ;
111  size_t colonpos = m_address.rfind(':') ;
112  if( colonpos != std::string::npos && colonpos < closepos )
113  colonpos = std::string::npos ;
114  m_host = G::Str::head( m_address , colonpos , m_address ) ;
115  m_port = G::Str::tail( m_address , colonpos ) ;
116  G::Str::removeAll( m_host , '[' ) ;
117  G::Str::removeAll( m_host , ']' ) ;
118  }
119  else
120  {
121  size_t colonpos = m_address.rfind(':') ;
122  m_host = G::Str::head( m_address , colonpos , m_address ) ;
123  m_port = G::Str::tail( m_address , colonpos ) ;
124  }
125  if( m_host.empty() )
126  {
127  m_port.clear() ;
128  m_address.clear() ;
129  }
130  }
131 
132  // remove and save the path, leave just the parameters and anchor
133  pos = url.find("?") ;
134  {
135  m_path = "/" + G::Str::head( url , pos , url ) ;
136  url = G::Str::tail( url , pos , "" ) ;
137 
138  m_path = G::Str::unique( m_path , '/' , '/' ) ;
139  if( m_path.size() > 1U && m_path.at(m_path.size()-1U) == '/' )
140  m_path.resize( m_path.size() - 1U ) ;
141  }
142  G_ASSERT( !m_path.empty() ) ;
143 
144  // remove and save the anchor, leave just the parameters
145  if( (pos = url.find("#")) != npos )
146  {
147  m_anchor = G::Str::tail( url , pos ) ;
148  url = G::Str::head( url , pos ) ;
149  }
150 
151  // save the params string verbatim
152  m_params = url ;
153 
154  // decode the params into a multimap
155  G::StringArray parts ;
156  G::Str::splitIntoTokens( m_params , parts , "&" ) ;
157  for( G::StringArray::iterator p = parts.begin() ; p != parts.end() ; ++p )
158  {
159  pos = (*p).find("=") ;
160  std::string key = decode( G::Str::head(*p,pos,*p) ) ;
161  std::string value = decode( G::Str::tail(*p,pos) ) ;
162  m_params_map.insert( Map::value_type(key,value) ) ;
163  }
164 }
165 
166 std::string G::Url::request() const
167 {
168  return path() + (m_params.empty()?"":"?") + m_params ;
169 }
170 
171 std::string G::Url::parameters() const
172 {
173  return m_params ;
174 }
175 
176 G::Url::Map G::Url::pmap() const
177 {
178  return m_params_map ;
179 }
180 
181 std::string G::Url::protocol() const
182 {
183  return m_protocol ;
184 }
185 
186 std::string G::Url::host() const
187 {
188  return m_host ;
189 }
190 
191 std::string G::Url::port( const std::string & default_ ) const
192 {
193  return m_port.empty() ? default_ : m_port ;
194 }
195 
196 std::string G::Url::address() const
197 {
198  return m_address ;
199 }
200 
201 std::string G::Url::address( const std::string & default_port ) const
202 {
203  G_ASSERT( !default_port.empty() ) ;
204  if( default_port.empty() ) return host() + ":0" ;
205  return G::Str::join( ":" , host() , port(default_port) ) ;
206 }
207 
208 bool G::Url::has( const std::string & key ) const
209 {
210  return m_params_map.find(key) != m_params_map.end() ;
211 }
212 
213 std::string G::Url::parameter( std::string key , std::string default_ ) const
214 {
215  Map::const_iterator p = m_params_map.find(key) ;
216  if( p == m_params_map.end() )
217  return default_ ;
218  else
219  return (*p).second ; // first one
220 }
221 
222 std::string G::Url::path() const
223 {
224  return m_path ;
225 }
226 
227 std::string G::Url::authorisation() const
228 {
229  return m_authorisation ;
230 }
231 
232 std::string G::Url::anchor() const
233 {
234  return m_anchor ;
235 }
236 
237 size_t G::Url::colonpos( const std::string & address )
238 {
239  // find trailing colon outside of any ipv6 square backets
240  const std::string::size_type npos = std::string::npos ;
241  if( address.empty() ) return npos ;
242  size_t colon = address.rfind( ":" ) ;
243  if( colon == npos ) return npos ;
244  size_t end = address.at(0U) == '[' ? address.rfind("]") : npos ;
245  return ( end != npos && colon < end ) ? npos : colon ;
246 }
247 
248 std::string G::Url::decode( const std::string & s )
249 {
250  std::string result ;
251  result.reserve( s.size() ) ;
252  size_t n = s.size() ;
253  for( size_t i = 0U ; i < n ; i++ )
254  {
255  if( s.at(i) == '%' && (i+2U) < n )
256  {
257  static std::string map = "00112233445566778899aAbBcCdDeEfF" ;
258  char c1 = s.at(++i) ;
259  char c2 = s.at(++i) ;
260  size_t n1 = map.find(c1) ;
261  size_t n2 = map.find(c2) ;
262  if( n1 == std::string::npos ) n1 = 0U ; // moot
263  if( n2 == std::string::npos ) n2 = 0U ;
264  unsigned int x = ((n1>>1)<<4) | (n2>>1) ;
265  result.append( 1U , static_cast<char>(x) ) ;
266  }
267  else if( s.at(i) == '+' )
268  {
269  result.append( 1U , ' ' ) ;
270  }
271  else
272  {
273  result.append( 1U , s.at(i) ) ;
274  }
275  }
276  return result ;
277 }
278 
279 std::string G::Url::encode( const std::string & s , bool plus_for_space )
280 {
281  std::string result ;
282  result.reserve( s.size() ) ;
283  size_t sn = s.size() ;
284  for( size_t i = 0U ; i < sn ; i++ )
285  {
286  char c = s.at( i ) ;
287  if( ( c >= 'A' && c <= 'Z' ) || ( c >= 'a' && c <= 'z' ) || ( c >= '0' && c <= '9' ) ||
288  c == '-' || c == '_' || c == '.' || c == '~' )
289  {
290  result.append( 1U , c ) ;
291  }
292  else if( c == ' ' && plus_for_space )
293  {
294  result.append( 1U , '+' ) ;
295  }
296  else
297  {
298  static const char * map = "0123456789ABCDEF" ;
299  unsigned int n = static_cast<unsigned char>(c) ;
300  result.append( 1U , '%' ) ;
301  result.append( 1U , map[(n>>4)%16U] ) ;
302  result.append( 1U , map[n%16U] ) ;
303  }
304  }
305  return result ;
306 }
307 
308 /// \file gurl.cpp
std::string port(const std::string &default_=std::string()) const
Returns the port or service-name, or the specified default if none.
Definition: gurl.cpp:191
std::vector< std::string > StringArray
A std::vector of std::strings.
Definition: gstrings.h:33
std::string host() const
Returns the hostname or network address part.
Definition: gurl.cpp:186
static void splitIntoTokens(const std::string &in, StringArray &out, const std::string &ws)
Splits the string into 'ws'-delimited tokens.
Definition: gstr.cpp:868
static std::string decode(const std::string &)
Does url-decoding (rfc3986 2.1).
Definition: gurl.cpp:248
std::string parameter(std::string key, std::string default_=std::string()) const
Returns the decode()d value of the named parameter, or a default value.
Definition: gurl.cpp:213
static std::string tail(const std::string &in, std::string::size_type pos, const std::string &default_=std::string())
Returns the last part of the string after the given position.
Definition: gstr.cpp:1051
std::string request() const
Returns the path and parameters, suitable for a GET request.
Definition: gurl.cpp:166
std::string path() const
Returns the path part, including the leading slash.
Definition: gurl.cpp:222
bool has(const std::string &key) const
Returns true if the named parameter is present.
Definition: gurl.cpp:208
static std::string head(const std::string &in, std::string::size_type pos, const std::string &default_=std::string())
Returns the first part of the string up to just before the given position.
Definition: gstr.cpp:1037
static std::string unique(std::string s, char c= ' ', char r= ' ')
Returns a string with repeated 'c' charaters replaced by one 'r' character.
Definition: gstr.cpp:1144
std::string parameters() const
Returns the parameters string.
Definition: gurl.cpp:171
std::string str() const
Returns the string representation. Returns "/" if default constructed.
Definition: gurl.cpp:38
Url()
Default constructor for a url with a path of "/".
Definition: gurl.cpp:28
Map pmap() const
Returns the decode()d parameters as a multimap.
Definition: gurl.cpp:176
std::string protocol() const
Returns the protocol part eg. "http".
Definition: gurl.cpp:181
static std::string join(const std::string &sep, const StringArray &strings)
Concatenates an array of strings.
Definition: gstr.cpp:972
std::string authorisation() const
Returns the "user:pwd" part.
Definition: gurl.cpp:227
static void removeAll(std::string &, char)
Removes all occurrences of the character from the string.
Definition: gstr.cpp:180
static std::string encode(const std::string &, bool plus_for_space=true)
Does url-encoding.
Definition: gurl.cpp:279
std::string summary() const
Returns a summary of the url for logging purposes, specifically excluding username/password but also ...
Definition: gurl.cpp:73
std::string anchor() const
Returns the "#anchor" part.
Definition: gurl.cpp:232
std::string address() const
Returns the address part, which might include the port, and which might use ipv6 square brackets...
Definition: gurl.cpp:196
static std::string join(const std::string &protocol, const std::string &authorisation, const std::string &address, const std::string &path, const std::string &params, const std::string &anchor=std::string())
Returns a concatenation of the given url parts, with the correct separators inserted.
Definition: gurl.cpp:43