ceph/src/rgw/rgw_rest.cc

   1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
   2 // vim: ts=8 sw=2 smarttab
   3
   4 #include <errno.h>
   5 #include <limits.h>
   6
   7 #include <boost/algorithm/string.hpp>
   8 #include "common/Formatter.h"
   9 #include "common/HTMLFormatter.h"
  10 #include "common/utf8.h"
  11 #include "include/str_list.h"
  12 #include "rgw_common.h"
  13 #include "rgw_rados.h"
  14 #include "rgw_formats.h"
  15 #include "rgw_op.h"
  16 #include "rgw_rest.h"
  17 #include "rgw_rest_swift.h"
  18 #include "rgw_rest_s3.h"
  19 #include "rgw_swift_auth.h"
  20 #include "rgw_cors_s3.h"
  21 #include "rgw_http_errors.h"
  22 #include "rgw_lib.h"
  23
  24 #include "rgw_client_io.h"
  25 #include "rgw_resolve.h"
  26
  27 #include <numeric>
  28
  29 #define dout_subsys ceph_subsys_rgw
  30
  31
  32 struct rgw_http_attr {
  33   const char *rgw_attr;
  34   const char *http_attr;
  35 };
  36
  37 /*
  38  * mapping between rgw object attrs and output http fields
  39  */
  40 static const struct rgw_http_attr base_rgw_to_http_attrs[] = {
  41   { RGW_ATTR_CONTENT_LANG,      "Content-Language" },
  42   { RGW_ATTR_EXPIRES,           "Expires" },
  43   { RGW_ATTR_CACHE_CONTROL,     "Cache-Control" },
  44   { RGW_ATTR_CONTENT_DISP,      "Content-Disposition" },
  45   { RGW_ATTR_CONTENT_ENC,       "Content-Encoding" },
  46   { RGW_ATTR_USER_MANIFEST,     "X-Object-Manifest" },
  47   { RGW_ATTR_X_ROBOTS_TAG ,     "X-Robots-Tag" },
  48   /* RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION header depends on access mode:
  49    * S3 endpoint: x-amz-website-redirect-location
  50    * S3Website endpoint: Location
  51    */
  52   { RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION, "x-amz-website-redirect-location" },
  53 };
  54
  55
  56 struct generic_attr {
  57   const char *http_header;
  58   const char *rgw_attr;
  59 };
  60
  61 /*
  62  * mapping between http env fields and rgw object attrs
  63  */
  64 static const struct generic_attr generic_attrs[] = {
  65   { "CONTENT_TYPE",             RGW_ATTR_CONTENT_TYPE },
  66   { "HTTP_CONTENT_LANGUAGE",    RGW_ATTR_CONTENT_LANG },
  67   { "HTTP_EXPIRES",             RGW_ATTR_EXPIRES },
  68   { "HTTP_CACHE_CONTROL",       RGW_ATTR_CACHE_CONTROL },
  69   { "HTTP_CONTENT_DISPOSITION", RGW_ATTR_CONTENT_DISP },
  70   { "HTTP_CONTENT_ENCODING",    RGW_ATTR_CONTENT_ENC },
  71   { "HTTP_X_ROBOTS_TAG",        RGW_ATTR_X_ROBOTS_TAG },
  72 };
  73
  74 map<string, string> rgw_to_http_attrs;
  75 static map<string, string> generic_attrs_map;
  76 map<int, const char *> http_status_names;
  77
  78 /*
  79  * make attrs look_like_this
  80  * converts dashes to underscores
  81  */
  82 string lowercase_underscore_http_attr(const string& orig)
  83 {
  84   const char *s = orig.c_str();
  85   char buf[orig.size() + 1];
  86   buf[orig.size()] = '\0';
  87
  88   for (size_t i = 0; i < orig.size(); ++i, ++s) {
  89     switch (*s) {
  90       case '-':
  91         buf[i] = '_';
  92         break;
  93       default:
  94         buf[i] = tolower(*s);
  95     }
  96   }
  97   return string(buf);
  98 }
  99
 100 /*
 101  * make attrs LOOK_LIKE_THIS
 102  * converts dashes to underscores
 103  */
 104 string uppercase_underscore_http_attr(const string& orig)
 105 {
 106   const char *s = orig.c_str();
 107   char buf[orig.size() + 1];
 108   buf[orig.size()] = '\0';
 109
 110   for (size_t i = 0; i < orig.size(); ++i, ++s) {
 111     switch (*s) {
 112       case '-':
 113         buf[i] = '_';
 114         break;
 115       default:
 116         buf[i] = toupper(*s);
 117     }
 118   }
 119   return string(buf);
 120 }
 121
 122 /*
 123  * make attrs look-like-this
 124  * converts underscores to dashes
 125  */
 126 string lowercase_dash_http_attr(const string& orig)
 127 {
 128   const char *s = orig.c_str();
 129   char buf[orig.size() + 1];
 130   buf[orig.size()] = '\0';
 131
 132   for (size_t i = 0; i < orig.size(); ++i, ++s) {
 133     switch (*s) {
 134       case '_':
 135         buf[i] = '-';
 136         break;
 137       default:
 138         buf[i] = tolower(*s);
 139     }
 140   }
 141   return string(buf);
 142 }
 143
 144 /*
 145  * make attrs Look-Like-This
 146  * converts underscores to dashes
 147  */
 148 string camelcase_dash_http_attr(const string& orig)
 149 {
 150   const char *s = orig.c_str();
 151   char buf[orig.size() + 1];
 152   buf[orig.size()] = '\0';
 153
 154   bool last_sep = true;
 155
 156   for (size_t i = 0; i < orig.size(); ++i, ++s) {
 157     switch (*s) {
 158       case '_':
 159       case '-':
 160         buf[i] = '-';
 161         last_sep = true;
 162         break;
 163       default:
 164         if (last_sep) {
 165           buf[i] = toupper(*s);
 166         } else {
 167           buf[i] = tolower(*s);
 168         }
 169         last_sep = false;
 170     }
 171   }
 172   return string(buf);
 173 }
 174
 175 /* avoid duplicate hostnames in hostnames lists */
 176 static set<string> hostnames_set;
 177 static set<string> hostnames_s3website_set;
 178
 179 void rgw_rest_init(CephContext *cct, RGWRados *store, RGWZoneGroup& zone_group)
 180 {
 181   store->init_host_id();
 182
 183   for (const auto& rgw2http : base_rgw_to_http_attrs)  {
 184     rgw_to_http_attrs[rgw2http.rgw_attr] = rgw2http.http_attr;
 185   }
 186
 187   for (const auto& http2rgw : generic_attrs) {
 188     generic_attrs_map[http2rgw.http_header] = http2rgw.rgw_attr;
 189   }
 190
 191   list<string> extended_http_attrs;
 192   get_str_list(cct->_conf->rgw_extended_http_attrs, extended_http_attrs);
 193
 194   list<string>::iterator iter;
 195   for (iter = extended_http_attrs.begin(); iter != extended_http_attrs.end(); ++iter) {
 196     string rgw_attr = RGW_ATTR_PREFIX;
 197     rgw_attr.append(lowercase_underscore_http_attr(*iter));
 198
 199     rgw_to_http_attrs[rgw_attr] = camelcase_dash_http_attr(*iter);
 200
 201     string http_header = "HTTP_";
 202     http_header.append(uppercase_underscore_http_attr(*iter));
 203
 204     generic_attrs_map[http_header] = rgw_attr;
 205   }
 206
 207   for (const struct rgw_http_status_code *h = http_codes; h->code; h++) {
 208     http_status_names[h->code] = h->name;
 209   }
 210
 211   hostnames_set.insert(cct->_conf->rgw_dns_name);
 212   hostnames_set.insert(zone_group.hostnames.begin(), zone_group.hostnames.end());
 213   hostnames_set.erase(""); // filter out empty hostnames
 214   ldout(cct, 20) << "RGW hostnames: " << hostnames_set << dendl;
 215   /* TODO: We should have a sanity check that no hostname matches the end of
 216    * any other hostname, otherwise we will get ambigious results from
 217    * rgw_find_host_in_domains.
 218    * Eg:
 219    * Hostnames: [A, B.A]
 220    * Inputs: [Z.A, X.B.A]
 221    * Z.A clearly splits to subdomain=Z, domain=Z
 222    * X.B.A ambigously splits to both {X, B.A} and {X.B, A}
 223    */
 224
 225   hostnames_s3website_set.insert(cct->_conf->rgw_dns_s3website_name);
 226   hostnames_s3website_set.insert(zone_group.hostnames_s3website.begin(), zone_group.hostnames_s3website.end());
 227   hostnames_s3website_set.erase(""); // filter out empty hostnames
 228   ldout(cct, 20) << "RGW S3website hostnames: " << hostnames_s3website_set << dendl;
 229   /* TODO: we should repeat the hostnames_set sanity check here
 230    * and ALSO decide about overlap, if any
 231    */
 232 }
 233
 234 static bool str_ends_with(const string& s, const string& suffix, size_t *pos)
 235 {
 236   size_t len = suffix.size();
 237   if (len > (size_t)s.size()) {
 238     return false;
 239   }
 240
 241   ssize_t p = s.size() - len;
 242   if (pos) {
 243     *pos = p;
 244   }
 245
 246   return s.compare(p, len, suffix) == 0;
 247 }
 248
 249 static bool rgw_find_host_in_domains(const string& host, string *domain, string *subdomain, set<string> valid_hostnames_set)
 250 {
 251   set<string>::iterator iter;
 252   /** TODO, Future optimization
 253    * store hostnames_set elements _reversed_, and look for a prefix match,
 254    * which is much faster than a suffix match.
 255    */
 256   for (iter = valid_hostnames_set.begin(); iter != valid_hostnames_set.end(); ++iter) {
 257     size_t pos;
 258     if (!str_ends_with(host, *iter, &pos))
 259       continue;
 260
 261     if (pos == 0) {
 262       *domain = host;
 263       subdomain->clear();
 264     } else {
 265       if (host[pos - 1] != '.') {
 266         continue;
 267       }
 268
 269       *domain = host.substr(pos);
 270       *subdomain = host.substr(0, pos - 1);
 271     }
 272     return true;
 273   }
 274   return false;
 275 }
 276
 277 static void dump_status(struct req_state *s, int status,
 278                         const char *status_name)
 279 {
 280   s->formatter->set_status(status, status_name);
 281   try {
 282     RESTFUL_IO(s)->send_status(status, status_name);
 283   } catch (rgw::io::Exception& e) {
 284     ldout(s->cct, 0) << "ERROR: s->cio->send_status() returned err="
 285                      << e.what() << dendl;
 286   }
 287 }
 288
 289 void rgw_flush_formatter_and_reset(struct req_state *s, Formatter *formatter)
 290 {
 291   std::ostringstream oss;
 292   formatter->output_footer();
 293   formatter->flush(oss);
 294   std::string outs(oss.str());
 295   if (!outs.empty() && s->op != OP_HEAD) {
 296     dump_body(s, outs);
 297   }
 298
 299   s->formatter->reset();
 300 }
 301
 302 void rgw_flush_formatter(struct req_state *s, Formatter *formatter)
 303 {
 304   std::ostringstream oss;
 305   formatter->flush(oss);
 306   std::string outs(oss.str());
 307   if (!outs.empty() && s->op != OP_HEAD) {
 308     dump_body(s, outs);
 309   }
 310 }
 311
 312 void set_req_state_err(struct rgw_err& err,     /* out */
 313                        int err_no,              /* in  */
 314                        const int prot_flags)    /* in  */
 315 {
 316   const struct rgw_http_errors *r;
 317
 318   if (err_no < 0)
 319     err_no = -err_no;
 320   err.ret = -err_no;
 321   if (prot_flags & RGW_REST_SWIFT) {
 322     r = search_err(err_no, RGW_HTTP_SWIFT_ERRORS,
 323                    ARRAY_LEN(RGW_HTTP_SWIFT_ERRORS));
 324     if (r) {
 325       err.http_ret = r->http_ret;
 326       err.s3_code = r->s3_code;
 327       return;
 328     }
 329   }
 330
 331   r = search_err(err_no, RGW_HTTP_ERRORS, ARRAY_LEN(RGW_HTTP_ERRORS));
 332   if (r) {
 333     err.http_ret = r->http_ret;
 334     err.s3_code = r->s3_code;
 335     return;
 336   }
 337   dout(0) << "WARNING: set_req_state_err err_no=" << err_no
 338           << " resorting to 500" << dendl;
 339
 340   err.http_ret = 500;
 341   err.s3_code = "UnknownError";
 342 }
 343
 344 void set_req_state_err(struct req_state * const s, const int err_no)
 345 {
 346   if (s) {
 347     set_req_state_err(s->err, err_no, s->prot_flags);
 348   }
 349 }
 350
 351 void dump_errno(int http_ret, string& out) {
 352   stringstream ss;
 353
 354   ss <<  http_ret << " " << http_status_names[http_ret];
 355   out = ss.str();
 356 }
 357
 358 void dump_errno(const struct rgw_err &err, string& out) {
 359   dump_errno(err.http_ret, out);
 360 }
 361
 362 void dump_errno(struct req_state *s)
 363 {
 364   dump_status(s, s->err.http_ret, http_status_names[s->err.http_ret]);
 365 }
 366
 367 void dump_errno(struct req_state *s, int http_ret)
 368 {
 369   dump_status(s, http_ret, http_status_names[http_ret]);
 370 }
 371
 372 void dump_header(struct req_state* const s,
 373                  const boost::string_ref& name,
 374                  const boost::string_ref& val)
 375 {
 376   try {
 377     RESTFUL_IO(s)->send_header(name, val);
 378   } catch (rgw::io::Exception& e) {
 379     ldout(s->cct, 0) << "ERROR: s->cio->send_header() returned err="
 380                      << e.what() << dendl;
 381   }
 382 }
 383
 384 static inline boost::string_ref get_sanitized_hdrval(ceph::buffer::list& raw)
 385 {
 386   /* std::string and thus boost::string_ref ARE OBLIGED to carry multiple
 387    * 0x00 and count them to the length of a string. We need to take that
 388    * into consideration and sanitize the size of a ceph::buffer::list used
 389    * to store metadata values (x-amz-meta-*, X-Container-Meta-*, etags).
 390    * Otherwise we might send 0x00 to clients. */
 391   const char* const data = raw.c_str();
 392   size_t len = raw.length();
 393
 394   if (len && data[len - 1] == '\0') {
 395     /* That's the case - the null byte has been included at the last position
 396      * of the bufferlist. We need to restore the proper string length we'll
 397      * pass to string_ref. */
 398     len--;
 399   }
 400
 401   return boost::string_ref(data, len);
 402 }
 403
 404 void dump_header(struct req_state* const s,
 405                  const boost::string_ref& name,
 406                  ceph::buffer::list& bl)
 407 {
 408   return dump_header(s, name, get_sanitized_hdrval(bl));
 409 }
 410
 411 void dump_header(struct req_state* const s,
 412                  const boost::string_ref& name,
 413                  const long long val)
 414 {
 415   char buf[32];
 416   const auto len = snprintf(buf, sizeof(buf), "%lld", val);
 417
 418   return dump_header(s, name, boost::string_ref(buf, len));
 419 }
 420
 421 void dump_header(struct req_state* const s,
 422                  const boost::string_ref& name,
 423                  const utime_t& ut)
 424 {
 425   char buf[32];
 426   const auto len = snprintf(buf, sizeof(buf), "%lld.%05d",
 427                             static_cast<long long>(ut.sec()),
 428                             static_cast<int>(ut.usec() / 10));
 429
 430   return dump_header(s, name, boost::string_ref(buf, len));
 431 }
 432
 433 void dump_content_length(struct req_state* const s, const uint64_t len)
 434 {
 435   try {
 436     RESTFUL_IO(s)->send_content_length(len);
 437   } catch (rgw::io::Exception& e) {
 438     ldout(s->cct, 0) << "ERROR: s->cio->send_content_length() returned err="
 439                      << e.what() << dendl;
 440   }
 441   dump_header(s, "Accept-Ranges", "bytes");
 442 }
 443
 444 static void dump_chunked_encoding(struct req_state* const s)
 445 {
 446   try {
 447     RESTFUL_IO(s)->send_chunked_transfer_encoding();
 448   } catch (rgw::io::Exception& e) {
 449     ldout(s->cct, 0) << "ERROR: RESTFUL_IO(s)->send_chunked_transfer_encoding()"
 450                      << " returned err=" << e.what() << dendl;
 451   }
 452 }
 453
 454 void dump_etag(struct req_state* const s,
 455                const boost::string_ref& etag,
 456                const bool quoted)
 457 {
 458   if (etag.empty()) {
 459     return;
 460   }
 461
 462   if (s->prot_flags & RGW_REST_SWIFT && ! quoted) {
 463     return dump_header(s, "etag", etag);
 464   } else {
 465     return dump_header_quoted(s, "ETag", etag);
 466   }
 467 }
 468
 469 void dump_etag(struct req_state* const s,
 470                ceph::buffer::list& bl_etag,
 471                const bool quoted)
 472 {
 473   return dump_etag(s, get_sanitized_hdrval(bl_etag), quoted);
 474 }
 475
 476 void dump_bucket_from_state(struct req_state *s)
 477 {
 478   if (g_conf->rgw_expose_bucket && ! s->bucket_name.empty()) {
 479     if (! s->bucket_tenant.empty()) {
 480       dump_header(s, "Bucket",
 481                   url_encode(s->bucket_tenant + "/" + s->bucket_name));
 482     } else {
 483       dump_header(s, "Bucket", url_encode(s->bucket_name));
 484     }
 485   }
 486 }
 487
 488 void dump_uri_from_state(struct req_state *s)
 489 {
 490   if (strcmp(s->info.request_uri.c_str(), "/") == 0) {
 491
 492     string location = "http://";
 493     string server = s->info.env->get("SERVER_NAME", "<SERVER_NAME>");
 494     location.append(server);
 495     location += "/";
 496     if (!s->bucket_name.empty()) {
 497       if (!s->bucket_tenant.empty()) {
 498         location += s->bucket_tenant;
 499         location += ":";
 500       }
 501       location += s->bucket_name;
 502       location += "/";
 503       if (!s->object.empty()) {
 504         location += s->object.name;
 505         dump_header(s, "Location", location);
 506       }
 507     }
 508   } else {
 509     dump_header_quoted(s, "Location", s->info.request_uri);
 510   }
 511 }
 512
 513 void dump_redirect(struct req_state * const s, const std::string& redirect)
 514 {
 515   return dump_header_if_nonempty(s, "Location", redirect);
 516 }
 517
 518 static size_t dump_time_header_impl(char (&timestr)[TIME_BUF_SIZE],
 519                                     const real_time t)
 520 {
 521   const utime_t ut(t);
 522   time_t secs = static_cast<time_t>(ut.sec());
 523
 524   struct tm result;
 525   const struct tm * const tmp = gmtime_r(&secs, &result);
 526   if (tmp == nullptr) {
 527     return 0;
 528   }
 529
 530   return strftime(timestr, sizeof(timestr),
 531                   "%a, %d %b %Y %H:%M:%S %Z", tmp);
 532 }
 533
 534 void dump_time_header(struct req_state *s, const char *name, real_time t)
 535 {
 536   char timestr[TIME_BUF_SIZE];
 537
 538   const size_t len = dump_time_header_impl(timestr, t);
 539   if (len == 0) {
 540     return;
 541   }
 542
 543   return dump_header(s, name, boost::string_ref(timestr, len));
 544 }
 545
 546 std::string dump_time_to_str(const real_time& t)
 547 {
 548   char timestr[TIME_BUF_SIZE];
 549   dump_time_header_impl(timestr, t);
 550
 551   return timestr;
 552 }
 553
 554
 555 void dump_last_modified(struct req_state *s, real_time t)
 556 {
 557   dump_time_header(s, "Last-Modified", t);
 558 }
 559
 560 void dump_epoch_header(struct req_state *s, const char *name, real_time t)
 561 {
 562   utime_t ut(t);
 563   char buf[65];
 564   const auto len = snprintf(buf, sizeof(buf), "%lld.%09lld",
 565                             (long long)ut.sec(),
 566                             (long long)ut.nsec());
 567
 568   return dump_header(s, name, boost::string_ref(buf, len));
 569 }
 570
 571 void dump_time(struct req_state *s, const char *name, real_time *t)
 572 {
 573   char buf[TIME_BUF_SIZE];
 574   rgw_to_iso8601(*t, buf, sizeof(buf));
 575
 576   s->formatter->dump_string(name, buf);
 577 }
 578
 579 void dump_owner(struct req_state *s, const rgw_user& id, string& name,
 580                 const char *section)
 581 {
 582   if (!section)
 583     section = "Owner";
 584   s->formatter->open_object_section(section);
 585   s->formatter->dump_string("ID", id.to_str());
 586   s->formatter->dump_string("DisplayName", name);
 587   s->formatter->close_section();
 588 }
 589
 590 void dump_access_control(struct req_state *s, const char *origin,
 591                          const char *meth,
 592                          const char *hdr, const char *exp_hdr,
 593                          uint32_t max_age) {
 594   if (origin && (origin[0] != '\0')) {
 595     dump_header(s, "Access-Control-Allow-Origin", origin);
 596     /* If the server specifies an origin host rather than "*",
 597      * then it must also include Origin in the Vary response header
 598      * to indicate to clients that server responses will differ
 599      * based on the value of the Origin request header.
 600      */
 601     if (strcmp(origin, "*") != 0) {
 602       dump_header(s, "Vary", "Origin");
 603     }
 604
 605     if (meth && (meth[0] != '\0')) {
 606       dump_header(s, "Access-Control-Allow-Methods", meth);
 607     }
 608     if (hdr && (hdr[0] != '\0')) {
 609       dump_header(s, "Access-Control-Allow-Headers", hdr);
 610     }
 611     if (exp_hdr && (exp_hdr[0] != '\0')) {
 612       dump_header(s, "Access-Control-Expose-Headers", exp_hdr);
 613     }
 614     if (max_age != CORS_MAX_AGE_INVALID) {
 615       dump_header(s, "Access-Control-Max-Age", max_age);
 616     }
 617   }
 618 }
 619
 620 void dump_access_control(req_state *s, RGWOp *op)
 621 {
 622   string origin;
 623   string method;
 624   string header;
 625   string exp_header;
 626   unsigned max_age = CORS_MAX_AGE_INVALID;
 627
 628   if (!op->generate_cors_headers(origin, method, header, exp_header, &max_age))
 629     return;
 630
 631   dump_access_control(s, origin.c_str(), method.c_str(), header.c_str(),
 632                       exp_header.c_str(), max_age);
 633 }
 634
 635 void dump_start(struct req_state *s)
 636 {
 637   if (!s->content_started) {
 638     s->formatter->output_header();
 639     s->content_started = true;
 640   }
 641 }
 642
 643 void dump_trans_id(req_state *s)
 644 {
 645   if (s->prot_flags & RGW_REST_SWIFT) {
 646     dump_header(s, "X-Trans-Id", s->trans_id);
 647     dump_header(s, "X-Openstack-Request-Id", s->trans_id);
 648   } else if (s->trans_id.length()) {
 649     dump_header(s, "x-amz-request-id", s->trans_id);
 650   }
 651 }
 652
 653 void end_header(struct req_state* s, RGWOp* op, const char *content_type,
 654                 const int64_t proposed_content_length, bool force_content_type,
 655                 bool force_no_error)
 656 {
 657   string ctype;
 658
 659   dump_trans_id(s);
 660
 661   if ((!s->err.is_err()) &&
 662       (s->bucket_info.owner != s->user->user_id) &&
 663       (s->bucket_info.requester_pays)) {
 664     dump_header(s, "x-amz-request-charged", "requester");
 665   }
 666
 667   if (op) {
 668     dump_access_control(s, op);
 669   }
 670
 671   if (s->prot_flags & RGW_REST_SWIFT && !content_type) {
 672     force_content_type = true;
 673   }
 674
 675   /* do not send content type if content length is zero
 676      and the content type was not set by the user */
 677   if (force_content_type ||
 678       (!content_type &&  s->formatter->get_len()  != 0) || s->err.is_err()){
 679     switch (s->format) {
 680     case RGW_FORMAT_XML:
 681       ctype = "application/xml";
 682       break;
 683     case RGW_FORMAT_JSON:
 684       ctype = "application/json";
 685       break;
 686     case RGW_FORMAT_HTML:
 687       ctype = "text/html";
 688       break;
 689     default:
 690       ctype = "text/plain";
 691       break;
 692     }
 693     if (s->prot_flags & RGW_REST_SWIFT)
 694       ctype.append("; charset=utf-8");
 695     content_type = ctype.c_str();
 696   }
 697   if (!force_no_error && s->err.is_err()) {
 698     dump_start(s);
 699     if (s->format != RGW_FORMAT_HTML) {
 700       s->formatter->open_object_section("Error");
 701     }
 702     if (!s->err.s3_code.empty())
 703       s->formatter->dump_string("Code", s->err.s3_code);
 704     if (!s->err.message.empty())
 705       s->formatter->dump_string("Message", s->err.message);
 706     if (!s->bucket_name.empty()) // TODO: connect to expose_bucket
 707       s->formatter->dump_string("BucketName", s->bucket_name);
 708     if (!s->trans_id.empty()) // TODO: connect to expose_bucket or another toggle
 709       s->formatter->dump_string("RequestId", s->trans_id);
 710     s->formatter->dump_string("HostId", s->host_id);
 711     if (s->format != RGW_FORMAT_HTML) {
 712       s->formatter->close_section();
 713     }
 714     s->formatter->output_footer();
 715     dump_content_length(s, s->formatter->get_len());
 716   } else {
 717     if (proposed_content_length == CHUNKED_TRANSFER_ENCODING) {
 718       dump_chunked_encoding(s);
 719     } else if (proposed_content_length != NO_CONTENT_LENGTH) {
 720       dump_content_length(s, proposed_content_length);
 721     }
 722   }
 723
 724   if (content_type) {
 725     dump_header(s, "Content-Type", content_type);
 726   }
 727
 728   try {
 729     RESTFUL_IO(s)->complete_header();
 730   } catch (rgw::io::Exception& e) {
 731     ldout(s->cct, 0) << "ERROR: RESTFUL_IO(s)->complete_header() returned err="
 732                      << e.what() << dendl;
 733   }
 734
 735   ACCOUNTING_IO(s)->set_account(true);
 736   rgw_flush_formatter_and_reset(s, s->formatter);
 737 }
 738
 739 void abort_early(struct req_state *s, RGWOp *op, int err_no,
 740                  RGWHandler* handler)
 741 {
 742   string error_content("");
 743   if (!s->formatter) {
 744     s->formatter = new JSONFormatter;
 745     s->format = RGW_FORMAT_JSON;
 746   }
 747
 748   // op->error_handler is responsible for calling it's handler error_handler
 749   if (op != NULL) {
 750     int new_err_no;
 751     new_err_no = op->error_handler(err_no, &error_content);
 752     ldout(s->cct, 20) << "op->ERRORHANDLER: err_no=" << err_no
 753                       << " new_err_no=" << new_err_no << dendl;
 754     err_no = new_err_no;
 755   } else if (handler != NULL) {
 756     int new_err_no;
 757     new_err_no = handler->error_handler(err_no, &error_content);
 758     ldout(s->cct, 20) << "handler->ERRORHANDLER: err_no=" << err_no
 759                       << " new_err_no=" << new_err_no << dendl;
 760     err_no = new_err_no;
 761   }
 762
 763   // If the error handler(s) above dealt with it completely, they should have
 764   // returned 0. If non-zero, we need to continue here.
 765   if (err_no) {
 766     // Watch out, we might have a custom error state already set!
 767     if (s->err.http_ret && s->err.http_ret != 200) {
 768       dump_errno(s);
 769     } else {
 770       set_req_state_err(s, err_no);
 771       dump_errno(s);
 772     }
 773     dump_bucket_from_state(s);
 774     if (err_no == -ERR_PERMANENT_REDIRECT || err_no == -ERR_WEBSITE_REDIRECT) {
 775       string dest_uri;
 776       if (!s->redirect.empty()) {
 777         dest_uri = s->redirect;
 778       } else if (!s->zonegroup_endpoint.empty()) {
 779         dest_uri = s->zonegroup_endpoint;
 780         /*
 781          * reqest_uri is always start with slash, so we need to remove
 782          * the unnecessary slash at the end of dest_uri.
 783          */
 784         if (dest_uri[dest_uri.size() - 1] == '/') {
 785           dest_uri = dest_uri.substr(0, dest_uri.size() - 1);
 786         }
 787         dest_uri += s->info.request_uri;
 788         dest_uri += "?";
 789         dest_uri += s->info.request_params;
 790       }
 791
 792       if (!dest_uri.empty()) {
 793         dump_redirect(s, dest_uri);
 794       }
 795     }
 796
 797     if (!error_content.empty()) {
 798       /*
 799        * TODO we must add all error entries as headers here:
 800        * when having a working errordoc, then the s3 error fields are
 801        * rendered as HTTP headers, e.g.:
 802        *   x-amz-error-code: NoSuchKey
 803        *   x-amz-error-message: The specified key does not exist.
 804        *   x-amz-error-detail-Key: foo
 805        */
 806       end_header(s, op, NULL, error_content.size(), false, true);
 807       RESTFUL_IO(s)->send_body(error_content.c_str(), error_content.size());
 808     } else {
 809       end_header(s, op);
 810     }
 811   }
 812   perfcounter->inc(l_rgw_failed_req);
 813 }
 814
 815 void dump_continue(struct req_state * const s)
 816 {
 817   try {
 818     RESTFUL_IO(s)->send_100_continue();
 819   } catch (rgw::io::Exception& e) {
 820     ldout(s->cct, 0) << "ERROR: RESTFUL_IO(s)->send_100_continue() returned err="
 821                      << e.what() << dendl;
 822   }
 823 }
 824
 825 void dump_range(struct req_state* const s,
 826                 const uint64_t ofs,
 827                 const uint64_t end,
 828                 const uint64_t total)
 829 {
 830   /* dumping range into temp buffer first, as libfcgi will fail to digest
 831    * %lld */
 832   char range_buf[128];
 833   size_t len;
 834
 835   if (! total) {
 836     len = snprintf(range_buf, sizeof(range_buf), "bytes */%lld",
 837                    static_cast<long long>(total));
 838   } else {
 839     len = snprintf(range_buf, sizeof(range_buf), "bytes %lld-%lld/%lld",
 840                    static_cast<long long>(ofs),
 841                    static_cast<long long>(end),
 842                    static_cast<long long>(total));
 843   }
 844
 845   return dump_header(s, "Content-Range", boost::string_ref(range_buf, len));
 846 }
 847
 848
 849 int dump_body(struct req_state* const s,
 850               const char* const buf,
 851               const size_t len)
 852 {
 853   try {
 854     return RESTFUL_IO(s)->send_body(buf, len);
 855   } catch (rgw::io::Exception& e) {
 856     return -e.code().value();
 857   }
 858 }
 859
 860 int dump_body(struct req_state* const s, /* const */ ceph::buffer::list& bl)
 861 {
 862   return dump_body(s, bl.c_str(), bl.length());
 863 }
 864
 865 int dump_body(struct req_state* const s, const std::string& str)
 866 {
 867   return dump_body(s, str.c_str(), str.length());
 868 }
 869
 870 int recv_body(struct req_state* const s,
 871               char* const buf,
 872               const size_t max)
 873 {
 874   try {
 875     return AWS_AUTHv4_IO(s)->recv_body(buf, max, s->aws4_auth_needs_complete);
 876   } catch (rgw::io::Exception& e) {
 877     return -e.code().value();
 878   }
 879 }
 880
 881 int RGWGetObj_ObjStore::get_params()
 882 {
 883   range_str = s->info.env->get("HTTP_RANGE");
 884   if_mod = s->info.env->get("HTTP_IF_MODIFIED_SINCE");
 885   if_unmod = s->info.env->get("HTTP_IF_UNMODIFIED_SINCE");
 886   if_match = s->info.env->get("HTTP_IF_MATCH");
 887   if_nomatch = s->info.env->get("HTTP_IF_NONE_MATCH");
 888
 889   if (s->system_request) {
 890     mod_zone_id = s->info.env->get_int("HTTP_DEST_ZONE_SHORT_ID", 0);
 891     mod_pg_ver = s->info.env->get_int("HTTP_DEST_PG_VER", 0);
 892     rgwx_stat = s->info.args.exists(RGW_SYS_PARAM_PREFIX "stat");
 893     get_data &= (!rgwx_stat);
 894   }
 895
 896   /* start gettorrent */
 897   bool is_torrent = s->info.args.exists(GET_TORRENT);
 898   bool torrent_flag = s->cct->_conf->rgw_torrent_flag;
 899   if (torrent_flag && is_torrent)
 900   {
 901     int ret = 0;
 902     ret = torrent.get_params();
 903     if (ret < 0)
 904     {
 905       return ret;
 906     }
 907   }
 908   /* end gettorrent */
 909
 910   return 0;
 911 }
 912
 913 int RESTArgs::get_string(struct req_state *s, const string& name,
 914                          const string& def_val, string *val, bool *existed)
 915 {
 916   bool exists;
 917   *val = s->info.args.get(name, &exists);
 918
 919   if (existed)
 920     *existed = exists;
 921
 922   if (!exists) {
 923     *val = def_val;
 924     return 0;
 925   }
 926
 927   return 0;
 928 }
 929
 930 int RESTArgs::get_uint64(struct req_state *s, const string& name,
 931                          uint64_t def_val, uint64_t *val, bool *existed)
 932 {
 933   bool exists;
 934   string sval = s->info.args.get(name, &exists);
 935
 936   if (existed)
 937     *existed = exists;
 938
 939   if (!exists) {
 940     *val = def_val;
 941     return 0;
 942   }
 943
 944   int r = stringtoull(sval, val);
 945   if (r < 0)
 946     return r;
 947
 948   return 0;
 949 }
 950
 951 int RESTArgs::get_int64(struct req_state *s, const string& name,
 952                         int64_t def_val, int64_t *val, bool *existed)
 953 {
 954   bool exists;
 955   string sval = s->info.args.get(name, &exists);
 956
 957   if (existed)
 958     *existed = exists;
 959
 960   if (!exists) {
 961     *val = def_val;
 962     return 0;
 963   }
 964
 965   int r = stringtoll(sval, val);
 966   if (r < 0)
 967     return r;
 968
 969   return 0;
 970 }
 971
 972 int RESTArgs::get_uint32(struct req_state *s, const string& name,
 973                          uint32_t def_val, uint32_t *val, bool *existed)
 974 {
 975   bool exists;
 976   string sval = s->info.args.get(name, &exists);
 977
 978   if (existed)
 979     *existed = exists;
 980
 981   if (!exists) {
 982     *val = def_val;
 983     return 0;
 984   }
 985
 986   int r = stringtoul(sval, val);
 987   if (r < 0)
 988     return r;
 989
 990   return 0;
 991 }
 992
 993 int RESTArgs::get_int32(struct req_state *s, const string& name,
 994                         int32_t def_val, int32_t *val, bool *existed)
 995 {
 996   bool exists;
 997   string sval = s->info.args.get(name, &exists);
 998
 999   if (existed)
1000     *existed = exists;
1001
1002   if (!exists) {
1003     *val = def_val;
1004     return 0;
1005   }
1006
1007   int r = stringtol(sval, val);
1008   if (r < 0)
1009     return r;
1010
1011   return 0;
1012 }
1013
1014 int RESTArgs::get_time(struct req_state *s, const string& name,
1015                        const utime_t& def_val, utime_t *val, bool *existed)
1016 {
1017   bool exists;
1018   string sval = s->info.args.get(name, &exists);
1019
1020   if (existed)
1021     *existed = exists;
1022
1023   if (!exists) {
1024     *val = def_val;
1025     return 0;
1026   }
1027
1028   uint64_t epoch, nsec;
1029
1030   int r = utime_t::parse_date(sval, &epoch, &nsec);
1031   if (r < 0)
1032     return r;
1033
1034   *val = utime_t(epoch, nsec);
1035
1036   return 0;
1037 }
1038
1039 int RESTArgs::get_epoch(struct req_state *s, const string& name, uint64_t def_val, uint64_t *epoch, bool *existed)
1040 {
1041   bool exists;
1042   string date = s->info.args.get(name, &exists);
1043
1044   if (existed)
1045     *existed = exists;
1046
1047   if (!exists) {
1048     *epoch = def_val;
1049     return 0;
1050   }
1051
1052   int r = utime_t::parse_date(date, epoch, NULL);
1053   if (r < 0)
1054     return r;
1055
1056   return 0;
1057 }
1058
1059 int RESTArgs::get_bool(struct req_state *s, const string& name, bool def_val, bool *val, bool *existed)
1060 {
1061   bool exists;
1062   string sval = s->info.args.get(name, &exists);
1063
1064   if (existed)
1065     *existed = exists;
1066
1067   if (!exists) {
1068     *val = def_val;
1069     return 0;
1070   }
1071
1072   const char *str = sval.c_str();
1073
1074   if (sval.empty() ||
1075       strcasecmp(str, "true") == 0 ||
1076       sval.compare("1") == 0) {
1077     *val = true;
1078     return 0;
1079   }
1080
1081   if (strcasecmp(str, "false") != 0 &&
1082       sval.compare("0") != 0) {
1083     *val = def_val;
1084     return -EINVAL;
1085   }
1086
1087   *val = false;
1088   return 0;
1089 }
1090
1091
1092 void RGWRESTFlusher::do_start(int ret)
1093 {
1094   set_req_state_err(s, ret); /* no going back from here */
1095   dump_errno(s);
1096   dump_start(s);
1097   end_header(s, op);
1098   rgw_flush_formatter_and_reset(s, s->formatter);
1099 }
1100
1101 void RGWRESTFlusher::do_flush()
1102 {
1103   rgw_flush_formatter(s, s->formatter);
1104 }
1105
1106 int RGWPutObj_ObjStore::verify_params()
1107 {
1108   if (s->length) {
1109     off_t len = atoll(s->length);
1110     if (len > (off_t)(s->cct->_conf->rgw_max_put_size)) {
1111       return -ERR_TOO_LARGE;
1112     }
1113   }
1114
1115   return 0;
1116 }
1117
1118 int RGWPutObj_ObjStore::get_params()
1119 {
1120   /* start gettorrent */
1121   if (s->cct->_conf->rgw_torrent_flag)
1122   {
1123     int ret = 0;
1124     ret = torrent.get_params();
1125     ldout(s->cct, 5) << "NOTICE:  open produce torrent file " << dendl;
1126     if (ret < 0)
1127     {
1128       return ret;
1129     }
1130     torrent.set_info_name((s->object).name);
1131   }
1132   /* end gettorrent */
1133   supplied_md5_b64 = s->info.env->get("HTTP_CONTENT_MD5");
1134
1135   return 0;
1136 }
1137
1138 int RGWPutObj_ObjStore::get_padding_last_aws4_chunk_encoded(bufferlist &bl, uint64_t chunk_size) {
1139
1140   const int chunk_str_min_len = 1 + 17 + 64 + 2; /* len('0') = 1 */
1141
1142   char *chunk_str = bl.c_str();
1143   int budget = bl.length();
1144
1145   unsigned int chunk_data_size;
1146   unsigned int chunk_offset = 0;
1147
1148   while (1) {
1149
1150     /* check available metadata */
1151     if (budget < chunk_str_min_len) {
1152       return -ERR_SIGNATURE_NO_MATCH;
1153     }
1154
1155     chunk_offset = 0;
1156
1157     /* grab chunk size */
1158     while ((*(chunk_str+chunk_offset) != ';') && (chunk_offset < chunk_str_min_len))
1159       chunk_offset++;
1160     string str = string(chunk_str, chunk_offset);
1161     stringstream ss;
1162     ss << std::hex << str;
1163     ss >> chunk_data_size;
1164
1165     /* next chunk */
1166     chunk_offset += 17 + 64 + 2 + chunk_data_size;
1167
1168     /* last chunk? */
1169     budget -= chunk_offset;
1170     if (budget < 0) {
1171       budget *= -1;
1172       break;
1173     }
1174
1175     chunk_str += chunk_offset;
1176   }
1177
1178   return budget;
1179 }
1180
1181 int RGWPutObj_ObjStore::get_data(bufferlist& bl)
1182 {
1183   size_t cl;
1184   uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
1185   if (s->length) {
1186     cl = atoll(s->length) - ofs;
1187     if (cl > chunk_size)
1188       cl = chunk_size;
1189   } else {
1190     cl = chunk_size;
1191   }
1192
1193   int len = 0;
1194   if (cl) {
1195     ACCOUNTING_IO(s)->set_account(true);
1196     bufferptr bp(cl);
1197
1198     const auto read_len  = recv_body(s, bp.c_str(), cl);
1199     if (read_len < 0) {
1200       return read_len;
1201     }
1202
1203     len = read_len;
1204     bl.append(bp, 0, len);
1205
1206     /* read last aws4 chunk padding */
1207     if (s->aws4_auth_streaming_mode && len == (int)chunk_size) {
1208       int ret_auth = get_padding_last_aws4_chunk_encoded(bl, chunk_size);
1209       if (ret_auth < 0) {
1210         return ret_auth;
1211       }
1212       int len_padding = ret_auth;
1213       if (len_padding) {
1214         bufferptr bp_extra(len_padding);
1215         const auto read_len = recv_body(s, bp_extra.c_str(), len_padding);
1216         if (read_len < 0) {
1217           return read_len;
1218         }
1219         if (read_len != len_padding) {
1220           return -ERR_SIGNATURE_NO_MATCH;
1221         }
1222         bl.append(bp_extra.c_str(), len_padding);
1223         bl.rebuild();
1224       }
1225     }
1226     ACCOUNTING_IO(s)->set_account(false);
1227   }
1228
1229   if ((uint64_t)ofs + len > s->cct->_conf->rgw_max_put_size) {
1230     return -ERR_TOO_LARGE;
1231   }
1232
1233   if (!ofs)
1234     supplied_md5_b64 = s->info.env->get("HTTP_CONTENT_MD5");
1235
1236   return len;
1237 }
1238
1239
1240 /*
1241  * parses params in the format: 'first; param1=foo; param2=bar'
1242  */
1243 void RGWPostObj_ObjStore::parse_boundary_params(const std::string& params_str,
1244                                                 std::string& first,
1245                                                 std::map<std::string,
1246                                                 std::string>& params)
1247 {
1248   size_t pos = params_str.find(';');
1249   if (std::string::npos == pos) {
1250     first = rgw_trim_whitespace(params_str);
1251     return;
1252   }
1253
1254   first = rgw_trim_whitespace(params_str.substr(0, pos));
1255   pos++;
1256
1257   while (pos < params_str.size()) {
1258     size_t end = params_str.find(';', pos);
1259     if (std::string::npos == end) {
1260       end = params_str.size();
1261     }
1262
1263     std::string param = params_str.substr(pos, end - pos);
1264     size_t eqpos = param.find('=');
1265
1266     if (std::string::npos != eqpos) {
1267       std::string param_name = rgw_trim_whitespace(param.substr(0, eqpos));
1268       std::string val = rgw_trim_quotes(param.substr(eqpos + 1));
1269       params[std::move(param_name)] = std::move(val);
1270     } else {
1271       params[rgw_trim_whitespace(param)] = "";
1272     }
1273
1274     pos = end + 1;
1275   }
1276 }
1277
1278 int RGWPostObj_ObjStore::parse_part_field(const std::string& line,
1279                                           std::string& field_name,  /* out */
1280                                           post_part_field& field)   /* out */
1281 {
1282   size_t pos = line.find(':');
1283   if (pos == string::npos)
1284     return -EINVAL;
1285
1286   field_name = line.substr(0, pos);
1287   if (pos >= line.size() - 1)
1288     return 0;
1289
1290   parse_boundary_params(line.substr(pos + 1), field.val, field.params);
1291
1292   return 0;
1293 }
1294
1295 static bool is_crlf(const char *s)
1296 {
1297   return (*s == '\r' && *(s + 1) == '\n');
1298 }
1299
1300 /*
1301  * find the index of the boundary, if exists, or optionally the next end of line
1302  * also returns how many bytes to skip
1303  */
1304 static int index_of(ceph::bufferlist& bl,
1305                     uint64_t max_len,
1306                     const std::string& str,
1307                     const bool check_crlf,
1308                     bool& reached_boundary,
1309                     int& skip)
1310 {
1311   reached_boundary = false;
1312   skip = 0;
1313
1314   if (str.size() < 2) // we assume boundary is at least 2 chars (makes it easier with crlf checks)
1315     return -EINVAL;
1316
1317   if (bl.length() < str.size())
1318     return -1;
1319
1320   const char *buf = bl.c_str();
1321   const char *s = str.c_str();
1322
1323   if (max_len > bl.length())
1324     max_len = bl.length();
1325
1326   for (uint64_t i = 0; i < max_len; i++, buf++) {
1327     if (check_crlf &&
1328         i >= 1 &&
1329         is_crlf(buf - 1)) {
1330       return i + 1; // skip the crlf
1331     }
1332     if ((i < max_len - str.size() + 1) &&
1333         (buf[0] == s[0] && buf[1] == s[1]) &&
1334         (strncmp(buf, s, str.size()) == 0)) {
1335       reached_boundary = true;
1336       skip = str.size();
1337
1338       /* oh, great, now we need to swallow the preceding crlf
1339        * if exists
1340        */
1341       if ((i >= 2) &&
1342           is_crlf(buf - 2)) {
1343         i -= 2;
1344         skip += 2;
1345       }
1346       return i;
1347     }
1348   }
1349
1350   return -1;
1351 }
1352
1353 int RGWPostObj_ObjStore::read_with_boundary(ceph::bufferlist& bl,
1354                                             uint64_t max,
1355                                             const bool check_crlf,
1356                                             bool& reached_boundary,
1357                                             bool& done)
1358 {
1359   uint64_t cl = max + 2 + boundary.size();
1360
1361   if (max > in_data.length()) {
1362     uint64_t need_to_read = cl - in_data.length();
1363
1364     bufferptr bp(need_to_read);
1365
1366     const auto read_len = recv_body(s, bp.c_str(), need_to_read);
1367     if (read_len < 0) {
1368       return read_len;
1369     }
1370     in_data.append(bp, 0, read_len);
1371   }
1372
1373   done = false;
1374   int skip;
1375   const int index = index_of(in_data, cl, boundary, check_crlf,
1376                              reached_boundary, skip);
1377   if (index >= 0) {
1378     max = index;
1379   }
1380
1381   if (max > in_data.length()) {
1382     max = in_data.length();
1383   }
1384
1385   bl.substr_of(in_data, 0, max);
1386
1387   ceph::bufferlist new_read_data;
1388
1389   /*
1390    * now we need to skip boundary for next time, also skip any crlf, or
1391    * check to see if it's the last final boundary (marked with "--" at the end
1392    */
1393   if (reached_boundary) {
1394     int left = in_data.length() - max;
1395     if (left < skip + 2) {
1396       int need = skip + 2 - left;
1397       bufferptr boundary_bp(need);
1398       const int r = recv_body(s, boundary_bp.c_str(), need);
1399       if (r < 0) {
1400         return r;
1401       }
1402       in_data.append(boundary_bp);
1403     }
1404     max += skip; // skip boundary for next time
1405     if (in_data.length() >= max + 2) {
1406       const char *data = in_data.c_str();
1407       if (is_crlf(data + max)) {
1408         max += 2;
1409       } else {
1410         if (*(data + max) == '-' &&
1411             *(data + max + 1) == '-') {
1412           done = true;
1413           max += 2;
1414         }
1415       }
1416     }
1417   }
1418
1419   new_read_data.substr_of(in_data, max, in_data.length() - max);
1420   in_data = new_read_data;
1421
1422   return 0;
1423 }
1424
1425 int RGWPostObj_ObjStore::read_line(ceph::bufferlist& bl,
1426                                    const uint64_t max,
1427                                    bool& reached_boundary,
1428                                    bool& done)
1429 {
1430   return read_with_boundary(bl, max, true, reached_boundary, done);
1431 }
1432
1433 int RGWPostObj_ObjStore::read_data(ceph::bufferlist& bl,
1434                                    const uint64_t max,
1435                                    bool& reached_boundary,
1436                                    bool& done)
1437 {
1438   return read_with_boundary(bl, max, false, reached_boundary, done);
1439 }
1440
1441
1442 int RGWPostObj_ObjStore::read_form_part_header(struct post_form_part* const part,
1443                                                bool& done)
1444 {
1445   bufferlist bl;
1446   bool reached_boundary;
1447   uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
1448   int r = read_line(bl, chunk_size, reached_boundary, done);
1449   if (r < 0) {
1450     return r;
1451   }
1452
1453   if (done) {
1454     return 0;
1455   }
1456
1457   if (reached_boundary) { // skip the first boundary
1458     r = read_line(bl, chunk_size, reached_boundary, done);
1459     if (r < 0) {
1460       return r;
1461     } else if (done) {
1462       return 0;
1463     }
1464   }
1465
1466   while (true) {
1467   /*
1468    * iterate through fields
1469    */
1470     std::string line = rgw_trim_whitespace(string(bl.c_str(), bl.length()));
1471
1472     if (line.empty()) {
1473       break;
1474     }
1475
1476     struct post_part_field field;
1477
1478     string field_name;
1479     r = parse_part_field(line, field_name, field);
1480     if (r < 0) {
1481       return r;
1482     }
1483
1484     part->fields[field_name] = field;
1485
1486     if (stringcasecmp(field_name, "Content-Disposition") == 0) {
1487       part->name = field.params["name"];
1488     }
1489
1490     if (reached_boundary) {
1491       break;
1492     }
1493
1494     r = read_line(bl, chunk_size, reached_boundary, done);
1495   }
1496
1497   return 0;
1498 }
1499
1500 bool RGWPostObj_ObjStore::part_str(parts_collection_t& parts,
1501                                    const std::string& name,
1502                                    std::string* val)
1503 {
1504   const auto iter = parts.find(name);
1505   if (std::end(parts) == iter) {
1506     return false;
1507   }
1508
1509   ceph::bufferlist& data = iter->second.data;
1510   std::string str = string(data.c_str(), data.length());
1511   *val = rgw_trim_whitespace(str);
1512   return true;
1513 }
1514
1515 std::string RGWPostObj_ObjStore::get_part_str(parts_collection_t& parts,
1516                                               const std::string& name,
1517                                               const std::string& def_val)
1518 {
1519   std::string val;
1520
1521   if (part_str(parts, name, &val)) {
1522     return val;
1523   } else {
1524     return rgw_trim_whitespace(def_val);
1525   }
1526 }
1527
1528 bool RGWPostObj_ObjStore::part_bl(parts_collection_t& parts,
1529                                   const std::string& name,
1530                                   ceph::bufferlist* pbl)
1531 {
1532   const auto iter = parts.find(name);
1533   if (std::end(parts) == iter) {
1534     return false;
1535   }
1536
1537   *pbl = iter->second.data;
1538   return true;
1539 }
1540
1541 int RGWPostObj_ObjStore::verify_params()
1542 {
1543   /*  check that we have enough memory to store the object
1544   note that this test isn't exact and may fail unintentionally
1545   for large requests is */
1546   if (!s->length) {
1547     return -ERR_LENGTH_REQUIRED;
1548   }
1549   off_t len = atoll(s->length);
1550   if (len > (off_t)(s->cct->_conf->rgw_max_put_size)) {
1551     return -ERR_TOO_LARGE;
1552   }
1553
1554   return 0;
1555 }
1556
1557 int RGWPostObj_ObjStore::get_params()
1558 {
1559   if (s->expect_cont) {
1560     /* OK, here it really gets ugly. With POST, the params are embedded in the
1561      * request body, so we need to continue before being able to actually look
1562      * at them. This diverts from the usual request flow. */
1563     dump_continue(s);
1564     s->expect_cont = false;
1565   }
1566
1567   std::string req_content_type_str = s->info.env->get("CONTENT_TYPE", "");
1568   std::string req_content_type;
1569   std::map<std::string, std::string> params;
1570   parse_boundary_params(req_content_type_str, req_content_type, params);
1571
1572   if (req_content_type.compare("multipart/form-data") != 0) {
1573     err_msg = "Request Content-Type is not multipart/form-data";
1574     return -EINVAL;
1575   }
1576
1577   if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) {
1578     ldout(s->cct, 20) << "request content_type_str="
1579                       << req_content_type_str << dendl;
1580     ldout(s->cct, 20) << "request content_type params:" << dendl;
1581
1582     for (const auto& pair : params) {
1583       ldout(s->cct, 20) << " " << pair.first << " -> " << pair.second
1584                         << dendl;
1585     }
1586   }
1587
1588   const auto iter = params.find("boundary");
1589   if (std::end(params) == iter) {
1590     err_msg = "Missing multipart boundary specification";
1591     return -EINVAL;
1592   }
1593
1594   /* Create the boundary. */
1595   boundary = "--";
1596   boundary.append(iter->second);
1597
1598   return 0;
1599 }
1600
1601
1602 int RGWPutACLs_ObjStore::get_params()
1603 {
1604   const auto max_size = s->cct->_conf->rgw_max_put_param_size;
1605   op_ret = rgw_rest_read_all_input(s, &data, &len, max_size, false);
1606   return op_ret;
1607 }
1608
1609 int RGWPutLC_ObjStore::get_params()
1610 {
1611   const auto max_size = s->cct->_conf->rgw_max_put_param_size;
1612   op_ret = rgw_rest_read_all_input(s, &data, &len, max_size, false);
1613   return op_ret;
1614 }
1615
1616 static int read_all_chunked_input(req_state *s, char **pdata, int *plen, const uint64_t max_read)
1617 {
1618 #define READ_CHUNK 4096
1619 #define MAX_READ_CHUNK (128 * 1024)
1620   int need_to_read = READ_CHUNK;
1621   int total = need_to_read;
1622   char *data = (char *)malloc(total + 1);
1623   if (!data)
1624     return -ENOMEM;
1625
1626   int read_len = 0, len = 0;
1627   do {
1628     read_len = recv_body(s, data + len, need_to_read);
1629     if (read_len < 0) {
1630       free(data);
1631       return read_len;
1632     }
1633
1634     len += read_len;
1635
1636     if (read_len == need_to_read) {
1637       if (need_to_read < MAX_READ_CHUNK)
1638         need_to_read *= 2;
1639
1640       if ((unsigned)total > max_read) {
1641         free(data);
1642         return -ERANGE;
1643       }
1644       total += need_to_read;
1645
1646       void *p = realloc(data, total + 1);
1647       if (!p) {
1648         free(data);
1649         return -ENOMEM;
1650       }
1651       data = (char *)p;
1652     } else {
1653       break;
1654     }
1655
1656   } while (true);
1657   data[len] = '\0';
1658
1659   *pdata = data;
1660   *plen = len;
1661
1662   return 0;
1663 }
1664
1665 int rgw_rest_read_all_input(struct req_state *s, char **pdata, int *plen,
1666                             const uint64_t max_len, const bool allow_chunked)
1667 {
1668   size_t cl = 0;
1669   int len = 0;
1670   char *data = NULL;
1671
1672   if (s->length)
1673     cl = atoll(s->length);
1674   else if (!allow_chunked)
1675     return -ERR_LENGTH_REQUIRED;
1676
1677   if (cl) {
1678     if (cl > (size_t)max_len) {
1679       return -ERANGE;
1680     }
1681     data = (char *)malloc(cl + 1);
1682     if (!data) {
1683       return -ENOMEM;
1684     }
1685     len = recv_body(s, data, cl);
1686     if (len < 0) {
1687       free(data);
1688       return len;
1689     }
1690     data[len] = '\0';
1691   } else if (allow_chunked && !s->length) {
1692     const char *encoding = s->info.env->get("HTTP_TRANSFER_ENCODING");
1693     if (!encoding || strcmp(encoding, "chunked") != 0)
1694       return -ERR_LENGTH_REQUIRED;
1695
1696     int ret = read_all_chunked_input(s, &data, &len, max_len);
1697     if (ret < 0)
1698       return ret;
1699   }
1700
1701   *plen = len;
1702   *pdata = data;
1703
1704   return 0;
1705 }
1706
1707 int RGWCompleteMultipart_ObjStore::get_params()
1708 {
1709   upload_id = s->info.args.get("uploadId");
1710
1711   if (upload_id.empty()) {
1712     op_ret = -ENOTSUP;
1713     return op_ret;
1714   }
1715
1716 #define COMPLETE_MULTIPART_MAX_LEN (1024 * 1024) /* api defines max 10,000 parts, this should be enough */
1717   op_ret = rgw_rest_read_all_input(s, &data, &len, COMPLETE_MULTIPART_MAX_LEN);
1718   if (op_ret < 0)
1719     return op_ret;
1720
1721   return 0;
1722 }
1723
1724 int RGWListMultipart_ObjStore::get_params()
1725 {
1726   upload_id = s->info.args.get("uploadId");
1727
1728   if (upload_id.empty()) {
1729     op_ret = -ENOTSUP;
1730   }
1731   string marker_str = s->info.args.get("part-number-marker");
1732
1733   if (!marker_str.empty()) {
1734     string err;
1735     marker = strict_strtol(marker_str.c_str(), 10, &err);
1736     if (!err.empty()) {
1737       ldout(s->cct, 20) << "bad marker: "  << marker << dendl;
1738       op_ret = -EINVAL;
1739       return op_ret;
1740     }
1741   }
1742
1743   string str = s->info.args.get("max-parts");
1744   if (!str.empty())
1745     max_parts = atoi(str.c_str());
1746
1747   return op_ret;
1748 }
1749
1750 int RGWListBucketMultiparts_ObjStore::get_params()
1751 {
1752   delimiter = s->info.args.get("delimiter");
1753   prefix = s->info.args.get("prefix");
1754   string str = s->info.args.get("max-parts");
1755   if (!str.empty())
1756     max_uploads = atoi(str.c_str());
1757   else
1758     max_uploads = default_max;
1759
1760   string key_marker = s->info.args.get("key-marker");
1761   string upload_id_marker = s->info.args.get("upload-id-marker");
1762   if (!key_marker.empty())
1763     marker.init(key_marker, upload_id_marker);
1764
1765   return 0;
1766 }
1767
1768 int RGWDeleteMultiObj_ObjStore::get_params()
1769 {
1770
1771   if (s->bucket_name.empty()) {
1772     op_ret = -EINVAL;
1773     return op_ret;
1774   }
1775
1776   // everything is probably fine, set the bucket
1777   bucket = s->bucket;
1778
1779   const auto max_size = s->cct->_conf->rgw_max_put_param_size;
1780   op_ret = rgw_rest_read_all_input(s, &data, &len, max_size, false);
1781   return op_ret;
1782 }
1783
1784
1785 void RGWRESTOp::send_response()
1786 {
1787   if (!flusher.did_start()) {
1788     set_req_state_err(s, http_ret);
1789     dump_errno(s);
1790     end_header(s, this);
1791   }
1792   flusher.flush();
1793 }
1794
1795 int RGWRESTOp::verify_permission()
1796 {
1797   return check_caps(s->user->caps);
1798 }
1799
1800 RGWOp* RGWHandler_REST::get_op(RGWRados* store)
1801 {
1802   RGWOp *op;
1803   switch (s->op) {
1804    case OP_GET:
1805      op = op_get();
1806      break;
1807    case OP_PUT:
1808      op = op_put();
1809      break;
1810    case OP_DELETE:
1811      op = op_delete();
1812      break;
1813    case OP_HEAD:
1814      op = op_head();
1815      break;
1816    case OP_POST:
1817      op = op_post();
1818      break;
1819    case OP_COPY:
1820      op = op_copy();
1821      break;
1822    case OP_OPTIONS:
1823      op = op_options();
1824      break;
1825    default:
1826      return NULL;
1827   }
1828
1829   if (op) {
1830     op->init(store, s, this);
1831   }
1832   return op;
1833 } /* get_op */
1834
1835 void RGWHandler_REST::put_op(RGWOp* op)
1836 {
1837   delete op;
1838 } /* put_op */
1839
1840 int RGWHandler_REST::allocate_formatter(struct req_state *s,
1841                                         int default_type,
1842                                         bool configurable)
1843 {
1844   s->format = default_type;
1845   if (configurable) {
1846     string format_str = s->info.args.get("format");
1847     if (format_str.compare("xml") == 0) {
1848       s->format = RGW_FORMAT_XML;
1849     } else if (format_str.compare("json") == 0) {
1850       s->format = RGW_FORMAT_JSON;
1851     } else if (format_str.compare("html") == 0) {
1852       s->format = RGW_FORMAT_HTML;
1853     } else {
1854       const char *accept = s->info.env->get("HTTP_ACCEPT");
1855       if (accept) {
1856         char format_buf[64];
1857         unsigned int i = 0;
1858         for (; i < sizeof(format_buf) - 1 && accept[i] && accept[i] != ';'; ++i) {
1859           format_buf[i] = accept[i];
1860         }
1861         format_buf[i] = 0;
1862         if ((strcmp(format_buf, "text/xml") == 0) || (strcmp(format_buf, "application/xml") == 0)) {
1863           s->format = RGW_FORMAT_XML;
1864         } else if (strcmp(format_buf, "application/json") == 0) {
1865           s->format = RGW_FORMAT_JSON;
1866         } else if (strcmp(format_buf, "text/html") == 0) {
1867           s->format = RGW_FORMAT_HTML;
1868         }
1869       }
1870     }
1871   }
1872
1873   const string& mm = s->info.args.get("multipart-manifest");
1874   const bool multipart_delete = (mm.compare("delete") == 0);
1875   const bool swift_bulkupload = s->prot_flags & RGW_REST_SWIFT &&
1876                                 s->info.args.exists("extract-archive");
1877   switch (s->format) {
1878     case RGW_FORMAT_PLAIN:
1879       {
1880         const bool use_kv_syntax = s->info.args.exists("bulk-delete") ||
1881                                    multipart_delete || swift_bulkupload;
1882         s->formatter = new RGWFormatter_Plain(use_kv_syntax);
1883         break;
1884       }
1885     case RGW_FORMAT_XML:
1886       {
1887         const bool lowercase_underscore = s->info.args.exists("bulk-delete") ||
1888                                           multipart_delete || swift_bulkupload;
1889
1890         s->formatter = new XMLFormatter(false, lowercase_underscore);
1891         break;
1892       }
1893     case RGW_FORMAT_JSON:
1894       s->formatter = new JSONFormatter(false);
1895       break;
1896     case RGW_FORMAT_HTML:
1897       s->formatter = new HTMLFormatter(s->prot_flags & RGW_REST_WEBSITE);
1898       break;
1899     default:
1900       return -EINVAL;
1901
1902   };
1903   //s->formatter->reset(); // All formatters should reset on create already
1904
1905   return 0;
1906 }
1907
1908 int RGWHandler_REST::validate_tenant_name(string const& t)
1909 {
1910   struct tench {
1911     static bool is_good(char ch) {
1912       return isalnum(ch) || ch == '_';
1913     }
1914   };
1915   std::string::const_iterator it =
1916     std::find_if_not(t.begin(), t.end(), tench::is_good);
1917   return (it == t.end())? 0: -ERR_INVALID_TENANT_NAME;
1918 }
1919
1920 // This function enforces Amazon's spec for bucket names.
1921 // (The requirements, not the recommendations.)
1922 int RGWHandler_REST::validate_bucket_name(const string& bucket)
1923 {
1924   int len = bucket.size();
1925   if (len < 3) {
1926     if (len == 0) {
1927       // This request doesn't specify a bucket at all
1928       return 0;
1929     }
1930     // Name too short
1931     return -ERR_INVALID_BUCKET_NAME;
1932   }
1933   else if (len > MAX_BUCKET_NAME_LEN) {
1934     // Name too long
1935     return -ERR_INVALID_BUCKET_NAME;
1936   }
1937
1938   return 0;
1939 }
1940
1941 // "The name for a key is a sequence of Unicode characters whose UTF-8 encoding
1942 // is at most 1024 bytes long."
1943 // However, we can still have control characters and other nasties in there.
1944 // Just as long as they're utf-8 nasties.
1945 int RGWHandler_REST::validate_object_name(const string& object)
1946 {
1947   int len = object.size();
1948   if (len > MAX_OBJ_NAME_LEN) {
1949     // Name too long
1950     return -ERR_INVALID_OBJECT_NAME;
1951   }
1952
1953   if (check_utf8(object.c_str(), len)) {
1954     // Object names must be valid UTF-8.
1955     return -ERR_INVALID_OBJECT_NAME;
1956   }
1957   return 0;
1958 }
1959
1960 static http_op op_from_method(const char *method)
1961 {
1962   if (!method)
1963     return OP_UNKNOWN;
1964   if (strcmp(method, "GET") == 0)
1965     return OP_GET;
1966   if (strcmp(method, "PUT") == 0)
1967     return OP_PUT;
1968   if (strcmp(method, "DELETE") == 0)
1969     return OP_DELETE;
1970   if (strcmp(method, "HEAD") == 0)
1971     return OP_HEAD;
1972   if (strcmp(method, "POST") == 0)
1973     return OP_POST;
1974   if (strcmp(method, "COPY") == 0)
1975     return OP_COPY;
1976   if (strcmp(method, "OPTIONS") == 0)
1977     return OP_OPTIONS;
1978
1979   return OP_UNKNOWN;
1980 }
1981
1982 int RGWHandler_REST::init_permissions(RGWOp* op)
1983 {
1984   if (op->get_type() == RGW_OP_CREATE_BUCKET)
1985     return 0;
1986
1987   return do_init_permissions();
1988 }
1989
1990 int RGWHandler_REST::read_permissions(RGWOp* op_obj)
1991 {
1992   bool only_bucket;
1993
1994   switch (s->op) {
1995   case OP_HEAD:
1996   case OP_GET:
1997     only_bucket = false;
1998     break;
1999   case OP_PUT:
2000   case OP_POST:
2001   case OP_COPY:
2002     /* is it a 'multi-object delete' request? */
2003     if (s->info.args.exists("delete")) {
2004       only_bucket = true;
2005       break;
2006     }
2007     if (is_obj_update_op()) {
2008       only_bucket = false;
2009       break;
2010     }
2011     /* is it a 'create bucket' request? */
2012     if (op_obj->get_type() == RGW_OP_CREATE_BUCKET)
2013       return 0;
2014     only_bucket = true;
2015     break;
2016   case OP_DELETE:
2017     only_bucket = true;
2018     break;
2019   case OP_OPTIONS:
2020     only_bucket = true;
2021     break;
2022   default:
2023     return -EINVAL;
2024   }
2025
2026   return do_read_permissions(op_obj, only_bucket);
2027 }
2028
2029 void RGWRESTMgr::register_resource(string resource, RGWRESTMgr *mgr)
2030 {
2031   string r = "/";
2032   r.append(resource);
2033
2034   /* do we have a resource manager registered for this entry point? */
2035   map<string, RGWRESTMgr *>::iterator iter = resource_mgrs.find(r);
2036   if (iter != resource_mgrs.end()) {
2037     delete iter->second;
2038   }
2039   resource_mgrs[r] = mgr;
2040   resources_by_size.insert(pair<size_t, string>(r.size(), r));
2041
2042   /* now build default resource managers for the path (instead of nested entry points)
2043    * e.g., if the entry point is /auth/v1.0/ then we'd want to create a default
2044    * manager for /auth/
2045    */
2046
2047   size_t pos = r.find('/', 1);
2048
2049   while (pos != r.size() - 1 && pos != string::npos) {
2050     string s = r.substr(0, pos);
2051
2052     iter = resource_mgrs.find(s);
2053     if (iter == resource_mgrs.end()) { /* only register it if one does not exist */
2054       resource_mgrs[s] = new RGWRESTMgr; /* a default do-nothing manager */
2055       resources_by_size.insert(pair<size_t, string>(s.size(), s));
2056     }
2057
2058     pos = r.find('/', pos + 1);
2059   }
2060 }
2061
2062 void RGWRESTMgr::register_default_mgr(RGWRESTMgr *mgr)
2063 {
2064   delete default_mgr;
2065   default_mgr = mgr;
2066 }
2067
2068 RGWRESTMgr* RGWRESTMgr::get_resource_mgr(struct req_state* const s,
2069                                          const std::string& uri,
2070                                          std::string* const out_uri)
2071 {
2072   *out_uri = uri;
2073
2074   multimap<size_t, string>::reverse_iterator iter;
2075
2076   for (iter = resources_by_size.rbegin(); iter != resources_by_size.rend(); ++iter) {
2077     string& resource = iter->second;
2078     if (uri.compare(0, iter->first, resource) == 0 &&
2079         (uri.size() == iter->first ||
2080          uri[iter->first] == '/')) {
2081       std::string suffix = uri.substr(iter->first);
2082       return resource_mgrs[resource]->get_resource_mgr(s, suffix, out_uri);
2083     }
2084   }
2085
2086   if (default_mgr) {
2087     return default_mgr->get_resource_mgr_as_default(s, uri, out_uri);
2088   }
2089
2090   return this;
2091 }
2092
2093 void RGWREST::register_x_headers(const string& s_headers)
2094 {
2095   std::vector<std::string> hdrs = get_str_vec(s_headers);
2096   for (auto& hdr : hdrs) {
2097     boost::algorithm::to_upper(hdr); // XXX
2098     (void) x_headers.insert(hdr);
2099   }
2100 }
2101
2102 RGWRESTMgr::~RGWRESTMgr()
2103 {
2104   map<string, RGWRESTMgr *>::iterator iter;
2105   for (iter = resource_mgrs.begin(); iter != resource_mgrs.end(); ++iter) {
2106     delete iter->second;
2107   }
2108   delete default_mgr;
2109 }
2110
2111 static int64_t parse_content_length(const char *content_length)
2112 {
2113   int64_t len = -1;
2114
2115   if (*content_length == '\0') {
2116     len = 0;
2117   } else {
2118     string err;
2119     len = strict_strtoll(content_length, 10, &err);
2120     if (!err.empty()) {
2121       len = -1;
2122     }
2123   }
2124
2125   return len;
2126 }
2127
2128 int RGWREST::preprocess(struct req_state *s, rgw::io::BasicClient* cio)
2129 {
2130   req_info& info = s->info;
2131
2132   /* save the request uri used to hash on the client side. request_uri may suffer
2133      modifications as part of the bucket encoding in the subdomain calling format.
2134      request_uri_aws4 will be used under aws4 auth */
2135   s->info.request_uri_aws4 = s->info.request_uri;
2136
2137   s->cio = cio;
2138
2139   // We need to know if this RGW instance is running the s3website API with a
2140   // higher priority than regular S3 API, or possibly in place of the regular
2141   // S3 API.
2142   // Map the listing of rgw_enable_apis in REVERSE order, so that items near
2143   // the front of the list have a higher number assigned (and -1 for items not in the list).
2144   list<string> apis;
2145   get_str_list(g_conf->rgw_enable_apis, apis);
2146   int api_priority_s3 = -1;
2147   int api_priority_s3website = -1;
2148   auto api_s3website_priority_rawpos = std::find(apis.begin(), apis.end(), "s3website");
2149   auto api_s3_priority_rawpos = std::find(apis.begin(), apis.end(), "s3");
2150   if (api_s3_priority_rawpos != apis.end()) {
2151     api_priority_s3 = apis.size() - std::distance(apis.begin(), api_s3_priority_rawpos);
2152   }
2153   if (api_s3website_priority_rawpos != apis.end()) {
2154     api_priority_s3website = apis.size() - std::distance(apis.begin(), api_s3website_priority_rawpos);
2155   }
2156   ldout(s->cct, 10) << "rgw api priority: s3=" << api_priority_s3 << " s3website=" << api_priority_s3website << dendl;
2157   bool s3website_enabled = api_priority_s3website >= 0;
2158
2159   if (info.host.size()) {
2160     ssize_t pos = info.host.find(':');
2161     if (pos >= 0) {
2162       info.host = info.host.substr(0, pos);
2163     }
2164     ldout(s->cct, 10) << "host=" << info.host << dendl;
2165     string domain;
2166     string subdomain;
2167     bool in_hosted_domain_s3website = false;
2168     bool in_hosted_domain = rgw_find_host_in_domains(info.host, &domain, &subdomain, hostnames_set);
2169
2170     string s3website_domain;
2171     string s3website_subdomain;
2172
2173     if (s3website_enabled) {
2174       in_hosted_domain_s3website = rgw_find_host_in_domains(info.host, &s3website_domain, &s3website_subdomain, hostnames_s3website_set);
2175       if (in_hosted_domain_s3website) {
2176         in_hosted_domain = true; // TODO: should hostnames be a strict superset of hostnames_s3website?
2177         domain = s3website_domain;
2178         subdomain = s3website_subdomain;
2179       }
2180     }
2181
2182     ldout(s->cct, 20)
2183       << "subdomain=" << subdomain
2184       << " domain=" << domain
2185       << " in_hosted_domain=" << in_hosted_domain
2186       << " in_hosted_domain_s3website=" << in_hosted_domain_s3website
2187       << dendl;
2188
2189     if (g_conf->rgw_resolve_cname
2190         && !in_hosted_domain
2191         && !in_hosted_domain_s3website) {
2192       string cname;
2193       bool found;
2194       int r = rgw_resolver->resolve_cname(info.host, cname, &found);
2195       if (r < 0) {
2196         ldout(s->cct, 0)
2197           << "WARNING: rgw_resolver->resolve_cname() returned r=" << r
2198           << dendl;
2199       }
2200
2201       if (found) {
2202         ldout(s->cct, 5) << "resolved host cname " << info.host << " -> "
2203                          << cname << dendl;
2204         in_hosted_domain =
2205           rgw_find_host_in_domains(cname, &domain, &subdomain, hostnames_set);
2206
2207         if (s3website_enabled
2208             && !in_hosted_domain_s3website) {
2209           in_hosted_domain_s3website =
2210             rgw_find_host_in_domains(cname, &s3website_domain,
2211                                      &s3website_subdomain,
2212                                      hostnames_s3website_set);
2213           if (in_hosted_domain_s3website) {
2214             in_hosted_domain = true; // TODO: should hostnames be a
2215                                      // strict superset of hostnames_s3website?
2216             domain = s3website_domain;
2217             subdomain = s3website_subdomain;
2218           }
2219         }
2220
2221         ldout(s->cct, 20)
2222           << "subdomain=" << subdomain
2223           << " domain=" << domain
2224           << " in_hosted_domain=" << in_hosted_domain
2225           << " in_hosted_domain_s3website=" << in_hosted_domain_s3website
2226           << dendl;
2227       }
2228     }
2229
2230     // Handle A/CNAME records that point to the RGW storage, but do match the
2231     // CNAME test above, per issue http://tracker.ceph.com/issues/15975
2232     // If BOTH domain & subdomain variables are empty, then none of the above
2233     // cases matched anything, and we should fall back to using the Host header
2234     // directly as the bucket name.
2235     // As additional checks:
2236     // - if the Host header is an IP, we're using path-style access without DNS
2237     // - Also check that the Host header is a valid bucket name before using it.
2238     // - Don't enable virtual hosting if no hostnames are configured
2239     if (subdomain.empty()
2240         && (domain.empty() || domain != info.host)
2241         && !looks_like_ip_address(info.host.c_str())
2242         && RGWHandler_REST::validate_bucket_name(info.host) == 0
2243         && !(hostnames_set.empty() && hostnames_s3website_set.empty())) {
2244       subdomain.append(info.host);
2245       in_hosted_domain = 1;
2246     }
2247
2248     if (s3website_enabled && api_priority_s3website > api_priority_s3) {
2249       in_hosted_domain_s3website = 1;
2250     }
2251
2252     if (in_hosted_domain_s3website) {
2253       s->prot_flags |= RGW_REST_WEBSITE;
2254     }
2255
2256
2257     if (in_hosted_domain && !subdomain.empty()) {
2258       string encoded_bucket = "/";
2259       encoded_bucket.append(subdomain);
2260       if (s->info.request_uri[0] != '/')
2261         encoded_bucket.append("/");
2262       encoded_bucket.append(s->info.request_uri);
2263       s->info.request_uri = encoded_bucket;
2264     }
2265
2266     if (!domain.empty()) {
2267       s->info.domain = domain;
2268     }
2269
2270     ldout(s->cct, 20)
2271       << "final domain/bucket"
2272       << " subdomain=" << subdomain
2273       << " domain=" << domain
2274       << " in_hosted_domain=" << in_hosted_domain
2275       << " in_hosted_domain_s3website=" << in_hosted_domain_s3website
2276       << " s->info.domain=" << s->info.domain
2277       << " s->info.request_uri=" << s->info.request_uri
2278       << dendl;
2279   }
2280
2281   if (s->info.domain.empty()) {
2282     s->info.domain = s->cct->_conf->rgw_dns_name;
2283   }
2284
2285   url_decode(s->info.request_uri, s->decoded_uri);
2286
2287   /* FastCGI specification, section 6.3
2288    * http://www.fastcgi.com/devkit/doc/fcgi-spec.html#S6.3
2289    * ===
2290    * The Authorizer application receives HTTP request information from the Web
2291    * server on the FCGI_PARAMS stream, in the same format as a Responder. The
2292    * Web server does not send CONTENT_LENGTH, PATH_INFO, PATH_TRANSLATED, and
2293    * SCRIPT_NAME headers.
2294    * ===
2295    * Ergo if we are in Authorizer role, we MUST look at HTTP_CONTENT_LENGTH
2296    * instead of CONTENT_LENGTH for the Content-Length.
2297    *
2298    * There is one slight wrinkle in this, and that's older versions of
2299    * nginx/lighttpd/apache setting BOTH headers. As a result, we have to check
2300    * both headers and can't always simply pick A or B.
2301    */
2302   const char* content_length = info.env->get("CONTENT_LENGTH");
2303   const char* http_content_length = info.env->get("HTTP_CONTENT_LENGTH");
2304   if (!http_content_length != !content_length) {
2305     /* Easy case: one or the other is missing */
2306     s->length = (content_length ? content_length : http_content_length);
2307   } else if (s->cct->_conf->rgw_content_length_compat &&
2308              content_length && http_content_length) {
2309     /* Hard case: Both are set, we have to disambiguate */
2310     int64_t content_length_i, http_content_length_i;
2311
2312     content_length_i = parse_content_length(content_length);
2313     http_content_length_i = parse_content_length(http_content_length);
2314
2315     // Now check them:
2316     if (http_content_length_i < 0) {
2317       // HTTP_CONTENT_LENGTH is invalid, ignore it
2318     } else if (content_length_i < 0) {
2319       // CONTENT_LENGTH is invalid, and HTTP_CONTENT_LENGTH is valid
2320       // Swap entries
2321       content_length = http_content_length;
2322     } else {
2323       // both CONTENT_LENGTH and HTTP_CONTENT_LENGTH are valid
2324       // Let's pick the larger size
2325       if (content_length_i < http_content_length_i) {
2326         // prefer the larger value
2327         content_length = http_content_length;
2328       }
2329     }
2330     s->length = content_length;
2331     // End of: else if (s->cct->_conf->rgw_content_length_compat &&
2332     //   content_length &&
2333     // http_content_length)
2334   } else {
2335     /* no content length was defined */
2336     s->length = NULL;
2337   }
2338
2339   if (s->length) {
2340     if (*s->length == '\0') {
2341       s->content_length = 0;
2342     } else {
2343       string err;
2344       s->content_length = strict_strtoll(s->length, 10, &err);
2345       if (!err.empty()) {
2346         ldout(s->cct, 10) << "bad content length, aborting" << dendl;
2347         return -EINVAL;
2348       }
2349     }
2350   }
2351
2352   if (s->content_length < 0) {
2353     ldout(s->cct, 10) << "negative content length, aborting" << dendl;
2354     return -EINVAL;
2355   }
2356
2357   map<string, string>::iterator giter;
2358   for (giter = generic_attrs_map.begin(); giter != generic_attrs_map.end();
2359        ++giter) {
2360     const char *env = info.env->get(giter->first.c_str());
2361     if (env) {
2362       s->generic_attrs[giter->second] = env;
2363     }
2364   }
2365
2366   s->http_auth = info.env->get("HTTP_AUTHORIZATION");
2367
2368   if (g_conf->rgw_print_continue) {
2369     const char *expect = info.env->get("HTTP_EXPECT");
2370     s->expect_cont = (expect && !strcasecmp(expect, "100-continue"));
2371   }
2372   s->op = op_from_method(info.method);
2373
2374   info.init_meta_info(&s->has_bad_meta);
2375
2376   return 0;
2377 }
2378
2379 RGWHandler_REST* RGWREST::get_handler(
2380   RGWRados * const store,
2381   struct req_state* const s,
2382   const rgw::auth::StrategyRegistry& auth_registry,
2383   const std::string& frontend_prefix,
2384   RGWRestfulIO* const rio,
2385   RGWRESTMgr** const pmgr,
2386   int* const init_error
2387 ) {
2388   *init_error = preprocess(s, rio);
2389   if (*init_error < 0) {
2390     return nullptr;
2391   }
2392
2393   RGWRESTMgr *m = mgr.get_manager(s, frontend_prefix, s->decoded_uri,
2394                                   &s->relative_uri);
2395   if (! m) {
2396     *init_error = -ERR_METHOD_NOT_ALLOWED;
2397     return nullptr;
2398   }
2399
2400   if (pmgr) {
2401     *pmgr = m;
2402   }
2403
2404   RGWHandler_REST* handler = m->get_handler(s, auth_registry, frontend_prefix);
2405   if (! handler) {
2406     *init_error = -ERR_METHOD_NOT_ALLOWED;
2407     return NULL;
2408   }
2409   *init_error = handler->init(store, s, rio);
2410   if (*init_error < 0) {
2411     m->put_handler(handler);
2412     return nullptr;
2413   }
2414
2415   return handler;
2416 } /* get stream handler */