ceph/src/common/ConfUtils.cc

   1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
   2 // vim: ts=8 sw=2 smarttab
   3 /*
   4  * Ceph - scalable distributed file system
   5  *
   6  * Copyright (C) 2011 New Dream Network
   7  *
   8  * This is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License version 2.1, as published by the Free Software
  11  * Foundation.  See file COPYING.
  12  *
  13  */
  14
  15 #include <algorithm>
  16 #include <errno.h>
  17 #include <map>
  18 #include <sstream>
  19 #include <stdio.h>
  20 #include <stdlib.h>
  21 #include <string.h>
  22 #include <string>
  23 #include <sys/stat.h>
  24 #include <iostream>
  25
  26 #include "include/buffer.h"
  27 #include "common/errno.h"
  28 #include "common/utf8.h"
  29 #include "common/ConfUtils.h"
  30
  31 using std::cerr;
  32 using std::ostringstream;
  33 using std::pair;
  34 using std::string;
  35
  36 #define MAX_CONFIG_FILE_SZ 0x40000000
  37
  38 ////////////////////////////// ConfLine //////////////////////////////
  39 ConfLine::
  40 ConfLine(const std::string &key_, const std::string &val_,
  41       const std::string &newsection_, const std::string &comment_, int line_no_)
  42   : key(key_), val(val_), newsection(newsection_)
  43 {
  44   // If you want to implement writable ConfFile support, you'll need to save
  45   // the comment and line_no arguments here.
  46 }
  47
  48 bool ConfLine::
  49 operator<(const ConfLine &rhs) const
  50 {
  51   // We only compare keys.
  52   // If you have more than one line with the same key in a given section, the
  53   // last one wins.
  54   if (key < rhs.key)
  55     return true;
  56   else
  57     return false;
  58 }
  59
  60 std::ostream &operator<<(std::ostream& oss, const ConfLine &l)
  61 {
  62   oss << "ConfLine(key = '" << l.key << "', val='"
  63       << l.val << "', newsection='" << l.newsection << "')";
  64   return oss;
  65 }
  66 ///////////////////////// ConfFile //////////////////////////
  67 ConfFile::
  68 ConfFile()
  69 {
  70 }
  71
  72 ConfFile::
  73 ~ConfFile()
  74 {
  75 }
  76
  77 void ConfFile::
  78 clear()
  79 {
  80   sections.clear();
  81 }
  82
  83 /* We load the whole file into memory and then parse it.  Although this is not
  84  * the optimal approach, it does mean that most of this code can be shared with
  85  * the bufferlist loading function. Since bufferlists are always in-memory, the
  86  * load_from_buffer interface works well for them.
  87  * In general, configuration files should be a few kilobytes at maximum, so
  88  * loading the whole configuration into memory shouldn't be a problem.
  89  */
  90 int ConfFile::
  91 parse_file(const std::string &fname, std::deque<std::string> *errors,
  92            std::ostream *warnings)
  93 {
  94   clear();
  95
  96   int ret = 0;
  97   size_t sz;
  98   char *buf = NULL;
  99   FILE *fp = fopen(fname.c_str(), "r");
 100   if (!fp) {
 101     ostringstream oss;
 102     oss << __func__ << ": cannot open " << fname << ": " << cpp_strerror(errno);
 103     errors->push_back(oss.str());
 104     ret = -errno;
 105     return ret;
 106   }
 107
 108   struct stat st_buf;
 109   if (fstat(fileno(fp), &st_buf)) {
 110     ret = -errno;
 111     ostringstream oss;
 112     oss << __func__ << ": failed to fstat '" << fname << "': " << cpp_strerror(ret);
 113     errors->push_back(oss.str());
 114     goto done;
 115   }
 116
 117   if (st_buf.st_size > MAX_CONFIG_FILE_SZ) {
 118     ostringstream oss;
 119     oss << __func__ << ": config file '" << fname << "' is " << st_buf.st_size
 120         << " bytes, but the maximum is " << MAX_CONFIG_FILE_SZ;
 121     errors->push_back(oss.str());
 122     ret = -EINVAL;
 123     goto done;
 124   }
 125
 126   sz = (size_t)st_buf.st_size;
 127   buf = (char*)malloc(sz);
 128   if (!buf) {
 129     ret = -ENOMEM;
 130     goto done;
 131   }
 132
 133   if (fread(buf, 1, sz, fp) != sz) {
 134     if (ferror(fp)) {
 135       ret = -errno;
 136       ostringstream oss;
 137       oss << __func__ << ": fread error while reading '" << fname << "': "
 138           << cpp_strerror(ret);
 139       errors->push_back(oss.str());
 140       goto done;
 141     }
 142     else {
 143       ostringstream oss;
 144       oss << __func__ << ": unexpected EOF while reading '" << fname << "': "
 145           << "possible concurrent modification?";
 146       errors->push_back(oss.str());
 147       ret = -EIO;
 148       goto done;
 149     }
 150   }
 151
 152   load_from_buffer(buf, sz, errors, warnings);
 153   ret = 0;
 154
 155 done:
 156   free(buf);
 157   fclose(fp);
 158   return ret;
 159 }
 160
 161 int ConfFile::
 162 parse_bufferlist(ceph::bufferlist *bl, std::deque<std::string> *errors,
 163                  std::ostream *warnings)
 164 {
 165   clear();
 166
 167   load_from_buffer(bl->c_str(), bl->length(), errors, warnings);
 168   return 0;
 169 }
 170
 171 int ConfFile::
 172 read(const std::string &section, const std::string &key, std::string &val) const
 173 {
 174   string k(normalize_key_name(key));
 175
 176   const_section_iter_t s = sections.find(section);
 177   if (s == sections.end())
 178     return -ENOENT;
 179   ConfLine exemplar(k, "", "", "", 0);
 180   ConfSection::const_line_iter_t l = s->second.lines.find(exemplar);
 181   if (l == s->second.lines.end())
 182     return -ENOENT;
 183   val = l->val;
 184   return 0;
 185 }
 186
 187 ConfFile::const_section_iter_t ConfFile::
 188 sections_begin() const
 189 {
 190   return sections.begin();
 191 }
 192
 193 ConfFile::const_section_iter_t ConfFile::
 194 sections_end() const
 195 {
 196   return sections.end();
 197 }
 198
 199 void ConfFile::
 200 trim_whitespace(std::string &str, bool strip_internal)
 201 {
 202   // strip preceding
 203   const char *in = str.c_str();
 204   while (true) {
 205     char c = *in;
 206     if ((!c) || (!isspace(c)))
 207       break;
 208     ++in;
 209   }
 210   char output[strlen(in) + 1];
 211   strcpy(output, in);
 212
 213   // strip trailing
 214   char *o = output + strlen(output);
 215   while (true) {
 216     if (o == output)
 217       break;
 218     --o;
 219     if (!isspace(*o)) {
 220       ++o;
 221       *o = '\0';
 222       break;
 223     }
 224   }
 225
 226   if (!strip_internal) {
 227     str.assign(output);
 228     return;
 229   }
 230
 231   // strip internal
 232   char output2[strlen(output) + 1];
 233   char *out2 = output2;
 234   bool prev_was_space = false;
 235   for (char *u = output; *u; ++u) {
 236     char c = *u;
 237     if (isspace(c)) {
 238       if (!prev_was_space)
 239         *out2++ = c;
 240       prev_was_space = true;
 241     }
 242     else {
 243       *out2++ = c;
 244       prev_was_space = false;
 245     }
 246   }
 247   *out2++ = '\0';
 248   str.assign(output2);
 249 }
 250
 251 /* Normalize a key name.
 252  *
 253  * Normalized key names have no leading or trailing whitespace, and all
 254  * whitespace is stored as underscores.  The main reason for selecting this
 255  * normal form is so that in common/config.cc, we can use a macro to stringify
 256  * the field names of md_config_t and get a key in normal form.
 257  */
 258 std::string ConfFile::
 259 normalize_key_name(const std::string &key)
 260 {
 261   string k(key);
 262   ConfFile::trim_whitespace(k, true);
 263   std::replace(k.begin(), k.end(), ' ', '_');
 264   return k;
 265 }
 266
 267 std::ostream &operator<<(std::ostream &oss, const ConfFile &cf)
 268 {
 269   for (ConfFile::const_section_iter_t s = cf.sections_begin();
 270        s != cf.sections_end(); ++s) {
 271     oss << "[" << s->first << "]\n";
 272     for (ConfSection::const_line_iter_t l = s->second.lines.begin();
 273          l != s->second.lines.end(); ++l) {
 274       if (!l->key.empty()) {
 275         oss << "\t" << l->key << " = \"" << l->val << "\"\n";
 276       }
 277     }
 278   }
 279   return oss;
 280 }
 281
 282 void ConfFile::
 283 load_from_buffer(const char *buf, size_t sz, std::deque<std::string> *errors,
 284                  std::ostream *warnings)
 285 {
 286   errors->clear();
 287
 288   section_iter_t::value_type vt("global", ConfSection());
 289   pair < section_iter_t, bool > vr(sections.insert(vt));
 290   assert(vr.second);
 291   section_iter_t cur_section = vr.first;
 292   std::string acc;
 293
 294   const char *b = buf;
 295   int line_no = 0;
 296   size_t line_len = -1;
 297   size_t rem = sz;
 298   while (1) {
 299     b += line_len + 1;
 300     if ((line_len + 1) > rem)
 301       break;
 302     rem -= line_len + 1;
 303     if (rem == 0)
 304       break;
 305     line_no++;
 306
 307     // look for the next newline
 308     const char *end = (const char*)memchr(b, '\n', rem);
 309     if (!end) {
 310       ostringstream oss;
 311       oss << "read_conf: ignoring line " << line_no << " because it doesn't "
 312           << "end with a newline! Please end the config file with a newline.";
 313       errors->push_back(oss.str());
 314       break;
 315     }
 316
 317     // find length of line, and search for NULLs
 318     line_len = 0;
 319     bool found_null = false;
 320     for (const char *tmp = b; tmp != end; ++tmp) {
 321       line_len++;
 322       if (*tmp == '\0') {
 323         found_null = true;
 324       }
 325     }
 326
 327     if (found_null) {
 328       ostringstream oss;
 329       oss << "read_conf: ignoring line " << line_no << " because it has "
 330           << "an embedded null.";
 331       errors->push_back(oss.str());
 332       acc.clear();
 333       continue;
 334     }
 335
 336     if (check_utf8(b, line_len)) {
 337       ostringstream oss;
 338       oss << "read_conf: ignoring line " << line_no << " because it is not "
 339           << "valid UTF8.";
 340       errors->push_back(oss.str());
 341       acc.clear();
 342       continue;
 343     }
 344
 345     if ((line_len >= 1) && (b[line_len-1] == '\\')) {
 346       // A backslash at the end of a line serves as a line continuation marker.
 347       // Combine the next line with this one.
 348       // Remove the backslash itself from the text.
 349       acc.append(b, line_len - 1);
 350       continue;
 351     }
 352
 353     acc.append(b, line_len);
 354
 355     //cerr << "acc = '" << acc << "'" << std::endl;
 356     ConfLine *cline = process_line(line_no, acc.c_str(), errors);
 357     acc.clear();
 358     if (!cline)
 359       continue;
 360     const std::string &csection(cline->newsection);
 361     if (!csection.empty()) {
 362       std::map <std::string, ConfSection>::value_type nt(csection, ConfSection());
 363       pair < section_iter_t, bool > nr(sections.insert(nt));
 364       cur_section = nr.first;
 365     }
 366     else {
 367       if (cur_section->second.lines.count(*cline)) {
 368         // replace an existing key/line in this section, so that
 369         //  [mysection]
 370         //    foo = 1
 371         //    foo = 2
 372         // will result in foo = 2.
 373         cur_section->second.lines.erase(*cline);
 374         if (cline->key.length() && warnings)
 375           *warnings << "warning: line " << line_no << ": '" << cline->key << "' in section '"
 376                     << cur_section->first << "' redefined " << std::endl;
 377       }
 378       // add line to current section
 379       //std::cerr << "cur_section = " << cur_section->first << ", " << *cline << std::endl;
 380       cur_section->second.lines.insert(*cline);
 381     }
 382     delete cline;
 383   }
 384
 385   if (!acc.empty()) {
 386     ostringstream oss;
 387     oss << "read_conf: don't end with lines that end in backslashes!";
 388     errors->push_back(oss.str());
 389   }
 390 }
 391
 392 /*
 393  * A simple state-machine based parser.
 394  * This probably could/should be rewritten with something like boost::spirit
 395  * or yacc if the grammar ever gets more complex.
 396  */
 397 ConfLine* ConfFile::
 398 process_line(int line_no, const char *line, std::deque<std::string> *errors)
 399 {
 400   enum acceptor_state_t {
 401     ACCEPT_INIT,
 402     ACCEPT_SECTION_NAME,
 403     ACCEPT_KEY,
 404     ACCEPT_VAL_START,
 405     ACCEPT_UNQUOTED_VAL,
 406     ACCEPT_QUOTED_VAL,
 407     ACCEPT_COMMENT_START,
 408     ACCEPT_COMMENT_TEXT,
 409   };
 410   const char *l = line;
 411   acceptor_state_t state = ACCEPT_INIT;
 412   string key, val, newsection, comment;
 413   bool escaping = false;
 414   while (true) {
 415     char c = *l++;
 416     switch (state) {
 417       case ACCEPT_INIT:
 418         if (c == '\0')
 419           return NULL; // blank line. Not an error, but not interesting either.
 420         else if (c == '[')
 421           state = ACCEPT_SECTION_NAME;
 422         else if ((c == '#') || (c == ';'))
 423           state = ACCEPT_COMMENT_TEXT;
 424         else if (c == ']') {
 425           ostringstream oss;
 426           oss << "unexpected right bracket at char " << (l - line)
 427               << ", line " << line_no;
 428           errors->push_back(oss.str());
 429           return NULL;
 430         }
 431         else if (isspace(c)) {
 432           // ignore whitespace here
 433         }
 434         else {
 435           // try to accept this character as a key
 436           state = ACCEPT_KEY;
 437           --l;
 438         }
 439         break;
 440       case ACCEPT_SECTION_NAME:
 441         if (c == '\0') {
 442           ostringstream oss;
 443           oss << "error parsing new section name: expected right bracket "
 444               << "at char " << (l - line) << ", line " << line_no;
 445           errors->push_back(oss.str());
 446           return NULL;
 447         }
 448         else if ((c == ']') && (!escaping)) {
 449           trim_whitespace(newsection, true);
 450           if (newsection.empty()) {
 451             ostringstream oss;
 452             oss << "error parsing new section name: no section name found? "
 453                 << "at char " << (l - line) << ", line " << line_no;
 454             errors->push_back(oss.str());
 455             return NULL;
 456           }
 457           state = ACCEPT_COMMENT_START;
 458         }
 459         else if (((c == '#') || (c == ';')) && (!escaping)) {
 460           ostringstream oss;
 461           oss << "unexpected comment marker while parsing new section name, at "
 462               << "char " << (l - line) << ", line " << line_no;
 463           errors->push_back(oss.str());
 464           return NULL;
 465         }
 466         else if ((c == '\\') && (!escaping)) {
 467           escaping = true;
 468         }
 469         else {
 470           escaping = false;
 471           newsection += c;
 472         }
 473         break;
 474       case ACCEPT_KEY:
 475         if ((((c == '#') || (c == ';')) && (!escaping)) || (c == '\0')) {
 476           ostringstream oss;
 477           if (c == '\0') {
 478             oss << "end of key=val line " << line_no
 479                 << " reached, no \"=val\" found...missing =?";
 480           } else {
 481             oss << "unexpected character while parsing putative key value, "
 482                 << "at char " << (l - line) << ", line " << line_no;
 483           }
 484           errors->push_back(oss.str());
 485           return NULL;
 486         }
 487         else if ((c == '=') && (!escaping)) {
 488           key = normalize_key_name(key);
 489           if (key.empty()) {
 490             ostringstream oss;
 491             oss << "error parsing key name: no key name found? "
 492                 << "at char " << (l - line) << ", line " << line_no;
 493             errors->push_back(oss.str());
 494             return NULL;
 495           }
 496           state = ACCEPT_VAL_START;
 497         }
 498         else if ((c == '\\') && (!escaping)) {
 499           escaping = true;
 500         }
 501         else {
 502           escaping = false;
 503           key += c;
 504         }
 505         break;
 506       case ACCEPT_VAL_START:
 507         if (c == '\0')
 508           return new ConfLine(key, val, newsection, comment, line_no);
 509         else if ((c == '#') || (c == ';'))
 510           state = ACCEPT_COMMENT_TEXT;
 511         else if (c == '"')
 512           state = ACCEPT_QUOTED_VAL;
 513         else if (isspace(c)) {
 514           // ignore whitespace
 515         }
 516         else {
 517           // try to accept character as a val
 518           state = ACCEPT_UNQUOTED_VAL;
 519           --l;
 520         }
 521         break;
 522       case ACCEPT_UNQUOTED_VAL:
 523         if (c == '\0') {
 524           if (escaping) {
 525             ostringstream oss;
 526             oss << "error parsing value name: unterminated escape sequence "
 527                 << "at char " << (l - line) << ", line " << line_no;
 528             errors->push_back(oss.str());
 529             return NULL;
 530           }
 531           trim_whitespace(val, false);
 532           return new ConfLine(key, val, newsection, comment, line_no);
 533         }
 534         else if (((c == '#') || (c == ';')) && (!escaping)) {
 535           trim_whitespace(val, false);
 536           state = ACCEPT_COMMENT_TEXT;
 537         }
 538         else if ((c == '\\') && (!escaping)) {
 539           escaping = true;
 540         }
 541         else {
 542           escaping = false;
 543           val += c;
 544         }
 545         break;
 546       case ACCEPT_QUOTED_VAL:
 547         if (c == '\0') {
 548           ostringstream oss;
 549           oss << "found opening quote for value, but not the closing quote. "
 550               << "line " << line_no;
 551           errors->push_back(oss.str());
 552           return NULL;
 553         }
 554         else if ((c == '"') && (!escaping)) {
 555           state = ACCEPT_COMMENT_START;
 556         }
 557         else if ((c == '\\') && (!escaping)) {
 558           escaping = true;
 559         }
 560         else {
 561           escaping = false;
 562           // Add anything, including whitespace.
 563           val += c;
 564         }
 565         break;
 566       case ACCEPT_COMMENT_START:
 567         if (c == '\0') {
 568           return new ConfLine(key, val, newsection, comment, line_no);
 569         }
 570         else if ((c == '#') || (c == ';')) {
 571           state = ACCEPT_COMMENT_TEXT;
 572         }
 573         else if (isspace(c)) {
 574           // ignore whitespace
 575         }
 576         else {
 577           ostringstream oss;
 578           oss << "unexpected character at char " << (l - line) << " of line "
 579               << line_no;
 580           errors->push_back(oss.str());
 581           return NULL;
 582         }
 583         break;
 584       case ACCEPT_COMMENT_TEXT:
 585         if (c == '\0')
 586           return new ConfLine(key, val, newsection, comment, line_no);
 587         else
 588           comment += c;
 589         break;
 590       default:
 591         ceph_abort();
 592         break;
 593     }
 594     assert(c != '\0'); // We better not go past the end of the input string.
 595   }
 596 }