src/lib/INIReader.cpp
branchv_0
changeset 23 b497140b0b63
parent 22 29d673a54ecf
child 24 07e0a2edf3bc
equal deleted inserted replaced
22:29d673a54ecf 23:b497140b0b63
    31 private:
    31 private:
    32 	std::istream& input;
    32 	std::istream& input;
    33 	std::vector<INIContentHandler*> handlers;
    33 	std::vector<INIContentHandler*> handlers;
    34 
    34 
    35 	/** 
    35 	/** 
    36 	 * This might be configurable.
       
    37 	 * 
       
    38 	 * By default, we ignore all leading whitespace on continuing lines.
    36 	 * By default, we ignore all leading whitespace on continuing lines.
    39 	 * If there should be some spaces or tabs, they should be placed on the previous line before the „\“.
    37 	 * If there should be some spaces or tabs, they should be placed on the previous line before the „\“.
    40 	 * If a line break is desired, it should be written as \n (escaped) or the value should be quoted in " or '.
    38 	 * If a line break is desired, it should be written as \n (escaped) or the value should be quoted in " or '.
    41 	 * 
    39 	 * 
    42 	 * Related specifications:
    40 	 * Related specifications:
    43 	 *  - https://docs.oracle.com/javase/8/docs/api/index.html?java/util/Properties.html
    41 	 *  - https://docs.oracle.com/javase/8/docs/api/index.html?java/util/Properties.html
    44 	 */
    42 	 */
    45 	bool consumeLeadingSpacesOnContinuingLines = true;
    43 	bool trimLeadingSpacesOnContinuingLines = true;
    46 
    44 
    47 	/**
    45 	/**
    48 	 * This might be configurable.
       
    49 	 * 
       
    50 	 * KDE uses some weird INI dialect that allows [section][x] syntax where „x“ is kind of „tag“ that signalizes some properties of given section.
    46 	 * KDE uses some weird INI dialect that allows [section][x] syntax where „x“ is kind of „tag“ that signalizes some properties of given section.
    51 	 * Line „[section_1][$i]“ means that the „section_1“ is „locked“.
    47 	 * Line „[section_1][$i]“ means that the „section_1“ is „locked“.
    52 	 * We may emit this information somehow later, but for now, it is just ignored.
    48 	 * We may emit this information somehow later, but for now, it is just ignored.
    53 	 * 
    49 	 * 
    54 	 * TODO: Is „section tag“ right name?
    50 	 * TODO: Is „section tag“ right name?
    57 	 *  - https://userbase.kde.org/KDE_System_Administration/Configuration_Files#Lock_Down
    53 	 *  - https://userbase.kde.org/KDE_System_Administration/Configuration_Files#Lock_Down
    58 	 */
    54 	 */
    59 	bool allowSectionTags = true;
    55 	bool allowSectionTags = true;
    60 
    56 
    61 	/**
    57 	/**
    62 	 * This might be configurable.
       
    63 	 * 
       
    64 	 * If whole key is „aaa[bbb]“ then „aaa“ is considered to be the key and „bbb“ the sub-key.
    58 	 * If whole key is „aaa[bbb]“ then „aaa“ is considered to be the key and „bbb“ the sub-key.
    65 	 * No \[ escaping is currently supported, so the key might not contain the bracket character.
    59 	 * No \[ escaping is currently supported, so the key might not contain the bracket character.
    66 	 * 
    60 	 * 
    67 	 * Related specifications:
    61 	 * Related specifications:
    68 	 *  - https://userbase.kde.org/KDE_System_Administration/Configuration_Files#Shell_Expansion
    62 	 *  - https://userbase.kde.org/KDE_System_Administration/Configuration_Files#Shell_Expansion
    69 	 *  - https://specifications.freedesktop.org/desktop-entry-spec/latest/ar01s05.html
    63 	 *  - https://specifications.freedesktop.org/desktop-entry-spec/latest/ar01s05.html
    70 	 */
    64 	 */
    71 	bool allowSubKeys = true;
    65 	bool allowSubKeys = true;
    72 
    66 
    73 	/**
    67 	/**
    74 	 * This might be configurable.
       
    75 	 * 
       
    76 	 * Classic INI uses „key=value“ syntax.
    68 	 * Classic INI uses „key=value“ syntax.
    77 	 * But some other formats/dialects might use key:value.
    69 	 * But some other formats/dialects might use key:value.
    78 	 * 
    70 	 * 
    79 	 * Only single character separators are supported.
    71 	 * Only single character separators are supported.
    80 	 * If multiple separators should be recognized (e.g. both „=“ and „:“), this string will contain all of them,
    72 	 * If multiple separators should be recognized (e.g. both „=“ and „:“), this string will contain all of them,
    81 	 * i.e. „:=“ does not mean that the „key:=value“ syntax, but „key=value“ or „key:value“.
    73 	 * i.e. „:=“ does not mean that the „key:=value“ syntax, but „key=value“ or „key:value“.
    82 	 */
    74 	 */
    83 	std::string keyValueSeparators = "=";
    75 	std::string keyValueSeparators = "=";
    84 
    76 
    85 	/**
    77 	/**
    86 	 * This might be configurable.
       
    87 	 * 
       
    88 	 * Classic INI uses „; comment“ syntax.
    78 	 * Classic INI uses „; comment“ syntax.
    89 	 * But many existing files contain „# comment“ lines.
    79 	 * But many existing files contain „# comment“ lines.
    90 	 * 
    80 	 * 
    91 	 * Only single character separators are supported (works same as keyValueSeparators).
    81 	 * Only single character separators are supported (works same as keyValueSeparators).
    92 	 */
    82 	 */
    93 	std::string commentSeparators = ";#";
    83 	std::string commentSeparators = ";#";
    94 
    84 
    95 	/**
    85 	/**
    96 	 * This might be configurable.
       
    97 	 * 
       
    98 	 * INI often support both "quotes" and 'apostrophes' styles.
    86 	 * INI often support both "quotes" and 'apostrophes' styles.
    99 	 * But some dialects may support only one of them or not support quoting at all.
    87 	 * But some dialects may support only one of them or not support quoting at all.
   100 	 * 
    88 	 * 
   101 	 * In such case e.g. „key="some value"“ would mean that the value is „"value"“ (including the quotes).
    89 	 * In such case e.g. „key="some value"“ would mean that the value is „"value"“ (including the quotes).
   102 	 * Thus it is important to allow disabling quote recognizing (which is done by setting this parameter to empty string).
    90 	 * Thus it is important to allow disabling quote recognizing (which is done by setting this parameter to empty string).
   144 		for (char ch = peek(); input.good() && (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); ch = peek()) result.put(get());
   132 		for (char ch = peek(); input.good() && (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); ch = peek()) result.put(get());
   145 		return result.str();
   133 		return result.str();
   146 	}
   134 	}
   147 
   135 
   148 	void processContinuingLine(std::stringstream& result) {
   136 	void processContinuingLine(std::stringstream& result) {
   149 		if (consumeLeadingSpacesOnContinuingLines) readSpacesAndTabs();
   137 		if (trimLeadingSpacesOnContinuingLines) readSpacesAndTabs();
   150 		else result.put('\n');
   138 		else result.put('\n');
   151 	}
   139 	}
   152 
   140 
   153 	std::string readUntil(const char until, bool* found = nullptr) {
   141 	std::string readUntil(const char until, bool* found = nullptr) {
   154 		return readUntil(std::string(1, until), found);
   142 		return readUntil(std::string(1, until), found);
   235 
   223 
   236 	std::string trim(std::string s) {
   224 	std::string trim(std::string s) {
   237 		return std::regex_replace(s, std::regex("^\\s+|\\s+$"), "");
   225 		return std::regex_replace(s, std::regex("^\\s+|\\s+$"), "");
   238 	}
   226 	}
   239 
   227 
       
   228 	/**
       
   229 	 * TODO: use a common method
       
   230 	 */
       
   231 	bool parseBoolean(const std::string& value) {
       
   232 		if (value == "true") return true;
       
   233 		else if (value == "false") return false;
       
   234 		else throw std::invalid_argument(std::string("Unable to parse boolean value: ") + value + " (expecting true or false)");
       
   235 	}
       
   236 
       
   237 	void setDialect(const std::string& name) {
       
   238 		if (name == "default-ini") {
       
   239 			// already set
       
   240 		} else if (name == "java-properties") {
       
   241 			trimLeadingSpacesOnContinuingLines = true;
       
   242 			allowSectionTags = false;
       
   243 			allowSubKeys = false;
       
   244 			commentSeparators = "#";
       
   245 			keyValueSeparators = "=:";
       
   246 			quotes = "";
       
   247 			// TODO: allowSections = false;
       
   248 			// TODO: enable unicode unescaping
       
   249 		} else {
       
   250 			throw std::invalid_argument(std::string("Unsupported INI dialect: ") + name);
       
   251 		}
       
   252 	}
       
   253 
   240 public:
   254 public:
   241 
   255 
   242 	INIReaderImpl(std::istream& input) : input(input) {
   256 	INIReaderImpl(std::istream& input) : input(input) {
       
   257 	}
       
   258 
       
   259 	void setOption(const std::string& uri, const std::string& value) override {
       
   260 		if (uri == "trim-continuing-lines") trimLeadingSpacesOnContinuingLines = parseBoolean(value); // TODO: continuing lines modes (enum), not just boolean
       
   261 		// TODO: else if (uri == "allow-sections") allowSections = parseBoolean(value);
       
   262 		else if (uri == "allow-section-tags") allowSectionTags = parseBoolean(value);
       
   263 		else if (uri == "allow-sub-keys") allowSubKeys = parseBoolean(value);
       
   264 		else if (uri == "comment-separators") commentSeparators = value;
       
   265 		else if (uri == "key-value-separators") keyValueSeparators = value;
       
   266 		else if (uri == "quotes") quotes = value;
       
   267 		else if (uri == "dialect") setDialect(value);
       
   268 		else throw std::invalid_argument(std::string("Invalid parser option: „") + uri + "“ with value: „" + value + "“");
   243 	}
   269 	}
   244 
   270 
   245 	void addHandler(INIContentHandler* handler) override {
   271 	void addHandler(INIContentHandler* handler) override {
   246 		handlers.push_back(handler);
   272 		handlers.push_back(handler);
   247 	}
   273 	}