Added a simple non-optimized StringTokenizer-class for tokenizing strings. Also added a contains(char)-function to the String-class because it was handy in implementing the StringTokenizer.

svn-id: r30828
2008-02-08 04:11:20 +00:00 · 2008-02-08 04:11:20 +00:00 · b6cad0f0ce
commit b6cad0f0ce
parent e3852c92a7
4 changed files with 58 additions and 0 deletions
--- a/common/str.cpp
+++ b/common/str.cpp
@ -226,6 +226,10 @@ bool String::contains(const char *x) const {
 	return strstr(c_str(), x) != NULL;
 }

+bool String::contains(char x) const {
+	return strchr(c_str(), x) != NULL;
+}
+
 void String::deleteLastChar() {
 	deleteChar(_len - 1);
 }
--- a/common/str.h
+++ b/common/str.h
@ -133,6 +133,7 @@ public:
 	bool hasPrefix(const char *x) const;

 	bool contains(const char *x) const;
+	bool contains(char x) const;

 	inline const char *c_str() const		{ return _str; }
 	inline uint size() const				{ return _len; }
--- a/common/util.cpp
+++ b/common/util.cpp
@ -96,6 +96,33 @@ bool matchString(const char *str, const char *pat) {
 	}
 }

+StringTokenizer::StringTokenizer(const String &str, const String &delimiters) : _str(str), _delimiters(delimiters) {
+	reset();
+}
+
+void StringTokenizer::reset() {
+	_tokenBegin = _tokenEnd = 0;
+}
+
+bool StringTokenizer::empty() const {
+	// Search for the next token's start (i.e. the next non-delimiter character)
+	for (uint i = _tokenEnd; i < _str.size(); i++) {
+		if (!_delimiters.contains(_str[i]))
+			return false; // Found a token so the tokenizer is not empty
+	}
+	// Didn't find any more tokens so the tokenizer is empty
+	return true;
+}
+
+String StringTokenizer::nextToken() {
+	// Seek to next token's start (i.e. jump over the delimiters before next token)
+	for (_tokenBegin = _tokenEnd; _tokenBegin < _str.size() && _delimiters.contains(_str[_tokenBegin]); _tokenBegin++);
+	// Seek to the token's end (i.e. jump over the non-delimiters)
+	for (_tokenEnd = _tokenBegin; _tokenEnd < _str.size() && !_delimiters.contains(_str[_tokenEnd]); _tokenEnd++);
+	// Return the found token
+	return String(_str.c_str() + _tokenBegin, _tokenEnd - _tokenBegin);
+}
+
 //
 // Print hexdump of the data passed in
 //
--- a/common/util.h
+++ b/common/util.h
@ -74,6 +74,32 @@ namespace Common {
 */
 bool matchString(const char *str, const char *pat);

+/**
+ * A simple non-optimized string tokenizer.
+ *
+ * Example of use:
+ * StringTokenizer("Now, this is a test!", " ,!") gives tokens "Now", "this", "is", "a" and "test" using nextToken().
+ */
+class StringTokenizer {
+public:
+	/**
+	 * Creates a StringTokenizer.	 
+	 * @param str The string to be tokenized.
+	 * @param delimiters String containing all the delimiter characters (i.e. the characters to be ignored).
+	 * @note Uses space, horizontal tab, carriage return, newline, form feed and vertical tab as delimiters by default.
+	 */
+	StringTokenizer(const String &str, const String &delimiters = " \t\r\n\f\v");
+	void reset();       //!< Resets the tokenizer to its initial state
+	bool empty() const; //!< Returns true if there are no more tokens left in the string, false otherwise
+	String nextToken(); //!< Returns the next token from the string (Or an empty string if there are no more tokens)
+
+private:
+	const String _str;        //!< The string to be tokenized
+	const String _delimiters; //!< String containing all the delimiter characters
+	uint         _tokenBegin; //!< Latest found token's begin (Valid after a call to nextToken(), zero otherwise)
+	uint         _tokenEnd;   //!< Latest found token's end (Valid after a call to nextToken(), zero otherwise)
+};
+
 /**
 * Print a hexdump of the data passed in. The number of bytes per line is
 * customizable.