/* LICENCE see end of file! */ /* Environemental note The program is created for Arduino/ESP8266 and is tested on WIN10 only! */ /* --->>> READ THIS: <<<--- Developer, don't forget to update the version number here! */ // File main control ---------------------------------------------------------- #pragma once // Use this file only once to avoid redefinition problems class SplitText { /** * @class SplitText * @brief Utility class for identifying start and end indices of "words" * within a text using protected and non-protected delimiters. * * This class is a helper class to OledPrint and processes a text and computes the positions of substrings * (referred to as "words") based on user-specified delimiters. * * Features: * - **Protected delimiters**: * Two consecutive delimiters (e.g., " " or "xx") act as protection * markers. Substrings enclosed by such pairs are treated as a single * unit, and delimiters within the protected section are ignored. * Protected delimiters must always appear in pairs. * * - **Non-protected delimiters**: * Single occurrences of the delimiter act as regular separators. * * - **Delimiter constraints**: * - Delimiters can be one or two characters long. * - Two-character delimiters must consist of identical characters * repeated (e.g., "xx"). * - Sequences of more than two identical delimiters are invalid. * * Example usage: * SplitText splitText; * splitText.freshSplitData("one protected two three", " "); * * std::pair pos; * do { * pos = splitText.getNextSplitPositions(); * if (pos.first != -1) { * // pos.first and pos.second define the indices of the "word" * // You can extract the substring like this (optional): * // String word = text.substring(pos.first, pos.second); * } * } while (pos.first != -1); * ``` */ #ifndef DUMMY // private: =================================================== private: /***************** FILE AND VERSION ****************/ /* ! ! ! ! ! ! ! UPDATE VERSION NO ! ! ! ! ! ! ! */ // Values visible in THIS file only! // namespace SelfValues { // static constexpr char FILE_AND_VERSION[] = "10.0061"; // } // Identifiers are sorted in alphabetical order /* coreIndex */ int coreIndex ; /* delimiter */ String delimiter ; /* delimiterChar */ char delimiterChar ; /* freshJobDataReceived */ bool freshJobDataReceived ; /* textToSplitLen */ int textToSplitLen ; int retStart ; // Start-Index to be returned int retEnd ; // End-Index to be returned #endif // private: #ifndef DUMMY // public: ==================================================== public: #ifndef DUMMY // initialize() =============================================== static void initialize() { // textMsg_XXL = ""; } #endif // initialize() #ifndef DUMMY // freshSplitData() =========================================== void freshSplitData(const String& textToSplit, String delimiter) { /** * @brief Prepares a new split operation by providing the text to process * and the delimiter to use. * * This method initializes the internal state of the SplitText instance * with a fresh text and delimiter. It also performs validation checks * on the input to ensure that: * - The delimiter is valid (1 or 2 identical characters). * - No sequences of more than two identical delimiters exist. * - All protected (double) delimiters in the text are properly paired. * * @param text The text to be split into "words". * @param delimiter The delimiter used for splitting. Can be a single * character or a two-character sequence of identical chars * (e.g., " " or "xx"). * * @note Sets an internal flag (`freshJobDataReceived`) to indicate * that a new split-job is active. Validation errors set * `errorType = 1`. */ freshJobDataReceived = true; // Refer to const String& text as textToSplit // const String& textToSplit = text; // Disambiguate identical names of class member-values and parameters this->delimiter = delimiter; // Dependables delimiterChar = delimiter.charAt(0); textToSplitLen = textToSplit.length(); #ifndef DUMMY // Validate delimiters passed int doubleDelimiterCount = 0; /* * 0) Error if length of String is near the maximum reserved size -- * */ if (textToSplitLen >= SIZE_RESERVED_STRING_XXL -3) { /* Arbitrary number */ // error condition } /* * 1) Error if delimiter sequences of length > 2 are detected ------ * */ if (textToSplit.indexOf(delimiter + delimiter + delimiter) != -1) { // error condition // errorType = 1; // WARN #if _10_SPLIT_TEXT_IS_STANDALONE Serial << endl; Serial << "Error: " << __FILE__ << " " << FILE_AND_VERSION << " line " << __LINE__ << endl; Serial << "Test: Triple delimiters found!" << endl; #endif // _10_SPLIT_TEXT_IS_STANDALONE } /* * 2) Error if protected delimiters are not paired ----------------- * */ for (int i = 0; i < textToSplitLen - 1; i++) { if (textToSplit.charAt(i) == delimiterChar && textToSplit.charAt(i + 1) == delimiterChar) { doubleDelimiterCount++; // i++; // skip the next one to avoid overlap } } if (doubleDelimiterCount % 2 != 0) { // ==> not paired! // error condition // errorType = 1; // WARN #if _10_SPLIT_TEXT_IS_STANDALONE Serial << endl; Serial << "Error: " << __FILE__ << " " << FILE_AND_VERSION << " line " << __LINE__ << endl; Serial << "Test: Protected delimiters not paired!" << endl; #endif // _10_SPLIT_TEXT_IS_STANDALONE } #endif // Validate delimiters passed } #endif // freshSplitData() #ifndef DUMMY // getNextSplitPositions() ==================================== std::pair getNextSplitPositions() { /** * @brief Returns the start and end indices of the next "word" in the text. * * This method scans the text from the current position and determines * the boundaries (start and end indices) of the next substring * (referred to as a "word"). It distinguishes between: * - **Protected words**: Enclosed by paired double delimiters (e.g., " "). * Delimiters inside such sections are ignored. * - **Non-protected words**: Separated by single delimiters. * * After returning a result, the internal index advances to the next * candidate "word" for subsequent calls. * * @return A std::pair: * - `.first`: Start index of the word (inclusive). * - `.second`: End index of the word (exclusive). * Returns `(-1, -1)` if no more words are found. * * @note On the first call after freshSplitData(), the internal * cursor resets to the beginning of the text. */ bool isDoubleDelimiter = false; String doubleDelimiter = String(delimiterChar) + String(delimiterChar); // Method initial call handling if (freshJobDataReceived) { freshJobDataReceived = false; // Is set true in freshSplitData coreIndex = 0 ; // Set coreIndex to zero } // Return (-1, -1,) if end of String is reached if (coreIndex >= textToSplitLen) { return std::make_pair(-1, -1); } /* General note: * a) Any entry to getNextSplitPositions() here means: * We are at the beginning of a new word * * b) Once an ordinary word has been processed, the coreIndex needs to be * positioned AT the leading DELIMITER of the next "word"! * * c) Once a protected "word" has been processed, the coreIndex needs to be * positioned AFTER the leading DELIMITER of the next "word"! * (ending and leading delimiter relates to the same "word"-boarder here) */ /* * 1) Move index to first char of new 'word' ----------------------- * * * The first char of a word may be found: * - At the very beginning of textMsg_XXL * - At a delimiter anywhere in the 'middle' of textMsg_XXL */ // If a delimiter is found ... if (textMsg_XXL.charAt(coreIndex) == delimiterChar) { coreIndex++; // Advance by one // If it is a double delimiter (doubleDelimiter -s are paired!) if (textMsg_XXL.charAt(coreIndex) == delimiterChar) { isDoubleDelimiter = true; // Beginning of a protected "word" coreIndex++; // Advance one more } } // ... the start-index of a "word" is found retStart = coreIndex; /* * 2) Progress char by char to find the end of the 'word' ---------- * * * - 'end' is defined as the next split position OR the * very end of textMsg_XXL * * - return the start and end indices */ while (true) { // Increment coreIndex until end of "word" is found if (isDoubleDelimiter == true) { coreIndex = textMsg_XXL.indexOf(doubleDelimiter, coreIndex); } else if (textMsg_XXL.charAt(coreIndex) != delimiterChar && coreIndex < textToSplitLen) { coreIndex++; continue; } // coreIndex here is positioned directly at the end of a "word" retEnd = coreIndex; if (isDoubleDelimiter) { retStart = retStart -2; retEnd = retEnd +2; coreIndex = retEnd; } else { // NOOP } // Avoid returning an empty word // (possible in case of a trailing single delimiter in textMsg_XXL) if (retStart == retEnd && retStart == textToSplitLen) { return std::make_pair(-1, -1); } // Return "word"-indices return std::make_pair(retStart, retEnd); } // error condition // errorType = 1; // WARN return std::make_pair(-1, -1); } #endif // getNextSplitPositions() #endif // public: }; // Instantiation of class ===================================================== SplitText splitText; /* History: * 2025-07-27: * - In freshSplitData(...), changed the parameter 'String text' to 'const * String& text'. This syntax now is passing the String by reference * only and the String itself securely is not altered! * 2025-07-22: * - Renamed textToSplit to textMsg_XXL * 2025-07-21: * - Revised .getNextSplitPositions() to * - 1-pass only is required for both, protected and ordinary sections * - protected sections are to be enclosed in double-delimiters * 2025-06-25: * - Revised tests and error-checks in preparation for revision of * .getNextSplitPositions() * 2025-06-18: * - Added licence (#046) * - Corrected various logical pitfalls (#045) * 2025-06-15 (#040-42): * - Moved textToSplit to global scope, accessible for all * - Corrected endless loops, simplified error and invalid checks logic * - Basic setup, including freshSplitData() */ /***** LICENCE by Uli / ulisblog.info / ulrich-hauser.de *****/ /***** Fermenting Box Control © 2024 *****/ /* This licence is including related header- and .cpp files and is licensed * under: * * Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International * * CC BY-NC-SA 4.0 * * To view a copy of this license, please visit: * https://creativecommons.org/licenses/by-nc-sa/4.0// * */