/* LICENCE see end of file! */ /* Environemental note The program is created for Arduino/ESP8266 and is tested on WIN10 only! */ /* --->>> READ THIS: <<<--- Developer, don't forget to update the version number in the private section of the class! */ #ifndef DUMMY // Mandatory Preprocessor Items ================================= // Meta items --------------------------------------------------------------- #ifndef _10_SPLIT_TEXT_IS_STANDALONE #define _10_SPLIT_TEXT_IS_STANDALONE true // If running as a standalone #endif // _10_SPLIT_TEXT_IS_STANDALONE // REQUIRED IN GLOBAL SPACE ================================================! String textToSplit; // Reserved long String to avoid heap fragmentation #endif // Mandatory Preprocessor Items #include "shared_includes.h" #include "shared_globals.h" class SplitText { /** * @class SplitText * @brief Utility class for identifying start and end indices of "words" * within a text using protected and non-protected delimiters. * * This class processes a text and computes the positions of substrings * (referred to as "words") based on user-specified delimiters. * * Features: * - **Protected delimiters**: * Two consecutive delimiters (e.g., " " or "xx") act as protection * markers. Substrings enclosed by such pairs are treated as a single * unit, and delimiters within the protected section are ignored. * Protected delimiters must always appear in pairs. * * - **Non-protected delimiters**: * Single occurrences of the delimiter act as regular separators. * * - **Delimiter constraints**: * - Delimiters can be one or two characters long. * - Two-character delimiters must consist of identical characters * repeated (e.g., "xx"). * - Sequences of more than two identical delimiters are invalid. * * Example usage: * SplitText splitText; * splitText.freshSplitData("one protected two three", " "); * * std::pair pos; * do { * pos = splitText.getNextSplitPositions(); * if (pos.first != -1) { * // pos.first and pos.second define the indices of the "word" * // You can extract the substring like this (optional): * // String word = text.substring(pos.first, pos.second); * } * } while (pos.first != -1); * ``` */ #ifndef DUMMY // private: =================================================== private: /***************** FILE AND VERSION ****************/ /* ! ! ! ! ! ! ! UPDATE VERSION NO ! ! ! ! ! ! ! */ static constexpr char FILE_AND_VERSION[8] = "10.0058"; // Identifiers are listed in alphabetical order /* coreIndex */ int coreIndex ; /* delimiter */ String delimiter ; /* delimiterChar */ char delimiterChar ; /* freshJobDataReceived */ bool freshJobDataReceived ; /* textToSplitLen */ int textToSplitLen ; int retStart ; // Start-Index to be returned int retEnd ; // End-Index to be returned #endif // private: #ifndef DUMMY // public: ==================================================== public: #ifndef DUMMY // initialize() =============================================== static void initialize() { textToSplit = ""; } #endif // initialize() #ifndef DUMMY // freshSplitData() =========================================== void freshSplitData(String text, String delimiter) { /** * @brief Prepares a new split operation by providing the text to process * and the delimiter to use. * * This method initializes the internal state of the SplitText instance * with a fresh text and delimiter. It also performs validation checks * on the input to ensure that: * - The delimiter is valid (1 or 2 identical characters). * - No sequences of more than two identical delimiters exist. * - All protected (double) delimiters in the text are properly paired. * * @param text The text to be split into "words". * @param delimiter The delimiter used for splitting. Can be a single * character or a two-character sequence of identical chars * (e.g., " " or "xx"). * * @note Sets an internal flag (`freshJobDataReceived`) to indicate * that a new split-job is active. Validation errors set * `errorType = 1`. */ freshJobDataReceived = true; // Populate textToSplit with parameter text textToSplit = text; // Disambiguate identical names of class member-values and parameters this->delimiter = delimiter; // Dependables delimiterChar = delimiter.charAt(0); textToSplitLen = textToSplit.length(); #ifndef DUMMY // Validate delimiters passed int doubleDelimiterCount = 0; bool runTests = true; /* * 1) Error if delimiter sequences of length > 2 are detected ------- */ if (textToSplit.indexOf(delimiter + delimiter + delimiter) != -1) { // error condition errorType = 1; // WARN #if _10_SPLIT_TEXT_IS_STANDALONE Serial << endl; Serial << "Error: " << __FILE__ << " " << FILE_AND_VERSION << " line " << __LINE__ << endl; Serial << "Test: Triple delimiters found!" << endl; #endif // _10_SPLIT_TEXT_IS_STANDALONE } /* * 2) Error if protected delimiters are not paired ----------------- */ for (int i = 0; i < textToSplitLen - 1; i++) { if (textToSplit.charAt(i) == delimiterChar && textToSplit.charAt(i + 1) == delimiterChar) { doubleDelimiterCount++; // i++; // skip the next one to avoid overlap } } if (doubleDelimiterCount % 2 != 0) { // ==> not paired! // error condition errorType = 1; // WARN #if _10_SPLIT_TEXT_IS_STANDALONE Serial << endl; Serial << "Error: " << __FILE__ << " " << FILE_AND_VERSION << " line " << __LINE__ << endl; Serial << "Test: Protected delimiters not paired!" << endl; #endif // _10_SPLIT_TEXT_IS_STANDALONE } #endif // Validate delimiters passed } #endif // freshSplitData() #ifndef DUMMY // getNextSplitPositions() ==================================== std::pair getNextSplitPositions() { /** * @brief Returns the start and end indices of the next "word" in the text. * * This method scans the text from the current position and determines * the boundaries (start and end indices) of the next substring * (referred to as a "word"). It distinguishes between: * - **Protected words**: Enclosed by paired double delimiters (e.g., " "). * Delimiters inside such sections are ignored. * - **Non-protected words**: Separated by single delimiters. * * After returning a result, the internal index advances to the next * candidate "word" for subsequent calls. * * @return A std::pair: * - `.first`: Start index of the word (inclusive). * - `.second`: End index of the word (exclusive). * Returns `(-1, -1)` if no more words are found. * * @note On the first call after freshSplitData(), the internal * cursor resets to the beginning of the text. */ bool isDoubleDelimiter = false; String doubleDelimiter = String(delimiterChar) + String(delimiterChar); // Method initial call handling if (freshJobDataReceived) { freshJobDataReceived = false; // Is set true in freshSplitData coreIndex = 0 ; // Set coreIndex to zero } // Return (-1, -1,) if end of String is reached if (coreIndex >= textToSplitLen) { return std::make_pair(-1, -1); } /* General note: * * 1) Once an ordinary word has been processed, the coreIndex needs to be * positioned ON the leading DELIMITER of the next "word"! * * 2) Once a protected "word" has been processed, the coreIndex needs to be * positioned AFTER the leading DELIMITER of the next "word"! * (ending and leading delimiter relates to the same "word"-boarder here) */ // If a delimiter is found if (textToSplit.charAt(coreIndex) == delimiterChar) { coreIndex++; // Advance by one /* If it is a double delimiter (doubleDelimiter -s are guaranteed to be paired) */ if (textToSplit.charAt(coreIndex) == delimiterChar) { isDoubleDelimiter = true; // Beginning of a protected "word" coreIndex++; // Advance one more } } // Start-index of "word" retStart = coreIndex; /* Progress char by char to find the end of the "word" * (or next split position) and return the indices. */ while (true) { // Increment coreIndex until end of "word" is found if (isDoubleDelimiter == true) { coreIndex = textToSplit.indexOf(doubleDelimiter, coreIndex); } else if (textToSplit.charAt(coreIndex) != delimiterChar && coreIndex < textToSplitLen) { coreIndex++; continue; } // coreIndex here is positioned directly at the end of a "word" retEnd = coreIndex; if (isDoubleDelimiter) { retStart = retStart -2; retEnd = retEnd +2; coreIndex = retEnd; } else { // NOOP } // Avoid returning an empty word (possible in case of a trailing simple delimiter in textToSplit) if (retStart == retEnd && retStart == textToSplitLen) { return std::make_pair(-1, -1); } // Return "word"-indices return std::make_pair(retStart, retEnd); } // error condition errorType = 1; // WARN return std::make_pair(-1, -1); } #endif // getNextSplitPositions() #endif // public: }; // Instantiation of class ===================================================== SplitText splitText; #if _10_SPLIT_TEXT_IS_STANDALONE // setup() and loop() void setup() { #include "shared_setup.h" // slpitText() test-calls ================================================= // slpitText.freshSplitData() // Parameter format: text, delimiter, bool (protected=true) #ifndef DUMMY // Tests freshSplitData() // Trigger 1.1 Error if delimiter sequences of length > 2 are existing - */ splitText.freshSplitData("Ein Text, der einen dreifachen Delimiter enthält.", " "); // tested OK // Trigger 1.2) Error if protected delimiters are not paired splitText.freshSplitData("Ein Text mit ungepaarten geschützten Delimitern.", " "); #endif // Tests freshSplitData() #ifndef DUMMY // Tests getNextSplitPositions() String tmpText ; String tmpDelimiter ; String tmpWord ; String tmpLeadPadding1 ; String tmpLeadPadding2 ; std::pair tmpPos ; #ifndef DUMMY // 2.1 A simple phrase Serial << endl; Serial << endl; // Init tmpPos = std::make_pair(0, 0); // Input: tmpText = "Ein einfacher Satz."; tmpDelimiter = " "; splitText.freshSplitData(tmpText, tmpDelimiter); // Output: Serial << "Input (2.1):" << endl; Serial << "Text >>>" << tmpText << "<<<" << endl; Serial << "Delimiter >>>" << tmpDelimiter << "<<<" << endl; Serial << endl; Serial << "Output:" << endl; while (true) { // Check for end of textToSplit if (tmpPos.first == -1 && tmpPos.second == -1) { Serial << endl << "No more words to extract." << endl; break; } // Get the next word positions tmpPos = splitText.getNextSplitPositions(); tmpLeadPadding1=""; if (tmpPos.first > -1 && tmpPos.first < 10) {tmpLeadPadding1=" ";} tmpLeadPadding2=""; if (tmpPos.second > -1 && tmpPos.second < 10) {tmpLeadPadding2=" ";} Serial << "Indices " << tmpLeadPadding1 << tmpPos.first << " to " << tmpLeadPadding2 << tmpPos.second; tmpWord = textToSplit.substring(tmpPos.first, tmpPos.second); Serial << ": >>" << tmpWord << "<<" << endl; } #endif // 2.1 A simple phrase #ifndef DUMMY // 2.2 Delimiter am Anfang und am Ende Serial << endl; Serial << endl; // Init tmpPos = std::make_pair(0, 0); // Input: tmpText = " Mit Delimiter am Anfang und am Ende. "; tmpDelimiter = " "; splitText.freshSplitData(tmpText, tmpDelimiter); // Output: Serial << "Input (2.2):" << endl; Serial << "Text >>>" << tmpText << "<<<" << endl; Serial << "Delimiter >>>" << tmpDelimiter << "<<<" << endl; Serial << endl; Serial << "Output:" << endl; while (true) { // Check for end of textToSplit if (tmpPos.first == -1 && tmpPos.second == -1) { Serial << endl << "No more words to extract." << endl; break; } // Get the next word positions tmpPos = splitText.getNextSplitPositions(); tmpLeadPadding1=""; if (tmpPos.first > -1 && tmpPos.first < 10) {tmpLeadPadding1=" ";} tmpLeadPadding2=""; if (tmpPos.second > -1 && tmpPos.second < 10) {tmpLeadPadding2=" ";} Serial << "Indices " << tmpLeadPadding1 << tmpPos.first << " to " << tmpLeadPadding2 << tmpPos.second; tmpWord = textToSplit.substring(tmpPos.first, tmpPos.second); Serial << ": >>" << tmpWord << "<<" << endl; } #endif // 2.2 Delimiter am Anfang und am Ende #ifndef DUMMY // 2.3 Protected 'word' in the middle Serial << endl; Serial << endl; // Init tmpPos = std::make_pair(0, 0); // Input: tmpText = "Text mit gepaartem Doppel-Delimiter bzw. geschütztem 'Wort' ."; tmpDelimiter = " "; splitText.freshSplitData(tmpText, tmpDelimiter); // Output: Serial << "Input (2.3):" << endl; Serial << "Text >>>" << tmpText << "<<<" << endl; Serial << "Delimiter >>>" << tmpDelimiter << "<<<" << endl; Serial << endl; Serial << "Output:" << endl; while (true) { // Check for end of textToSplit if (tmpPos.first == -1 && tmpPos.second == -1) { Serial << endl << "No more words to extract." << endl; break; } // Get the next word positions tmpPos = splitText.getNextSplitPositions(); tmpLeadPadding1=""; if (tmpPos.first > -1 && tmpPos.first < 10) {tmpLeadPadding1=" ";} tmpLeadPadding2=""; if (tmpPos.second > -1 && tmpPos.second < 10) {tmpLeadPadding2=" ";} Serial << "Indices " << tmpLeadPadding1 << tmpPos.first << " to " << tmpLeadPadding2 << tmpPos.second; tmpWord = textToSplit.substring(tmpPos.first, tmpPos.second); Serial << ": >>" << tmpWord << "<<" << endl; } #endif // 2.3 Protected 'word' in the middle #ifndef DUMMY // 2.4 Protected section at the beginning and at the end Serial << endl; Serial << endl; // Init tmpPos = std::make_pair(0, 0); // Input: tmpText = " Geschützte Sektion am Anfang und am Ende "; tmpDelimiter = " "; splitText.freshSplitData(tmpText, tmpDelimiter); // Output: Serial << "Input (2.4):" << endl; Serial << "Text >>>" << tmpText << "<<<" << endl; Serial << "Delimiter >>>" << tmpDelimiter << "<<<" << endl; Serial << endl; Serial << "Output:" << endl; while (true) { // Check for end of textToSplit if (tmpPos.first == -1 && tmpPos.second == -1) { Serial << endl << "No more words to extract." << endl; break; } // Get the next word positions tmpPos = splitText.getNextSplitPositions(); tmpLeadPadding1=""; if (tmpPos.first > -1 && tmpPos.first < 10) {tmpLeadPadding1=" ";} tmpLeadPadding2=""; if (tmpPos.second > -1 && tmpPos.second < 10) {tmpLeadPadding2=" ";} Serial << "Indices " << tmpLeadPadding1 << tmpPos.first << " to " << tmpLeadPadding2 << tmpPos.second; tmpWord = textToSplit.substring(tmpPos.first, tmpPos.second); Serial << ": >>" << tmpWord << "<<" << endl; } #endif // 2.4 Protected section at the beginning and at the end #endif // Tests getNextSplitPositions() } void loop() { // NOOP } #endif // setup() and loop() /*** History *** * 2025-07-21 : * - Revised .getNextSplitPositions() to * - 1-pass only is required for both, protected and ordinary sections * - protected sections are to be enclosed in double-delimiters * 2025-06-25: * - Revised tests and error-checks in preparation for revision of * .getNextSplitPositions() * 2025-06-18: * - Added licence (#046) * - Corrected various logical pitfalls (#045) * 2025-06-15 (#040-42): * - Moved textToSplit to global scope, accessible for all * - Corrected endless loops, simplified error and invalid checks logic * - Basic setup, including freshSplitData() */ /***** LICENCE CC BY-NC-SA 4.0 ***** Fermenting Box Control © 2024 by Uli / ulisblog.info / ulrich-hauser.de ======================================================================= including its header-files and custom-libraries is licensed under Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International CC BY-NC-SA 4.0 To view a copy of this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0// */