/* LICENCE see end of file! */

/* Environemental note

                The program is created for Arduino/ESP8266
                    and is tested on WIN10 only! 

*/

/*                    --->>> READ THIS: <<<---


           Developer, don't forget to update the version number 
                 in the private section of the class!


*/

#ifndef DUMMY // Mandatory Preprocessor Items =================================
  // Meta items ---------------------------------------------------------------
  #ifndef   _10_SPLIT_TEXT_IS_STANDALONE
  #define   _10_SPLIT_TEXT_IS_STANDALONE true // If running as a standalone
  #endif // _10_SPLIT_TEXT_IS_STANDALONE

  // REQUIRED IN GLOBAL SPACE ================================================!
  String textToSplit; // Reserved long String to avoid heap fragmentation

#endif // Mandatory Preprocessor Items

#include "shared_includes.h"
#include "shared_globals.h"

class SplitText {
  /**
   * @class SplitText
   * @brief Utility class for identifying start and end indices of "words" 
   *        within a text using protected and non-protected delimiters.
   *
   * This class processes a text and computes the positions of substrings 
   * (referred to as "words") based on user-specified delimiters.
   *
   * Features:
   * - **Protected delimiters**: 
   *   Two consecutive delimiters (e.g., "  " or "xx") act as protection 
   *   markers. Substrings enclosed by such pairs are treated as a single 
   *   unit, and delimiters within the protected section are ignored. 
   *   Protected delimiters must always appear in pairs.
   *
   * - **Non-protected delimiters**: 
   *   Single occurrences of the delimiter act as regular separators.
   *
   * - **Delimiter constraints**:
   *   - Delimiters can be one or two characters long.
   *   - Two-character delimiters must consist of identical characters 
   *     repeated (e.g., "xx").
   *   - Sequences of more than two identical delimiters are invalid.
   *
   * Example usage:
   * SplitText splitText;
   * splitText.freshSplitData("one  protected  two three", " ");
   * 
   * std::pair<int, int> pos;
   * do {
   *     pos = splitText.getNextSplitPositions();
   *     if (pos.first != -1) {
   *         // pos.first and pos.second define the indices of the "word"
   *         // You can extract the substring like this (optional):
   *         // String word = text.substring(pos.first, pos.second);
   *     }
   * } while (pos.first != -1);
   * ```
   */

  #ifndef DUMMY // private: ===================================================
  private:
     /***************** FILE AND VERSION ****************/ 



     /* ! ! ! ! ! ! !  UPDATE VERSION NO  ! ! ! ! ! ! ! */
     static constexpr char FILE_AND_VERSION[8] = "10.0058";
  



  // Identifiers are listed in alphabetical order
  /* coreIndex                */  int    coreIndex            ;
  /* delimiter                */  String delimiter            ;
  /* delimiterChar            */  char   delimiterChar        ;
  /* freshJobDataReceived     */  bool   freshJobDataReceived ;
  /* textToSplitLen           */  int    textToSplitLen       ;

  int  retStart ;  // Start-Index to be returned
  int  retEnd   ;  // End-Index to be returned

  #endif // private:

  #ifndef DUMMY // public: ====================================================
  public:

  #ifndef DUMMY // initialize() ===============================================
  static void initialize() {
    textToSplit = "";
  }
  #endif // initialize()

  #ifndef DUMMY // freshSplitData() ===========================================
  void freshSplitData(String text, String delimiter) {
    /**
     * @brief Prepares a new split operation by providing the text to process
     *        and the delimiter to use.
     *
     * This method initializes the internal state of the SplitText instance
     * with a fresh text and delimiter. It also performs validation checks
     * on the input to ensure that:
     * - The delimiter is valid (1 or 2 identical characters).
     * - No sequences of more than two identical delimiters exist.
     * - All protected (double) delimiters in the text are properly paired.
     *
     * @param text The text to be split into "words".
     * @param delimiter The delimiter used for splitting. Can be a single
     *        character or a two-character sequence of identical chars
     *        (e.g., " " or "xx").
     *
     * @note Sets an internal flag (`freshJobDataReceived`) to indicate
     *       that a new split-job is active. Validation errors set 
     *       `errorType = 1`.
     */

    freshJobDataReceived = true;

    // Populate textToSplit with parameter text
    textToSplit             = text;

    // Disambiguate identical names of class member-values and parameters
    this->delimiter         = delimiter;

    // Dependables
    delimiterChar           = delimiter.charAt(0);
    textToSplitLen          = textToSplit.length();
    
    #ifndef DUMMY // Validate delimiters passed
    int doubleDelimiterCount = 0;
    bool runTests = true;
    
    /* * 1) Error if delimiter sequences of length > 2 are detected ------- */
    if (textToSplit.indexOf(delimiter + delimiter + delimiter) != -1) {
      // error condition
      errorType = 1; // WARN
      #if _10_SPLIT_TEXT_IS_STANDALONE
      Serial << endl;
      Serial << "Error: " 
             << __FILE__ << " " << FILE_AND_VERSION 
             << " line " << __LINE__ << endl;
      Serial << "Test: Triple delimiters found!" << endl;
      #endif // _10_SPLIT_TEXT_IS_STANDALONE
    }

    /* * 2) Error if protected delimiters are not paired ----------------- */
    for (int i = 0; i < textToSplitLen - 1; i++) {
      if (textToSplit.charAt(i) == delimiterChar &&
          textToSplit.charAt(i + 1) == delimiterChar) {
        doubleDelimiterCount++;
        // i++; // skip the next one to avoid overlap
      }
    }
    
    if (doubleDelimiterCount % 2 != 0) { // ==> not paired!
      // error condition
      errorType = 1; // WARN
      #if _10_SPLIT_TEXT_IS_STANDALONE
      Serial << endl;
      Serial << "Error: " 
             << __FILE__ << " " << FILE_AND_VERSION 
             << " line " << __LINE__ << endl;
      Serial << "Test: Protected delimiters not paired!" << endl;
      #endif // _10_SPLIT_TEXT_IS_STANDALONE
    }
    #endif // Validate delimiters passed
  }
  #endif // freshSplitData()

  #ifndef DUMMY // getNextSplitPositions() ====================================
  std::pair<int, int> getNextSplitPositions() {
    /**
     * @brief Returns the start and end indices of the next "word" in the text.
     *
     * This method scans the text from the current position and determines
     * the boundaries (start and end indices) of the next substring 
     * (referred to as a "word"). It distinguishes between:
     * - **Protected words**: Enclosed by paired double delimiters (e.g., "  ").
     *   Delimiters inside such sections are ignored.
     * - **Non-protected words**: Separated by single delimiters.
     *
     * After returning a result, the internal index advances to the next 
     * candidate "word" for subsequent calls.
     *
     * @return A std::pair<int, int>:
     *         - `.first`: Start index of the word (inclusive).
     *         - `.second`: End index of the word (exclusive).
     *         Returns `(-1, -1)` if no more words are found.
     *
     * @note On the first call after freshSplitData(), the internal
     *       cursor resets to the beginning of the text.
     */

    bool isDoubleDelimiter = false;
    String doubleDelimiter = String(delimiterChar) + String(delimiterChar);

    // Method initial call handling
    if (freshJobDataReceived) { 
      freshJobDataReceived = false;  // Is set true in freshSplitData
      coreIndex            = 0    ;  // Set coreIndex to zero
    }

    // Return (-1, -1,) if end of String is reached
    if (coreIndex >= textToSplitLen) {
      return std::make_pair(-1, -1);
    }

    /* General note:
     *
     * 1) Once an ordinary word has been processed, the coreIndex needs to be
     * positioned ON the leading DELIMITER of the next "word"!
     *
     * 2) Once a protected "word" has been processed, the coreIndex needs to be
     * positioned AFTER the leading DELIMITER of the next "word"!
     * (ending and leading delimiter relates to the same "word"-boarder here)
     */

    // If a delimiter is found
    if (textToSplit.charAt(coreIndex) == delimiterChar) {
      coreIndex++; // Advance by one

      /* If it is a double delimiter (doubleDelimiter -s are guaranteed to be paired) */
      if (textToSplit.charAt(coreIndex) == delimiterChar) {
        isDoubleDelimiter = true; // Beginning of a protected "word"
        coreIndex++;              // Advance one more
      }
    }

    // Start-index of "word"
    retStart = coreIndex;

    /* Progress char by char to find the end of the "word" 
     * (or next split position) and return the indices.
     */
    while (true) {
      // Increment coreIndex until end of "word" is found
      if (isDoubleDelimiter == true) {
        coreIndex = textToSplit.indexOf(doubleDelimiter, coreIndex);
      } else if (textToSplit.charAt(coreIndex) != delimiterChar && 
        coreIndex < textToSplitLen) {
        coreIndex++;
        continue;
      }

      // coreIndex here is positioned directly at the end of a "word"
      retEnd = coreIndex;

      if (isDoubleDelimiter) {
        retStart  = retStart -2;
        retEnd    = retEnd   +2;
        coreIndex = retEnd;
      } else {
        // NOOP
      }

      // Avoid returning an empty word (possible in case of a trailing simple delimiter in textToSplit)
      if (retStart == retEnd && retStart == textToSplitLen) {
        return std::make_pair(-1, -1);
      }

      // Return "word"-indices
      return  std::make_pair(retStart, retEnd);
    }

    // error condition
    errorType = 1; // WARN
    return std::make_pair(-1, -1);
  }

  #endif // getNextSplitPositions()

  #endif // public:
};

// Instantiation of class =====================================================
SplitText splitText;

#if _10_SPLIT_TEXT_IS_STANDALONE // setup() and loop()
void setup() {

  #include "shared_setup.h"

  // slpitText() test-calls =================================================

  // slpitText.freshSplitData()
  // Parameter format: text, delimiter, bool (protected=true)
  #ifndef DUMMY // Tests freshSplitData()
  // Trigger 1.1 Error if delimiter sequences of length > 2 are existing - */
  splitText.freshSplitData("Ein   Text, der einen dreifachen Delimiter enthält.", " "); // tested OK

  // Trigger 1.2) Error if protected delimiters are not paired
  splitText.freshSplitData("Ein  Text  mit ungepaarten  geschützten Delimitern.", " ");
  #endif // Tests freshSplitData()

  #ifndef DUMMY // Tests getNextSplitPositions()
  String              tmpText         ;
  String              tmpDelimiter    ;
  String              tmpWord         ;
  String              tmpLeadPadding1 ;
  String              tmpLeadPadding2 ;
  std::pair<int, int> tmpPos          ;

  #ifndef DUMMY // 2.1 A simple phrase
  Serial << endl;
  Serial << endl;

  // Init
  tmpPos = std::make_pair(0, 0);

  // Input:
  tmpText      = "Ein einfacher Satz.";
  tmpDelimiter = " ";
  splitText.freshSplitData(tmpText, tmpDelimiter);

  // Output:
  Serial << "Input (2.1):" << endl;
  Serial <<  "Text >>>" << tmpText << "<<<" << endl;
  Serial <<  "Delimiter >>>" << tmpDelimiter << "<<<" << endl;

  Serial << endl;
  Serial << "Output:" << endl;
  while (true) {
    // Check for end of textToSplit
    if (tmpPos.first == -1 && tmpPos.second == -1) {
      Serial << endl << "No more words to extract." << endl;
      break;
    }

    // Get the next word positions
    tmpPos = splitText.getNextSplitPositions();

    tmpLeadPadding1="";
    if (tmpPos.first > -1  && tmpPos.first  < 10) {tmpLeadPadding1=" ";}
    tmpLeadPadding2="";
    if (tmpPos.second > -1 && tmpPos.second < 10) {tmpLeadPadding2=" ";}

    Serial << "Indices " << tmpLeadPadding1 << tmpPos.first << " to " << tmpLeadPadding2 << tmpPos.second;
    tmpWord = textToSplit.substring(tmpPos.first, tmpPos.second);
    Serial << ": >>" << tmpWord << "<<" << endl;
  }
  #endif // 2.1 A simple phrase

  #ifndef DUMMY // 2.2 Delimiter am Anfang und am Ende
  Serial << endl;
  Serial << endl;

  // Init
  tmpPos = std::make_pair(0, 0);

  // Input:
  tmpText      = " Mit Delimiter am Anfang und am Ende. ";
  tmpDelimiter = " ";
  splitText.freshSplitData(tmpText, tmpDelimiter);

  // Output:
  Serial << "Input (2.2):" << endl;
  Serial <<  "Text >>>" << tmpText << "<<<" << endl;
  Serial <<  "Delimiter >>>" << tmpDelimiter << "<<<" << endl;

  Serial << endl;
  Serial << "Output:" << endl;
  while (true) {
    // Check for end of textToSplit
    if (tmpPos.first == -1 && tmpPos.second == -1) {
      Serial << endl << "No more words to extract." << endl;
      break;
    }

    // Get the next word positions
    tmpPos = splitText.getNextSplitPositions();

    tmpLeadPadding1="";
    if (tmpPos.first > -1  && tmpPos.first  < 10) {tmpLeadPadding1=" ";}
    tmpLeadPadding2="";
    if (tmpPos.second > -1 && tmpPos.second < 10) {tmpLeadPadding2=" ";}

    Serial << "Indices " << tmpLeadPadding1 << tmpPos.first << " to " << tmpLeadPadding2 << tmpPos.second;
    tmpWord = textToSplit.substring(tmpPos.first, tmpPos.second);
    Serial << ": >>" << tmpWord << "<<" << endl;
  }
  #endif // 2.2 Delimiter am Anfang und am Ende

  #ifndef DUMMY // 2.3 Protected 'word' in the middle
  Serial << endl;
  Serial << endl;

  // Init
  tmpPos = std::make_pair(0, 0);

  // Input:
  tmpText      = "Text mit gepaartem Doppel-Delimiter bzw.  geschütztem 'Wort'  .";
  tmpDelimiter = " ";
  splitText.freshSplitData(tmpText, tmpDelimiter);

  // Output:
  Serial << "Input (2.3):" << endl;
  Serial <<  "Text >>>" << tmpText << "<<<" << endl;
  Serial <<  "Delimiter >>>" << tmpDelimiter << "<<<" << endl;

  Serial << endl;
  Serial << "Output:" << endl;
  while (true) {
    // Check for end of textToSplit
    if (tmpPos.first == -1 && tmpPos.second == -1) {
      Serial << endl << "No more words to extract." << endl;
      break;
    }

    // Get the next word positions
    tmpPos = splitText.getNextSplitPositions();

    tmpLeadPadding1="";
    if (tmpPos.first > -1  && tmpPos.first  < 10) {tmpLeadPadding1=" ";}
    tmpLeadPadding2="";
    if (tmpPos.second > -1 && tmpPos.second < 10) {tmpLeadPadding2=" ";}

    Serial << "Indices " << tmpLeadPadding1 << tmpPos.first << " to " << tmpLeadPadding2 << tmpPos.second;
    tmpWord = textToSplit.substring(tmpPos.first, tmpPos.second);
    Serial << ": >>" << tmpWord << "<<" << endl;
  }
  #endif // 2.3 Protected 'word' in the middle

  #ifndef DUMMY // 2.4 Protected section at the beginning and at the end
  Serial << endl;
  Serial << endl;

  // Init
  tmpPos = std::make_pair(0, 0);

  // Input:
  tmpText      = "  Geschützte Sektion  am Anfang und  am Ende  ";
  tmpDelimiter = " ";
  splitText.freshSplitData(tmpText, tmpDelimiter);

  // Output:
  Serial << "Input (2.4):" << endl;
  Serial <<  "Text >>>" << tmpText << "<<<" << endl;
  Serial <<  "Delimiter >>>" << tmpDelimiter << "<<<" << endl;

  Serial << endl;
  Serial << "Output:" << endl;
  while (true) {
    // Check for end of textToSplit
    if (tmpPos.first == -1 && tmpPos.second == -1) {
      Serial << endl << "No more words to extract." << endl;
      break;
    }

    // Get the next word positions
    tmpPos = splitText.getNextSplitPositions();

    tmpLeadPadding1="";
    if (tmpPos.first > -1  && tmpPos.first  < 10) {tmpLeadPadding1=" ";}
    tmpLeadPadding2="";
    if (tmpPos.second > -1 && tmpPos.second < 10) {tmpLeadPadding2=" ";}

    Serial << "Indices " << tmpLeadPadding1 << tmpPos.first << " to " << tmpLeadPadding2 << tmpPos.second;
    tmpWord = textToSplit.substring(tmpPos.first, tmpPos.second);
    Serial << ": >>" << tmpWord << "<<" << endl;
  }
  #endif // 2.4 Protected section at the beginning and at the end

  #endif // Tests getNextSplitPositions()
}

void loop() {
  // NOOP
}    
#endif // setup() and loop()


/*** History ***
 *  2025-07-21 :
 *  - Revised .getNextSplitPositions() to
 *    - 1-pass only is required for both, protected and ordinary sections
 *    - protected sections are to be enclosed in double-delimiters
 *  2025-06-25:
 *  - Revised tests and error-checks in preparation for revision of
 *    .getNextSplitPositions()
 *  2025-06-18:
 *  - Added licence (#046)
 *  - Corrected various logical pitfalls (#045)
 *  2025-06-15 (#040-42):
 *  - Moved textToSplit to global scope, accessible for all
 *  - Corrected endless loops, simplified error and invalid checks logic
 *  - Basic setup, including freshSplitData()
 */

/***** LICENCE CC BY-NC-SA 4.0 *****

Fermenting Box Control © 2024 by Uli / ulisblog.info / ulrich-hauser.de
=======================================================================

including its header-files and custom-libraries is licensed under 
Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International

                 CC BY-NC-SA 4.0

To view a copy of this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0//

*/