export async function processTextInChunks(inputText, processFn = async (text) => console.log(text), config = {}) {
const {
chunkLength = 1000,
overlapLength = 0
} = config;
const results = [];
let startIndex = 0;
const numChunks = Math.ceil((inputText.length - overlapLength) / (chunkLength - overlapLength));
for (let chunkIndex = 0; chunkIndex < numChunks; chunkIndex++) {
const endIndex = Math.min(startIndex + chunkLength, inputText.length);
const mainChunk = inputText.slice(startIndex, endIndex);
let overlapChunk = '';
if (startIndex > 0) {
const overlapStartIndex = Math.max(0, startIndex - overlapLength);
overlapChunk = inputText.slice(overlapStartIndex, startIndex);
}
const result = await processFn(mainChunk, overlapChunk, {
startIndex,
endIndex,
isLastChunk: endIndex === inputText.length,
chunkIndex,
numChunks
});
if (result !== undefined) {
results.push(result);
}
startIndex = endIndex - overlapLength;
if (startIndex >= inputText.length) break;
}
return results.length > 0 ? results : inputText;
}
export function preprocessTextChunks(inputText, config = {}) {
const {
chunkLength = 1000,
overlapLength = 0
} = config;
if (chunkLength <= overlapLength) {
throw new Error("chunkLength must be greater than overlapLength");
}
const chunks = [];
let startIndex = 0;
while (startIndex < inputText.length) {
const endIndex = Math.min(startIndex + chunkLength, inputText.length);
const mainChunk = inputText.slice(startIndex, endIndex);
let overlapChunk = '';
if (startIndex > 0) {
const overlapStartIndex = Math.max(0, startIndex - overlapLength);
overlapChunk = inputText.slice(overlapStartIndex, startIndex);
}
chunks.push({
mainChunk,
overlapChunk,
startIndex,
endIndex,
isLastChunk: endIndex === inputText.length
});
if (endIndex === inputText.length) break;
startIndex = endIndex - overlapLength;
}
return {
numChunks: chunks.length,
chunks
};
}