///////////////////////////////////////////////////////// //TextChunker.m //PodReader //////////////////////////////////////////////////////// #import "TextChunker.h" @implementation TextChunker - (void)chunkIt:(NSString*)fileName toDir:(NSString*)destDir withSeparator:(NSString*)separator { unsigned PR_CHUNK = 4000; unsigned PR_INITIAL_CUT_POINT = PR_CHUNK-50; unsigned PR_CUT_DELTA = 50; NSMutableString* fileContents = [NSMutableString stringWithContentsOfFile:fileName]; NSMutableArray* chunksArray = [[NSMutableArray alloc] init]; unsigned endOfChunk = 0; //Create an NSArray of NSMutableStrings by segmenting fileContents with `separator.' while ([fileContents length] != 0) { endOfChunk = [[fileContents substringFromIndex:1] rangeOfString:separator].location; if (NSNotFound == endOfChunk) { endOfChunk = [fileContents length]; } NSRange paragraphRange = [fileContents paragraphRangeForRange: NSMakeRange(0, endOfChunk)]; //the lastLineInParagraph is ideally the heading such as `CHAPTER 1' that //introduces the next chapter, section, act, etc. NSRange lastLineInParagraph = [fileContents lineRangeForRange:NSMakeRange(endOfChunk, paragraphRange.location)]; //if the last line in the paragraph is the entire paragraph, //use it as the entire chunk if (lastLineInParagraph.location == 0) { endOfChunk = lastLineInParagraph.length; } else { endOfChunk = lastLineInParagraph.location; } NSMutableString* theSubstring = [NSMutableString stringWithString: [fileContents substringWithRange: NSMakeRange(0,endOfChunk)]]; [chunksArray addObject:theSubstring]; [fileContents setString:[fileContents substringFromIndex:endOfChunk]]; [fileContents setString:[fileContents stringByTrimmingCharactersInSet: [NSCharacterSet whitespaceAndNewlineCharacterSet]]]; } //Size chunks `intelligently' as close to PR_CHUNK as possible: //Walk through the array and segment the chunk if it's too big, //try and merge it with it's predecessor if it's small enough, and //leave it `as is' otherwise unsigned index=0; while (index < [chunksArray count]) { NSMutableString* currentChunk = [chunksArray objectAtIndex:index]; //segment the chunk if ([currentChunk length] > PR_CHUNK) { unsigned cutPoint = PR_INITIAL_CUT_POINT; BOOL done = NO; while (!done) { NSRange lineRange = [currentChunk lineRangeForRange: NSMakeRange(0, cutPoint)]; if (lineRange.length < PR_CHUNK) { //segmentation at line ending [chunksArray insertObject:[NSMutableString stringWithString: [currentChunk substringFromIndex:lineRange.length]] atIndex:(index+1)]; [chunksArray replaceObjectAtIndex:index withObject: [NSMutableString stringWithString: [currentChunk substringToIndex:lineRange.length]]]; done = YES; } else if (cutPoint > 0) { //back up and try to segment again cutPoint -= PR_CUT_DELTA; } else { //arbitrary segmentation [chunksArray insertObject:[NSMutableString stringWithString: [currentChunk substringFromIndex:PR_CHUNK]] atIndex:(index+1)]; [chunksArray replaceObjectAtIndex:index withObject: [NSMutableString stringWithString: [currentChunk substringToIndex:PR_CHUNK]]]; done = YES; } } index++; } //Try to combine chunks else if ((index >= 1) && ([currentChunk length] + [[[chunksArray objectAtIndex:(index-1)] description] length] < PR_CHUNK)) { NSString* combinedString = [[chunksArray objectAtIndex:(index-1)] stringByAppendingString:currentChunk]; [chunksArray replaceObjectAtIndex:index withObject: [NSMutableString stringWithString:combinedString]]; [chunksArray removeObjectAtIndex:(index-1)]; //Don't increment the counter. Index already points to the next chunk } //Leave `as is' and advance the placeholder index else { index++; } } //Write each element of the array to the directory specified. //Pad the filenames all to the same length with leading zeros //so that files appear in order unsigned numDigits = [[[NSString stringWithFormat:@"%u", [chunksArray count]] description] length]; NSString* filePrefix = [[fileName lastPathComponent] stringByDeletingPathExtension]; NSMutableArray* finalFileNames = [[NSMutableArray alloc] init]; unsigned chunkNumber=1; NSEnumerator* enumerator = [chunksArray objectEnumerator]; while (nil != [enumerator nextObject]) { unsigned digitLength = [[[NSString stringWithFormat:@"%u", chunkNumber] description] length]; NSString* padding = [@"" stringByPaddingToLength:(numDigits-digitLength) withString:@"0" startingAtIndex:0]; NSString* finalFileName = [[[destDir stringByAppendingPathComponent:filePrefix] stringByAppendingString:padding] stringByAppendingFormat:@"%u", chunkNumber++]; [finalFileNames addObject:finalFileName]; } unsigned currentChunk = 0; while (currentChunk < [chunksArray count]-1) { NSString* linkedChunk = [[[[chunksArray objectAtIndex:currentChunk] stringByAppendingString:@"NEXT"]; [linkedChunk writeToFile:[finalFileNames objectAtIndex:currentChunk] atomically:YES]; currentChunk++; } [[chunksArray objectAtIndex:currentChunk] writeToFile:[finalFileNames objectAtIndex:currentChunk] atomically:YES]; [finalFileNames release]; [chunksArray release]; } @end