.NET 4.0 makes Parallel programming easy. Below is an example of how to replace text across many .docx documents in parallel.
This example contains 4 functions.
1) Replace: This function opens a document and does the text replace.
2) NonParallel_ReplaceText: This is how you would replace text across multiple documents without using parallel execution. This is included for comparisons sake.
3) Parallel_ReplaceText: This is how you would replace text across multiple documents in parallel.
4) Main: This function does the work sequentially and then in parallel and prints the time taken for both.
Before running this code replace the line
DirectoryInfo di = new DirectoryInfo(@"C:\Users\Cathal\Desktop\multiple");
with a directory on your machine that contains many .docx documents.
Note(s):
1) There is over head when executing code in Parallel. Make sure your doing enough work to justify Parallel execution. For example: if you run this code on 4 small documents, the function NonParallel_ReplaceText may run faster than its parallel equivalent.
2) Run this example without the debugger, the debugger adds overhead which makes this code run significantly slower.
3) You can download and build the latest version of DocX.dll from here http://docx.codeplex.com/SourceControl/list/changesets#.
Code Snippet
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- using Novacode;
- using System.Drawing;
- using System.Threading.Tasks;
- using System.IO;
- using System.Diagnostics;
- namespace testDocX
- {
- class Program
- {
- static void Main(string[] args)
- {
- // Directory containing many .docx documents.
- DirectoryInfo di = new DirectoryInfo(@"C:\Users\Cathal\Desktop\multiple");
-
- // Print out the time taken in miliseconds.
- Console.WriteLine("Non-Parallel took " + NonParallel_ReplaceText(di, "pear", "raep") + " miliseconds.");
-
- // Print out the time taken in miliseconds.
- Console.WriteLine("Parallel took " + Parallel_ReplaceText(di, "raep", "pear") + " miliseconds.");
-
- // Wait until the user presses a key before exiting.
- Console.ReadKey();
- }
-
- // Replace text accross multiple documents sequentially.
- private static long NonParallel_ReplaceText(DirectoryInfo di, string a, string b)
- {
- // Create a new Stopwatch, we will use this to time execution.
- Stopwatch sw = new Stopwatch();
-
- sw.Start(); // Start the stop watch.
-
- // Loop through each document in this specified direction.
- foreach (FileInfo fi in di.GetFiles())
- {
- // Replace text in this document.
- Replace(fi.FullName, a, b);
- }
-
- sw.Stop(); // Stop the stop watch.
-
- // Return the time taken in miliseconds.
- return sw.ElapsedMilliseconds;
- }
-
- // Replace text accross multiple documents in Parallel.
- private static long Parallel_ReplaceText(DirectoryInfo di, string a, string b)
- {
- // Create a new Stopwatch, we will use this to time execution.
- Stopwatch sw = new Stopwatch();
-
- sw.Start(); // Start the stop watch.
-
- // Loop through each document in this specified direction.
- System.Threading.Tasks.Parallel.ForEach
- (
- di.GetFiles(),
- currentFile =>
- {
- Replace(currentFile.FullName, a, b);
- }
- );
-
- sw.Stop(); // Stop the stop watch.
-
- // Return the time taken in miliseconds.
- return sw.ElapsedMilliseconds;
- }
-
- // Replace the string a with the string b in filename and save the changes.
- static void Replace(string filename, string a, string b)
- {
- // Load the document.
- using (DocX document = DocX.Load(filename))
- {
- // Replace text in this document.
- document.ReplaceText(a, b);
-
- // Save changes made to this document.
- document.Save();
- } // Release this document from memory.
- }
- }
- }