From 844599b61c630d7fa3c80c6d949de08bd0a9cfac Mon Sep 17 00:00:00 2001
From: cxl <cxl@f0d560ea-af0d-0410-9eb7-867de7ffcac7>
Date: Mon, 19 Dec 2016 15:00:48 +0000
Subject: [PATCH] .tutorial

git-svn-id: svn://ultimatepp.org/upp/trunk@10558 f0d560ea-af0d-0410-9eb7-867de7ffcac7
---
 tutorial/CoreTutorial/CoPartition.cpp       | 19 +++++++++---
 tutorial/CoreTutorial/CoWork.cpp            | 32 ++++++++++++++++-----
 tutorial/CoreTutorial/ConditionVariable.cpp |  2 +-
 3 files changed, 41 insertions(+), 12 deletions(-)
diff --git a/tutorial/CoreTutorial/CoPartition.cpp b/tutorial/CoreTutorial/CoPartition.cpp
index 7959b555b..7071ba24d 100644
--- a/tutorial/CoreTutorial/CoPartition.cpp
+++ b/tutorial/CoreTutorial/CoPartition.cpp
@@ -4,6 +4,10 @@ void CoPartitionTutorial()
 {
 	/// .CoPartition
 	
+	/// There is some overhead associated with CoWork worker threads. That is why e.g.
+	/// performing a simple operation on the array spawning worker thread for each element is
+	/// not a good idea performance wise:
+	
 	Vector<int> data;
 	for(int i = 0; i < 10000; i++)
 		data.Add(i);
@@ -16,7 +20,12 @@ void CoPartitionTutorial()
 	co.Finish();
 	DUMP(sum);
 	
-	///
+	/// Above code computes the sum of all elements in the `Vector`, using CoWorker job for
+	/// each element. While producing the correct reason, it is likely to run much slower than
+	/// single-threaded version.
+	
+	/// The solution to the problem is to split the array into small number of larger subranges
+	/// that are processed in parallel. This is what `CoPartition` template algorithm does:
 	
 	sum = 0;
 	CoPartition(data, [&sum](const auto& subrange) {
@@ -28,7 +37,8 @@ void CoPartitionTutorial()
 	});
 	DUMP(sum);
 	
-	///
+	/// Note that CoPartition is still internally used, so `CoWork::FinLock` is available.
+	/// Instead of working on subranges, it is also possible to use iterators:
 	
 	sum = 0;
 	CoPartition(data.begin(), data.end(), [&sum] (auto l, auto h) {
@@ -40,7 +50,8 @@ void CoPartitionTutorial()
 	});
 	DUMP(sum);
 	
-	///
+	/// There is no requirement on the type of iterators, so it is even possible to use just
+	/// indices:
 	
 	sum = 0;
 	CoPartition(0, data.GetCount(), [&sum, &data] (int l, int h) {
@@ -53,4 +64,4 @@ void CoPartitionTutorial()
 	DUMP(sum);
 	
 	///
-}
\ No newline at end of file
+}
diff --git a/tutorial/CoreTutorial/CoWork.cpp b/tutorial/CoreTutorial/CoWork.cpp
index b222f6f40..222251eac 100644
--- a/tutorial/CoreTutorial/CoWork.cpp
+++ b/tutorial/CoreTutorial/CoWork.cpp
@@ -3,6 +3,17 @@
 void CoWorkTutorial()
 {
 	/// .`CoWork`
+	
+	/// `CoWork` is intented to be use when thread are used to speedup code by distributing tasks
+	/// over multiple CPU cores. `CoWork` spans a single set of worker threads that exist for the
+	/// whole duration of program run. `CoWork` instances then manage assigning jobs to these
+	/// worker threads and waiting for the all work to finish.
+	
+	/// Job units to `CoWork` are represented by `Function<void ()>` and thus can be written
+	/// inline as lambdas.
+	
+	/// As an example, following code reads input file by lines, splits lines into words (this
+	/// is the parallelized work) and then adds resulting words to `Index`:
 
 	FileIn in(GetDataFile("test.txt")); // let us open some tutorial testing data
 	
@@ -13,18 +24,22 @@ void CoWorkTutorial()
 	while(!in.IsEof()) {
 		String ln = in.GetLine();
 		co & [ln, &w, &m] {
-			for(const auto& s : Split(ln, [](int c) { return IsAlpha(c) ? 0 : c; })) {
-				Mutex::Lock __(m);
+			Vector<String> h = Split(ln, [](int c) { return IsAlpha(c) ? 0 : c; });
+			Mutex::Lock __(m);
+			for(const auto& s : h)
 				w.FindAdd(s);
-			}
 		};
 	}
 	co.Finish();
 	
 	DUMP(w);
 	
-	///
-	
+	/// Adding words to `w` requires `Mutex`. Alternative to this 'result gathering' `Mutex` is
+	/// CoWork::FinLock. The idea behind this is that CoWork requires an internal `Mutex` to
+	/// serialize access to common data, so why `FinLock` locks this internal mutex a bit
+	/// earlier, saving CPU cycles required to lock and unlock dedicated mutex. From API
+	/// contract perspective, you can consider `FinLock` to serialize code till the end of
+	/// worker job.
 
 	in.Seek(0);
 	while(!in.IsEof()) {
@@ -36,8 +51,11 @@ void CoWorkTutorial()
 				w.FindAdd(s);
 		};
 	}
-	
+	co.Finish();
+
 	DUMP(w);
 	
-	///
+	/// Of course, the code performed after FinLock should not take long, otherwise there is
+	/// negative impact on all CoWork instances. In fact, from this perspective, above code is
+	/// probably past this threshold...
 }
diff --git a/tutorial/CoreTutorial/ConditionVariable.cpp b/tutorial/CoreTutorial/ConditionVariable.cpp
index 15eb39d2e..93f5c8653 100644
--- a/tutorial/CoreTutorial/ConditionVariable.cpp
+++ b/tutorial/CoreTutorial/ConditionVariable.cpp
@@ -44,7 +44,7 @@ void ConditionVariableTutorial()
 	t.Wait();
 	
 	/// Important note: rarely thread can be resumed from `Wait` even if no other called
-	/// `Signal`. This is not a bud, but ^https://en.wikipedia.org/wiki/Spurious_wakeup:design
+	/// `Signal`. This is not a bug, but ^https://en.wikipedia.org/wiki/Spurious_wakeup:design
 	/// decision for performance reason^. In practice it only means that situation has to be
 	/// (re)checked after resume.
 }
\ No newline at end of file