C++ example for Adapter Design Pattern

The Adapter pattern is used to convert the programming interface of one class into that of another. We use adapters whenever we want unrelated classes to work together in a single program. The concept of an adapter is thus pretty simple; we write a class that has the desired interface and then make it communicate with the class that has a different interface.

There are two ways to do this: by inheritance, and by object composition. In the first case, we derive a new class from the nonconforming one and add the methods we need to make the new derived class match the desired interface. The other way is to include the original class inside the new one and create the methods to translate calls within the new class. These two approaches are termed as class adapters and object adapters respectively.

The frequency of usage of adapter pattern is medium high.

Example of Adapter pattern as follows:



//Program tested on Microsoft Visual Studio 2008 - Zahid Ghadialy
//Adapter is part of Structural Patterns
//Structural Patterns deal with decoupling the interface and implementation of classes and objects
//Adapter pattern is used to convert the programming interface of one class into that of another.

//We Take an example where chemical compound objects accesses the databank
//through an Adapter Interface

#include<iostream>
#include<string>

using namespace
std;

//The 'Target' class
class Compound
{

public
:
Compound(string chemical):chemical_(chemical){}
virtual
~Compound() {};
void
Display()
{

cout << "\nCompound: " << chemical_ << "----------------" << endl;
}

protected
:
string chemical_;
float
boilingPoint_;
float
meltingPoint_;
double
molecularWeight_;
string molecularFormula_;
private
:
Compound(); //disallow default constructor
};

//The 'Adaptee' class
class ChemicalDatabank
{

public
:
virtual
~ChemicalDatabank() {};
//The databank 'legacy API'
float GetCriticalPoint(string compound, string point)
{

string lowerCompound = toLowerString(compound);
//Melting Point
if (point == "M")
{

if
(_stricmp(lowerCompound.c_str(), "water") == 0)
return
0.0;
else if
(_stricmp(lowerCompound.c_str(), "benzene") == 0)
return
5.5;
else if
(_stricmp(lowerCompound.c_str(), "ethanol") == 0)
return
(float) -114.1;
else
return
0.0;
}

//Boiling point
else
{

if
(_stricmp(lowerCompound.c_str(), "water") == 0)
return
(float) 100.0;
else if
(_stricmp(lowerCompound.c_str(), "benzene") == 0)
return
(float) 80.1;
else if
(_stricmp(lowerCompound.c_str(), "ethanol") == 0)
return
(float) 78.3;
else
return
0.0;
}
}


string GetMolecularStructure(string compound)
{

string lowerCompound = toLowerString(compound);

if
(_stricmp(lowerCompound.c_str(), "water") == 0)
return
"H2O";
else if
(_stricmp(lowerCompound.c_str(), "benzene") == 0)
return
"C6H6";
else if
(_stricmp(lowerCompound.c_str(), "ethanol") == 0)
return
"C2H5OH";
else
return
"";
}


double
GetMolecularWeight(string compound)
{

string lowerCompound = toLowerString(compound);

if
(_stricmp(lowerCompound.c_str(), "water") == 0)
return
18.015;
else if
(_stricmp(lowerCompound.c_str(), "benzene") == 0)
return
78.1134;
else if
(_stricmp(lowerCompound.c_str(), "ethanol") == 0)
return
46.0688;
else
return
0.0;
}

private
:
//Helper function
string toLowerString(string& input)
{

int
length = input.length();
string output;
for
(int i = 0; i < length; i++)
{

output += tolower(input[i]);
}

return
output;
}
};


//The 'Adapter' class
class RichCompound : public Compound
{

public
:
RichCompound(string name) : Compound(name)
{

bank_ = new ChemicalDatabank();
}

virtual
~RichCompound()
{

delete
bank_;
}

void
Display()
{

boilingPoint_ = bank_->GetCriticalPoint(chemical_, "B");
meltingPoint_ = bank_->GetCriticalPoint(chemical_, "M");
molecularWeight_ = bank_->GetMolecularWeight(chemical_);
molecularFormula_ = bank_->GetMolecularStructure(chemical_);

Compound::Display();
cout << " Formula : " << molecularFormula_ << endl;
cout << " Weight : " << molecularWeight_ << endl;
cout << " Melting Pt : " << meltingPoint_ << endl;
cout << " Boiling Pt : " << boilingPoint_ << endl;
}

private
:
ChemicalDatabank* bank_;
RichCompound(); //dis-allow default constructor
};

int
main()
{

Compound unknown("Unknown");
unknown.Display();

RichCompound water("Water");
water.Display();

RichCompound benzene("Benzene");
benzene.Display();

RichCompound ethanol("Ethanol");
ethanol.Display();

RichCompound zahid("Zahid");
zahid.Display();

return
0;
}






The output is as follows:



For more details, please see:

http://www.dofactory.com/Patterns/PatternAdapter.aspx
http://www.patterndepot.com/put/8/adapter.pdf

Nested if-else statements in c++

Nested if-else statements in c++



/*A c++ program example that takes input from user for username and password and then makes decision.*/


// Program 1

#include <iostream>
#include <string>

using namespace std;

const string userName = "computergeek";
const string passWord = "break_codes";


int main ()
{
    string name, pass;

    cout << "Username: ";
    cin >> name;

    cout << "Password: ";
    cin >> pass;

    if (name == userName)
    {
        if (pass == passWord)
            cout << "You are logged in!" << endl;
        else
            cout << "Incorrect username or password." << endl;
    }
    else
        cout << "Incorrect username or password." << endl;

    return 0;
}


/*A c++ program example that takes input from user for username and if username is correct then only asks for password and makes decision.*/


// Program 2

#include <iostream>
#include <string>

using namespace std;

const string userName = "computergeek";
const string passWord = "break_codes";

int main ()
{
    string name, pass;

    cout << "Username: ";
    cin >> name;

    if (name == userName)
    {
        cout << "Password: ";
        cin >> pass;

        if (pass == passWord)
            cout << "You are logged in!" << endl;
        else
            cout << "Incorrect password." << endl;
    }
    else
        cout << "Incorrect username." << endl;

    return 0;
}


Programming Advice & Explanation:


Whenever possible avoid nesting or deep nesting as it makes program difficult to read. We should program in a manner that it can be read and understood easily by us and other programmers. For example, in the "Program 1" the nesting can be avoided by using logical operator. However in "Program 2" the nesting is required as we want to take input for password only if username is correct.

In the next chapter we will learn about logical operators and will rewrite the "Program 1" using it.

Designing algorithms for Map Reduce

Since the emerging of Hadoop implementation, I have been trying to morph existing algorithms from various areas into the map/reduce model. The result is pretty encouraging and I've found Map/Reduce is applicable in a wide spectrum of application scenarios.

So I want to write down my findings but then found the scope is too broad and also I haven't spent enough time to explore different problem domains. Finally, I realize that there is no way for me to completely cover what Map/Reduce can do in all areas, so I just dump out what I know at this moment over the long weekend when I have an extra day.

Notice that Map/Reduce is good for "data parallelism", which is different from "task parallelism". Here is a description about their difference and a general parallel processing design methodology.

I'll cover the abstract Map/Reduce processing model below. For a detail description of the implementation of Hadoop framework, please refer to my earlier blog here.


Abstract Processing Model
There are no formal definition of the Map/reduce model. Basic on the Hadoop implementation, we can think of it as a "distributed merge-sort engine". The general processing flow is as follows.
  • Input data is "split" into multiple mapper process which executes in parallel
  • The result of the mapper is partitioned by key and locally sorted
  • Result of mapper of the same key will land on the same reducer and consolidated there
  • Merge sorted happens at the reducer so all keys arriving the same reducer is sorted

Within the processing flow, user defined functions can be plugged-in to the framework.
  • map(key1, value1) -> emit(key2, value2)
  • reduce(key2, value2_list) -> emit(key2, aggregated_value2)
  • combine(key2, value2_list) -> emit(key2, combined_value2)
  • partition(key2) return reducerNo
Design the algorithm for map/reduce is about how to morph your problem into a distributed sorting problem and fit your algorithm into the user defined functions of above.

To analyze the complexity of the algorithm, we need to understand the processing cost, especially the cost of network communication in such a highly distributed system.

Lets first consider the communication between Input data split and Mapper. To minimize this overhead, we need to run the mapper logic at the data split (without moving the data). How well we do this depends on how the input data is stored and whether we can run the mapper code there. For HDFS and Cassandra, we can the mapper at the storage node and the scheduler algorithm of JobTracker will assign the mapper to the data split that it collocates with and hence significantly reduce the data movement. Other data store such as Amazon S3 doesn't allow execution of mapper logic at the storage node and therefore incur more data traffic.

The communication between Mapper and Reducer cannot be collocated because it depends on the emit key. The only mechanism available is the combine() function which can perform a local consolidation and hence can reduce the data sent to the reducer.

Finally the communication between the reducer and the output data store depends on the store's implementation. For HDFS, the data is triply replicated and hence the cost of writing can be high. Cassandra (a NOSQL data store) allows configurable latency with various degree of data consistency trade-off. Fortunately, in most case the volume of result data after a Map/Reduce processing is not high.

Now, we see how to fit various different kinds of algorithms into the Map/Reduce model ...


Map-Only
"Embarrassing parallel" problems are those that the same processing is applied in each data element in a pretty independent way, in other words, there is no need to consolidate or aggregate individual results.

These kinds of problem can be expressed as a Map-only job (by specifying the number of reducers to zero). In this case, Mapper's emitted result will directly go to the output format.

Some examples of map-only examples are ...
  • Distributed grep
  • Document format conversion
  • ETL
  • Input data sampling

Sorting
As we described above, Hadoop is fundamentally a distributed sorting engine, so using it for sorting is a natural fit.

For example, we can use an Identity function for both map() and reduce(), then the output is equivalent to sorting the input data. Notice that we are using a single reducer here. So the merge is still sequential although the sorting is done at the mapper in parallel.

We can perform the merge in parallel by using multiple reducers. In this case, output of each reducer are sorted. We may need to do a final merge on all the reducer's output. Another way is to use a customized partition() function such that the keys are partitioned by range. In this case, each reducer is sorting a particular range and the final result is just to concatenate the each reducer's sorted result.
partition(key) {
range = (KEY_MAX - KEY_MIN) / NUM_OF_REDUCERS
reducer_no = (key - KEY_MIN) / range
return reducer_no
}


Inverted Indexes
The map reduce model is originated from Google which has a lot of scenarios of building large scale inverted index. Building an inverted index is about parsing different documents to build a word -> document index for keyword search.

In fact, inverted index is pretty general and can be applied in many scenarios. To build an inverted index, we can feed the mapper each document (or lines within a document). The Mapper will parse the words in the document to emit [word, doc] pairs along with other metadata such as where in the document this word occurs ... etc. The reducer can simply be an identity function that just dump out the list, or it can perform some statistic aggregation per word.

In a more general form of Inverted index, there is a "container" and "element" concept. The Map and Reduce function will be organized in the following patterns.
map(key, container) {
for each element in container {
element_meta =
extract_metadata(element, container)
emit(element, [container_id, element_meta])
}
}

reduce(element, container_ids) {
element_stat =
compute_stat(container_ids)
emit(element, [element_stat, container_ids])
}

In Text index, we are not just counting the actual frequency of the terms but also adjust its weighting based on its frequency distribution so common words will have less significance when they appears in the document. The final value after normalization is called TF-IDF (term frequency times inverse document frequency) and can be computed using Map Reduce as well.


Simple Statistics Computation
Computing max, min, count is very straightforward since this operation is commutative and associative. Each mapper will perform the local computation and send the result to a single reducer to do the final computation.

Combine function is typically used to reduce the network traffic. Notice that the input to the combine function must look the same as the input to the reducer function and the output of the combine function must look the same as the output of the map function. There is also no guarantee that the combiner function will be invoked at all.

class Mapper {
buffer

map(key, number) {
buffer.append(number)
if (buffer.is_full) {
max = compute_max(buffer)
emit(1, max)
}
}
}


class Reducer {
reduce(key, list_of_local_max) {
global_max = 0
for local_max in list_of_local_max {
if local_max > global_max {
global_max = local_max
}
}
emit(1, global_max)
}
}


class Combiner {
combine(key, list_of_local_max) {
local_max = maximum(list_of_local_max)
emit(1, local_max)
}
}
Computing avg is done in a similar way except that instead of computing the local avg, we compute the local sum and local count. The reducer will do the final sum divided by the final count to come up with the final avg.

Computing a histogram is pretty common in statistics and can give a quick idea about the data distribution. A typical approach is to divide the number into different intervals. The mapper will compute the count per interval, and emit that per interval and the reducer will compute the sum of that interval.
class Mapper {
interval_start = [0, 20, 40, 60, 80]

map(key, number) {
i = 0;
while (i < NO_OF_INTERVALS) {
if (number < interval_start[i]) {
emit(i, 1)
break
}
}
}
}


class Reducer {
reduce(interval, counts) {
total_counts = 0
for each count in counts {
total_counts += count
}
emit(interval, total_counts)
}
}


class Combiner {
combine(interval, occurrence) {
emit(interval, occurrence.size)
}
}
Notice that a non-uniform distribution of values across intervals may cause an unbalanced workload among reducers and hence undermine the degree of parallelism. We'll address this in the later part of this post.


In-Mapper Combine
Jimmy Lin, in his excellent book, talks about a technique call "in-mapper combine" which regains control at the application level when the combine takes place. The general idea is to maintain a HashMap to buffer the intermediate result and has a separate logic to determine when to actually emit the data from the buffer. The general code structure is as follows ...
class Mapper {
buffer

init() {
buffer = HashMap.new
}

map(key, data) {
elements = process(data)
for each element {
....
check_and_put(buffer, k2, v2)
}
}

check_and_put(buffer, k2, v2) {
if buffer.full {
for each k2 in buffer.keys {
emit(k2, buffer[k2])
}
}
}

close() {
for each k2 in buffer.keys {
emit(k2, buffer[k2])
}
}
}

SQL Model
The SQL model can be used to extract data from the data source. It contains a number of primitives.

Projection / Filter
This logic is typically implemented in the Mapper
  • result = SELECT c1, c2, c3, c4 FROM source WHERE conditions
Aggregation / Group by / Having
This logic is typically implemented in the Reducer
  • SELECT sum(c3) as s1, avg(c4) as s2 ... FROM result GROUP BY c1, c2 HAVING conditions
The above example can be realized by the following map/reduce job
class Mapper {
map(k, rec) {
select_fields =
[rec.c1, rec.c2, rec.c3, rec.c4]
group_fields =
[rec.c1, rec.c2]
if (filter_condition == true) {
emit(group_fields, select_fields)
}
}
}

class Reducer {
reduce(group_fields, list_of_rec) {
s1 = 0
s2 = 0
for each rec in list_of_rec {
s1 += rec.c3
s2 += rec.c4
}
s2 = s2 / rec.size
if (having_condition == true) {
emit(group_fields, [s1, s2])
}
}
}

Data Joins
Joining 2 data set is a very common operation in Relational Data Model and has been very mature in RDBMS implementation. The common join mechanism in a centralized DB architecture is as follows
  1. Nested loop join -- This is the most basic and naive mechanism and is organized as two loops. The outer loop reads from data set1, the inner loop scan through the whole data set2 and compare with the records just read from data set1.
  2. Indexed join -- An index (e.g. B-Tree index) is built for one of the data sets (say data set2 which is the smaller one). The join will scan through data set1 and lookup the index to find the matched records of data set2.
  3. Merge join -- Pre-sort both data sets so they are arranged physically in increasing order. The join is realized by just merging the two data sets. a) Locate the first record in both data set1 & set2, which is their corresponding minimum key b) In the one with a smaller minimum key (say data set1), keep scanning until finding the next key which is bigger than the minimum key of the other data set (ie. data set2), call this the next minimum key of data set1. c) Switch position and repeat the whole thing until one of the data set is exhausted.
  4. Hash / Partition join -- Partition the data set1 and data set2 into smaller size and apply other join algorithm in a smaller data set size. A linear scan with a hash() function is typically performed to partition the data sets such that data in set1 and data in set2 with the same key will land on the same partition.
  5. Semi join -- This is mainly used to join two sets of data that is stored at different locations and the goal is to reduce the amount of data transfer such that only the full records appears in the final joint result will be send through. a) Data set2 will send its key set to machine holding Data set1. b) Machine holding Data set1 will do a join and send back the records in Data set1 that matches one of the send-over keys. c) The machine holding data set2 will do a final join to the data send back.
In the map reduce environment, it has the corresponding joins.

General reducer-side join
This is the most basic one, records from data set1 and set2 with the same key will land on the same reducer, which will then do a cartesian product. The downside of this model is that the reducer need to have enough memory to hold all records of each key.
map(k1, rec) {
emit(rec.key, [rec.type, rec])
}

reduce(k2, list_of_rec) {
list_of_typeA = []
list_of_typeB = []
for each rec in list_of_rec {
if (rec.type == 'A') {
list_of_typeA.append(rec)
} else {
list_of_typeB.append(rec)
}
}

# Compute the catesian product
products = []
for recA in list_of_typeA {
for recB in list_of_typeB {
emit(k2, [recA, recB])
}
}
}

Optimized reducer-side join
You can "secondary sort" the data type for each key by defining a customized partition function. In this model, you arrange the data type (which has less records per key to arrive first) and you only need to store these types.
map(k1, rec) {
emit([rec.key, rec.type], rec])
}

partition(key_pair) {
super.partition(key_pair[0])
}

reduce(k2, list_of_rec) {
list_of_typeA = []
for each rec in list_of_rec {
if (rec.type == 'A') {
list_of_typeA.append(rec)
} else { # receive records of typeA
for recA in list_of_typeA {
emit(k2, [recA, rec])
}
}
}
}

While being very flexible, the downside of Reducer side join is that all data need to be transfer from the mapper to the reducer and then result write to HDFS. Map-side join explore some special arrangement of the input file such that the join is being perform at the mapper. The advantage of doing in the mapper is that we can exploit the collocation of the Map reduce framework such that the mapper will be allocated an input split in its local machine, hence reduce the data transfer from the disk to the mapper. After the map-side join, the result is written directly to the output HDFS files and hence eliminate the data transfer between the mapper and the reducer.

Map-side partition join
In this model, it requires the 2 data sets to be partitioned into 2 sets of partition files (same number of partitions for each set). The size of the partition is such that it can fit into the memory of the Mapper machine. We also need to configure the Map/Reduce job such that there is no split in the partition file, in other words, the whole partition is assigned to a mapper task.

The mapper will detect the partition of the input file and then read the corresponding partition file of the other data set into an in-memory hashtable. After that, the mapper will lookup the Hashtable to do the join.
class Mapper {
map = Hashtable.new

init() {
partition = detect_input_filename()
map = load("hdfs://dataset2/" + partition)
}

map(k1, rec1) {
rec2 = map[rec1.key]
if (rec2 != nil) {
emit(rec1.key, [rec1, rec2])
}
}
}

Map-side partition merge join
In additional, if the partition file is also sorted, then the mapper can use a merge join, which has an even smaller memory footprint.
class Mapper {
rec2_key = nil
next_rec2 = nil
list_of_rec2 = []
file = nil

init() {
partition = detect_input_filename()
file = open("hdfs://dataset2/" + partition, "r")
next_rec2 = file.read()
fill_rec2_list()
}

# Fill up the list of rec2 list which has the same key
fill_rec2_list() {
rec2_key = next_rec2.key
list_of_rec2.append(next_rec2)
next_rec2 = file.read
while(next_rec2.key == key) {
list_of_rec2.append(next_rec2)
}
}

map(k1, rec1) {
while (rec1.key > rec2_key) {
fill_rec2_list()
}

while (rec1.key == rec2.key) {
for rec2 in list_of_rec2 {
emit(rec1.key, [rec1, rec2])
}
}

}
}

Memcache join
The model is very straightforward, the second data set is loaded into a distributed hash table (like memcache) which has effectively unlimited size. The mapper will receive input split from the first data set and then lookup the memcache for the corresponding record of the other data set.

There are also some other more sophisticated join mechanism such as semi-join described in this paper.

Graph Algorithms
Many problems can be modeled as a graph of Node and Edges. In the Search engine environment, computing the rank of a document using Page Rank or Hits can be model as a sequence of iterations of Map/Reduce jobs.

In the past, I have been blog a number of very basic graph algorithms in map reduce including doing topological sort, finding shortest path, minimum spanning tree etc. and also how to recommend people connection using Map/Reduce.

Due to the fact that graph traversal is inherently sequential, I am not sure Map/Reduce is the best parallel processing model for graph processing. Another problem is that due to the "stateless nature" of map() and reduce() functions, the whole graph need to be transferred between mapper and reducer which incur significant communication costs. Jimmy Lin has described a clever technique called Shimmy which exploit using a special partitioning function which let the reducer to retain the ownership of nodes across map/reduce jobs. I have described this technique as well as a general model of Map/Reduce graph processing in a previous blog.

I think a parallel programming model specific for Graph processing will perform much better. Google's Pregel model is a good example of that.


Machine Learning
Many of the machine learning algorithm involve multiple iterations of parallel processing that fits very well into Map/Reduce model.

For example, we can use map reduce to calculate the statistics for probabilistic methods such as naive Bayes.

A simple example of computing K-Means cluster can also be done in the following way.
  • Input: A set of points, with k initial centrods
  • Output: K final centroids
Iterate until no more change of membership
  1. For each point, assign it to be the member of closest centroid
  2. Re-compute the centroid from the assigned point members


For a complete list of Machine learning algorithms and how they can be implemented using the Map/Reduce model, here is a very good paper.


Matrix arithmetic
A lot of real-life relationships can be represented as a Matrix. One example is the vector space model of Information Retrieval where the column represents docs and the row represents terms. Another example is the social network graph where the column as well as the row representing people and a binary value of each cell to represent a "friend" relationship. In this case, M + M.M represents all the people that I can reach within 2 degree.

Processing for dense matrix is very easy to parallelized. But since the sequential version is O(N^3), it is not that interesting for Matrix with large size (millions range in rows and columns).

A lot of real-world graph problem can be represented as sparse matrix. So my interests is to focus more in the processing of sparse matrix. I don't have much to share at this moment but I hope this is something I will blog about in future.

The Limitations of SPARQL

Recently, I have been looking at RDF model and try to compare that with the property graph model that I mention in a previous post. I also look at the SPARQL query model. While I think it is a very powerful query language based by variable bindings, I also observe a couple of limitations that it doesn't handle well.

Note that I haven't used SPARQL in very simple examples and don't claim to be expert in this area. I am hoping my post here can invite other SPARQL experts to share their experience.

Here are the limitations that I have seen.

Support of Negation

Because of the “Open World” assumption, SPARQL doesn’t support “negation” well, this means expressing "negation" in SPARQL is not easy.
  • Find all persons who is Bob’s friends but doesn’t know Java
  • Find all persons who know Bob but doesn't know Alice
Support of Path Expression

In SPARQL, expressing a variable length path is not easy.
  • Find all posts written by Bob’s direct and indirect friends (everyone reachable from Bob)
Predicates cannot have Properties

This may be a RDF limitation that SPARQL inherits. Since RDF represents everything in Triples. It is easy to implement properties of a Node using extra Triples, but it is very difficult to implement properties in Edges.

In SPARQL, there is no way to attaching a property to a “predicate”.
  • Bob knows Peter for 5 years
RDF inference Rule

Inference rules are build around RDFS and OWL which is focusing mainly on type and set relationships and is implemented using a Rule: (conditions => derived triple) expression. But it is not easy to express a derived triples whose object’s value is an expression of existing triples.
  • Family income is the sum of all individual member’s income
Support of Fuzzy Matches with Ranked results

SPARQL is based on a boolean query model which is designed for exact match. Express a fuzzy match with ranked result is very difficult.
  • Find the top 20 posts that is “similar” to this post ranked by degree of similarity (lets say similarity is measured by the number of common tags that the 2 posts share)

I am also very interested to see if there is any large scale deployment of RDF graph in real-life scenarios. I am not aware of any popular social network sites are using RDF to store the social graph or social activities. I guess this may be due to scalability of the RDF implementation today. I may be wrong though.

An example of replacing part of strings

Taking a bit of break from the Design Patterns this week. We look at a simple example of replacing part of the strings. For example you may have a program which prints out some customised letter. You may want to replace the default name with the name of a person that can be input on command line.



Example as follows:





//Program tested on Microsoft Visual Studio 2008 - Zahid Ghadialy
#include<iostream>
#include<string>

using namespace
std;

int
main()
{

string s1 = "Hello is that Tom? Are you ok Tom? Take Care tom!";
string s2 = "Tom";
string s3 = "William";

cout << "s1 = " << s1 << endl;

//Find s2 and replace by s3
bool flag = true;
while
(flag)
{

size_t found = s1.find(s2);
if
(found != string::npos)
{

s1.replace(found, s2.length(), s3);
}

else

{

flag = false;
}
}


cout << "s1 = " << s1 << endl;

return
0;
}






The output is as follows:







C++ example for Singleton Design Pattern


The Singleton Design Pattern which is widely used ensures that there is a single instance of the object and it also provides a global point of access to it.
I have covered couple of examples on Singletons in past and you are welcome to refer to them as an example for this design pattern. The simple example is here and a more involved one is here.
To learn more about Singletons see:

C++ example for Prototype Design Pattern

The Protoype pattern is used when creating an instance of a class is very time-consuming or complex in some way. Then, rather than creating more instances, you make copies of the original instance, modifying them as appropriate.


Prototypes can also be used whenever you need classes that differ only in the type of processing they offer, for example in parsing of strings representing numbers in different radixes. This design pattern is not used very frequently.

The following is an example of Prototype Design Pattern:

//Program tested on Microsoft Visual Studio 2008 - Zahid Ghadialy
//Prototype is part of Creational Patterns
//Creational Patterns deal with initializing and configuring classes and objects
//Prototype creates a fully initialized instance to be copied or cloned

//We will take an example of creating Colour class.
//There are three parts to colour - Red, Green and Blue
//Simple colours like Red only contain the red component
//Complex colours like Angry and Peace contains all three components

#include <iostream>
#include <string>
#include <iomanip>
#include <map>

using namespace
std;

//The abstract 'Protoype' class
class ColourPrototype
{

public
:
virtual
ColourPrototype* Clone(void) = 0;
};


//The 'ConcretePrototype' class
class Colour : public ColourPrototype
{

public
:
Colour(int red, int green, int blue)
{

red_ = red, green_ = green, blue_ = blue;
}

ColourPrototype* Clone(void)
{

cout << "Cloning colour RGB: " << setw(3) << red_ << ", " << setw(3) << green_ <<", " << setw(3) << blue_ <<endl;
ColourPrototype* colourPrototype = new Colour(red_, green_, blue_);
return
colourPrototype;
}

void
SetRed(int red)
{

red_ = red;
}

void
SetGreen(int green)
{

green_ = green;
}

void
SetBlue(int blue)
{

blue_ = blue;
}

int
GetRed(void)
{

return
red_;
}

int
GetGreen(void)
{

return
green_;
}

int
GetBlue(void)
{

return
blue_;
}


private
:
Colour(); //default constructor not allowed
int red_, green_, blue_;
};


//Prototype manager
class ColourManager
{

public
:
virtual
~ColourManager()
{

while
(!coloursMap_.empty())
{

map<string, ColourPrototype*>::iterator it = coloursMap_.begin();
delete
it->second;
coloursMap_.erase(it);
}
}

void
AddColour(const string& colour, ColourPrototype* prototype)
{

coloursMap_[colour] = prototype;
}

ColourPrototype* GetColour(const string& colour)
{

map<string, ColourPrototype*>::const_iterator it = coloursMap_.find(colour);
if
(it != coloursMap_.end())
return
it->second;
return
NULL;
}

void
PrintColours(void)
{

cout << "\nAvailable Colours and their values " << endl;
map<string, ColourPrototype*>::const_iterator it = coloursMap_.begin();
while
(it != coloursMap_.end())
{

cout << setw(20) << it->first << " : ";
cout << setw(3) << dynamic_cast<Colour*>(it->second)->GetRed() << ", ";
cout << setw(3) << dynamic_cast<Colour*>(it->second)->GetGreen() << ", ";
cout << setw(3) << dynamic_cast<Colour*>(it->second)->GetBlue() << endl;
++
it;
}
}

private
:
map<string, ColourPrototype*> coloursMap_;
};



//The Main method
int main()
{

ColourManager* colourManager = new ColourManager();

//Add simple colours
colourManager->AddColour("Red", new Colour(255, 0, 0));
colourManager->AddColour("Green", new Colour(0, 255, 0));
colourManager->AddColour("Blue", new Colour(0, 0, 255));

//Add complex colours
colourManager->AddColour("Angry", new Colour(255, 54, 0));
colourManager->AddColour("Peace", new Colour(128, 211, 128));
colourManager->AddColour("Flame", new Colour(211, 34, 20));

//Clone existing colours, modify and add them to the manager
ColourPrototype* colour1 = (colourManager->GetColour("Red"))->Clone();
(
dynamic_cast<Colour*>(colour1))->SetRed(200);
colourManager->AddColour("Light Red", colour1);

ColourPrototype* colour2 = (colourManager->GetColour("Peace"))->Clone();
(
dynamic_cast<Colour*>(colour2))->SetRed(150);
(
dynamic_cast<Colour*>(colour2))->SetBlue(150);
colourManager->AddColour("Extreme Peace", colour2);

ColourPrototype* colour3 = (colourManager->GetColour("Flame"))->Clone();
(
dynamic_cast<Colour*>(colour3))->SetRed(255);
colourManager->AddColour("Hot Flame", colour3);

colourManager->PrintColours();

//clean the memory
delete colourManager;

return
0;
}


The output is as follows:



Other good example of Prototype is available here and here.

Selection Statement (if-else if-else) in c++

Selection Statement (if-else if-else) in c++


To understand this chapter you should know about Comparison Operators which is explained in previous chapter. Selection statements are very important in programming because we make decisions using it.

/* A simple c++ program example to demonstrate the if statement. */


// Program 1

#include <iostream>
#include <iomanip>

using namespace std;

int main ()
{
    int first, second;

    cout << "Enter two integers." << endl;

    cout << "First " << setw (3) << ": ";
    cin >> first;

    cout << "Second "<< setw (2) << ": ";
    cin >> second;

    if (first > second)
        cout << "first is greater than second." << endl;

    return 0;
}


/* A c++ program example to demonstrate if-else statements */


// Program 2

#include <iostream>
#include <iomanip>

using namespace std;

int main ()
{
    int first, second;

    cout << "Enter two integers." << endl;

    cout << "First " << setw (3) << ": ";
    cin >> first;

    cout << "Second "<< setw (2) << ": ";
    cin >> second;

    if (first > second)
        cout << "first is greater than second." << endl;

    else
        cout << "first is less than or equal to second." << endl;

    return 0;
}


/* A c++ program example to demonstrate if-else if-else statements. */


// Program 3

#include <iostream>
#include <iomanip>

using namespace std;

int main ()
{
    int first, second;

    cout << "Enter two integers." << endl;

    cout << "First " << setw (3) << ": ";
    cin >> first;

    cout << "Second "<< setw (2) << ": ";
    cin >> second;

    if (first > second)
        cout << "first is greater than second." << endl;

    else if (first < second)
        cout << "first is less than second" << endl;

    else
        cout << "first and second are equal." << endl;

    return 0;
}


Note: All the above three programs satisfies the condition that if first value
is greater then display the message "first is greater than second".

Question: When to use if, if-else and if-else if-else ?
Answer: It depends upon your program. The number of possibilities your program has. For example, in the above program there are three possibilities. The value could either be greater, smaller or equal. All these possibilities are covered using if-else if-else statements. Depending upon the possibilities you can add more "else if" statement before the final else statement. It is good programming to cover all the possibilities to make your program perfect.

In the next chapter we will learn about nested if statements.

C++ example for Factory Method Design Pattern

The Factory Method Design Pattern defines an interface for creating an object, but let subclasses decide which class to instantiate. Factory Method lets a class defer instantiation to subclasses. This design pattern is used very frequently in practice.

The following is an example of Factory Method Design Pattern:

//Program tested on Microsoft Visual Studio 2008 - Zahid Ghadialy
//Factory Method is part of Creational Patterns
//Creational Patterns deal with initializing and configuring classes and objects
//Factory Method creates an instance of several derived classes

//We will take an example of creating Pages for Document.
//There are 2 types of Document: Resume and Report
//Different Document can instantiate different Pages based on their requirements

#include <iostream>
#include <string>
#include <list>

using namespace
std;

//Create the abstract 'Product' class
class Page
{

public
:
virtual
string GetPageName(void) = 0;
};


//'ConcreteProduct'#1 class
class SkillsPage : public Page
{

public
:
string GetPageName(void)
{

return
"SkillsPage";
}
};


//'ConcreteProduct'#2 class
class EducationPage : public Page
{

public
:
string GetPageName(void)
{

return
"EducationPage";
}
};


//'ConcreteProduct'#3 class
class ExperiencePage : public Page
{

public
:
string GetPageName(void)
{

return
"ExperiencePage";
}
};


//'ConcreteProduct'#4 class
class IntroductionPage : public Page
{

public
:
string GetPageName(void)
{

return
"IntroductionPage";
}
};


//'ConcreteProduct'#5 class
class ResultsPage : public Page
{

public
:
string GetPageName(void)
{

return
"ResultsPage";
}
};


//'ConcreteProduct'#6 class
class ConclusionPage : public Page
{

public
:
string GetPageName(void)
{

return
"ConclusionPage";
}
};


//'ConcreteProduct'#7 class
class SummaryPage : public Page
{

public
:
string GetPageName(void)
{

return
"SummaryPage";
}
};


//'ConcreteProduct'#8 class
class BibliographyPage : public Page
{

public
:
string GetPageName(void)
{

return
"BibliographyPage";
}
};


//Create the abstract 'Creator' class
class Document
{

public
:
//constructor
Document()
{

//CreatePages(); - Cannot be called directly in constructor because its virtual
//Should be called in the Derived class
}
void
AddPages(Page* page)
{

pages_.push_back(page);
}

const
list<Page*>& GetPages(void)
{

return
pages_;
}

//Factory Method
virtual void CreatePages(void) = 0;
private
:
list<Page*> pages_;
};


//Create the 'ConcreteCreator' # 1 class
class Resume : public Document
{

public
:
Resume()
{

CreatePages();
}

void
CreatePages(void)
{

AddPages(new SkillsPage());
AddPages(new EducationPage());
AddPages(new ExperiencePage());
}
};


//Create the 'ConcreteCreator' # 2 class
class Report : public Document
{

public
:
Report()
{

CreatePages();
}

void
CreatePages(void)
{

AddPages(new SummaryPage());
AddPages(new IntroductionPage());
AddPages(new ResultsPage());
AddPages(new ConclusionPage());
AddPages(new BibliographyPage());
}
};



//The Main method
int main()
{

//Create two types of documents - constructors call Factory method
Document* doc1 = new Resume();
Document* doc2 = new Report();

//Get and print the pages of the first document
list<Page*>& doc1Pages = const_cast<list<Page*>&> (doc1->GetPages());
cout << "\nResume Pages -------------" << endl;
for
(list<Page*>::iterator it = doc1Pages.begin(); it != doc1Pages.end(); it++)
{

cout << "\t" << (*it)->GetPageName() << endl;
}


//Get and print the pages of the second document
list<Page*>& doc2Pages = const_cast<list<Page*>&> (doc2->GetPages());
cout << "\nReport Pages -------------" << endl;
for
(list<Page*>::iterator it = doc2Pages.begin(); it != doc2Pages.end(); it++)
{

cout << "\t" << (*it)->GetPageName() << endl;
}


return
0;
}


The output is as follows:

Map/Reduce to recommend people connection

Once common feature in Social Network site is to recommend people connection. e.g. "People you may know" from Linkedin. The basic idea is very simple; if person A and person B doesn't know each other but they have a lot of common friends, then the system should recommend person B to person A and vice versa.

From a graph theory perspective, for each person who is 2-degree reachable from person A, we count how many distinct paths (with 2 connecting edges) exist between this person and person A. Rank this list in terms the number of paths and show the top 10 persons that person A should connect with.

We should how we can use Map/Reduce to compute this top-10 connection list for every person. The problem can be stated as: For every person X, we determine a list of person X1, X2 ... X10 which is the top 10 persons that person X has common friends with.

The social network graph is generally very sparse. Here we assume the input records is an adjacency list sorted by name.
"ricky" => ["jay", "peter", "phyllis"]
"peter" => ["dave", "jack", "ricky", "susan"]
We use two rounds of Map/Reduce job to compute the top-10 list

First Round MR Job

The purpose of this MR job is to compute the number of distinct path between all pairs of people who is 2 degree separated from each other.
  • In Map(), we do a cartesian product for all pairs of friends (since these friends may be connected in 2-dgrees). We also need to eliminate the pairs if they already have a direct connection. Therefore, the The Map() function should also emit pairs of direct connected persons. We need to order the key space such that all keys with the same pair of people with go to the same reducer. On the other hand, we need the pair of direct connection come before the pairs of 2 degree of separations.
  • In Reduce(), we know all the key pairs reaching the same reducer will be sorted. So the direct connect pair will come before the 2-degree pairs. So the reducer just need to check if the first pair is a direct connected one and if so skip the rest.
Input record ...  person -> connection_list
e.g. "ricky" => ["jay", "john", "mitch", "peter"]
also the connection list is sorted by alphabetical order

def map(person, connection_list)
# Compute a cartesian product using nested loops
for each friend1 in connection_list
# Eliminate all 2-degree pairs if they already
# have a one-degree connection
emit([person, friend1, 0])
for each friend2 > friend1 in connection_list
emit([friend1, friend2, 1], 1)

def partition(key)
#use the first two elements of the key to choose a reducer
return super.partition([key[0], key[1]])

def reduce(person_pair, frequency_list)
# Check if this is a new pair
if @current_pair != [person_pair[0], person_pair[1]]
@current_pair = [person_pair[0], person_pair[1]]
# Skip all subsequent pairs if these two person
# already know each other
@skip = true if person_pair[2] == 0

if !skip
path_count = 0
for each count in frequency_list
path_count += count
emit(person_pair, path_count)

Output record ... person_pair => path_count
e.g. ["jay", "john"] => 5



Second Round MR Job

The purpose of this MR job is to rank the connections for every person by the number of distinct path between them.
  • In Map(), we rearrange the input records so it will be sorted before reaching the reducer
  • In Reduce(), all the connections from the person is sorted, we just need to aggregate the top 10 to a list and then write the list out.
Input record = Output record of round 1

def map(person_pair, path_count)
emit([person_pair[0], path_count], person_pair[1])

def partition(key)
#use the first element of the key to choose a reducer
return super.partition(key[0])

def reduce(connection_count_pair, candidate_list)
# Check if this is a new person
if @current_person != connection_count_pair[0]
emit(@current_person, @top_ten)
@top_ten = []
@current_person = connection_count_pair[0]

#Pick the top ten candidates to connect with
if @top_ten.size < 10
for each candidate in candidate_list
@top_ten.append([candidate, connection_count_pair[1]])
break if @pick_count > 10

Output record ... person -> candidate_count_list
e.g. "ricky" => [["jay", 5], ["peter", 3] ...]

Check out this stream