Here are the steps for creating an end-to-end application in PlinyCompute. We recommend to follow this tutorial to become familiar with the system and some key concepts. This assumes that you have:
- successfully installed and deployed an instance of PlinyCompute.
- created, compiled, and built a shared library named libDoubleVectorAggregation.so, with the following definition and declaration. See here detailed steps for building shared libraries. When developing your own applications, the name given to your shared libraries is arbitrary.
Step 1: Create connection to PlinyCompute manager node, create storage and register shared libraries.
// Creates a connection to PlinyCompute's manager node, if this client // is connecting to a remote manager node replace "localhost" with the // correct IP address of that node. PDBClient pdbClient(8108, "localhost"); // Creates a new database for storing data pdbClient.createDatabase("test_db"); // Creates a new set in that database for storing data pdbClient.createSet<DoubleVector>("test_db", "test_set"); // Creates a new set in that database for storing output data pdbClient.createSet<DoubleVector>("test_db", "output_set1"); // Registers a query class provided in a shared library pdbClient.registerType("libraries/libDoubleVectorAggregation.so");
Step 2: Storing data
// Adds data to storage double total = 0; if (numOfMb > 0) { int numIterations = numOfMb / 64; int remainder = numOfMb - 64 * numIterations; if (remainder > 0) { numIterations = numIterations + 1; } for (int num = 0; num < numIterations; num++) { int blockSize = 64; if ((num == numIterations - 1) && (remainder > 0)) { blockSize = remainder; } // creates a 64 Megabytes allocation block where objects // will be allocated makeObjectAllocatorBlock(blockSize * 1024 * 1024, true); // creates a Handle to a container of type Vector, where // each element is a Vector of doubles Handle<Vector<Handle<DoubleVector>>> storeMe = makeObject<Vector<Handle<DoubleVector>>>(); try { for (int i = 0; true; i++) { // creates a Handle to a container of type DoubleVector, // which in essence is a Vector of doubles Handle<DoubleVector> myData = makeObject<DoubleVector>(10); // populates the Vector with 10 doubles for (int j = 0; j < 10; j++) { (*(myData->data))[j] = 1; } // adds this element to the Vector storeMe->push_back(myData); total = total + 1; } } catch (pdb::NotEnoughSpace& n) { for (int i = 0; i < storeMe->size(); i++) { if (i % 10000 == 0) { std::cout << i << ": "; ((*storeMe)[i])->print(); } } // when there is no more memory in the allocator, // sends data to be stored in the cluster pdbClient.sendData<DoubleVector>( pair<string, string>("test83_set", "test83_db"), storeMe); } } std::cout << "Total=" << total << std::endl; // forces to write all buffered records pdbClient.flushData(); }
Step 3: Running computations and printing results
// creates a 128 Megabytes allocation block where objects // will be allocated const UseTemporaryAllocationBlock tempBlock{1024 * 1024 * 128}; // creates the computation objects Handle<Computation> myScanSet = makeObject<ScanDoubleVectorSet>("test83_db", "test83_set"); Handle<Computation> myAgg = makeObject<DoubleVectorAggregation>("test83_db", "output_set1"); myAgg->setInput(myScanSet); // executes computations pdbClient.executeComputations(myAgg); // prints input data SetIterator<DoubleVector> result = pdbClient.getSetIterator<DoubleVector>("test83_db", "test83_set"); std::cout << "Print input data: "; int count = 0; for (auto a : result) { count++; if (count % 10000 == 0) { std::cout << count << ":"; a->print(); } } std::cout << "Number of input items:" << count << "\n"; std::cout << "Print query results:" << std::endl; SetIterator<DoubleVectorResult> result = pdbClient.getSetIterator<DoubleVectorResult>("test83_db", "output_set1"); std::cout << "Query results: "; int count = 0; for (auto a : result) { count++; std::cout << count << ":"; a->print(); } std::cout << "Aggregation output count:" << count << "\n"; }