Here are the steps for creating an end-to-end application in PlinyCompute. We recommend to follow this tutorial to become familiar with the system and some key concepts. This assumes that you have:

  • successfully installed and deployed an instance of PlinyCompute.
  • created, compiled, and built a shared library named libDoubleVectorAggregation.so, with the following definition and declaration. See here detailed steps for building shared libraries. When developing your own applications, the name given to your shared libraries is arbitrary.
Step 1: Create connection to PlinyCompute manager node, create storage and register shared libraries.
  // Creates a connection to PlinyCompute's manager node, if this client
  // is connecting to a remote manager node replace "localhost" with the
  // correct IP address of that node.
  PDBClient pdbClient(8108, "localhost");

  // Creates a new database for storing data
  pdbClient.createDatabase("test_db");

  // Creates a new set in that database for storing data
  pdbClient.createSet<DoubleVector>("test_db", "test_set");
  
  // Creates a new set in that database for storing output data
  pdbClient.createSet<DoubleVector>("test_db", "output_set1");
  
  // Registers a query class provided in a shared library
  pdbClient.registerType("libraries/libDoubleVectorAggregation.so");

Step 2: Storing data
  
  // Adds data to storage
  double total = 0;
  if (numOfMb > 0) {
    int numIterations = numOfMb / 64;
    int remainder = numOfMb - 64 * numIterations;
    if (remainder > 0) {
      numIterations = numIterations + 1;
    }
    for (int num = 0; num < numIterations; num++) {   
      int blockSize = 64; 
      if ((num == numIterations - 1) && (remainder > 0)) {
        blockSize = remainder;
      }
     
      // creates a 64 Megabytes allocation block where objects 
      // will be allocated
      makeObjectAllocatorBlock(blockSize * 1024 * 1024, true);

      // creates a Handle to a container of type Vector, where
      // each element is a Vector of doubles
      Handle<Vector<Handle<DoubleVector>>> storeMe =
         makeObject<Vector<Handle<DoubleVector>>>();
      try {
        for (int i = 0; true; i++) {
          // creates a Handle to a container of type DoubleVector,
          // which in essence is a Vector of doubles
          Handle<DoubleVector> myData = makeObject<DoubleVector>(10);

          // populates the Vector with 10 doubles
          for (int j = 0; j < 10; j++) { (*(myData->data))[j] = 1;
          }

          // adds this element to the Vector
          storeMe->push_back(myData);
          total = total + 1;
        }
      } catch (pdb::NotEnoughSpace& n) {
        for (int i = 0; i < storeMe->size(); i++) {
          if (i % 10000 == 0) {
            std::cout << i << ": "; ((*storeMe)[i])->print();
          }
        }
        // when there is no more memory in the allocator,
        // sends data to be stored in the cluster
        pdbClient.sendData<DoubleVector>(
          pair<string, string>("test83_set", "test83_db"),
          storeMe);
    }
  }
  std::cout << "Total=" << total << std::endl;

  // forces to write all buffered records
  pdbClient.flushData();
} 
Step 3: Running computations and printing results
  
 // creates a 128 Megabytes allocation block where objects 
 // will be allocated
 const UseTemporaryAllocationBlock tempBlock{1024 * 1024 * 128};

 // creates the computation objects
 Handle<Computation> myScanSet = 
    makeObject<ScanDoubleVectorSet>("test83_db", "test83_set");
 Handle<Computation> myAgg = 
    makeObject<DoubleVectorAggregation>("test83_db", "output_set1");

 myAgg->setInput(myScanSet);
    
 // executes computations
 pdbClient.executeComputations(myAgg);

 // prints input data
 SetIterator<DoubleVector> result =
  pdbClient.getSetIterator<DoubleVector>("test83_db", "test83_set");

 std::cout << "Print input data: ";
 int count = 0;
 for (auto a : result) {
   count++;
   if (count % 10000 == 0) {
      std::cout << count << ":"; a->print();
   }
 }
 std::cout << "Number of input items:" << count << "\n";

 std::cout << "Print query results:" << std::endl;
 SetIterator<DoubleVectorResult> result =
   pdbClient.getSetIterator<DoubleVectorResult>("test83_db", "output_set1");

   std::cout << "Query results: ";
   int count = 0;
   for (auto a : result) {
     count++;
     std::cout << count << ":"; a->print();
   }
   std::cout << "Aggregation output count:" << count << "\n";
}