/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
/*
*
*	Author: Yu Cao <caoyu08@csnet1.cs.tsinghua.edu.cn>
 */


#include <ctype.h>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <cassert>

#include "ns3/core-module.h"
#include "ns3/ipv4-global-routing.h"
#include "ns3/nampt-l4-protocol.h"

#include "ns3/applications-module.h"
#include "ns3/network-module.h"
#include "ns3/internet-module.h"
#include "ns3/point-to-point-module.h"


/*
 Note: you should first uncomment the Macro: 
 #define NAMPT_LARGE_SCALE, 
 which is located in "nampt-l4-protocol.h"
*/


#define APPNAME				"FatTree"
#define						PER_NODE_LOGS

#define						ROUTING_SUFFIX
//#define						ROUTING_ECMP

#define						PERMUTATION
//#define						RANDOM_FLOWS
//#define						DISTRIBUTED_READ

static const uint32_t		kAry				= 8;
static const uint32_t		upLinksOfTOR		= 4;
static const uint32_t		nHostAddr		= 2;
static const uint32_t		nSubflows		= 2;
static const char *			bwAccess = "1Gbps";
static const char *			bwAggre  = "1Gbps";
static const char *			bwCore   = "1Gbps";
static const char *			delayAccess	= "20us"; 
static const char *			delayAggre	= "30us";
static const char *			delayCore	= "40us";
// 1Gbps * 105us / (8*1500)  = 9 pkts
// 1Gbps * 250us / (8*1500) = 21 pkts
// 1Gbps * 435us / (8*1500) = 37 pkts

static const uint32_t		queueSize	= 100;
static const uint32_t		markLine		= 10;
static const uint32_t		mygamma		= 1;
static const uint32_t		mybeta		= 4;
static const double			minRTO		= 0.2;
static const uint32_t		pktSize		= 1400;
static const int32_t			linkSample	= 100; // ms

static const uint32_t		flowSizeThreshold	= 262144; // 256KB, watershed between small flows and large flows
static const uint32_t		readRequestFlowSize	=  2048; // 2KB
static const uint32_t		readResponseFlowSize	= 65536; // 64KB
static const uint32_t		readFlows			= 8; //  512KB
static const double			readArrivalInterval	= 50.0;  // ms, doc: 10us ~ 1ms
static const uint32_t		readJobs				= 8; // the number of jobs
static const uint32_t		largeFlowCap			= 4; // each node receives no more than # of large flows

static const uint64_t		maxTotalLargeFlows	= 2000;
static double				simTime		= 0; // seconds
static const uint32_t		sndBufSize	= 400000; //1Gbps * 80us = 10 pkts
static const uint32_t		rcvBufSize	= 800000;
ns3::NaMPTAlgo_t				ccLargeFlow	= ns3::NAMPT_CC_XMP;
ns3::NaMPTAlgo_t				ccJobFlow	= ns3::NAMPT_CC_LIA;
ns3::NaMPTAlgo_t				schedulingAlgo = ns3::NAMPT_SOD;
ns3::NaMPTAlgo_t				assemblingAlgo = ns3::NAMPT_NA;
static const uint32_t		pmManagement = ns3::NAMPT_PM_SRC_ROUNDROBIN;// | ns3::NAMPT_PM_DETECT_LAN;


using namespace ns3;

NS_LOG_COMPONENT_DEFINE (APPNAME);

uint64_t					totalLargeFlows	= 0;
uint64_t					totalJobs		= 0;
uint32_t					activeLargeFlows	= 0;
uint32_t					activeJobs		= 0;
uint32_t					peakActiveLarges	= 0;
uint32_t					peakActiveJobs	= 0;
std::vector<Ptr<Node> >	hostArray;
Ptr<Node> 				coreArray[upLinksOfTOR][kAry/2]; // indexed by aggre and uplink
NodeContainer Hosts[kAry][kAry/2]; // indexed by pod and TOR
NodeContainer Cores;
NodeContainer Aggres[kAry]; // indexed by pod
NodeContainer TORs[kAry]; // indexed by pod
uint32_t TransferData(Ptr<Node> src, Ptr<Node> des, uint32_t size);
Ipv4Address GetNodeAddress(Ptr<Node> node, bool bRandom = true);
void LocateHostCoordinates(Ptr<Node> node, uint32_t& ipod, uint32_t& itor, uint32_t& ihost);


// Note: flow size must be large than flowSizeThreshold
uint32_t LargeFlowSize()
{
#ifdef PERMUTATION
  static UniformVariable uniformFlowSize(64, 512); // 64M ~ 512M, or lasting for 0.5s ~ 4s
  return static_cast<uint32_t> (uniformFlowSize.GetValue()*1024*1024);
#else
  static ParetoVariable paretoFlowSize(2.0, 1.5, 8.0); // 2/3*96M=64M,  2*96M=192M,  8*96M=768M
  double pareto = paretoFlowSize.GetValue();
  return static_cast<uint32_t> (pareto*96*1024*1024);
#endif
}

Ipv4Address GetNodeAddress(Ptr<Node> node, bool bRandom)
{
  static UniformVariable uniformOffset(0, nHostAddr);
  uint32_t addr = node->m_mainAddr.Get();
  if (bRandom && nHostAddr>1 && (addr&0x000000FF)>1 && ((addr&0x00FF0000)>>16)<kAry)
  {
    uint32_t offset = static_cast<uint32_t>(uniformOffset.GetValue() );
	addr += offset;
  }
  return Ipv4Address(addr);
}

void LocateHostCoordinates(Ptr<Node> node, uint32_t& ipod, uint32_t& itor, uint32_t& ihost)
{
  uint32_t addr = GetNodeAddress(node, false).Get();
  ipod = (addr & 0x00FF0000) >> 16;
  itor = (addr & 0x0000FF00) >> 8;
  ihost = ((addr & 0x000000FF) - 2) / nHostAddr;
}

Ptr<Node> RandomPickHost(uint32_t& ipod, uint32_t& itor, uint32_t& ihost)
{
  static UniformVariable uniformPod(0, kAry);
  static UniformVariable uniformTor(0, kAry/2);
  static UniformVariable uniformHost(0, kAry-upLinksOfTOR);
  static UniformVariable uniformArray(0, kAry*(kAry/2)*(kAry-upLinksOfTOR));
  // locally
  if (ipod != (uint32_t)-1 && itor != (uint32_t)-1 && ihost != (uint32_t)-1)
  {
    uint32_t i = static_cast<uint32_t>(uniformArray.GetValue() );
	LocateHostCoordinates(hostArray.at(i), ipod, itor, ihost);
	return hostArray[i];
  }
  // globally
  if (ipod == (uint32_t)-1)
  	ipod = static_cast<uint32_t>(uniformPod.GetValue() );
  if (itor == (uint32_t)-1)
  	itor = static_cast<uint32_t>(uniformTor.GetValue() );
  if (ihost == (uint32_t)-1)
  	ihost = static_cast<uint32_t>(uniformHost.GetValue() );
  return Hosts[ipod][itor].Get(ihost);
}

bool IssueLargeFlows(Ptr<Node> src, Ptr<Node> des, double prob)
{
  static UniformVariable uniformVar(0, 1.0);
  if (uniformVar.GetValue() > prob)
  	return false;
  uint32_t ipod, itor, ihost;
  while (!src || src == des)
  {
    ipod = 0, itor = 0, ihost = 0;
	src = RandomPickHost(ipod, itor, ihost);
  }
  while (!des || src == des)
  {
    ipod = 0, itor = 0, ihost = 0;
	des = RandomPickHost(ipod, itor, ihost);
	if (largeFlowCap>0 && des->m_locked>=largeFlowCap)
	  des = 0; // re-select one
  }
  NS_ASSERT(src != des);
  uint32_t appid = TransferData(src, des, LargeFlowSize() );
  des->m_locked++; // how many large flows are destined to des node ?
  src->m_largeFlows[appid] = des;
  return true;
}

void IssueNotInnerRackFlows(Ptr<Node> src)
{
  uint32_t ipod, itor, ihost, dpod, dtor, dhost;
  LocateHostCoordinates(src, ipod, itor, ihost);
  Ptr<Node> des = 0;
  do {
  	dpod = -1, dtor = -1, dhost = -1;
  	des = RandomPickHost(dpod, dtor, dhost);
	if (largeFlowCap>0 && des->m_locked>=largeFlowCap)
	  continue;
  }while (ipod==dpod && itor==dtor);
  IssueLargeFlows (src, des, 1.0);
}

void IssueDistributedRead()
{
  static UniformVariable uniformVar;
  //static ExponentialVariable expArrival(readArrivalInterval, 10*readArrivalInterval); // ms
  static LogNormalVariable logNormalArrival(
  		log(readArrivalInterval)-0.5*log(1.0+10.0*readArrivalInterval/(readArrivalInterval*readArrivalInterval)),
  		sqrt(log(1.0+10.0*readArrivalInterval/(readArrivalInterval*readArrivalInterval))) );
  
  std::vector<Ptr<Node> > hosts = hostArray;
  uint32_t size = hosts.size();
  // randomly pick a client node
  uint32_t idx = uniformVar.GetInteger(0, size-1);
  Ptr<Node> client = hosts[idx];
  hosts[idx] = hosts[size-1];
  size--;
  // create a read job
  DatacenterJob_t* job = new DatacenterJob_t;
  job->tmBegin = Simulator::Now();
  job->requestNode = client;
  job->flowNum = readFlows;
  job->nextJobArrival = std::min(10.0*readArrivalInterval, logNormalArrival.GetValue() ); // ms
  // randomly pick some server nodes
  for (uint32_t i = 0; i < job->flowNum; i++)
  {
    idx = uniformVar.GetInteger(0, size-1);
	Ptr<Node> server = hosts[idx];
	hosts[idx] = hosts[size-1];
	size--;
	// issue requests
	uint32_t appid = TransferData(client, server, readRequestFlowSize);
	NS_ASSERT(appid != (uint32_t)-1 && 
		job->requestFlows.find(appid) == job->requestFlows.end() && 
		client->m_dcJobs.find(appid) == client->m_dcJobs.end() );
	job->requestFlows[appid] = server;
	client->m_dcJobs[appid] = job;
  }
  
  activeJobs++;
  peakActiveJobs = std::max(peakActiveJobs, activeJobs);
  std::clog << 
    Simulator::Now().GetSeconds() << " " << 
    "Jobs: " << totalJobs << "/" << peakActiveJobs << "/" << activeJobs << " " << 
    "NextArrival: " << job->nextJobArrival << std::endl;
}

void PermutationTraffic()
{
  static UniformVariable uniformPermutation;
  std::vector<Ptr<Node> > des = hostArray;
  uint32_t size = des.size();
  uint32_t s = 0, d = 0;
  while (size)
  {
    d = uniformPermutation.GetInteger(0, size-1);
	TransferData(hostArray[s], des[d], LargeFlowSize() );
	s++;
	Ptr<Node> tmp = des[d];
	des[d] = des[size-1];
	des[size-1] = tmp;
	size--;
  }
}

void AssignAddress(Ptr<NetDevice> device, Ipv4Address addr, Ipv4Mask mask)
{
  Ptr<Node> node = device->GetNode ();
  Ptr<Ipv4> ipv4 = node->GetObject<Ipv4> ();
  int32_t interface = ipv4->GetInterfaceForDevice (device);
  if (interface == -1)
    interface = ipv4->AddInterface (device);
  Ipv4InterfaceAddress ipv4Addr = Ipv4InterfaceAddress (addr, mask);
  ipv4->AddAddress (interface, ipv4Addr);
  ipv4->SetMetric (interface, 1);
  ipv4->SetUp (interface);
}

Ptr<Ipv4GlobalRouting> GetGlobalRouting (Ptr<Ipv4> ipv4)
{
  Ptr<Ipv4ListRouting> ipv4ListRouting = DynamicCast<Ipv4ListRouting>(ipv4->GetRoutingProtocol());
  if (!ipv4ListRouting)
  	return 0;
  int16_t priority;
  for (uint32_t i=0; i<ipv4ListRouting->GetNRoutingProtocols(); i++)
  {
    Ptr<Ipv4RoutingProtocol> temp = ipv4ListRouting->GetRoutingProtocol(i, priority);
	if (DynamicCast<Ipv4GlobalRouting>(temp) )
	  return DynamicCast<Ipv4GlobalRouting>(temp);
  }
  return 0;
}

void RemoveUselessStaticRoutes(Ptr<Ipv4StaticRouting> staticRouting)
{
  uint32_t i = 0;
  while (i < staticRouting->GetNRoutes() )
  {
    const Ipv4RoutingTableEntry& entry = staticRouting->GetRoute(i);
	if (entry.GetInterface() != 0 && entry.GetGateway() == Ipv4Address("0.0.0.0"))
	  staticRouting->RemoveRoute(i);
	else
	  i++;
  }
}

void TransferExit(Ptr<Node> node, uint32_t index, uint32_t size, double elapse, double rate)
{
  // Jobs
  std::map<uint32_t, DatacenterJob_t*>::iterator itJob;
  itJob = node->m_dcJobs.find(index);
  if (itJob != node->m_dcJobs.end() )
  {
    DatacenterJob_t* job = (DatacenterJob_t*)itJob->second;
	node->m_dcJobs.erase(itJob);
	if (job->requestNode == node) // request exits
	{
	  std::map<uint32_t, Ptr<Node> >::iterator itRequest;
	  itRequest = job->requestFlows.find(index);
	  NS_ASSERT(itRequest != job->requestFlows.end() );
	  Ptr<Node> server = (Ptr<Node>)itRequest->second;
	  job->requestFlows.erase(itRequest);
	  // issue respond flows
	  uint32_t appid = TransferData(server, node, readResponseFlowSize);
	  NS_ASSERT(appid != (uint32_t)-1 && server->m_dcJobs.find(appid) == server->m_dcJobs.end() );
	  server->m_dcJobs[appid] = job;
	}
	else //  respond exits
	{
	  job->flowNum--;
	  if (job->flowNum == 0) // job exits
	  {
	    NS_ASSERT(job->requestFlows.size() == 0);
		job->requestNode = 0;
		double completionTime = Simulator::Now().GetSeconds() - job->tmBegin.GetSeconds();
		completionTime *= 1000.0;
		double nextArrival = job->nextJobArrival;
		delete job;
		activeJobs--;
		totalJobs++;
		// output
		*(node->m_outputStream->GetStream()) << 
		  Simulator::Now().GetSeconds() << " " << 
		  "JOB " << completionTime << std::endl; // ms
		std::clog << 
		  Simulator::Now().GetSeconds() << " " << 
		  "Jobs: " << totalJobs << "/" << peakActiveJobs << "/" << activeJobs << " " << 
		  "JobTime: " << completionTime << std::endl;
		// issue the next job
		if (totalLargeFlows+activeLargeFlows < maxTotalLargeFlows)
		   Simulator::Schedule(Seconds(nextArrival/1000.0), &IssueDistributedRead);
	  }
	}
  }

  // large flows
  std::map<uint32_t, Ptr<Node> >::iterator itLarge;
  itLarge = node->m_largeFlows.find(index);
  if (itLarge != node->m_largeFlows.end() )
  {
    Ptr<Node> desNode = itLarge->second;
	node->m_largeFlows.erase(itLarge);
	desNode->m_locked--;
  }
  if (size > flowSizeThreshold)
  {
  	activeLargeFlows--;
	totalLargeFlows++;
	std::clog << 
	  Simulator::Now().GetSeconds() << " " << 
	  "LargeFlows: " << totalLargeFlows << "/" << peakActiveLarges << "/" << activeLargeFlows << " " << 
	  "Exit: " << size << " " << 
	  elapse << " " << 
	  rate << std::endl;
  }
  
  // cleanup app
  node->RemoveApplication(index);

#ifdef DISTRIBUTED_READ
  if (totalLargeFlows+activeLargeFlows < maxTotalLargeFlows)
  {
    // issue a new large flow
    if (size > flowSizeThreshold)
	  Simulator::ScheduleNow(&IssueNotInnerRackFlows, node);
  }
  else
  {
    // simulator exits
    if (activeJobs+activeLargeFlows == 0)
	  Simulator::Stop (Seconds(0.0) );
  }
#else
#ifdef PERMUTATION  
  if (activeLargeFlows == 0)
  {
    if (totalLargeFlows >= maxTotalLargeFlows)
	  Simulator::Stop (Seconds(0.0) );
	else
	  Simulator::ScheduleNow(&PermutationTraffic);
  }
#else
#ifdef RANDOM_FLOWS
  if (totalLargeFlows+activeLargeFlows < maxTotalLargeFlows)
  	IssueLargeFlows(node, 0, 1.0);
  else if (activeLargeFlows == 0)
	Simulator::Stop (Seconds(0.0) );
#else
if (activeJobs+activeLargeFlows == 0)
  Simulator::Stop (Seconds(0.0) );
#endif
#endif
#endif
}

uint32_t TransferData(Ptr<Node> src, Ptr<Node> des, uint32_t size)
{
  if (src == des)
  	return -1;
  static ObjectFactory appFactory("ns3::BulkSendApplication");
  //appFactory.Set("Protocol", StringValue("ns3::TcpSocketFactory")); // default attributes
  Ptr<BulkSendApplication> app = appFactory.Create<BulkSendApplication> ();
#ifdef ROUTING_ECMP
  for(uint32_t i = 0; i < nSubflows; i++)
    app->m_accessPoints.push_back(InetSocketAddress(GetNodeAddress(des), 22+i));
  app->m_pathManagement |= NAMPT_PM_ACCESS_POINTS;
#else
  app->SetAttribute("Remote", AddressValue(InetSocketAddress (GetNodeAddress(des), 22)) );
#endif
  app->m_localAddress = GetNodeAddress(src);
  app->SetAttribute("SendSize", UintegerValue(pktSize) );
  app->SetAttribute("MaxBytes", UintegerValue (size)); // Zero is unlimited.
  if (size > 0 && size <= flowSizeThreshold)
  {
  	app->m_pathManagement |= (pmManagement|ns3::NAMPT_PM_SINGLE_SUBFLOW);
	app->m_ccAlgo = ccJobFlow;
  }
  else
  {
  	app->m_pathManagement |= pmManagement;
	app->m_ccAlgo = ccLargeFlow;
	activeLargeFlows++;
	peakActiveLarges = std::max(peakActiveLarges, activeLargeFlows);
	std::clog << 
	  Simulator::Now().GetSeconds() << " " << 
	  "LargeFlows: " << totalLargeFlows << "/" << peakActiveLarges << "/" << activeLargeFlows << " " << 
	  "Transfer: " << size << " " << 
	  Names::FindName(src) << " " << 
	  Names::FindName(des) << " " << 
	  des->m_locked << std::endl;
  }

  app->SetStartTime(Seconds(0.0) );
  //app->SetStopTime(Seconds (simTime));
  app->m_Notify = MakeCallback(TransferExit);
  app->m_index = src->AddApplication(app); //  this node will generate start/stop events.
  return app->m_index;
}


int main (int argc, char *argv[])
{
	/* Enable log components */
	//LogComponentEnable ("PacketSink", LOG_LEVEL_ALL);
	//LogComponentEnableAll (LOG_LEVEL_ALL);

	/* Parse parameters */
	CommandLine cmd;
	cmd.AddValue ("st", "Simulation Time", simTime);
	cmd.Parse (argc, argv);

	/* Global configurations */
	Config::SetDefault ("ns3::TcpL4Protocol::SocketType", StringValue ("ns3::TcpReno") );
	Config::SetDefault ("ns3::TcpSocket::SegmentSize", UintegerValue (pktSize) );
	Config::SetDefault ("ns3::TcpSocket::DelAckCount", UintegerValue(1) );
	Config::SetDefault ("ns3::TcpSocket::SlowStartThreshold", UintegerValue(100) );
	Config::SetDefault ("ns3::TcpSocket::InitialCwnd", UintegerValue(2) );
	Config::SetDefault ("ns3::TcpSocket::RcvBufSize", UintegerValue(rcvBufSize) );
	Config::SetDefault ("ns3::TcpSocket::SndBufSize", UintegerValue(sndBufSize) );
	//Config::SetDefault ("ns3::NaMPTSocket::CongestionAlgo", UintegerValue(ccAlgo) );
    Config::SetDefault ("ns3::NaMPTSocket::SchedulingAlgo", UintegerValue(schedulingAlgo) );
	Config::SetDefault ("ns3::NaMPTSocket::AssemblingAlgo", UintegerValue(assemblingAlgo) );
	Config::SetDefault ("ns3::NaMPTSocket::gamma", UintegerValue(mygamma) );
	Config::SetDefault ("ns3::NaMPTSocket::beta", UintegerValue(mybeta) );
	Config::SetDefault ("ns3::DropTailQueue::MaxPackets", UintegerValue(queueSize) );
	Config::SetDefault ("ns3::DropTailQueue::MarkLine", UintegerValue(markLine) );
	Config::SetDefault ("ns3::Queue::SampleInterval", IntegerValue(linkSample) );
	Config::SetDefault ("ns3::RttEstimator::MinRTO", TimeValue(Seconds(minRTO)) );


	/* Construct topology */
	static const uint32_t buflen = 512;
	char buf[buflen];

	InternetStackHelper stack;
	stack.SetTcp ("ns3::NaMPTL4Protocol");
	for (uint32_t ipod = 0; ipod < kAry; ipod ++)
	{
	  TORs[ipod].Create(kAry/2);
	  stack.Install(TORs[ipod]);
	  Aggres[ipod].Create(upLinksOfTOR);
	  stack.Install(Aggres[ipod]);
	  for (uint32_t itor = 0; itor < TORs[ipod].GetN(); itor ++)
	  {
	  	Hosts[ipod][itor].Create(kAry-upLinksOfTOR);
		stack.Install(Hosts[ipod][itor]);
	  }
	}
	Cores.Create(upLinksOfTOR * kAry / 2);
	stack.Install(Cores);

	PointToPointHelper linkAccess, linkAggre, linkCore;
	Ipv4AddressHelper address;
	NetDeviceContainer devs;
	linkAccess.SetChannelAttribute("Delay", StringValue(delayAccess) );
	linkAccess.SetDeviceAttribute ("DataRate", StringValue(bwAccess) );
	linkAggre.SetChannelAttribute("Delay", StringValue(delayAggre) );
	linkAggre.SetDeviceAttribute ("DataRate", StringValue(bwAggre) );
	linkCore.SetChannelAttribute("Delay", StringValue(delayCore) );
	linkCore.SetDeviceAttribute ("DataRate", StringValue(bwCore) );

#ifndef PER_NODE_LOGS
	Ptr<OutputStreamWrapper> torOutput = Create<OutputStreamWrapper> (NAMPT_PATH_PREFIX"Node.tors", std::ios::out);
	Ptr<OutputStreamWrapper> hostOutput = Create<OutputStreamWrapper> (NAMPT_PATH_PREFIX"Node.hosts", std::ios::out);
	Ptr<OutputStreamWrapper> aggreOutput = Create<OutputStreamWrapper> (NAMPT_PATH_PREFIX"Node.aggres", std::ios::out);
	Ptr<OutputStreamWrapper> coreOutput = Create<OutputStreamWrapper> (NAMPT_PATH_PREFIX"Node.cores", std::ios::out);
#endif
	for (uint32_t ipod = 0; ipod < kAry; ipod++)
	{
	  // tors to hosts
	  for (uint32_t itor = 0; itor < TORs[ipod].GetN(); itor++)
	  {
		std::string torname, hostname;
	    // naming tors
	    snprintf(buf, buflen, "tor-%u-%u", ipod, itor);
		torname = buf;
		Names::Add (torname, TORs[ipod].Get(itor) );
#ifndef PER_NODE_LOGS
        TORs[ipod].Get(itor)->m_outputStream = torOutput;
#else
        TORs[ipod].Get(itor)->m_outputStream = Create<OutputStreamWrapper> (NAMPT_PATH_PREFIX"Node."+torname, std::ios::out);
#endif
		//  access links
	    uint32_t iaddr = 2;
	    for (uint32_t ihost = 0; ihost < Hosts[ipod][itor].GetN(); ihost++)
	    {
	      devs = linkAccess.Install(TORs[ipod].Get(itor), Hosts[ipod][itor].Get(ihost) );
		  // naming hosts
		  snprintf(buf, buflen, "host-%u-%u-%u", ipod, itor, ihost);
		  hostname = buf;
		  Names::Add (hostname, Hosts[ipod][itor].Get(ihost) );
#ifndef PER_NODE_LOGS
		  Hosts[ipod][itor].Get(ihost)->m_outputStream = hostOutput;
#else
		  Hosts[ipod][itor].Get(ihost)->m_outputStream = Create<OutputStreamWrapper> (NAMPT_PATH_PREFIX"Node."+hostname, std::ios::out);
#endif
		  hostArray.push_back(Hosts[ipod][itor].Get(ihost) );
          Hosts[ipod][itor].Get(ihost)->m_locked = 0;
		  // addressing hosts
		  Ipv4StaticRoutingHelper ipv4StaticRoutingHelper;
		  Ptr<Ipv4> ipv4 = TORs[ipod].Get(itor)->GetObject<Ipv4> ();
		  Ptr<Ipv4StaticRouting> staticRouting = ipv4StaticRoutingHelper.GetStaticRouting(ipv4);
		  for (uint32_t i = 0; i < nHostAddr; i++, iaddr++)
		  {
		    NS_ASSERT(iaddr < 255);
		    snprintf(buf, buflen, "10.%u.%u.%u", ipod, itor, iaddr); // host
		    AssignAddress(devs.Get(1), Ipv4Address(buf), Ipv4Mask("255.255.255.0") );
			if (i == 0)
			  Hosts[ipod][itor].Get(ihost)->m_mainAddr = Ipv4Address(buf);
			// add a static host route for each tor
			staticRouting->AddHostRouteTo(Ipv4Address(buf), Ipv4Address(buf), ihost+1);// interface 0 is loopback.
		  }
		  // addressing tors
		  snprintf(buf, buflen, "10.%u.%u.1", ipod, itor); // tor
		  AssignAddress(devs.Get(0), Ipv4Address(buf), Ipv4Mask("255.255.255.0") );
		  TORs[ipod].Get(itor)->m_mainAddr = Ipv4Address(buf);
		  RemoveUselessStaticRoutes(staticRouting);
		  // add a default route (gateway) for each host
		  ipv4 = Hosts[ipod][itor].Get(ihost)->GetObject<Ipv4> ();
		  staticRouting = ipv4StaticRoutingHelper.GetStaticRouting(ipv4);
		  staticRouting->SetDefaultRoute(Ipv4Address(buf), 1); // interface 0 is loopback.
		  RemoveUselessStaticRoutes(staticRouting);
		  // naming access links
		  PointerValue pv;
		  devs.Get(0)->GetObject<PointToPointNetDevice>()->GetAttribute("TxQueue", pv);
		  pv.Get<Queue>()->SetAttribute("QueueName", StringValue(torname+"->"+hostname) );
		  Names::Add (torname+"->"+hostname, pv.Get<Queue>() );
		  devs.Get(1)->GetObject<PointToPointNetDevice>()->GetAttribute("TxQueue", pv);
		  pv.Get<Queue>()->SetAttribute("QueueName", StringValue(hostname+"->"+torname) );
		  Names::Add (hostname+"->"+torname, pv.Get<Queue>() );
		}
	  }

	  // aggres to tors
	  for (uint32_t iaggre = 0; iaggre < Aggres[ipod].GetN(); iaggre++)
	  {
	    // naming aggres
	    std::string aggrename, torname;
	    snprintf(buf, buflen, "aggre-%u-%u", ipod, iaggre);
		aggrename = buf;
		Names::Add (aggrename, Aggres[ipod].Get(iaggre) );
#ifndef PER_NODE_LOGS
		Aggres[ipod].Get(iaggre)->m_outputStream = aggreOutput;
#else
		Aggres[ipod].Get(iaggre)->m_outputStream = Create<OutputStreamWrapper> (NAMPT_PATH_PREFIX"Node."+aggrename, std::ios::out);
#endif
		// aggregation links
		Ipv4StaticRoutingHelper ipv4StaticRoutingHelper;
		Ptr<Ipv4> ipv4 = Aggres[ipod].Get(iaggre)->GetObject<Ipv4> ();
		Ptr<Ipv4StaticRouting> staticRouting = ipv4StaticRoutingHelper.GetStaticRouting(ipv4);
	    for (uint32_t itor = 0; itor < TORs[ipod].GetN(); itor++)
	    {
	      devs = linkAggre.Install(Aggres[ipod].Get(iaggre), TORs[ipod].Get(itor) );
		  // addressing aggres
		  snprintf(buf, buflen, "10.%u.%u.1", ipod, iaggre+TORs[ipod].GetN() ); // aggre
		  AssignAddress(devs.Get(0), Ipv4Address(buf), Ipv4Mask("255.255.0.0") );
		  Aggres[ipod].Get(iaggre)->m_mainAddr = Ipv4Address(buf);
		  // addressing tors
		  snprintf(buf, buflen, "10.%u.%u.1", ipod, itor); // tor
		  AssignAddress(devs.Get(1), Ipv4Address(buf), Ipv4Mask("255.255.0.0") );
		  // add a static network route for each aggre
		  snprintf(buf, buflen, "10.%u.%u.1", ipod, itor);
		  staticRouting->AddNetworkRouteTo(Ipv4Address(buf), Ipv4Mask("255.255.255.0"), 
									Ipv4Address(buf), itor+1); // interface0 is the loop dev.
		  RemoveUselessStaticRoutes(staticRouting);
		  // naming aggregation links
		  torname = Names::FindName(TORs[ipod].Get(itor) );
		  PointerValue pv;
		  devs.Get(0)->GetObject<PointToPointNetDevice>()->GetAttribute("TxQueue", pv);
		  pv.Get<Queue>()->SetAttribute("QueueName", StringValue(aggrename+"->"+torname) );
		  Names::Add (aggrename+"->"+torname, pv.Get<Queue>() );
		  devs.Get(1)->GetObject<PointToPointNetDevice>()->GetAttribute("TxQueue", pv);
		  pv.Get<Queue>()->SetAttribute("QueueName", StringValue(torname+"->"+aggrename) );
		  Names::Add (torname+"->"+aggrename, pv.Get<Queue>() );
	    }
	  }
#ifdef ROUTING_SUFFIX
	  // add global routes for tors (uplinks)
	  for (uint32_t itor = 0; itor < TORs[ipod].GetN(); itor++)
	  {
	    Ipv4StaticRoutingHelper ipv4StaticRoutingHelper;
		Ptr<Ipv4> ipv4 = TORs[ipod].Get(itor)->GetObject<Ipv4> ();
		Ptr<Ipv4StaticRouting> staticRouting = ipv4StaticRoutingHelper.GetStaticRouting(ipv4);
		RemoveUselessStaticRoutes(staticRouting);
		Ptr<Ipv4GlobalRouting> globalRouting = GetGlobalRouting (ipv4);
	    for (uint32_t ip = 2; ip < nHostAddr*(kAry-upLinksOfTOR)+2; ip++)
	    {
	      uint32_t nextHopInterface = (ip+itor)%upLinksOfTOR;
		  std::string nextHop;
		  snprintf(buf, buflen, "10.%u.%u.1", ipod, nextHopInterface+TORs[ipod].GetN() );
		  nextHop = buf;
	      snprintf(buf, buflen, "0.0.0.%u", ip);
	      globalRouting->AddNetworkRouteTo(Ipv4Address(buf), Ipv4Mask("0.0.0.255"),
		  	Ipv4Address(nextHop.c_str()), nextHopInterface+(kAry-upLinksOfTOR+1));
	    }
	  }
#endif
#ifdef ROUTING_ECMP
      // enable the ecmp function of tors
      for (uint32_t itor = 0; itor < TORs[ipod].GetN(); itor++)
      {
        Ipv4StaticRoutingHelper ipv4StaticRoutingHelper;
		Ptr<Ipv4> ipv4 = TORs[ipod].Get(itor)->GetObject<Ipv4> ();
		Ptr<Ipv4StaticRouting> staticRouting = ipv4StaticRoutingHelper.GetStaticRouting(ipv4);
		RemoveUselessStaticRoutes(staticRouting);
		Ptr<Ipv4GlobalRouting> globalRouting = GetGlobalRouting (ipv4);
		globalRouting->SetAttribute("RandomEcmpRouting", BooleanValue(true) );
		for (uint32_t iaggre = 0; iaggre < Aggres[ipod].GetN(); iaggre++)
		{
		  snprintf(buf, buflen, "10.%u.%u.1", ipod, iaggre+TORs[ipod].GetN() );
		  globalRouting->AddNetworkRouteTo(Ipv4Address("10.0.0.0"), Ipv4Mask("255.0.0.0"),
			Ipv4Address(buf), iaggre+(kAry-upLinksOfTOR+1) );
		}
      }
#endif
	}

	// cores to aggres
	for (uint32_t icore = 0; icore < Cores.GetN(); icore++)
	{
	  uint32_t iaggre = icore / (kAry/2);
      // naming cores
      std::string corename, aggrename;
      snprintf(buf, buflen, "core-%u-%u", iaggre, icore%(kAry/2) );
      corename = buf;
      Names::Add (corename, Cores.Get(icore) );
	  coreArray[iaggre][icore%(kAry/2)] = Cores.Get(icore);
#ifndef PER_NODE_LOGS
	  Cores.Get(icore)->m_outputStream = coreOutput;
#else
	  Cores.Get(icore)->m_outputStream = Create<OutputStreamWrapper> (NAMPT_PATH_PREFIX"Node."+corename, std::ios::out);
#endif
	  // core links
	  Ipv4StaticRoutingHelper ipv4StaticRoutingHelper;
	  Ptr<Ipv4> ipv4 = Cores.Get(icore)->GetObject<Ipv4> ();
	  Ptr<Ipv4StaticRouting> staticRouting = ipv4StaticRoutingHelper.GetStaticRouting(ipv4);
	  for (uint32_t ipod = 0; ipod < kAry; ipod++)
	  {
	    devs = linkCore.Install(Cores.Get(icore), Aggres[ipod].Get(iaggre) );
		// addressing cores
		snprintf(buf, buflen, "10.%u.%u.%u", kAry, iaggre+1, (icore%(kAry/2))+1 ); // core
		AssignAddress(devs.Get(0), Ipv4Address(buf), Ipv4Mask("255.0.0.0") );
		Cores.Get(icore)->m_mainAddr = Ipv4Address(buf);
		// addressing aggres
		snprintf(buf, buflen, "10.%u.%u.1", ipod, iaggre+TORs[ipod].GetN() ); //aggre
		AssignAddress(devs.Get(1), Ipv4Address(buf), Ipv4Mask("255.0.0.0") );
		// add a static network route to each pod
		snprintf(buf, buflen, "10.%u.%u.1", ipod, iaggre+TORs[ipod].GetN() );
		staticRouting->AddNetworkRouteTo(Ipv4Address(buf), Ipv4Mask("255.255.0.0"), 
								  Ipv4Address(buf), ipod+1); // interface0 is the loop dev.
		RemoveUselessStaticRoutes(staticRouting);
		// naming core links
		aggrename = Names::FindName(Aggres[ipod].Get(iaggre) );
		PointerValue pv;
		devs.Get(0)->GetObject<PointToPointNetDevice>()->GetAttribute("TxQueue", pv);
		pv.Get<Queue>()->SetAttribute("QueueName", StringValue(corename+"->"+aggrename) );
		Names::Add (corename+"->"+aggrename, pv.Get<Queue>() );
		devs.Get(1)->GetObject<PointToPointNetDevice>()->GetAttribute("TxQueue", pv);
		pv.Get<Queue>()->SetAttribute("QueueName", StringValue(aggrename+"->"+corename) );
		Names::Add (aggrename+"->"+corename, pv.Get<Queue>() );
	  }
	}

#ifdef ROUTING_SUFFIX
	// add global routes for aggres (uplinks)
/* =======  Scheme 1  ========= */
#ifdef PERMUTATION
	for (uint32_t ipod = 0; ipod < kAry; ipod++)
	{
	  Ipv4StaticRoutingHelper ipv4StaticRoutingHelper;
	  for (uint32_t iaggre = 0; iaggre < Aggres[ipod].GetN(); iaggre++)
	  {
		Ptr<Ipv4> ipv4 = Aggres[ipod].Get(iaggre)->GetObject<Ipv4> ();
		Ptr<Ipv4StaticRouting> staticRouting = ipv4StaticRoutingHelper.GetStaticRouting(ipv4);
		RemoveUselessStaticRoutes(staticRouting);
		Ptr<Ipv4GlobalRouting> globalRouting = GetGlobalRouting (ipv4);
		for (uint32_t itor = 0; itor < TORs[ipod].GetN(); itor++)
		{
		  for (uint32_t ip = 2; ip < nHostAddr*(kAry-upLinksOfTOR)+2; ip++)
		  {
			uint32_t nextHopInterface = ((ip/upLinksOfTOR)+iaggre+itor)%(kAry/2);
			std::string nextHop;
			snprintf(buf, buflen, "10.%u.%u.%u", kAry, iaggre+1, nextHopInterface+1);
			nextHop = buf;
			snprintf(buf, buflen, "0.0.%u.%u", itor, ip);
			globalRouting->AddNetworkRouteTo(Ipv4Address(buf), Ipv4Mask("0.0.255.255"),
							Ipv4Address(nextHop.c_str()), nextHopInterface+1+kAry/2);
		  }
		}
	  }
	}
#else
/* =======  Scheme 2  ========= */
	for (uint32_t ipod = 0; ipod < kAry; ipod++)
	{
      Ipv4StaticRoutingHelper ipv4StaticRoutingHelper;
	  for (uint32_t iaggre = 0; iaggre < Aggres[ipod].GetN(); iaggre++)
	  {
        Ptr<Ipv4> ipv4 = Aggres[ipod].Get(iaggre)->GetObject<Ipv4> ();
        Ptr<Ipv4StaticRouting> staticRouting = ipv4StaticRoutingHelper.GetStaticRouting(ipv4);
        RemoveUselessStaticRoutes(staticRouting);
        Ptr<Ipv4GlobalRouting> globalRouting = GetGlobalRouting (ipv4);
	    for (uint32_t ip = 2; ip < nHostAddr*(kAry-upLinksOfTOR)+2; ip++)
	    {
	      //uint32_t nextHopInterface = ((ip/upLinksOfTOR)+iaggre)%(kAry/2);
	      uint32_t nextHopInterface = (ip+iaggre)%(kAry/2);
		  std::string nextHop;
		  snprintf(buf, buflen, "10.%u.%u.%u", kAry, iaggre+1, nextHopInterface+1);
		  nextHop = buf;
	      snprintf(buf, buflen, "0.0.0.%u", ip);
	      globalRouting->AddNetworkRouteTo(Ipv4Address(buf), Ipv4Mask("0.0.0.255"),
		  	Ipv4Address(nextHop.c_str()), nextHopInterface+1+kAry/2);
	    }
	  }
	}
#endif
#endif
#ifdef ROUTING_ECMP
    // enable the ecmp function of aggres
    for (uint32_t ipod = 0; ipod < kAry; ipod++)
    {
      Ipv4StaticRoutingHelper ipv4StaticRoutingHelper;
	  for (uint32_t iaggre = 0; iaggre < Aggres[ipod].GetN(); iaggre++)
	  {
	    Ptr<Ipv4> ipv4 = Aggres[ipod].Get(iaggre)->GetObject<Ipv4> ();
		Ptr<Ipv4StaticRouting> staticRouting = ipv4StaticRoutingHelper.GetStaticRouting(ipv4);
		RemoveUselessStaticRoutes(staticRouting);
		Ptr<Ipv4GlobalRouting> globalRouting = GetGlobalRouting (ipv4);
		globalRouting->SetAttribute("RandomEcmpRouting", BooleanValue(true) );
		for (uint32_t i = 0; i < kAry/2; i++)
		{
		  snprintf(buf, buflen, "10.%u.%u.%u", kAry, iaggre+1, i+1);
		  globalRouting->AddNetworkRouteTo(Ipv4Address("10.0.0.0"), Ipv4Mask("255.0.0.0"),
			Ipv4Address(buf), i+1+kAry/2);
		}
	  }
    }
#endif

	/* Setup global routing tables */
	//Ipv4GlobalRoutingHelper::PopulateRoutingTables ();


	/* Sinks */
	ApplicationContainer sinkApps;
	PacketSinkHelper ftpSink ("ns3::TcpSocketFactory", 
	  InetSocketAddress (Ipv4Address::GetAny (), 22) );
	for (uint32_t ipod = 0; ipod < kAry; ipod++)
	{
	  for (uint32_t itor = 0; itor < TORs[ipod].GetN(); itor++)
		sinkApps.Add(ftpSink.Install(Hosts[ipod][itor]) );
	}
	sinkApps.Start(Seconds(0.0));
	//sinkApps.Stop(Seconds(simTime));


	/* Sources */
	// note: app is withdrawed by callback functions
#ifdef PERMUTATION
	PermutationTraffic();
#endif
#ifdef DISTRIBUTED_READ
	for (uint32_t i = 0; i < readJobs; i++)
	{
	  IssueDistributedRead();
	}
	for (uint32_t i = 0; i < hostArray.size(); i++)
	{
	  IssueNotInnerRackFlows(hostArray[i]);
	}
#endif
#ifdef RANDOM_FLOWS
	for (uint32_t i = 0; i < hostArray.size(); i++)
	{
	  IssueLargeFlows(hostArray[i], 0, 1.0);
	}
#endif
	/* Simulation begins */
	//Simulator::Stop (Seconds(simTime) );
	Simulator::Run ();
	Simulator::Destroy (); 
	return 0;

}

