Hi Jianhua,
You can add extra profiling directly into your SLICC descriptions.
Here's an example of how I did it to get more information on cache
timing.
Also you could possibly modify the existing work, ruby already gives
stats of machine events.
Chip Stats
----------
--- L1Cache ---
- Event Counts -
Load 12499169
Ifetch 74071283
Store 1191455
Replacement 527989
....
- Transitions -
M Load 1515098
M Ifetch 83
M Store 1071614
M Replacement 52587
M INV 0 <--
M BCAST_AC_S 0 <--
M BCAST_AS_S 0 <--
M BCAST_AM_S 0 <--
...
The transitions show all the event + cache state combinations that
occur in your coherency system. It doesn't give individual results
for each processor though, just the combined coherency results.
Good luck with your work.
John Shield
#############################
Quick guide to adding profiling functions in your cache
coherency description:
File: /home/john/opt/gems/protocols/ASY_SMP-cache.sm
sequencer.updateTiming(in_msg, in_msg.Address, 0); // profiling
This is the profiling line. It calls the "sequencer" object and uses
a function called "updateTiming".
It's a function I made to separate timing information for line
replacement time, from the miss time.
File: /home/john/opt/gems/protocols/RubySlicc_Types.sm
external_type(Sequencer) {
.... // other functions
void updateTiming(CacheMsg, Address, int); // your new
function
}
You need to add the function to the RubySlicc_Types so it's
recognised.
File: /home/john/opt/gems/ruby/system/Sequencer.h
void updateTiming(const CacheMsg& request, const Address&
address, int UpdateType);
Declaration of function in sequencer
File: /home/john/opt/gems/ruby/system/Sequencer.C
void Sequencer::updateTiming(const CacheMsg& request, const
Address& address, int UpdateType) {
CacheMsg *request_ptr = 0;
int timematch = request.getTime();
int thread = -1;
int smt_threads = RubyConfig::numberofSMTThreads();
for(int t=0; t < smt_threads; ++t){
if (m_writeRequestTable_ptr[t]->exist(address)) {
CacheMsg & requestReal =
m_writeRequestTable_ptr[t]->lookup(address);
if (requestReal.getTime() == timematch) {
request_ptr = &requestReal;
break;
}
} else if (m_readRequestTable_ptr[t]->exist(address)) {
CacheMsg & requestReal =
m_readRequestTable_ptr[t]->lookup(address);
if (requestReal.getTime() == timematch) {
request_ptr = &requestReal;
break;
}
}
}
if (request_ptr == 0) {
ERROR_MSG("UpdateTiming cannot find request");
}
if (UpdateType) {
// update , 1 = hit, 2 = miss
request_ptr->m_cacheStartTime =
g_eventQueue_ptr->getTime();
if (request_ptr->m_cacheMiss == 0) {
request_ptr->m_cacheMiss = UpdateType;
}
} else {
// replacement
if (request_ptr->m_cacheMiss == 0) {
request_ptr->m_cacheMiss = 2;
request_ptr->m_replacementTime =
g_eventQueue_ptr->getTime();
} else {
// Sometimes multiple replacements are triggered,
ignore later replacements
}
}
}
Your code to perform the profiling information that you want to do
in the sequencer.
File: /home/john/opt/gems/slicc_interface/Message.h:
Message() : RefCountable() { m_time =
g_eventQueue_ptr->getTime(); m_LastEnqueueTime =
g_eventQueue_ptr->getTime(); m_DelayedCycles =
0;m_replacementTime=0; m_cacheStartTime=0; m_cacheMiss=0;}
... // other code
int m_cacheMiss;
Time m_cacheStartTime;
Time m_replacementTime;
Example adding variable to allow storage in the message buffer. (You
probably don't need to do this)
File: /home/john/opt/gems/slicc_interface/Message.h:
g_system_ptr->getProfiler()->recordLatency(request.getTime(),
request.m_cacheStartTime, request.m_replacementTime, type,
request.m_cacheMiss, write,
(m_chip_ptr->getID()*RubyConfig::numberOfProcsPerChip()+m_version));
After the cache request has been processed, it calls a function
called "recordLatency" in the profiler to store the information in
the profiler.
File: /home/john/opt/gems/ruby/profiler/Profiler.C
void Profiler::recordLatency(Time queueStart, Time cacheStart,
Time replaceStart, CacheRequestType type, int miss, bool write,
int cpu) {
Time current = g_eventQueue_ptr->getTime();
int replace;
int cachefetch;
int category = 0;
if (replaceStart == 0) {
replace = 0;
} else {
replace = (cacheStart-replaceStart+1);
}
if (cacheStart == 0) {
ERROR_MSG("ERROR: The cache request was never initated!");
cachefetch = 0;
} else {
cachefetch = (current-cacheStart+1);
}
// 0 = replacement; 1 = read hit; 2 = write hit; 3 = read
miss; 4 = write miss
if (replace) { // 0 = replacement
m_perProcCacheTiming[cpu][0] += replace;
m_perProcRequestCount[cpu][0] += 1;
}
if (miss == 0) {
ERROR_MSG("ERROR: The cache request was never initated!");
} else if (miss == 1 && cachefetch > 10) {
// Asymmetric write/read update condition
// Or lacking ownership
category = 5;
} else if (miss == 1) {
if (write) { // 2 = write hit
category = 2;
} else { // 1 = read hit
category = 1;
}
} else {
if (write) { // 4 write miss
category = 4;
} else { // 3 read miss
category = 3;
}
}
m_perProcCacheTiming[cpu][category] += cachefetch;
m_perProcRequestCount[cpu][category] += 1;
}
Data is now stored in variables in the Profiler object.
File: /home/john/opt/gems/ruby/profiler/Profiler.h
Vector< Vector< long long int > >
m_perProcCacheTiming;
Vector< Vector< long long int > >
m_perProcRequestCount;
Declare the variables in the profiler for storage. (also don't
forget to initialise them, not shown)
File: /home/john/opt/gems/ruby/profiler/Profiler.C
void Profiler::printStats(ostream& out, bool short_stats)
{
... // other code
out <<
"----------------------------------------------------" <<
endl;
for (int ii = 0; ii < RubyConfig::numberOfProcessors(); ii++)
{
out << "Processor[" << ii << "]\tAverage
Latency[" << left << setw(9) <<
perProcTotalMemoryLatency[ii] << "]\tRequests[" <<
perProTotalRequests[ii] << "]" << endl;
// 0 = replacement; 1 = read hit; 2 = write hit; 3 =
read miss; 4 = write miss
out << "\tReplacement: | " << left <<
setw(10) <<
m_perProcCacheTiming[ii][0]/double(m_perProcRequestCount[ii][0])
<< " | " << setw(12) <<
m_perProcRequestCount[ii][0] << " | " << setw(9)
<<
m_perProcCacheTiming[ii][0]/double(perProTotalRequests[ii])
<< endl;
out << "\tRead Miss: | " << left <<
setw(10) <<
m_perProcCacheTiming[ii][3]/double(m_perProcRequestCount[ii][3])
<< " | " << setw(12) <<
m_perProcRequestCount[ii][3] << " | " << setw(9)
<<
m_perProcCacheTiming[ii][3]/double(perProTotalRequests[ii]);
out << "Read Hit: | " << left <<
setw(10) <<
m_perProcCacheTiming[ii][1]/double(m_perProcRequestCount[ii][1])
<< " | " << setw(12) <<
m_perProcRequestCount[ii][1] << " | " << setw(9)
<<
m_perProcCacheTiming[ii][1]/double(perProTotalRequests[ii])
<< endl;
out << "\tWrite Miss: | " << left <<
setw(10) <<
m_perProcCacheTiming[ii][4]/double(m_perProcRequestCount[ii][4])
<< " | " << setw(12) <<
m_perProcRequestCount[ii][4] << " | " << setw(9)
<<
m_perProcCacheTiming[ii][4]/double(perProTotalRequests[ii]);
out << "Write Hit: | " << left <<
setw(10) <<
m_perProcCacheTiming[ii][2]/double(m_perProcRequestCount[ii][2])
<< " | " << setw(12) <<
m_perProcRequestCount[ii][2] << " | " << setw(9)
<<
m_perProcCacheTiming[ii][2]/double(perProTotalRequests[ii])
<< endl;
out << "\tAsym Remote: | " << left <<
setw(10) <<
m_perProcCacheTiming[ii][5]/double(m_perProcRequestCount[ii][5])
<< " | " << setw(12) <<
m_perProcRequestCount[ii][5] << " | " << setw(9)
<<
m_perProcCacheTiming[ii][5]/double(perProTotalRequests[ii])
<< endl;
}
out << endl;
out <<
"----------------------------------------------------" <<
endl;
... // other code
}
The profiler prints out the information from Ruby.stats, you
need to modify it to get your information out with the rest.
|
|