format ELF64 extrn printf extrn puts extrn scanf public main section '.text' executable main: push rbp mov rbp,rsp finit ; to determine how many clocks it takes to count clocks cpuid ; force all previous instructions to complete rdtsc ; read time stamp counter shl rdx,32 ; shift rdx left 32 places or rax,rdx ; timestamp in rax mov r11,rax ; r11 stores the first timestamp ; get the second timestamp cpuid ; force all previous instructions to complete rdtsc ; read time stamp counter shl rdx,32 ; shift rdx left 32 places or rax,rdx ; timestamp in rax sub rax,r11 ; get difference in timestamps mov r11,rax ; r11 stores clocks required to count clocks ; begin benchmarking cpuid rdtsc shl rdx,32 or rax,rdx mov r10,rax ; r10 stores first timestamp ; code to benchmark goes here nop nop ; end code to benchmark ; end benchmarking, take timestamp cpuid rdtsc shl rdx,32 or rax,rdx ; rax stores second timestamp sub rax,r10 ; get difference in timestamps sub rax,r11 ; subtract out time to count clocks ; print results mov rsi,rax ; rsi is first argument (the number of clocks) mov edi,msg ; rdi is second argument (location of msg) xor eax,eax ; eax = 0 (number of floating point arguments) call printf ; call printf and print results leave ret section '.data' writable align 16 msg db "%lu clock cycles",0xA,0