2008-03-14 Exam Solutions Datorarkitektur DA7009, DD221V, DD2377 page 1 Problem 1. number decimal binary ----------------------------------- Zero | 0 | 0 000000 n/a | -3 | 1 111101 n/a | 11 | 0 001011 n/a | -17 | 1 101111 n/a | 51 | 0 110011 n/a | -46 | 1 010010 TMax | 63 | 0 111111 TMin | -64 | 1 000000 TMin+TMin | 0 | 0 000000 TMin+1 | -63 | 1 000001 TMax+1 | -64(Tmin) | 1 000000 -TMax | -63 | 1 000001 -TMin | -64(Tmin) | 1 000000 Problem 3. char residents[FLOORS][ROOMS][LEN]; means that residents[floor][room] = residents + LEN*(ROOMS*floor + room) LEN=12; A) reserve_room: pushl %ebp movl %esp,%ebp movl 12(%ebp),%eax # room movl 16(%ebp),%edx # custname pushl %edx # arg 2 to strcpy movl 8(%ebp),%edx # floor sall $4,%edx # 16*floor subl 8(%ebp),%edx # 15*floor leal (%eax,%eax,2),%eax # 3*room leal residents(,%eax,4),%eax # residents + 12*room leal (%eax,%edx,4),%edx # residents + 12*room + 60*floor pushl %edx # arg 1 to strcpy call strcpy movl %ebp,%esp popl %ebp ret so residents[floor][room] = residents + 12*room + 60*floor = residents + 12*(room + 5*floor) which means that ROOMS=5 B) residents[0][1][-2] = residents + 12*(0*ROOMS + 1) - 2 = residents + 12*(0*ROOMS + 0) + 10 = residents[0][0][10] C) FLOORS=6 One way to think of this problem: There are five rooms per floor. The first scheme allocates 12*5 bytes per floor. The second scheme allocates 4 bytes for empty rooms (null ptr) and 4+12=16 bytes for reserved rooms. With 20% utilization, that means 4*4 + 1*16 = 32 bytes per floor. Thus, the second scheme saves 28 bytes per floor, or 168 bytes for 6 floors. Problem 5. Program Value Decimal Offset a -12 a[2] -4 x +8 buf -16 buf[3] -13 Saved value of register %ebx -20 Problem 6. A) Loop1 does not have any loop-carried dependency. It can therefore make better use of pipelining. B) It reduces the overhead for each iteration. C) Still limited by latency of integer multiply (4 cycles). 2008-03-14 Exam Solutions Datorarkitektur DA7009, DD221V, DD2377 page 2 Problem 2. int mat1[M][N] means that mat1[i][j] = mat1[i*N + j] = mat1 + 4*(i*N + j) int mat2[N][M] means that mat2[j][i] = mat2[j*M + i] = mat2 + 4*(j*M + i) copy_element: pushl %ebp movl %esp, %ebp movl 12(%ebp), %edx # j leal 0(,%edx,8), %eax # 8*j movl 8(%ebp), %ecx # i subl %edx, %eax # 7*j pushl %ebx leal (%ecx,%eax,2), %eax # i + 14*j leal (%ecx,%ecx,8), %ebx # 9*i movl mat2(,%eax,4), %eax # mat2 + 4*(i + 14*j) so M=14 addl %edx, %ebx # j + 9*i movl %eax, mat1(,%ebx,4) # mat1 + 4*(j + 9*i) so N=9 popl %ebx leave ret Answer: M=14 N=9 Problem 4. loop: pushl %ebp movl %esp,%ebp movl 0x8(%ebp),%edx # h movl %edx,%eax # h addl 0xc(%ebp),%eax # h + len leal 0xffffffff(%eax),%ecx # t = h + len - 1 cmpl %ecx,%edx # if h >= t jae .L4 # finshed .L6: movb (%edx),%al # *h xorb (%ecx),%al # *t ^ *h movb %al,(%edx) # *h = *t ^ *h xorb (%ecx),%al # *t ^ *h movb %al,(%ecx) # *t = *t ^ *h xorb %al,(%edx) # *h = *t ^ *h incl %edx # h++ decl %ecx # t-- cmpl %ecx,%edx # if h < t jb .L6 # go again .L4: movl %ebp,%esp popl %ebp ret Answer: void loop(char *h, int len) { /* loop reverses the string h */ char *t; for(t = h + len - 1; h < t; h++, t--){ *h = *h ^ *t; *t = *h ^ *t; *h = *h ^ *t; } } Problem 7. Part 1) CT: [12-5] CI: [4-2] CO: [1-0] Part 2 A) 0 1110 0011 0100 = 0111 0001 101 00 B) CO: 0x0 CI: 0x5 CT: 0x71 cache hit? Y cache byte? 0x0B