ARM PROGRAMMING Bùi Quốc Bảo When use assembly Functions that cannot be implemented in C, such as special register accesses and exclusive accesses Timing-critical routines Tight memory requirements, causing part of the program to be written in assembly to get the smallest memory size 1
Example of assembly program STACK_TOP EQU 0x20002000 ; constant for SP starting value AREA RESET, CODE THUMB DCD STACK_TOP ; Stack top DCD Start ; Reset vector AREA Vect, CODE ENTRY ; Indicate program execution start here Start ; Start of main program MOV r0, #10 ; Starting loop counter value MOV r1, #0 ; starting result ; Calculated 10+9+8+...+1 loop ADD r1, r0 ; R1=R1 + R0 SUBS r0, #1 ; Decrement R0, update? ag ( S suf? x) BNE loop ; If result not zero jump to loop, Result is now in R1 deadloop B deadloop ; Infinite loop END C Programming 2
C programming #include "inc/lm3s9b96.h" int main(void) volatile unsigned long ulloop; SYSCTL_RCGC2_R = SYSCTL_RCGC2_GPIOF; ulloop = SYSCTL_RCGC2_R; GPIO_PORTF_DIR_R = 0x08; GPIO_PORTF_DEN_R = 0x08; while(1) GPIO_PORTF_DATA_R = 0x08; for(ulloop = 0; ulloop < 200000; ulloop++); GPIO_PORTF_DATA_R &= ~(0x08); for(ulloop = 0; ulloop < 200000; ulloop++); Startup code AREA RESET, CODE, READONLY THUMB EXPORT Vectors Vectors DCD StackMem + Stack ; Top of Stack DCD Reset_Handler ; Reset Handler DCD NmiSR ; NMI Handler DCD FaultISR ; Hard Fault Handler DCD IntDefaultHandler ; The MPU fault handler DCD IntDefaultHandler ; The bus fault handler DCD IntDefaultHandler ; The usage fault handler DCD 0 ; Reserved DCD 0 ; Reserved DCD 0 ; Reserved DCD 0 ; Reserved DCD IntDefaultHandler ; SVCall handler DCD IntDefaultHandler ; Debug monitor handler 3
Startup code EXPORT Reset_Handler Reset_Handler IMPORT main B main Calling assembler from C First four arguments to a function are passed in registers r0 to r3 (any further parameters being passed on the stack) A single word result is returned in r0 4
Main.c #include <stdio.h> extern void strcopy(char *d, char *s); int main() char *srcstr = "First string - source "; char *dststr = "Second string - destination "; strcopy(dststr,srcstr); return (0); 5
Func.s AREA asm_func, CODE, READONLY THUMB EXPORT strcopy strcopy ; copy first string over second LDRB r2, [r1],#1 ; load byte and update address STRB r2, [r0],#1 ; store byte and update ;address; CMP r2, #0 ; check for zero terminator BNE strcopy ; keep going if not stop BX LR ALIGN END Local variable type int checksum_v1(int *data) char i; int sum = 0; for (i = 0; i < 64; i++) sum += data[i]; return sum; ARM registers are 32 bits Stack entries are at least 32 bits. To implement the i++ operation, the compiler have to check if i = 255 or not. So, declaring is as char is inefficient. i should be declared as integer 6
Function argument type short add_v1(short a, short b) return a+(b>>1); MOV r2,r0 ADD r0,r2,r1,asr #1 SXTH r0,r0 BX LR Function argument type int add_v2(int a, int b) return a+(b>>1); MOV r2,r0 ADD r0,r2,r1,asr #1 BX LR It is more efficient to use the int type for function arguments and return values, even if you are only passing an 8-bit value. 7
Loop structure int checksum_v5(int *data) unsigned int i; int sum=0; for (i=0; i<64; i++) sum += *(data++); return sum; An ADD to increment i A compare to check if i is less than 64 A conditional branch to continue the loop if i <64 PUSH r4,lr MOV r1,r0 MOV r3,#0x00 MOV r0,r3 MOV r2,r3 B check LOOP ADD r3,r1,#0x04 LDR r4,[r1,#0x00] MOV r1,r3 ADD r3,r4,r0 MOV r0,r3 ADD r3,r2,#0x01 MOV r2,r3 check CMP r2,#0x40 BCC LOOP POP r4,pc Loop structure int checksum_v5(int *data) unsigned int i; int sum=0; for (i=64; i!=0; i--) sum += *(data++); return sum; For loop structure, we should use decrementing loop 8
Arguments passing char *queue_bytes_v1 ( char *Q_start, char *Q_end, char *Q_ptr, char *data, unsigned int N) do *(Q_ptr++) = *(data++); if (Q_ptr == Q_end) Q_ptr = Q_start; while (--N); return Q_ptr; Arguments passing queue_bytes_v1 STR r14,[r13,#-4]! LDR r12,[r13,#4] queue_v1_loop LDRB r14,[r3],#1 STRB r14,[r2],#1 CMP r2,r1 MOVEQ r2,r0 SUBS r12,r12,#1 BNE queue_v1_loop MOV r0,r2 LDR pc,[r13],#4 ; save lr on the stack ; r12 = N ; r14 = *(data++) ; *(Q_ptr++) = r14 ; if (Q_ptr == Q_end) ; Q_ptr = Q_start; ; --N and set flags ; if (N!=0) goto loop ; r0 = Q_ptr ; return r0 9
Arguments passing typedef struct char *Q_start; /* Queue buffer start address */ char *Q_end; /* Queue buffer end address */ char *Q_ptr; /* Current queue pointer position */ Queue; void queue_bytes_v2(queue *queue, char *data, unsigned int N) char *Q_ptr = queue->q_ptr; char *Q_end = queue->q_end; Arguments passing do *(Q_ptr++) = *(data++); if (Q_ptr == Q_end) Q_ptr = queue->q_start; while (--N); queue->q_ptr = Q_ptr; To save the time of passing arguments to stack and pulling arguments from stack, one should try to reduce number of arguments to 4. 10