
#include "macros.inc"

#define dest_hi r25
#define dest_lo r24
#define src_hi r23
#define src_lo r22
#define len_hi r21
#define len_lo r20

; void *memcpy_P(void *dest, const void flash *src, size_t len)

	.text
	.global	_U(memcpy_P)
	.type	_U(memcpy_P), @function
_U(memcpy_P):
	LOAD_Z(src_lo, src_hi)
	LOAD_X(dest_lo, dest_hi)
#if OPTIMIZE_SPEED
; 17 words, (14 + len * 9 - (len & 1)) cycles
	sbrs	len_lo, 0
	rjmp	.memcpy_P_start
	rjmp	.memcpy_P_odd
.memcpy_P_loop:
	LPM_R0_ZP
	st	X+, r0
.memcpy_P_odd:
	LPM_R0_ZP
	st	X+, r0
.memcpy_P_start:
	subi	len_lo, lo8(2)
	sbci	len_hi, hi8(2)
#else
; 12 words, (13 + len * 11) cycles
	rjmp	.memcpy_P_start
.memcpy_P_loop:
	LPM_R0_ZP
	st	X+, r0
.memcpy_P_start:
	subi	len_lo, lo8(1)
	sbci	len_hi, hi8(1)
#endif
	brcc	.memcpy_P_loop
; return dest (unchanged)
	ret
.memcpy_P_end:
	.size	_U(memcpy_P), .memcpy_P_end - _U(memcpy_P)

