From 3acb107b9ae4e3d38d3fcfd29b455ebcfb444696 Mon Sep 17 00:00:00 2001
From: drath <drath@b42882b7-edfa-0310-969c-e2dbd0fdcd60>
Date: Thu, 31 Aug 2006 12:41:49 +0000
Subject: - endianess fixes everywhere but in the flash code. flashing might
 still be broken on big-endian targets and/or hosts - added access to ARM920T
 vector catch register (via generic register mechanism) - don't disable
 linefills on ARM920T cores - this lead to lockups when accessing lines
 already contained in cache - read content of ARM920T cache and tlb into file
 (arm920t read_flash/read_mmu commands) - memory reading improved on ARM7/9,
 can be further accelerated with new "arm7_9 fast_memory_access enable"
 command (renamed from fast_writes) - made in_handler independent from in
 field (makes the handler more flexible) - added timeout to ft2232 when using
 D2XX library - fixed STR7x protection bit handling on second bank (thanks to
 Bernard) - added support for using the OpenOCD on AT91RM9200 systems (thanks
 to Anders Larsen) - fixed AT91SAM7 flash handling when not running from 32kHz
 clock (thanks to Anders Larsen)

git-svn-id: svn://svn.berlios.de/openocd/trunk@90 b42882b7-edfa-0310-969c-e2dbd0fdcd60
---
 src/flash/at91sam7.c       |  13 +-
 src/flash/at91sam7.h       |   2 +-
 src/flash/str7x.c          |   4 +-
 src/jtag/Makefile.am       |   8 +-
 src/jtag/ft2232.c          |   6 +
 src/jtag/jtag.c            |  47 ++-
 src/jtag/jtag.h            |   2 +-
 src/target/arm720t.c       |   8 +-
 src/target/arm7_9_common.c | 131 +++++----
 src/target/arm7_9_common.h |   5 +-
 src/target/arm7tdmi.c      | 141 +++++++--
 src/target/arm920t.c       | 715 ++++++++++++++++++++++++++++++++++++++-------
 src/target/arm920t.h       |  14 +
 src/target/arm9tdmi.c      | 178 +++++++++--
 src/target/arm_jtag.c      |  86 +++++-
 src/target/arm_jtag.h      |  17 +-
 src/target/armv4_5.c       |   4 +-
 src/target/armv4_5.h       |  21 ++
 src/target/armv4_5_cache.h |   8 +
 src/target/armv4_5_mmu.c   |  29 +-
 src/target/armv4_5_mmu.h   |   8 +
 src/target/embeddedice.c   |   1 +
 src/target/embeddedice.h   |   6 +-
 src/target/target.c        |  70 ++++-
 src/target/target.h        |   7 +
 25 files changed, 1275 insertions(+), 256 deletions(-)

(limited to 'src')

diff --git a/src/flash/at91sam7.c b/src/flash/at91sam7.c
index 59ed2aa0..dd8bdeb5 100644
--- a/src/flash/at91sam7.c
+++ b/src/flash/at91sam7.c
@@ -133,9 +133,10 @@ u32 at91sam7_get_flash_status(flash_bank_t *bank)
 {
 	at91sam7_flash_bank_t *at91sam7_info = bank->driver_priv;
 	target_t *target = at91sam7_info->target;
-	long fsr;
+	u32 fsr;
 	
 	target->type->read_memory(target, MC_FSR, 4, 1, (u8 *)&fsr);
+	fsr = target_buffer_get_u32(target, (u8 *)&fsr);
 	
 	return fsr;
 }
@@ -206,7 +207,7 @@ void at91sam7_read_clock_info(flash_bank_t *bank)
 /* Setup the timimg registers for nvbits or normal flash */
 void at91sam7_set_flash_mode(flash_bank_t *bank,int mode)
 {
-	u32 fmcn, fmr;
+	u32 fmr, fmcn = 0, fws = 0;
 	at91sam7_flash_bank_t *at91sam7_info = bank->driver_priv;
 	target_t *target = at91sam7_info->target;
 	
@@ -220,12 +221,14 @@ void at91sam7_set_flash_mode(flash_bank_t *bank,int mode)
 			fmcn = (at91sam7_info->mck_freq/666666ul)+1;
 
 		/* Only allow fmcn=0 if clock period is > 30 us. */
-		if (at91sam7_info->mck_freq <= 33333)
+		if (at91sam7_info->mck_freq <= 33333333ul)
 			fmcn = 0;
+		else
+			fws = 1;
 
 		DEBUG("fmcn: %i", fmcn); 
-		fmr = fmcn<<16;
-		target->type->write_memory(target, MC_FSR, 4, 1, (u8 *)&fmr);
+		fmr = fmcn << 16 | fws << 8;
+		target->type->write_memory(target, MC_FMR, 4, 1, (u8 *)&fmr);
 	}
 	at91sam7_info->flashmode = mode;		
 }
diff --git a/src/flash/at91sam7.h b/src/flash/at91sam7.h
index c65600eb..e2412f8c 100644
--- a/src/flash/at91sam7.h
+++ b/src/flash/at91sam7.h
@@ -48,7 +48,7 @@ typedef struct at91sam7_flash_bank_s
 	u8 num_erase_regions;
 	u32 *erase_region_info;
 
-        /* nv memory bits */
+	/* nv memory bits */
 	u16 num_lockbits;
 	u16 lockbits;
 	u16 num_nvmbits;
diff --git a/src/flash/str7x.c b/src/flash/str7x.c
index b7081208..2e04f6b1 100644
--- a/src/flash/str7x.c
+++ b/src/flash/str7x.c
@@ -44,8 +44,8 @@ str7x_mem_layout_t mem_layout[] = {
 	{0x00010000, 0x10000, 0x01},
 	{0x00020000, 0x10000, 0x01},
 	{0x00030000, 0x10000, 0x01},
-	{0x000C0000, 0x02000, 0x10},
-	{0x000C2000, 0x02000, 0x10},
+	{0x000C0000, 0x02000, 0x100},
+	{0x000C2000, 0x02000, 0x100},
 	{0,0},
 };
 
diff --git a/src/jtag/Makefile.am b/src/jtag/Makefile.am
index ec46111e..c63c734a 100644
--- a/src/jtag/Makefile.am
+++ b/src/jtag/Makefile.am
@@ -49,6 +49,12 @@ else
 EP93XXFILES =
 endif
 
-libjtag_a_SOURCES = jtag.c $(BITBANGFILES) $(PARPORTFILES) $(FT2232FILES) $(AMTJTAGACCELFILES) $(EP93XXFILES)
+if AT91RM9200
+AT91RM9200FILES = at91rm9200.c
+else
+AT91RM9200FILES =
+endif
+
+libjtag_a_SOURCES = jtag.c $(BITBANGFILES) $(PARPORTFILES) $(FT2232FILES) $(AMTJTAGACCELFILES) $(EP93XXFILES) $(AT91RM9200FILES)
 
 noinst_HEADERS = bitbang.h jtag.h
diff --git a/src/jtag/ft2232.c b/src/jtag/ft2232.c
index 7d2fac08..c1973a3c 100644
--- a/src/jtag/ft2232.c
+++ b/src/jtag/ft2232.c
@@ -1009,6 +1009,12 @@ int ft2232_init(void)
 	{
 		DEBUG("current latency timer: %i", latency_timer);
 	}
+	
+	if ((status = FT_SetTimeouts(ftdih, 5000, 5000)) != FT_OK)
+	{
+		ERROR("unable to set timeouts: %i", status);
+		return ERROR_JTAG_INIT_FAILED;
+	}
 
 	if ((status = FT_SetBitMode(ftdih, 0x0b, 2)) != FT_OK)
 	{
diff --git a/src/jtag/jtag.c b/src/jtag/jtag.c
index 95818f07..b05f19d9 100644
--- a/src/jtag/jtag.c
+++ b/src/jtag/jtag.c
@@ -140,6 +140,10 @@ jtag_event_callback_t *jtag_event_callbacks;
 	extern jtag_interface_t ep93xx_interface;
 #endif
 
+#if BUILD_AT91RM9200 == 1
+	extern jtag_interface_t at91rm9200_interface;
+#endif
+
 jtag_interface_t *jtag_interfaces[] = {
 #if BUILD_PARPORT == 1
 	&parport_interface,
@@ -155,6 +159,9 @@ jtag_interface_t *jtag_interfaces[] = {
 #endif
 #if BUILD_EP93XX == 1
 	&ep93xx_interface,
+#endif
+#if BUILD_AT91RM9200 == 1
+	&at91rm9200_interface,
 #endif
 	NULL,
 };
@@ -974,20 +981,26 @@ int jtag_read_buffer(u8 *buffer, scan_command_t *cmd)
 
 	for (i=0; i < cmd->num_fields; i++)
 	{
-		/* if neither in_value nor in_check_value are specified we don't have to examine this field */
-		if (cmd->fields[i].in_value || cmd->fields[i].in_check_value)
+		/* if neither in_value, in_check_value nor in_handler
+		 * are specified we don't have to examine this field
+		 */
+		if (cmd->fields[i].in_value || cmd->fields[i].in_check_value || cmd->fields[i].in_handler)
 		{
 			int num_bits = cmd->fields[i].num_bits;
-
-			if (cmd->fields[i].in_value)
-			{
+			u8 *captured = buf_set_buf(buffer, bit_count, malloc(CEIL(num_bits, 8)), 0, num_bits);
+			#ifdef _DEBUG_JTAG_IO_
 				char *char_buf;
-				buf_set_buf(buffer, bit_count, cmd->fields[i].in_value, 0, num_bits);
-				char_buf = buf_to_char(cmd->fields[i].in_value, num_bits);
-#ifdef _DEBUG_JTAG_IO_
+
+				char_buf = buf_to_char(captured, num_bits);
 				DEBUG("fields[%i].in_value: %s", i, char_buf);
-#endif
 				free(char_buf);
+			#endif
+
+			
+			if (cmd->fields[i].in_value)
+			{
+				buf_cpy(captured, cmd->fields[i].in_value, num_bits);
+				
 				if (cmd->fields[i].in_handler)
 				{
 					if (cmd->fields[i].in_handler(cmd->fields[i].in_value, cmd->fields[i].in_handler_priv) != ERROR_OK)
@@ -998,6 +1011,18 @@ int jtag_read_buffer(u8 *buffer, scan_command_t *cmd)
 					}
 				}
 			}
+			
+			/* no in_value specified, but a handler takes care of the scanned data */
+			if (cmd->fields[i].in_handler && (!cmd->fields[i].in_value))
+			{
+				if (cmd->fields[i].in_handler(captured, cmd->fields[i].in_handler_priv) != ERROR_OK)
+				{
+					/* TODO: error reporting */
+					WARNING("in_handler reported a failed check");
+					retval = ERROR_JTAG_QUEUE_FAILED;
+				}
+				
+			}
 
 			if (cmd->fields[i].in_check_value)
 			{
@@ -1015,8 +1040,8 @@ int jtag_read_buffer(u8 *buffer, scan_command_t *cmd)
 					free(in_check_value_char);
 					free(in_check_mask_char);
 				}
-				free(captured);
 			}
+			free(captured);
 		}
 		bit_count += cmd->fields[i].num_bits;
 	}
@@ -1031,7 +1056,7 @@ enum scan_type jtag_scan_type(scan_command_t *cmd)
 	
 	for (i=0; i < cmd->num_fields; i++)
 	{
-		if (cmd->fields[i].in_check_value || cmd->fields[i].in_value)
+		if (cmd->fields[i].in_check_value || cmd->fields[i].in_value || cmd->fields[i].in_handler)
 			type |= SCAN_IN;
 		if (cmd->fields[i].out_value)
 			type |= SCAN_OUT;
diff --git a/src/jtag/jtag.h b/src/jtag/jtag.h
index 19f81ce6..7ad38278 100644
--- a/src/jtag/jtag.h
+++ b/src/jtag/jtag.h
@@ -25,7 +25,7 @@
 
 #include "command.h"
 
-#if 1
+#if 0
 #define _DEBUG_JTAG_IO_
 #endif
 
diff --git a/src/target/arm720t.c b/src/target/arm720t.c
index 677aa458..0a95e1c5 100644
--- a/src/target/arm720t.c
+++ b/src/target/arm720t.c
@@ -28,7 +28,7 @@
 #include <stdlib.h>
 #include <string.h>
 
-#if 1
+#if 0
 #define _DEBUG_INSTRUCTION_EXECUTION_
 #endif
 
@@ -92,8 +92,7 @@ int arm720t_scan_cp15(target_t *target, u32 out, u32 *in, int instruction, int c
 	u8 out_buf[4];
 	u8 instruction_buf = instruction;
 	
-	out = flip_u32(out, 32);
-	buf_set_u32(out_buf, 0, 32, out);
+	buf_set_u32(out_buf, 0, 32, flip_u32(out, 32));
 	
 	jtag_add_end_state(TAP_PD);
 	arm_jtag_scann(jtag_info, 0xf);
@@ -113,14 +112,13 @@ int arm720t_scan_cp15(target_t *target, u32 out, u32 *in, int instruction, int c
 	fields[1].num_bits = 32;
 	fields[1].out_value = out_buf;
 	fields[1].out_mask = NULL;
+	fields[1].in_value = NULL;
 	if (in)
 	{
-		fields[1].in_value = (u8*)in;
 		fields[1].in_handler = arm_jtag_buf_to_u32_flip;
 		fields[1].in_handler_priv = in;
 	} else
 	{
-		fields[1].in_value = NULL;
 		fields[1].in_handler = NULL;
 		fields[1].in_handler_priv = NULL;
 	}
diff --git a/src/target/arm7_9_common.c b/src/target/arm7_9_common.c
index 904d0ca0..40049e6b 100644
--- a/src/target/arm7_9_common.c
+++ b/src/target/arm7_9_common.c
@@ -52,7 +52,7 @@ int handle_arm7_9_write_core_reg_command(struct command_context_s *cmd_ctx, char
 int handle_arm7_9_sw_bkpts_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc);
 int handle_arm7_9_force_hw_bkpts_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc);
 int handle_arm7_9_dbgrq_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc);
-int handle_arm7_9_fast_writes_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc);
+int handle_arm7_9_fast_memory_access_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc);
 int handle_arm7_9_dcc_downloads_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc);
 
 int arm7_9_reinit_embeddedice(target_t *target)
@@ -184,13 +184,17 @@ int arm7_9_set_breakpoint(struct target_s *target, breakpoint_t *breakpoint)
 	{
 		if (breakpoint->length == 4)
 		{
+			/* keep the original instruction in target endianness */
 			target->type->read_memory(target, breakpoint->address, 4, 1, breakpoint->orig_instr);
-			target->type->write_memory(target, breakpoint->address, 4, 1, (u8*)(&arm7_9->arm_bkpt));
+			/* write the original instruction in target endianness (arm7_9->arm_bkpt is host endian) */
+			target_write_u32(target, breakpoint->address, arm7_9->arm_bkpt);
 		}
 		else
 		{
+			/* keep the original instruction in target endianness */
 			target->type->read_memory(target, breakpoint->address, 2, 1, breakpoint->orig_instr);
-			target->type->write_memory(target, breakpoint->address, 2, 1, (u8*)(&arm7_9->thumb_bkpt));
+			/* write the original instruction in target endianness (arm7_9->arm_bkpt is host endian) */
+			target_write_u32(target, breakpoint->address, arm7_9->thumb_bkpt);
 		}
 		breakpoint->set = 1;
 	}
@@ -234,6 +238,7 @@ int arm7_9_unset_breakpoint(struct target_s *target, breakpoint_t *breakpoint)
 	}
 	else
 	{
+		/* restore original instruction (kept in target endianness) */
 		if (breakpoint->length == 4)
 		{
 			target->type->write_memory(target, breakpoint->address, 4, 1, breakpoint->orig_instr);
@@ -534,7 +539,7 @@ int arm7_9_execute_sys_speed(struct target_s *target)
 	arm7_9_common_t *arm7_9 = armv4_5->arch_info;
 	arm_jtag_t *jtag_info = &arm7_9->jtag_info;
 	reg_t *dbg_stat = &arm7_9->eice_cache->reg_list[EICE_DBG_STAT];
-			
+				
 	/* set RESTART instruction */
 	jtag_add_end_state(TAP_RTI);
 	arm_jtag_set_instr(jtag_info, 0x4);
@@ -567,7 +572,7 @@ int arm7_9_execute_fast_sys_speed(struct target_s *target)
 	arm7_9_common_t *arm7_9 = armv4_5->arch_info;
 	arm_jtag_t *jtag_info = &arm7_9->jtag_info;
 	reg_t *dbg_stat = &arm7_9->eice_cache->reg_list[EICE_DBG_STAT];
-			
+				
 	/* set RESTART instruction */
 	jtag_add_end_state(TAP_RTI);
 	arm_jtag_set_instr(jtag_info, 0x4);
@@ -588,7 +593,6 @@ enum target_state arm7_9_poll(target_t *target)
 	armv4_5_common_t *armv4_5 = target->arch_info;
 	arm7_9_common_t *arm7_9 = armv4_5->arch_info;
 	reg_t *dbg_stat = &arm7_9->eice_cache->reg_list[EICE_DBG_STAT];
-	reg_t *dbg_ctrl = &arm7_9->eice_cache->reg_list[EICE_DBG_CTRL];
 
 	if (arm7_9->reinit_embeddedice)
 	{
@@ -967,6 +971,7 @@ int arm7_9_debug_entry(target_t *target)
 	
 	for (i=0; i<=15; i++)
 	{
+		DEBUG("r%i: 0x%8.8x", i, context[i]);
 		buf_set_u32(ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, i).value, 0, 32, context[i]);
 		ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, i).dirty = 0;
 		ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, i).valid = 1;
@@ -1619,14 +1624,13 @@ int arm7_9_read_memory(struct target_s *target, u32 address, u32 size, u32 count
 	arm7_9_common_t *arm7_9 = armv4_5->arch_info;
 	
 	u32 reg[16];
-	u32 *reg_p[16];
 	int num_accesses = 0;
 	int thisrun_accesses;
 	int i;
 	u32 cpsr;
 	int retval;
 	int last_reg = 0;
-
+	
 	DEBUG("address: 0x%8.8x, size: 0x%8.8x, count: 0x%8.8x", address, size, count);
 
 	if (target->state != TARGET_HALTED)
@@ -1642,11 +1646,6 @@ int arm7_9_read_memory(struct target_s *target, u32 address, u32 size, u32 count
 	if (((size == 4) && (address & 0x3u)) || ((size == 2) && (address & 0x1u)))
 		return ERROR_TARGET_UNALIGNED_ACCESS;
 	
-	for (i = 0; i < 16; i++)
-	{
-		reg_p[i] = &reg[i];
-	}
-	
 	/* load the base register with the address of the first word */
 	reg[0] = address;
 	arm7_9->write_core_regs(target, 0x1, reg);
@@ -1660,19 +1659,23 @@ int arm7_9_read_memory(struct target_s *target, u32 address, u32 size, u32 count
 				thisrun_accesses = ((count - num_accesses) >= 14) ? 14 : (count - num_accesses);
 				reg_list = (0xffff >> (15 - thisrun_accesses)) & 0xfffe;
 				
+				if (last_reg <= thisrun_accesses)
+					last_reg = thisrun_accesses;
+				
 				arm7_9->load_word_regs(target, reg_list);
-				arm7_9_execute_sys_speed(target);
 				
-				arm7_9->read_core_regs(target, reg_list, reg_p);
-				jtag_execute_queue();
+				/* fast memory reads are only safe when the target is running
+				 * from a sufficiently high clock (32 kHz is usually too slow)
+				 */
+				if (arm7_9->fast_memory_access)
+					arm7_9_execute_fast_sys_speed(target);
+				else
+					arm7_9_execute_sys_speed(target);
+									
+				arm7_9->read_core_regs_target_buffer(target, reg_list, buffer, 4);
 				
-				for (i = 1; i <= thisrun_accesses; i++)
-				{
-					if (i > last_reg)
-						last_reg = i;
-					target_buffer_set_u32(target, buffer, reg[i]);
-					buffer += 4;
-				}
+				/* advance buffer, count number of accesses */
+				buffer += thisrun_accesses * 4;
 				num_accesses += thisrun_accesses;
 			}	
 			break;
@@ -1688,17 +1691,19 @@ int arm7_9_read_memory(struct target_s *target, u32 address, u32 size, u32 count
 					if (i > last_reg)
 						last_reg = i;
 					arm7_9->load_hword_reg(target, i);
-					arm7_9_execute_sys_speed(target);
+					/* fast memory reads are only safe when the target is running
+					 * from a sufficiently high clock (32 kHz is usually too slow)
+					 */
+					if (arm7_9->fast_memory_access)
+						arm7_9_execute_fast_sys_speed(target);
+					else
+						arm7_9_execute_sys_speed(target);
 				}
 				
-				arm7_9->read_core_regs(target, reg_list, reg_p);
-				jtag_execute_queue();
+				arm7_9->read_core_regs_target_buffer(target, reg_list, buffer, 2);
 				
-				for (i = 1; i <= thisrun_accesses; i++)
-				{
-					target_buffer_set_u16(target, buffer, reg[i]);
-					buffer += 2;
-				}
+				/* advance buffer, count number of accesses */
+				buffer += thisrun_accesses * 2;
 				num_accesses += thisrun_accesses;
 			}	
 			break;
@@ -1714,16 +1719,19 @@ int arm7_9_read_memory(struct target_s *target, u32 address, u32 size, u32 count
 					if (i > last_reg)
 						last_reg = i;
 					arm7_9->load_byte_reg(target, i);
-					arm7_9_execute_sys_speed(target);
+					/* fast memory reads are only safe when the target is running
+					 * from a sufficiently high clock (32 kHz is usually too slow)
+					 */
+					if (arm7_9->fast_memory_access)
+						arm7_9_execute_fast_sys_speed(target);
+					else
+						arm7_9_execute_sys_speed(target);
 				}
 				
-				arm7_9->read_core_regs(target, reg_list, reg_p);
-				jtag_execute_queue();
+				arm7_9->read_core_regs_target_buffer(target, reg_list, buffer, 1);
 				
-				for (i = 1; i <= thisrun_accesses; i++)
-				{
-					*(buffer++) = reg[i] & 0xff;
-				}
+				/* advance buffer, count number of accesses */
+				buffer += thisrun_accesses * 1;
 				num_accesses += thisrun_accesses;
 			}	
 			break;
@@ -1759,6 +1767,7 @@ int arm7_9_write_memory(struct target_s *target, u32 address, u32 size, u32 coun
 {
 	armv4_5_common_t *armv4_5 = target->arch_info;
 	arm7_9_common_t *arm7_9 = armv4_5->arch_info;
+	reg_t *dbg_ctrl = &arm7_9->eice_cache->reg_list[EICE_DBG_CTRL];
 	
 	u32 reg[16];
 	int num_accesses = 0;
@@ -1787,6 +1796,10 @@ int arm7_9_write_memory(struct target_s *target, u32 address, u32 size, u32 coun
 	reg[0] = address;
 	arm7_9->write_core_regs(target, 0x1, reg);
 	
+	/* Clear DBGACK, to make sure memory fetches work as expected */
+	buf_set_u32(dbg_ctrl->value, EICE_DBG_CONTROL_DBGACK, 1, 0);
+	embeddedice_store_reg(dbg_ctrl);
+	
 	switch (size)
 	{
 		case 4:
@@ -1811,7 +1824,7 @@ int arm7_9_write_memory(struct target_s *target, u32 address, u32 size, u32 coun
 				/* fast memory writes are only safe when the target is running
 				 * from a sufficiently high clock (32 kHz is usually too slow)
 				 */
-				if (arm7_9->fast_memory_writes)
+				if (arm7_9->fast_memory_access)
 					arm7_9_execute_fast_sys_speed(target);
 				else
 					arm7_9_execute_sys_speed(target);
@@ -1843,7 +1856,7 @@ int arm7_9_write_memory(struct target_s *target, u32 address, u32 size, u32 coun
 					/* fast memory writes are only safe when the target is running
 					 * from a sufficiently high clock (32 kHz is usually too slow)
 					 */
-					if (arm7_9->fast_memory_writes)
+					if (arm7_9->fast_memory_access)
 						arm7_9_execute_fast_sys_speed(target);
 					else
 						arm7_9_execute_sys_speed(target);
@@ -1874,7 +1887,7 @@ int arm7_9_write_memory(struct target_s *target, u32 address, u32 size, u32 coun
 					/* fast memory writes are only safe when the target is running
 					 * from a sufficiently high clock (32 kHz is usually too slow)
 					 */
-					if (arm7_9->fast_memory_writes)
+					if (arm7_9->fast_memory_access)
 						arm7_9_execute_fast_sys_speed(target);
 					else
 						arm7_9_execute_sys_speed(target);
@@ -1889,11 +1902,9 @@ int arm7_9_write_memory(struct target_s *target, u32 address, u32 size, u32 coun
 			break;
 	}
 	
-	if ((retval = jtag_execute_queue()) != ERROR_OK)
-	{
-		ERROR("JTAG error while writing target memory");
-		exit(-1);
-	}
+	/* Re-Set DBGACK */
+	buf_set_u32(dbg_ctrl->value, EICE_DBG_CONTROL_DBGACK, 1, 1);
+	embeddedice_store_reg(dbg_ctrl);
 	
 	for (i=0; i<=last_reg; i++)
 		ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, i).dirty = 1;
@@ -1939,6 +1950,8 @@ int arm7_9_bulk_write_memory(target_t *target, u32 address, u32 count, u8 *buffe
 	/* regrab previously allocated working_area, or allocate a new one */
 	if (!arm7_9->dcc_working_area)
 	{
+		u8 dcc_code_buf[6 * 4];
+		
 		/* make sure we have a working area */
 		if (target_alloc_working_area(target, 24, &arm7_9->dcc_working_area) != ERROR_OK)
 		{
@@ -1946,8 +1959,14 @@ int arm7_9_bulk_write_memory(target_t *target, u32 address, u32 count, u8 *buffe
 			return target->type->write_memory(target, address, 4, count, buffer);
 		}
 		
+		/* copy target instructions to target endianness */
+		for (i = 0; i < 6; i++)
+		{
+			target_buffer_set_u32(target, dcc_code_buf + i*4, dcc_code[i]);
+		}
+		
 		/* write DCC code to working area */
-		target->type->write_memory(target, arm7_9->dcc_working_area->address, 4, 6, (u8*)dcc_code);
+		target->type->write_memory(target, arm7_9->dcc_working_area->address, 4, 6, dcc_code_buf);
 	}
 	
 	buf_set_u32(armv4_5->core_cache->reg_list[0].value, 0, 32, address);
@@ -1998,8 +2017,10 @@ int arm7_9_register_commands(struct command_context_s *cmd_ctx)
 	register_command(cmd_ctx, arm7_9_cmd, "force_hw_bkpts", handle_arm7_9_force_hw_bkpts_command, COMMAND_EXEC, "use hardware breakpoints for all breakpoints (disables sw breakpoint support) <enable|disable>");
 	register_command(cmd_ctx, arm7_9_cmd, "dbgrq", handle_arm7_9_dbgrq_command,
 		COMMAND_ANY, "use EmbeddedICE dbgrq instead of breakpoint for target halt requests <enable|disable>");
-	register_command(cmd_ctx, arm7_9_cmd, "fast_writes", handle_arm7_9_fast_writes_command,
-		 COMMAND_ANY, "use fast memory writes instead of slower but potentially unsafe slow writes <enable|disable>");
+	register_command(cmd_ctx, arm7_9_cmd, "fast_writes", handle_arm7_9_fast_memory_access_command,
+		 COMMAND_ANY, "(deprecated, see: arm7_9 fast_memory_access)");
+	register_command(cmd_ctx, arm7_9_cmd, "fast_memory_access", handle_arm7_9_fast_memory_access_command,
+		 COMMAND_ANY, "use fast memory accesses instead of slower but potentially unsafe slow accesses <enable|disable>");
 	register_command(cmd_ctx, arm7_9_cmd, "dcc_downloads", handle_arm7_9_dcc_downloads_command,
 		COMMAND_ANY, "use DCC downloads for larger memory writes <enable|disable>");
 
@@ -2243,7 +2264,7 @@ int handle_arm7_9_dbgrq_command(struct command_context_s *cmd_ctx, char *cmd, ch
 	return ERROR_OK;
 }
 
-int handle_arm7_9_fast_writes_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc)
+int handle_arm7_9_fast_memory_access_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc)
 {
 	target_t *target = get_current_target(cmd_ctx);
 	armv4_5_common_t *armv4_5;
@@ -2259,19 +2280,19 @@ int handle_arm7_9_fast_writes_command(struct command_context_s *cmd_ctx, char *c
 	{
 		if (strcmp("enable", args[0]) == 0)
 		{
-			arm7_9->fast_memory_writes = 1;
+			arm7_9->fast_memory_access = 1;
 		}
 		else if (strcmp("disable", args[0]) == 0)
 		{
-			arm7_9->fast_memory_writes = 0;
+			arm7_9->fast_memory_access = 0;
 		}
 		else
 		{
-			command_print(cmd_ctx, "usage: arm7_9 fast_writes <enable|disable>");
+			command_print(cmd_ctx, "usage: arm7_9 fast_memory_access <enable|disable>");
 		}
 	}
 		
-	command_print(cmd_ctx, "fast memory writes are %s", (arm7_9->fast_memory_writes) ? "enabled" : "disabled");
+	command_print(cmd_ctx, "fast memory access is %s", (arm7_9->fast_memory_access) ? "enabled" : "disabled");
 
 	return ERROR_OK;
 }
@@ -2327,7 +2348,7 @@ int arm7_9_init_arch_info(target_t *target, arm7_9_common_t *arm7_9)
 	
 	arm7_9->dcc_working_area = NULL;
 	
-	arm7_9->fast_memory_writes = 0;
+	arm7_9->fast_memory_access = 0;
 	arm7_9->dcc_downloads = 0;
 
 	jtag_register_event_callback(arm7_9_jtag_callback, target);
diff --git a/src/target/arm7_9_common.h b/src/target/arm7_9_common.h
index f03ae496..08468907 100644
--- a/src/target/arm7_9_common.h
+++ b/src/target/arm7_9_common.h
@@ -51,14 +51,15 @@ typedef struct arm7_9_common_s
 	
 	struct working_area_s *dcc_working_area;
 	
-	int fast_memory_writes;
+	int fast_memory_access;
 	int dcc_downloads;
 
 	int (*examine_debug_reason)(target_t *target);
 	
 	void (*change_to_arm)(target_t *target, u32 *r0, u32 *pc);
 	
-	void (*read_core_regs)(target_t *target, u32 mask, u32* core_regs[16]);
+	void (*read_core_regs)(target_t *target, u32 mask, u32 *core_regs[16]);
+	void (*read_core_regs_target_buffer)(target_t *target, u32 mask, void *buffer, int size);
 	void (*read_xpsr)(target_t *target, u32 *xpsr, int spsr);
 	
 	void (*write_xpsr)(target_t *target, u32 xpsr, int spsr);
diff --git a/src/target/arm7tdmi.c b/src/target/arm7tdmi.c
index 5f16f71f..0a6caeac 100644
--- a/src/target/arm7tdmi.c
+++ b/src/target/arm7tdmi.c
@@ -51,7 +51,6 @@ int arm7tdmi_quit();
 /* target function declarations */
 enum target_state arm7tdmi_poll(struct target_s *target);
 int arm7tdmi_halt(target_t *target);
-int arm7tdmi_read_memory(struct target_s *target, u32 address, u32 size, u32 count, u8 *buffer);
 		
 target_type_t arm7tdmi_target =
 {
@@ -152,8 +151,7 @@ int arm7tdmi_clock_out(arm_jtag_t *jtag_info, u32 out, u32 *in, int breakpoint)
 	u8 out_buf[4];
 	u8 breakpoint_buf;
 	
-	out = flip_u32(out, 32);
-	buf_set_u32(out_buf, 0, 32, out);
+	buf_set_u32(out_buf, 0, 32, flip_u32(out, 32));
 	buf_set_u32(&breakpoint_buf, 0, 1, breakpoint);
 
 	jtag_add_end_state(TAP_PD);
@@ -174,18 +172,17 @@ int arm7tdmi_clock_out(arm_jtag_t *jtag_info, u32 out, u32 *in, int breakpoint)
 	fields[1].num_bits = 32;
 	fields[1].out_value = out_buf;
 	fields[1].out_mask = NULL;
+	fields[1].in_value = NULL;
 	if (in)
 	{
-		fields[1].in_value = (u8*)in;
 		fields[1].in_handler = arm_jtag_buf_to_u32_flip;
 		fields[1].in_handler_priv = in;
-	} else
+	}
+	else
 	{
-		fields[1].in_value = NULL;
 		fields[1].in_handler = NULL;
 		fields[1].in_handler_priv = NULL;
 	}
-
 	fields[1].in_check_value = NULL;
 	fields[1].in_check_mask = NULL;
 
@@ -195,24 +192,21 @@ int arm7tdmi_clock_out(arm_jtag_t *jtag_info, u32 out, u32 *in, int breakpoint)
 	
 #ifdef _DEBUG_INSTRUCTION_EXECUTION_
 {
-		char* in_string;
 		jtag_execute_queue();
 		
 		if (in)
 		{
-			in_string = buf_to_char((u8*)in, 32);
-			DEBUG("out: 0x%8.8x, in: %s", flip_u32(out, 32), in_string);
-			free(in_string);
+			DEBUG("out: 0x%8.8x, in: 0x%8.8x", out, *in);
 		}
 		else
-			DEBUG("out: 0x%8.8x", flip_u32(out, 32));
+			DEBUG("out: 0x%8.8x", out);
 }
 #endif
 
 	return ERROR_OK;
 }
 
-/* put an instruction in the ARM7TDMI pipeline, and optionally read data */
+/* clock the target, reading the databus */
 int arm7tdmi_clock_data_in(arm_jtag_t *jtag_info, u32 *in)
 {
 	scan_field_t fields[2];
@@ -235,7 +229,7 @@ int arm7tdmi_clock_data_in(arm_jtag_t *jtag_info, u32 *in)
 	fields[1].num_bits = 32;
 	fields[1].out_value = NULL;
 	fields[1].out_mask = NULL;
-	fields[1].in_value = (u8*)in;
+	fields[1].in_value = NULL;
 	fields[1].in_handler = arm_jtag_buf_to_u32_flip;
 	fields[1].in_handler_priv = in;
 	fields[1].in_check_value = NULL;
@@ -247,14 +241,80 @@ int arm7tdmi_clock_data_in(arm_jtag_t *jtag_info, u32 *in)
 	
 #ifdef _DEBUG_INSTRUCTION_EXECUTION_
 {
-		char* in_string;
 		jtag_execute_queue();
 			
 		if (in)
 		{
-			in_string = buf_to_char((u8*)in, 32);
-			DEBUG("in: %s", in_string);
-			free(in_string);
+			DEBUG("in: 0x%8.8x", *in);
+		}
+		else
+		{
+			ERROR("BUG: called with in == NULL");
+		}
+}
+#endif
+
+	return ERROR_OK;
+}
+
+/* clock the target, and read the databus
+ * the *in pointer points to a buffer where elements of 'size' bytes
+ * are stored in big (be==1) or little (be==0) endianness
+ */ 
+int arm7tdmi_clock_data_in_endianness(arm_jtag_t *jtag_info, void *in, int size, int be)
+{
+	scan_field_t fields[2];
+
+	jtag_add_end_state(TAP_PD);
+	arm_jtag_scann(jtag_info, 0x1);
+	arm_jtag_set_instr(jtag_info, jtag_info->intest_instr);
+	
+	fields[0].device = jtag_info->chain_pos;
+	fields[0].num_bits = 1;
+	fields[0].out_value = NULL;
+	fields[0].out_mask = NULL;
+	fields[0].in_value = NULL;
+	fields[0].in_check_value = NULL;
+	fields[0].in_check_mask = NULL;
+	fields[0].in_handler = NULL;
+	fields[0].in_handler_priv = NULL;
+		
+	fields[1].device = jtag_info->chain_pos;
+	fields[1].num_bits = 32;
+	fields[1].out_value = NULL;
+	fields[1].out_mask = NULL;
+	fields[1].in_value = NULL;
+	switch (size)
+	{
+		case 4:
+			fields[1].in_handler = (be) ? arm_jtag_buf_to_be32_flip : arm_jtag_buf_to_le32_flip;
+			break;
+		case 2:
+			fields[1].in_handler = (be) ? arm_jtag_buf_to_be16_flip : arm_jtag_buf_to_le16_flip;
+			break;
+		case 1:
+			fields[1].in_handler = arm_jtag_buf_to_8_flip;
+			break;
+	}
+	fields[1].in_handler_priv = in;
+	fields[1].in_check_value = NULL;
+	fields[1].in_check_mask = NULL;
+
+	jtag_add_dr_scan(2, fields, -1);
+
+	jtag_add_runtest(0, -1);
+	
+#ifdef _DEBUG_INSTRUCTION_EXECUTION_
+{
+		jtag_execute_queue();
+			
+		if (in)
+		{
+			DEBUG("in: 0x%8.8x", *in);
+		}
+		else
+		{
+			ERROR("BUG: called with in == NULL");
 		}
 }
 #endif
@@ -334,6 +394,50 @@ void arm7tdmi_read_core_regs(target_t *target, u32 mask, u32* core_regs[16])
 
 }
 
+void arm7tdmi_read_core_regs_target_buffer(target_t *target, u32 mask, void* buffer, int size)
+{
+	int i;
+	/* get pointers to arch-specific information */
+	armv4_5_common_t *armv4_5 = target->arch_info;
+	arm7_9_common_t *arm7_9 = armv4_5->arch_info;
+	arm_jtag_t *jtag_info = &arm7_9->jtag_info;
+	int be = (target->endianness == TARGET_BIG_ENDIAN) ? 1 : 0;
+	u32 *buf_u32 = buffer;
+	u16 *buf_u16 = buffer;
+	u8 *buf_u8 = buffer;
+		
+	/* STMIA r0-15, [r0] at debug speed
+	 * register values will start to appear on 4th DCLK
+	 */
+	arm7tdmi_clock_out(jtag_info, ARMV4_5_STMIA(0, mask & 0xffff, 0, 0), NULL, 0);
+
+	/* fetch NOP, STM in DECODE stage */
+	arm7tdmi_clock_out(jtag_info, ARMV4_5_NOP, NULL, 0);
+	/* fetch NOP, STM in EXECUTE stage (1st cycle) */
+	arm7tdmi_clock_out(jtag_info, ARMV4_5_NOP, NULL, 0);
+
+	for (i = 0; i <= 15; i++)
+	{
+		/* nothing fetched, STM still in EXECUTE (1+i cycle), read databus */
+		if (mask & (1 << i))
+		{
+			switch (size)
+			{
+				case 4:
+					arm7tdmi_clock_data_in_endianness(jtag_info, buf_u32++, 4, be);
+					break;
+				case 2:
+					arm7tdmi_clock_data_in_endianness(jtag_info, buf_u16++, 2, be);
+					break;
+				case 1:
+					arm7tdmi_clock_data_in_endianness(jtag_info, buf_u8++, 1, be);
+					break;
+			}
+		}
+	}
+	
+}
+
 void arm7tdmi_read_xpsr(target_t *target, u32 *xpsr, int spsr)
 {
 	/* get pointers to arch-specific information */
@@ -684,6 +788,7 @@ int arm7tdmi_init_arch_info(target_t *target, arm7tdmi_common_t *arm7tdmi, int c
 	arm7_9->examine_debug_reason = arm7tdmi_examine_debug_reason;
 	arm7_9->change_to_arm = arm7tdmi_change_to_arm;
 	arm7_9->read_core_regs = arm7tdmi_read_core_regs;
+	arm7_9->read_core_regs_target_buffer = arm7tdmi_read_core_regs_target_buffer;
 	arm7_9->read_xpsr = arm7tdmi_read_xpsr;
 	
 	arm7_9->write_xpsr = arm7tdmi_write_xpsr;
diff --git a/src/target/arm920t.c b/src/target/arm920t.c
index 54feaff2..91dbb29b 100644
--- a/src/target/arm920t.c
+++ b/src/target/arm920t.c
@@ -42,6 +42,9 @@ int arm920t_handle_cache_info_command(struct command_context_s *cmd_ctx, char *c
 int arm920t_handle_md_phys_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc);
 int arm920t_handle_mw_phys_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc);
 
+int arm920t_handle_read_cache_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc);
+int arm920t_handle_read_mmu_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc);
+
 /* forward declarations */
 int arm920t_target_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc, struct target_s *target);
 int arm920t_init_target(struct command_context_s *cmd_ctx, struct target_s *target);
@@ -51,6 +54,8 @@ int arm920t_read_memory(struct target_s *target, u32 address, u32 size, u32 coun
 int arm920t_write_memory(struct target_s *target, u32 address, u32 size, u32 count, u8 *buffer);
 int arm920t_soft_reset_halt(struct target_s *target);
 
+#define ARM920T_CP15_PHYS_ADDR(x, y, z) ((x << 5) | (y << 1) << (z))
+
 target_type_t arm920t_target =
 {
 	.name = "arm920t",
@@ -141,10 +146,16 @@ int arm920t_read_cp15_physical(target_t *target, int reg_addr, u32 *value)
 	
 	jtag_add_dr_scan(4, fields, -1);
 
-	fields[1].in_value = (u8*)value;
+	fields[1].in_handler_priv = value;
+	fields[1].in_handler = arm_jtag_buf_to_u32;
 
 	jtag_add_dr_scan(4, fields, -1);
 
+#ifdef _DEBUG_INSTRUCTION_EXECUTION_
+	jtag_execute_queue();
+	DEBUG("addr: 0x%x value: %8.8x", reg_addr, *value);
+#endif
+
 	return ERROR_OK;
 }
 
@@ -157,6 +168,9 @@ int arm920t_write_cp15_physical(target_t *target, int reg_addr, u32 value)
 	u8 access_type_buf = 1;
 	u8 reg_addr_buf = reg_addr & 0x3f;
 	u8 nr_w_buf = 1;
+	u8 value_buf[4];
+	
+	buf_set_u32(value_buf, 0, 32, value);
 	
 	jtag_add_end_state(TAP_RTI);
 	arm_jtag_scann(jtag_info, 0xf);
@@ -174,7 +188,7 @@ int arm920t_write_cp15_physical(target_t *target, int reg_addr, u32 value)
 
 	fields[1].device = jtag_info->chain_pos;
 	fields[1].num_bits = 32;
-	fields[1].out_value = (u8*)&value;
+	fields[1].out_value = value_buf;
 	fields[1].out_mask = NULL;
 	fields[1].in_value = NULL;
 	fields[1].in_check_value = NULL;
@@ -204,31 +218,29 @@ int arm920t_write_cp15_physical(target_t *target, int reg_addr, u32 value)
 	
 	jtag_add_dr_scan(4, fields, -1);
 
+#ifdef _DEBUG_INSTRUCTION_EXECUTION_
+	DEBUG("addr: 0x%x value: %8.8x", reg_addr, value);
+#endif
+
 	return ERROR_OK;
 }
 
-int arm920t_read_cp15_interpreted(target_t *target, u32 opcode, u32 *value)
+int arm920t_execute_cp15(target_t *target, u32 cp15_opcode, u32 arm_opcode)
 {
-	u32 cp15c15 = 0x0;
+	armv4_5_common_t *armv4_5 = target->arch_info;
+	arm7_9_common_t *arm7_9 = armv4_5->arch_info;
+	arm_jtag_t *jtag_info = &arm7_9->jtag_info;
 	scan_field_t fields[4];
 	u8 access_type_buf = 0;		/* interpreted access */
 	u8 reg_addr_buf = 0x0;
 	u8 nr_w_buf = 0;
-	armv4_5_common_t *armv4_5 = target->arch_info;
-	arm7_9_common_t *arm7_9 = armv4_5->arch_info;
-	arm_jtag_t *jtag_info = &arm7_9->jtag_info;
-	u32* context_p[1];	
+	u8 cp15_opcode_buf[4];
 	
-	/* read-modify-write CP15 test state register 
-	* to enable interpreted access mode */
-	arm920t_read_cp15_physical(target, 0x1e, &cp15c15);	
-	jtag_execute_queue();
-	cp15c15 |= 1;	/* set interpret mode */
-	arm920t_write_cp15_physical(target, 0x1e, cp15c15);
-
 	jtag_add_end_state(TAP_RTI);
 	arm_jtag_scann(jtag_info, 0xf);
 	arm_jtag_set_instr(jtag_info, jtag_info->intest_instr);
+	
+	buf_set_u32(cp15_opcode_buf, 0, 32, cp15_opcode);
 
 	fields[0].device = jtag_info->chain_pos;
 	fields[0].num_bits = 1;
@@ -242,7 +254,7 @@ int arm920t_read_cp15_interpreted(target_t *target, u32 opcode, u32 *value)
 
 	fields[1].device = jtag_info->chain_pos;
 	fields[1].num_bits = 32;
-	fields[1].out_value = (u8*)&opcode;
+	fields[1].out_value = cp15_opcode_buf;
 	fields[1].out_mask = NULL;
 	fields[1].in_value = NULL;
 	fields[1].in_check_value = NULL;
@@ -272,45 +284,68 @@ int arm920t_read_cp15_interpreted(target_t *target, u32 opcode, u32 *value)
 
 	jtag_add_dr_scan(4, fields, -1);
 
-	arm9tdmi_clock_out(jtag_info, ARMV4_5_LDR(0, 15), 0, NULL, 0);
+	arm9tdmi_clock_out(jtag_info, arm_opcode, 0, NULL, 0);
 	arm9tdmi_clock_out(jtag_info, ARMV4_5_NOP, 0, NULL, 1);
 	arm7_9_execute_sys_speed(target);
-	jtag_execute_queue();
+	
+	if (jtag_execute_queue() != ERROR_OK)
+	{
+		ERROR("failed executing JTAG queue, exiting");
+		exit(-1);
+	}
+	
+	return ERROR_OK;
+}
 
+int arm920t_read_cp15_interpreted(target_t *target, u32 cp15_opcode, u32 address, u32 *value)
+{
+	armv4_5_common_t *armv4_5 = target->arch_info;
+	u32* regs_p[1];
+	u32 regs[2];
+	u32 cp15c15 = 0x0;
+
+	/* load address into R1 */
+	regs[1] = address;
+	arm9tdmi_write_core_regs(target, 0x2, regs); 
+	
 	/* read-modify-write CP15 test state register 
-	* to disable interpreted access mode */
+	* to enable interpreted access mode */
 	arm920t_read_cp15_physical(target, 0x1e, &cp15c15);	
 	jtag_execute_queue();
+	cp15c15 |= 1;	/* set interpret mode */
+	arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+
+	/* execute CP15 instruction and ARM load (reading from coprocessor) */
+	arm920t_execute_cp15(target, cp15_opcode, ARMV4_5_LDR(0, 1));
+	
+	/* disable interpreted access mode */
 	cp15c15 &= ~1U;	/* clear interpret mode */
 	arm920t_write_cp15_physical(target, 0x1e, cp15c15);
 
-	context_p[0] = value;
-	arm9tdmi_read_core_regs(target, 0x1, context_p);
+	/* retrieve value from R0 */
+	regs_p[0] = value;
+	arm9tdmi_read_core_regs(target, 0x1, regs_p);
 	jtag_execute_queue();
 	
-	DEBUG("opcode: %8.8x, value: %8.8x", opcode, *value);
+#ifdef _DEBUG_INSTRUCTION_EXECUTION_
+	DEBUG("cp15_opcode: %8.8x, address: %8.8x, value: %8.8x", cp15_opcode, address, *value);
+#endif
 
 	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 0).dirty = 1;
-	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 15).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 1).dirty = 1;
 
 	return ERROR_OK;
 }
 
-int arm920t_write_cp15_interpreted(target_t *target, u32 opcode, u32 value, u32 address)
+int arm920t_write_cp15_interpreted(target_t *target, u32 cp15_opcode, u32 value, u32 address)
 {
 	u32 cp15c15 = 0x0;
-	scan_field_t fields[4];
-	u8 access_type_buf = 0;		/* interpreted access */
-	u8 reg_addr_buf = 0x0;
-	u8 nr_w_buf = 0;
 	armv4_5_common_t *armv4_5 = target->arch_info;
-	arm7_9_common_t *arm7_9 = armv4_5->arch_info;
-	arm_jtag_t *jtag_info = &arm7_9->jtag_info;
 	u32 regs[2];
 
+	/* load value, address into R0, R1 */
 	regs[0] = value;
 	regs[1] = address;
-	
 	arm9tdmi_write_core_regs(target, 0x3, regs);
 
 	/* read-modify-write CP15 test state register 
@@ -320,65 +355,16 @@ int arm920t_write_cp15_interpreted(target_t *target, u32 opcode, u32 value, u32
 	cp15c15 |= 1;	/* set interpret mode */
 	arm920t_write_cp15_physical(target, 0x1e, cp15c15);
 
-	jtag_add_end_state(TAP_RTI);
-	arm_jtag_scann(jtag_info, 0xf);
-	arm_jtag_set_instr(jtag_info, jtag_info->intest_instr);
-
-	fields[0].device = jtag_info->chain_pos;
-	fields[0].num_bits = 1;
-	fields[0].out_value = &access_type_buf;
-	fields[0].out_mask = NULL;
-	fields[0].in_value = NULL;
-	fields[0].in_check_value = NULL;
-	fields[0].in_check_mask = NULL;
-	fields[0].in_handler = NULL;
-	fields[0].in_handler_priv = NULL;
-
-	fields[1].device = jtag_info->chain_pos;
-	fields[1].num_bits = 32;
-	fields[1].out_value = (u8*)&opcode;
-	fields[1].out_mask = NULL;
-	fields[1].in_value = NULL;
-	fields[1].in_check_value = NULL;
-	fields[1].in_check_mask = NULL;
-	fields[1].in_handler = NULL;
-	fields[1].in_handler_priv = NULL;
-
-	fields[2].device = jtag_info->chain_pos;
-	fields[2].num_bits = 6;
-	fields[2].out_value = &reg_addr_buf;
-	fields[2].out_mask = NULL;
-	fields[2].in_value = NULL;
-	fields[2].in_check_value = NULL;
-	fields[2].in_check_mask = NULL;
-	fields[2].in_handler = NULL;
-	fields[2].in_handler_priv = NULL;
-
-	fields[3].device = jtag_info->chain_pos;
-	fields[3].num_bits = 1;
-	fields[3].out_value = &nr_w_buf;
-	fields[3].out_mask = NULL;
-	fields[3].in_value = NULL;
-	fields[3].in_check_value = NULL;
-	fields[3].in_check_mask = NULL;
-	fields[3].in_handler = NULL;
-	fields[3].in_handler_priv = NULL;
-
-	jtag_add_dr_scan(4, fields, -1);
-
-	arm9tdmi_clock_out(jtag_info, ARMV4_5_STR(0, 1), 0, NULL, 0);
-	arm9tdmi_clock_out(jtag_info, ARMV4_5_NOP, 0, NULL, 1);
-	arm7_9_execute_sys_speed(target);
-	jtag_execute_queue();
+	/* execute CP15 instruction and ARM store (writing to coprocessor) */
+	arm920t_execute_cp15(target, cp15_opcode, ARMV4_5_STR(0, 1));
 
-	/* read-modify-write CP15 test state register 
-	* to disable interpreted access mode */
-	arm920t_read_cp15_physical(target, 0x1e, &cp15c15);	
-	jtag_execute_queue();
+	/* disable interpreted access mode */
 	cp15c15 &= ~1U;	/* set interpret mode */
 	arm920t_write_cp15_physical(target, 0x1e, cp15c15);
 
-	DEBUG("opcode: %8.8x, value: %8.8x, address: %8.8x", opcode, value, address);
+#ifdef _DEBUG_INSTRUCTION_EXECUTION_
+	DEBUG("cp15_opcode: %8.8x, value: %8.8x, address: %8.8x", cp15_opcode, value, address);
+#endif
 
 	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 0).dirty = 1;
 	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 1).dirty = 1;
@@ -391,7 +377,7 @@ u32 arm920t_get_ttb(target_t *target)
 	int retval;
 	u32 ttb = 0x0;
 
-	if ((retval = arm920t_read_cp15_interpreted(target, 0xeebf0f51, &ttb)) != ERROR_OK)
+	if ((retval = arm920t_read_cp15_interpreted(target, 0xeebf0f51, 0x0, &ttb)) != ERROR_OK)
 		return retval;
 
 	return ttb;
@@ -464,18 +450,20 @@ void arm920t_post_debug_entry(target_t *target)
 	arm920t->armv4_5_mmu.armv4_5_cache.i_cache_enabled = (arm920t->cp15_control_reg & 0x1000U) ? 1 : 0;
 
 	/* save i/d fault status and address register */
-	arm920t_read_cp15_interpreted(target, 0xee150f10, &arm920t->d_fsr);
-	arm920t_read_cp15_interpreted(target, 0xee150f30, &arm920t->i_fsr);
-	arm920t_read_cp15_interpreted(target, 0xee160f10, &arm920t->d_far);
-	arm920t_read_cp15_interpreted(target, 0xee160f30, &arm920t->i_far);
-
-	/* read-modify-write CP15 test state register 
-	* to disable I/D-cache linefills */
-	arm920t_read_cp15_physical(target, 0x1e, &cp15c15);	
-	jtag_execute_queue();
-	cp15c15 |= 0x600;
-	arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+	arm920t_read_cp15_interpreted(target, 0xee150f10, 0x0, &arm920t->d_fsr);
+	arm920t_read_cp15_interpreted(target, 0xee150f30, 0x0, &arm920t->i_fsr);
+	arm920t_read_cp15_interpreted(target, 0xee160f10, 0x0, &arm920t->d_far);
+	arm920t_read_cp15_interpreted(target, 0xee160f30, 0x0, &arm920t->i_far);
 
+	if (arm920t->preserve_cache)
+	{
+		/* read-modify-write CP15 test state register 
+		 * to disable I/D-cache linefills */
+		arm920t_read_cp15_physical(target, 0x1e, &cp15c15);
+		jtag_execute_queue();
+		cp15c15 |= 0x600;
+		arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+	}
 }
 
 void arm920t_pre_restore_context(target_t *target)
@@ -494,11 +482,13 @@ void arm920t_pre_restore_context(target_t *target)
 	
 	/* read-modify-write CP15 test state register 
 	* to reenable I/D-cache linefills */
-	arm920t_read_cp15_physical(target, 0x1e, &cp15c15);
-	jtag_execute_queue();
-	cp15c15 &= ~0x600U;
-	arm920t_write_cp15_physical(target, 0x1e, cp15c15);
-
+	if (arm920t->preserve_cache)
+	{
+		arm920t_read_cp15_physical(target, 0x1e, &cp15c15);
+		jtag_execute_queue();
+		cp15c15 &= ~0x600U;
+		arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+	}
 }
 
 int arm920t_get_arch_pointers(target_t *target, armv4_5_common_t **armv4_5_p, arm7_9_common_t **arm7_9_p, arm9tdmi_common_t **arm9tdmi_p, arm920t_common_t **arm920t_p)
@@ -682,6 +672,8 @@ int arm920t_init_arch_info(target_t *target, arm920t_common_t *arm920t, int chai
 	arm9tdmi_common_t *arm9tdmi = &arm920t->arm9tdmi_common;
 	arm7_9_common_t *arm7_9 = &arm9tdmi->arm7_9_common;
 	
+	/* initialize arm9tdmi specific info (including arm7_9 and armv4_5)
+	 */
 	arm9tdmi_init_arch_info(target, arm9tdmi, chain_pos, variant);
 
 	arm9tdmi->arch_info = arm920t;
@@ -699,6 +691,13 @@ int arm920t_init_arch_info(target_t *target, arm920t_common_t *arm920t, int chai
 	arm920t->armv4_5_mmu.has_tiny_pages = 1;
 	arm920t->armv4_5_mmu.mmu_enabled = 0;
 	
+	/* disabling linefills leads to lockups, so keep them enabled for now
+	 * this doesn't affect correctness, but might affect timing issues, if
+	 * important data is evicted from the cache during the debug session
+	 * */
+	arm920t->preserve_cache = 0;
+	
+	/* override hw single-step capability from ARM9TDMI */
 	arm9tdmi->has_single_step = 1;
 	
 	return ERROR_OK;
@@ -751,9 +750,513 @@ int arm920t_register_commands(struct command_context_s *cmd_ctx)
 	register_command(cmd_ctx, arm920t_cmd, "mwh_phys", arm920t_handle_mw_phys_command, COMMAND_EXEC, "write memory half-word <physical addr> <value>");
 	register_command(cmd_ctx, arm920t_cmd, "mwb_phys", arm920t_handle_mw_phys_command, COMMAND_EXEC, "write memory byte <physical addr> <value>");
 
+	register_command(cmd_ctx, arm920t_cmd, "read_cache", arm920t_handle_read_cache_command, COMMAND_EXEC, "display I/D cache content");
+	register_command(cmd_ctx, arm920t_cmd, "read_mmu", arm920t_handle_read_mmu_command, COMMAND_EXEC, "display I/D mmu content");
+
 	return ERROR_OK;
 }
 
+int arm920t_handle_read_cache_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc)
+{
+	target_t *target = get_current_target(cmd_ctx);
+	armv4_5_common_t *armv4_5;
+	arm7_9_common_t *arm7_9;
+	arm9tdmi_common_t *arm9tdmi;
+	arm920t_common_t *arm920t;
+	arm_jtag_t *jtag_info;
+	u32 cp15c15;
+	u32 cp15_ctrl, cp15_ctrl_saved;
+	u32 regs[16];
+	u32 *regs_p[16];
+	u32 C15_C_D_Ind, C15_C_I_Ind;
+	int i;
+	FILE *output;
+	arm920t_cache_line_t d_cache[8][64], i_cache[8][64];
+	int segment, index;
+	
+	if (argc != 1)
+	{
+		command_print(cmd_ctx, "usage: arm920t read_cache <filename>");
+		return ERROR_OK;
+	}
+	
+	if ((output = fopen(args[0], "w")) == NULL)
+	{
+		DEBUG("error opening cache content file");
+		return ERROR_OK;
+	}
+	
+	for (i = 0; i < 16; i++)
+		regs_p[i] = &regs[i];
+		
+	if (arm920t_get_arch_pointers(target, &armv4_5, &arm7_9, &arm9tdmi, &arm920t) != ERROR_OK)
+	{
+		command_print(cmd_ctx, "current target isn't an ARM920t target");
+		return ERROR_OK;
+	}
+	
+	jtag_info = &arm7_9->jtag_info;
+	
+	/* disable MMU and Caches */
+	arm920t_read_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0x1, 0), &cp15_ctrl);
+	jtag_execute_queue();
+	cp15_ctrl_saved = cp15_ctrl;
+	cp15_ctrl &= ~(ARMV4_5_MMU_ENABLED | ARMV4_5_D_U_CACHE_ENABLED | ARMV4_5_I_CACHE_ENABLED);
+	arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0x1, 0), cp15_ctrl);
+
+	/* read CP15 test state register */ 
+	arm920t_read_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), &cp15c15);
+	jtag_execute_queue();
+	
+	/* read DCache content */
+	fprintf(output, "DCache:\n");
+	
+	/* go through segments 0 to nsets (8 on ARM920T, 4 on ARM922T) */ 
+	for (segment = 0; segment < arm920t->armv4_5_mmu.armv4_5_cache.d_u_size.nsets; segment++)
+	{
+		fprintf(output, "\nsegment: %i\n----------", segment);
+		
+		/* Ra: r0 = SBZ(31:8):segment(7:5):SBZ(4:0) */
+		regs[0] = 0x0 | (segment << 5);
+		arm9tdmi_write_core_regs(target, 0x1, regs);
+		
+		/* set interpret mode */
+		cp15c15 |= 0x1;
+		arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), cp15c15);
+		
+		/* D CAM Read, loads current victim into C15.C.D.Ind */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,2,0,15,6,2), ARMV4_5_LDR(1, 0));
+	
+		/* read current victim */
+		arm920t_read_cp15_physical(target, 0x3d, &C15_C_D_Ind);
+
+		/* clear interpret mode */
+		cp15c15 &= ~0x1;
+		arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+
+		for (index = 0; index < 64; index++)
+		{
+			/* Ra: r0 = index(31:26):SBZ(25:8):segment(7:5):SBZ(4:0) */
+			regs[0] = 0x0 | (segment << 5) | (index << 26);
+			arm9tdmi_write_core_regs(target, 0x1, regs);
+
+			/* set interpret mode */
+			cp15c15 |= 0x1;
+			arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), cp15c15);
+	
+			/* Write DCache victim */
+			arm920t_execute_cp15(target, ARMV4_5_MCR(15,0,0,9,1,0), ARMV4_5_LDR(1, 0));
+	
+			/* Read D RAM */
+			arm920t_execute_cp15(target, ARMV4_5_MCR(15,2,0,15,10,2), ARMV4_5_LDMIA(0, 0x1fe, 0, 0));
+			
+			/* Read D CAM */
+			arm920t_execute_cp15(target, ARMV4_5_MCR(15,2,0,15,6,2), ARMV4_5_LDR(9, 0));
+			
+			/* clear interpret mode */
+			cp15c15 &= ~0x1;
+			arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+
+			/* read D RAM and CAM content */
+			arm9tdmi_read_core_regs(target, 0x3fe, regs_p);
+			jtag_execute_queue();
+
+			d_cache[segment][index].cam = regs[9];
+			
+			/* mask LFSR[6] */
+			regs[9] &= 0xfffffffe;
+			fprintf(output, "\nsegment: %i, index: %i, CAM: 0x%8.8x, content (%s):\n", segment, index, regs[9], (regs[9] & 0x10) ? "valid" : "invalid");
+			
+			for (i = 1; i < 9; i++)
+			{
+				 d_cache[segment][index].data[i] = regs[i];
+				 fprintf(output, "%i: 0x%8.8x\n", i-1, regs[i]);
+			}
+	
+		}
+		
+		/* Ra: r0 = index(31:26):SBZ(25:8):segment(7:5):SBZ(4:0) */
+		regs[0] = 0x0 | (segment << 5) | (C15_C_D_Ind << 26);
+		arm9tdmi_write_core_regs(target, 0x1, regs);
+
+		/* set interpret mode */
+		cp15c15 |= 0x1;
+		arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), cp15c15);
+	
+		/* Write DCache victim */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,0,0,9,1,0), ARMV4_5_LDR(1, 0));
+	
+		/* clear interpret mode */
+		cp15c15 &= ~0x1;
+		arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+	}
+
+	/* read ICache content */
+	fprintf(output, "ICache:\n");
+	
+	/* go through segments 0 to nsets (8 on ARM920T, 4 on ARM922T) */ 
+	for (segment = 0; segment < arm920t->armv4_5_mmu.armv4_5_cache.d_u_size.nsets; segment++)
+	{
+		fprintf(output, "segment: %i\n----------", segment);
+		
+		/* Ra: r0 = SBZ(31:8):segment(7:5):SBZ(4:0) */
+		regs[0] = 0x0 | (segment << 5);
+		arm9tdmi_write_core_regs(target, 0x1, regs);
+		
+		/* set interpret mode */
+		cp15c15 |= 0x1;
+		arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), cp15c15);
+		
+		/* I CAM Read, loads current victim into C15.C.I.Ind */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,2,0,15,5,2), ARMV4_5_LDR(1, 0));
+	
+		/* read current victim */
+		arm920t_read_cp15_physical(target, 0x3b, &C15_C_I_Ind);
+
+		/* clear interpret mode */
+		cp15c15 &= ~0x1;
+		arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+
+		for (index = 0; index < 64; index++)
+		{
+			/* Ra: r0 = index(31:26):SBZ(25:8):segment(7:5):SBZ(4:0) */
+			regs[0] = 0x0 | (segment << 5) | (index << 26);
+			arm9tdmi_write_core_regs(target, 0x1, regs);
+
+			/* set interpret mode */
+			cp15c15 |= 0x1;
+			arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), cp15c15);
+	
+			/* Write ICache victim */
+			arm920t_execute_cp15(target, ARMV4_5_MCR(15,0,0,9,1,1), ARMV4_5_LDR(1, 0));
+	
+			/* Read I RAM */
+			arm920t_execute_cp15(target, ARMV4_5_MCR(15,2,0,15,9,2), ARMV4_5_LDMIA(0, 0x1fe, 0, 0));
+			
+			/* Read I CAM */
+			arm920t_execute_cp15(target, ARMV4_5_MCR(15,2,0,15,5,2), ARMV4_5_LDR(9, 0));
+			
+			/* clear interpret mode */
+			cp15c15 &= ~0x1;
+			arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+
+			/* read I RAM and CAM content */
+			arm9tdmi_read_core_regs(target, 0x3fe, regs_p);
+			jtag_execute_queue();
+
+			i_cache[segment][index].cam = regs[9];
+			
+			/* mask LFSR[6] */
+			regs[9] &= 0xfffffffe;
+			fprintf(output, "\nsegment: %i, index: %i, CAM: 0x%8.8x, content (%s):\n", segment, index, regs[9], (regs[9] & 0x10) ? "valid" : "invalid");
+			
+			for (i = 1; i < 9; i++)
+			{
+				 i_cache[segment][index].data[i] = regs[i];
+				 fprintf(output, "%i: 0x%8.8x\n", i-1, regs[i]);
+			}
+	
+		}
+		
+	
+		/* Ra: r0 = index(31:26):SBZ(25:8):segment(7:5):SBZ(4:0) */
+		regs[0] = 0x0 | (segment << 5) | (C15_C_D_Ind << 26);
+		arm9tdmi_write_core_regs(target, 0x1, regs);
+
+		/* set interpret mode */
+		cp15c15 |= 0x1;
+		arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), cp15c15);
+	
+		/* Write ICache victim */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,0,0,9,1,1), ARMV4_5_LDR(1, 0));
+	
+		/* clear interpret mode */
+		cp15c15 &= ~0x1;
+		arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+	}
+	
+	/* restore CP15 MMU and Cache settings */
+	arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0x1, 0), cp15_ctrl_saved);
+	
+	command_print(cmd_ctx, "cache content successfully output to %s", args[0]);
+	
+	fclose(output);
+	
+	/* mark registers dirty */
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 0).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 1).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 2).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 3).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 4).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 5).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 6).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 7).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 8).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 9).dirty = 1;
+	
+	return ERROR_OK;
+}
+
+int arm920t_handle_read_mmu_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc)
+{
+	target_t *target = get_current_target(cmd_ctx);
+	armv4_5_common_t *armv4_5;
+	arm7_9_common_t *arm7_9;
+	arm9tdmi_common_t *arm9tdmi;
+	arm920t_common_t *arm920t;
+	arm_jtag_t *jtag_info;
+	u32 cp15c15;
+	u32 cp15_ctrl, cp15_ctrl_saved;
+	u32 regs[16];
+	u32 *regs_p[16];
+	int i;
+	FILE *output;
+	u32 Dlockdown, Ilockdown;
+	arm920t_tlb_entry_t d_tlb[64], i_tlb[64];
+	int victim;
+	
+	if (argc != 1)
+	{
+		command_print(cmd_ctx, "usage: arm920t read_mmu <filename>");
+		return ERROR_OK;
+	}
+	
+	if ((output = fopen(args[0], "w")) == NULL)
+	{
+		DEBUG("error opening mmu content file");
+		return ERROR_OK;
+	}
+	
+	for (i = 0; i < 16; i++)
+		regs_p[i] = &regs[i];
+		
+	if (arm920t_get_arch_pointers(target, &armv4_5, &arm7_9, &arm9tdmi, &arm920t) != ERROR_OK)
+	{
+		command_print(cmd_ctx, "current target isn't an ARM920t target");
+		return ERROR_OK;
+	}
+	
+	jtag_info = &arm7_9->jtag_info;
+	
+	/* disable MMU and Caches */
+	arm920t_read_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0x1, 0), &cp15_ctrl);
+	jtag_execute_queue();
+	cp15_ctrl_saved = cp15_ctrl;
+	cp15_ctrl &= ~(ARMV4_5_MMU_ENABLED | ARMV4_5_D_U_CACHE_ENABLED | ARMV4_5_I_CACHE_ENABLED);
+	arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0x1, 0), cp15_ctrl);
+
+	/* read CP15 test state register */ 
+	arm920t_read_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), &cp15c15);
+	jtag_execute_queue();
+
+	/* prepare reading D TLB content 
+	 * */
+	
+	/* set interpret mode */
+	cp15c15 |= 0x1;
+	arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), cp15c15);
+	
+	/* Read D TLB lockdown */
+	arm920t_execute_cp15(target, ARMV4_5_MRC(15,0,0,10,0,0), ARMV4_5_LDR(1, 0));
+	
+	/* clear interpret mode */
+	cp15c15 &= ~0x1;
+	arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+	
+	/* read D TLB lockdown stored to r1 */
+	arm9tdmi_read_core_regs(target, 0x2, regs_p);
+	jtag_execute_queue();
+	Dlockdown = regs[1];
+	
+	for (victim = 0; victim < 64; victim += 8)
+	{
+		/* new lockdown value: base[31:26]:victim[25:20]:SBZ[19:1]:p[0] 
+		 * base remains unchanged, victim goes through entries 0 to 63 */
+		regs[1] = (Dlockdown & 0xfc000000) | (victim << 20);
+		arm9tdmi_write_core_regs(target, 0x2, regs);
+		
+		/* set interpret mode */
+		cp15c15 |= 0x1;
+		arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), cp15c15);
+		
+		/* Write D TLB lockdown */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,0,0,10,0,0), ARMV4_5_STR(1, 0));
+	
+		/* Read D TLB CAM */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,4,0,15,6,4), ARMV4_5_LDMIA(0, 0x3fc, 0, 0));
+		
+		/* clear interpret mode */
+		cp15c15 &= ~0x1;
+		arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+		
+		/* read D TLB CAM content stored to r2-r9 */
+		arm9tdmi_read_core_regs(target, 0x3fc, regs_p);
+		jtag_execute_queue();
+		
+		for (i = 0; i < 8; i++)
+			d_tlb[victim + i].cam = regs[i + 2]; 
+	}
+
+	for (victim = 0; victim < 64; victim++)
+	{
+		/* new lockdown value: base[31:26]:victim[25:20]:SBZ[19:1]:p[0] 
+		 * base remains unchanged, victim goes through entries 0 to 63 */
+		regs[1] = (Dlockdown & 0xfc000000) | (victim << 20);
+		arm9tdmi_write_core_regs(target, 0x2, regs);
+		
+		/* set interpret mode */
+		cp15c15 |= 0x1;
+		arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), cp15c15);
+		
+		/* Write D TLB lockdown */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,0,0,10,0,0), ARMV4_5_STR(1, 0));
+	
+		/* Read D TLB RAM1 */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,4,0,15,10,4), ARMV4_5_LDR(2,0));
+
+		/* Read D TLB RAM2 */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,4,0,15,2,5), ARMV4_5_LDR(3,0));
+		
+		/* clear interpret mode */
+		cp15c15 &= ~0x1;
+		arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+		
+		/* read D TLB RAM content stored to r2 and r3 */
+		arm9tdmi_read_core_regs(target, 0xc, regs_p);
+		jtag_execute_queue();
+
+		d_tlb[victim].ram1 = regs[2]; 
+		d_tlb[victim].ram2 = regs[3]; 
+	}
+		
+	/* restore D TLB lockdown */
+	regs[1] = Dlockdown;
+	arm9tdmi_write_core_regs(target, 0x2, regs);
+	
+	/* Write D TLB lockdown */
+	arm920t_execute_cp15(target, ARMV4_5_MCR(15,0,0,10,0,0), ARMV4_5_STR(1, 0));
+
+	/* prepare reading I TLB content 
+	 * */
+	
+	/* set interpret mode */
+	cp15c15 |= 0x1;
+	arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), cp15c15);
+	
+	/* Read I TLB lockdown */
+	arm920t_execute_cp15(target, ARMV4_5_MRC(15,0,0,10,0,1), ARMV4_5_LDR(1, 0));
+	
+	/* clear interpret mode */
+	cp15c15 &= ~0x1;
+	arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+	
+	/* read I TLB lockdown stored to r1 */
+	arm9tdmi_read_core_regs(target, 0x2, regs_p);
+	jtag_execute_queue();
+	Ilockdown = regs[1];
+	
+	for (victim = 0; victim < 64; victim += 8)
+	{
+		/* new lockdown value: base[31:26]:victim[25:20]:SBZ[19:1]:p[0] 
+		 * base remains unchanged, victim goes through entries 0 to 63 */
+		regs[1] = (Ilockdown & 0xfc000000) | (victim << 20);
+		arm9tdmi_write_core_regs(target, 0x2, regs);
+		
+		/* set interpret mode */
+		cp15c15 |= 0x1;
+		arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), cp15c15);
+		
+		/* Write I TLB lockdown */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,0,0,10,0,1), ARMV4_5_STR(1, 0));
+	
+		/* Read I TLB CAM */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,4,0,15,5,4), ARMV4_5_LDMIA(0, 0x3fc, 0, 0));
+		
+		/* clear interpret mode */
+		cp15c15 &= ~0x1;
+		arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+		
+		/* read I TLB CAM content stored to r2-r9 */
+		arm9tdmi_read_core_regs(target, 0x3fc, regs_p);
+		jtag_execute_queue();
+		
+		for (i = 0; i < 8; i++)
+			i_tlb[i + victim].cam = regs[i + 2]; 
+	}
+
+	for (victim = 0; victim < 64; victim++)
+	{
+		/* new lockdown value: base[31:26]:victim[25:20]:SBZ[19:1]:p[0] 
+		 * base remains unchanged, victim goes through entries 0 to 63 */
+		regs[1] = (Dlockdown & 0xfc000000) | (victim << 20);
+		arm9tdmi_write_core_regs(target, 0x2, regs);
+		
+		/* set interpret mode */
+		cp15c15 |= 0x1;
+		arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0xf, 0), cp15c15);
+		
+		/* Write I TLB lockdown */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,0,0,10,0,1), ARMV4_5_STR(1, 0));
+	
+		/* Read I TLB RAM1 */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,4,0,15,9,4), ARMV4_5_LDR(2,0));
+
+		/* Read I TLB RAM2 */
+		arm920t_execute_cp15(target, ARMV4_5_MCR(15,4,0,15,1,5), ARMV4_5_LDR(3,0));
+		
+		/* clear interpret mode */
+		cp15c15 &= ~0x1;
+		arm920t_write_cp15_physical(target, 0x1e, cp15c15);
+		
+		/* read I TLB RAM content stored to r2 and r3 */
+		arm9tdmi_read_core_regs(target, 0xc, regs_p);
+		jtag_execute_queue();
+
+		i_tlb[victim].ram1 = regs[2]; 
+		i_tlb[victim].ram2 = regs[3]; 
+	}
+		
+	/* restore I TLB lockdown */
+	regs[1] = Ilockdown;
+	arm9tdmi_write_core_regs(target, 0x2, regs);
+	
+	/* Write I TLB lockdown */
+	arm920t_execute_cp15(target, ARMV4_5_MCR(15,0,0,10,0,1), ARMV4_5_STR(1, 0));
+	
+	/* restore CP15 MMU and Cache settings */
+	arm920t_write_cp15_physical(target, ARM920T_CP15_PHYS_ADDR(0, 0x1, 0), cp15_ctrl_saved);
+
+	/* output data to file */	
+	fprintf(output, "D TLB content:\n");
+	for (i = 0; i < 64; i++)
+	{
+		fprintf(output, "%i: 0x%8.8x 0x%8.8x 0x%8.8x %s\n", i, d_tlb[i].cam, d_tlb[i].ram1, d_tlb[i].ram2, (d_tlb[i].cam & 0x20) ? "(valid)" : "(invalid)");
+	}
+
+	fprintf(output, "\n\nI TLB content:\n");
+	for (i = 0; i < 64; i++)
+	{
+		fprintf(output, "%i: 0x%8.8x 0x%8.8x 0x%8.8x %s\n", i, i_tlb[i].cam, i_tlb[i].ram1, i_tlb[i].ram2, (i_tlb[i].cam & 0x20) ? "(valid)" : "(invalid)");
+	}
+	
+	command_print(cmd_ctx, "mmu content successfully output to %s", args[0]);
+	
+	fclose(output);
+	
+	/* mark registers dirty */
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 0).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 1).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 2).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 3).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 4).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 5).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 6).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 7).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 8).dirty = 1;
+	ARMV4_5_CORE_REG_MODE(armv4_5->core_cache, armv4_5->core_mode, 9).dirty = 1;
+	
+	return ERROR_OK;
+}
 int arm920t_handle_cp15_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc)
 {
 	int retval;
@@ -842,7 +1345,7 @@ int arm920t_handle_cp15i_command(struct command_context_s *cmd_ctx, char *cmd, c
 		if (argc == 1)
 		{
 			u32 value;
-			if ((retval = arm920t_read_cp15_interpreted(target, opcode, &value)) != ERROR_OK)
+			if ((retval = arm920t_read_cp15_interpreted(target, opcode, 0x0, &value)) != ERROR_OK)
 			{
 				command_print(cmd_ctx, "couldn't execute %8.8x", opcode);
 				return ERROR_OK;
@@ -872,6 +1375,10 @@ int arm920t_handle_cp15i_command(struct command_context_s *cmd_ctx, char *cmd, c
 			command_print(cmd_ctx, "%8.8x: %8.8x %8.8x", opcode, value, address);
 		}
 	}
+	else
+	{
+		command_print(cmd_ctx, "usage: arm920t cp15i <opcode> [value] [address]");
+	}
 
 	return ERROR_OK;
 }
diff --git a/src/target/arm920t.h b/src/target/arm920t.h
index bedf31f2..e51e651d 100644
--- a/src/target/arm920t.h
+++ b/src/target/arm920t.h
@@ -40,6 +40,20 @@ typedef struct arm920t_common_s
 	u32 i_fsr;
 	u32 d_far;
 	u32 i_far;
+	int preserve_cache;
 } arm920t_common_t;
 
+typedef struct arm920t_cache_line_s
+{
+	u32 cam;
+	u32 data[8];
+} arm920t_cache_line_t;
+
+typedef struct arm920t_tlb_entry_s
+{
+	u32 cam;
+	u32 ram1;
+	u32 ram2;
+} arm920t_tlb_entry_t;
+
 #endif /* ARM920T_H */
diff --git a/src/target/arm9tdmi.c b/src/target/arm9tdmi.c
index be01dd60..c99fcb2a 100644
--- a/src/target/arm9tdmi.c
+++ b/src/target/arm9tdmi.c
@@ -46,11 +46,6 @@ int arm9tdmi_register_commands(struct command_context_s *cmd_ctx);
 int arm9tdmi_target_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc, struct target_s *target);
 int arm9tdmi_init_target(struct command_context_s *cmd_ctx, struct target_s *target);
 int arm9tdmi_quit();
-
-/* target function declarations */
-enum target_state arm9tdmi_poll(struct target_s *target);
-int arm9tdmi_halt(target_t *target);
-int arm9tdmi_read_memory(struct target_s *target, u32 address, u32 size, u32 count, u8 *buffer);
 		
 target_type_t arm9tdmi_target =
 {
@@ -169,8 +164,7 @@ int arm9tdmi_clock_out(arm_jtag_t *jtag_info, u32 instr, u32 out, u32 *in, int s
 	/* prepare buffer */
 	buf_set_u32(out_buf, 0, 32, out);
 	
-	instr = flip_u32(instr, 32);
-	buf_set_u32(instr_buf, 0, 32, instr);
+	buf_set_u32(instr_buf, 0, 32, flip_u32(instr, 32));
 	
 	if (sysspeed)
 		buf_set_u32(&sysspeed_buf, 2, 1, 1);
@@ -183,17 +177,19 @@ int arm9tdmi_clock_out(arm_jtag_t *jtag_info, u32 instr, u32 out, u32 *in, int s
 	fields[0].num_bits = 32;
 	fields[0].out_value = out_buf;
 	fields[0].out_mask = NULL;
+	fields[0].in_value = NULL;
 	if (in)
 	{
-		fields[0].in_value = (u8*)in;
-	} else
+		fields[0].in_handler = arm_jtag_buf_to_u32;
+		fields[0].in_handler_priv = in;
+	}
+	else
 	{
-		fields[0].in_value = NULL;
+		fields[0].in_handler = NULL;
+		fields[0].in_handler_priv = NULL;
 	}
 	fields[0].in_check_value = NULL;
 	fields[0].in_check_mask = NULL;
-	fields[0].in_handler = NULL;
-	fields[0].in_handler_priv = NULL;
 	
 	fields[1].device = jtag_info->chain_pos;
 	fields[1].num_bits = 3;
@@ -221,17 +217,14 @@ int arm9tdmi_clock_out(arm_jtag_t *jtag_info, u32 instr, u32 out, u32 *in, int s
 	
 #ifdef _DEBUG_INSTRUCTION_EXECUTION_
 	{
-		char* in_string;
 		jtag_execute_queue();
 		
 		if (in)
 		{
-			in_string = buf_to_char((u8*)in, 32);
-			DEBUG("instr: 0x%8.8x, out: 0x%8.8x, in: %s", flip_u32(instr, 32), out, in_string);
-			free(in_string);
+			DEBUG("instr: 0x%8.8x, out: 0x%8.8x, in: 0x%8.8x", instr, out, *in);
 		}
 		else
-			DEBUG("instr: 0x%8.8x, out: 0x%8.8x", flip_u32(instr, 32), out);
+			DEBUG("instr: 0x%8.8x, out: 0x%8.8x", instr, out);
 	}
 #endif
 
@@ -251,9 +244,84 @@ int arm9tdmi_clock_data_in(arm_jtag_t *jtag_info, u32 *in)
 	fields[0].num_bits = 32;
 	fields[0].out_value = NULL;
 	fields[0].out_mask = NULL;
-	fields[0].in_value = (u8*)in;
-	fields[0].in_handler = NULL;
-	fields[0].in_handler_priv = NULL;
+	fields[0].in_value = NULL;
+	fields[0].in_handler = arm_jtag_buf_to_u32;
+	fields[0].in_handler_priv = in;
+	fields[0].in_check_value = NULL;
+	fields[0].in_check_mask = NULL;
+	
+	fields[1].device = jtag_info->chain_pos;
+	fields[1].num_bits = 3;
+	fields[1].out_value = NULL;
+	fields[1].out_mask = NULL;
+	fields[1].in_value = NULL;
+	fields[1].in_handler = NULL;
+	fields[1].in_handler_priv = NULL;
+	fields[1].in_check_value = NULL;
+	fields[1].in_check_mask = NULL;
+
+	fields[2].device = jtag_info->chain_pos;
+	fields[2].num_bits = 32;
+	fields[2].out_value = NULL;
+	fields[2].out_mask = NULL;
+	fields[2].in_value = NULL;
+	fields[2].in_check_value = NULL;
+	fields[2].in_check_mask = NULL;
+	fields[2].in_handler = NULL;
+	fields[2].in_handler_priv = NULL;
+	
+	jtag_add_dr_scan(3, fields, -1);
+
+	jtag_add_runtest(0, -1);
+	
+#ifdef _DEBUG_INSTRUCTION_EXECUTION_
+	{
+		jtag_execute_queue();
+			
+		if (in)
+		{
+			DEBUG("in: 0x%8.8x", *in);
+		}
+		else
+		{
+			ERROR("BUG: called with in == NULL");
+		}
+	}
+#endif
+
+	return ERROR_OK;
+}
+
+/* clock the target, and read the databus
+ * the *in pointer points to a buffer where elements of 'size' bytes
+ * are stored in big (be==1) or little (be==0) endianness
+ */
+int arm9tdmi_clock_data_in_endianness(arm_jtag_t *jtag_info, void *in, int size, int be)
+{
+	scan_field_t fields[3];
+
+	jtag_add_end_state(TAP_PD);
+	arm_jtag_scann(jtag_info, 0x1);
+	arm_jtag_set_instr(jtag_info, jtag_info->intest_instr);
+		
+	fields[0].device = jtag_info->chain_pos;
+	fields[0].num_bits = 32;
+	fields[0].out_value = NULL;
+	fields[0].out_mask = NULL;
+	fields[0].in_value = NULL;
+	switch (size)
+	{
+		case 4:
+			fields[0].in_handler = (be) ? arm_jtag_buf_to_be32 : arm_jtag_buf_to_le32;
+			break;
+		case 2:
+			fields[0].in_handler = (be) ? arm_jtag_buf_to_be16 : arm_jtag_buf_to_le16;
+			break;
+		case 1:
+			fields[0].in_handler = arm_jtag_buf_to_8;
+			break;
+	}
+	fields[0].in_handler_priv = in;
 	fields[0].in_check_value = NULL;
 	fields[0].in_check_mask = NULL;
 	
@@ -283,14 +351,15 @@ int arm9tdmi_clock_data_in(arm_jtag_t *jtag_info, u32 *in)
 	
 #ifdef _DEBUG_INSTRUCTION_EXECUTION_
 	{
-		char* in_string;
 		jtag_execute_queue();
 			
 		if (in)
 		{
-			in_string = buf_to_char((u8*)in, 32);
-			DEBUG("in: %s", in_string);
-			free(in_string);
+			DEBUG("in: 0x%8.8x", *in);
+		}
+		else
+		{
+			ERROR("BUG: called with in == NULL");
 		}
 	}
 #endif
@@ -372,6 +441,48 @@ void arm9tdmi_read_core_regs(target_t *target, u32 mask, u32* core_regs[16])
 
 }
 
+void arm9tdmi_read_core_regs_target_buffer(target_t *target, u32 mask, void* buffer, int size)
+{
+	int i;
+	/* get pointers to arch-specific information */
+	armv4_5_common_t *armv4_5 = target->arch_info;
+	arm7_9_common_t *arm7_9 = armv4_5->arch_info;
+	arm_jtag_t *jtag_info = &arm7_9->jtag_info;
+	int be = (target->endianness == TARGET_BIG_ENDIAN) ? 1 : 0;
+	u32 *buf_u32 = buffer;
+	u16 *buf_u16 = buffer;
+	u8 *buf_u8 = buffer;
+	
+	/* STMIA r0-15, [r0] at debug speed
+	 * register values will start to appear on 4th DCLK
+	 */
+	arm9tdmi_clock_out(jtag_info, ARMV4_5_STMIA(0, mask & 0xffff, 0, 0), 0, NULL, 0);
+
+	/* fetch NOP, STM in DECODE stage */
+	arm9tdmi_clock_out(jtag_info, ARMV4_5_NOP, 0, NULL, 0);
+	/* fetch NOP, STM in EXECUTE stage (1st cycle) */
+	arm9tdmi_clock_out(jtag_info, ARMV4_5_NOP, 0, NULL, 0);
+
+	for (i = 0; i <= 15; i++)
+	{
+		if (mask & (1 << i))
+			/* nothing fetched, STM in MEMORY (i'th cycle) */
+			switch (size)
+			{
+				case 4:
+					arm9tdmi_clock_data_in_endianness(jtag_info, buf_u32++, 4, be);
+					break;
+				case 2:
+					arm9tdmi_clock_data_in_endianness(jtag_info, buf_u16++, 2, be);
+					break;
+				case 1:
+					arm9tdmi_clock_data_in_endianness(jtag_info, buf_u8++, 1, be);
+					break;
+			}
+	}
+
+}
+
 void arm9tdmi_read_xpsr(target_t *target, u32 *xpsr, int spsr)
 {
 	/* get pointers to arch-specific information */
@@ -711,11 +822,13 @@ void arm9tdmi_build_reg_cache(target_t *target)
 	arm_jtag_t *jtag_info = &arm7_9->jtag_info;
 	arm9tdmi_common_t *arm9tdmi = arm7_9->arch_info;
 
+	embeddedice_reg_t *vec_catch_arch_info;
 
 	(*cache_p) = armv4_5_build_reg_cache(target, armv4_5);
 	armv4_5->core_cache = (*cache_p);
 	
-	(*cache_p)->next = embeddedice_build_reg_cache(target, jtag_info, 0);
+	/* one extra register (vector catch) */
+	(*cache_p)->next = embeddedice_build_reg_cache(target, jtag_info, 1);
 	arm7_9->eice_cache = (*cache_p)->next;
 	
 	if (arm9tdmi->has_monitor_mode)
@@ -725,6 +838,16 @@ void arm9tdmi_build_reg_cache(target_t *target)
 	
 	(*cache_p)->next->reg_list[EICE_DBG_STAT].size = 5;
 
+	(*cache_p)->next->reg_list[EICE_VEC_CATCH].name = "vector catch";
+	(*cache_p)->next->reg_list[EICE_VEC_CATCH].dirty = 0;
+	(*cache_p)->next->reg_list[EICE_VEC_CATCH].valid = 0;
+	(*cache_p)->next->reg_list[EICE_VEC_CATCH].bitfield_desc = NULL;
+	(*cache_p)->next->reg_list[EICE_VEC_CATCH].num_bitfields = 0;
+	(*cache_p)->next->reg_list[EICE_VEC_CATCH].size = 8;
+	(*cache_p)->next->reg_list[EICE_VEC_CATCH].value = calloc(1, 4);
+	vec_catch_arch_info = (*cache_p)->next->reg_list[EICE_VEC_CATCH].arch_info;
+	vec_catch_arch_info->addr = 0x2;
+	
 }
 
 int arm9tdmi_init_target(struct command_context_s *cmd_ctx, struct target_s *target)
@@ -758,6 +881,7 @@ int arm9tdmi_init_arch_info(target_t *target, arm9tdmi_common_t *arm9tdmi, int c
 	arm7_9->examine_debug_reason = arm9tdmi_examine_debug_reason;
 	arm7_9->change_to_arm = arm9tdmi_change_to_arm;
 	arm7_9->read_core_regs = arm9tdmi_read_core_regs;
+	arm7_9->read_core_regs_target_buffer = arm9tdmi_read_core_regs_target_buffer;
 	arm7_9->read_xpsr = arm9tdmi_read_xpsr;
 	
 	arm7_9->write_xpsr = arm9tdmi_write_xpsr;
@@ -793,7 +917,6 @@ int arm9tdmi_init_arch_info(target_t *target, arm9tdmi_common_t *arm9tdmi, int c
 	arm7_9->sw_bkpts_enabled = 0;
 	arm7_9->dbgreq_adjust_pc = 3;
 	arm7_9->arch_info = arm9tdmi;
-	arm7_9->use_dbgrq = 1;
 	
 	arm9tdmi->common_magic = ARM9TDMI_COMMON_MAGIC;
 	arm9tdmi->has_monitor_mode = 0;
@@ -814,6 +937,9 @@ int arm9tdmi_init_arch_info(target_t *target, arm9tdmi_common_t *arm9tdmi, int c
 		arm9tdmi->variant = strdup("");
 	
 	arm7_9_init_arch_info(target, arm7_9);
+
+	/* override use of DBGRQ, this is safe on ARM9TDMI */
+	arm7_9->use_dbgrq = 1;
 	
 	return ERROR_OK;
 }
diff --git a/src/target/arm_jtag.c b/src/target/arm_jtag.c
index ccc1adf4..3bb8a32f 100644
--- a/src/target/arm_jtag.c
+++ b/src/target/arm_jtag.c
@@ -108,11 +108,91 @@ int arm_jtag_setup_connection(arm_jtag_t *jtag_info)
 	return ERROR_OK;
 }
 
+/* read JTAG buffer into host-endian u32, flipping bit-order */
 int arm_jtag_buf_to_u32_flip(u8 *in_buf, void *priv)
 {
 	u32 *dest = priv;
-	
-	*dest = flip_u32(buf_get_u32(in_buf, 0, 32), 32);
-	
+	*dest = flip_u32(le_to_h_u32(in_buf), 32);
+	return ERROR_OK;
+}
+
+/* read JTAG buffer into little-endian u32, flipping bit-order */
+int arm_jtag_buf_to_le32_flip(u8 *in_buf, void *priv)
+{
+	h_u32_to_le(((u8*)priv), flip_u32(le_to_h_u32(in_buf), 32));
+	return ERROR_OK;
+}
+
+/* read JTAG buffer into little-endian u16, flipping bit-order */
+int arm_jtag_buf_to_le16_flip(u8 *in_buf, void *priv)
+{
+	h_u16_to_le(((u8*)priv), flip_u32(le_to_h_u32(in_buf), 32) & 0xffff);
+	return ERROR_OK;
+}
+
+/* read JTAG buffer into big-endian u32, flipping bit-order */
+int arm_jtag_buf_to_be32_flip(u8 *in_buf, void *priv)
+{
+	h_u32_to_be(((u8*)priv), flip_u32(le_to_h_u32(in_buf), 32));
+	return ERROR_OK;
+}
+
+/* read JTAG buffer into big-endian u16, flipping bit-order */
+int arm_jtag_buf_to_be16_flip(u8 *in_buf, void *priv)
+{
+	h_u16_to_be(((u8*)priv), flip_u32(le_to_h_u32(in_buf), 32) & 0xffff);
+	return ERROR_OK;
+}
+
+/* read JTAG buffer into u8, flipping bit-order */
+int arm_jtag_buf_to_8_flip(u8 *in_buf, void *priv)
+{
+	u8 *dest = priv;
+	*dest = flip_u32(le_to_h_u32(in_buf), 32) & 0xff;
+	return ERROR_OK;
+}
+
+/* not-flipping variants */
+/* read JTAG buffer into host-endian u32 */
+int arm_jtag_buf_to_u32(u8 *in_buf, void *priv)
+{
+	u32 *dest = priv;
+	*dest = le_to_h_u32(in_buf);
+	return ERROR_OK;
+}
+
+/* read JTAG buffer into little-endian u32 */
+int arm_jtag_buf_to_le32(u8 *in_buf, void *priv)
+{
+	h_u32_to_le(((u8*)priv), le_to_h_u32(in_buf));
+	return ERROR_OK;
+}
+
+/* read JTAG buffer into little-endian u16 */
+int arm_jtag_buf_to_le16(u8 *in_buf, void *priv)
+{
+	h_u16_to_le(((u8*)priv), le_to_h_u32(in_buf) & 0xffff);
+	return ERROR_OK;
+}
+
+/* read JTAG buffer into big-endian u32 */
+int arm_jtag_buf_to_be32(u8 *in_buf, void *priv)
+{
+	h_u32_to_be(((u8*)priv), le_to_h_u32(in_buf));
+	return ERROR_OK;
+}
+
+/* read JTAG buffer into big-endian u16 */
+int arm_jtag_buf_to_be16(u8 *in_buf, void *priv)
+{
+	h_u16_to_be(((u8*)priv), le_to_h_u32(in_buf) & 0xffff);
+	return ERROR_OK;
+}
+
+/* read JTAG buffer into u8 */
+int arm_jtag_buf_to_8(u8 *in_buf, void *priv)
+{
+	u8 *dest = priv;
+	*dest = le_to_h_u32(in_buf) & 0xff;
 	return ERROR_OK;
 }
diff --git a/src/target/arm_jtag.h b/src/target/arm_jtag.h
index a9f90846..6e523fba 100644
--- a/src/target/arm_jtag.h
+++ b/src/target/arm_jtag.h
@@ -35,8 +35,23 @@ typedef struct arm_jtag_s
 
 extern int arm_jtag_set_instr(arm_jtag_t *jtag_info, u32 new_instr);
 extern int arm_jtag_scann(arm_jtag_t *jtag_info, u32 new_scan_chain);
-extern int arm_jtag_buf_to_u32_flip(u8 *in_buf, void *priv);
 extern int arm_jtag_setup_connection(arm_jtag_t *jtag_info);
 
+/* JTAG buffers to host, be and le buffers, flipping variants */
+int arm_jtag_buf_to_u32_flip(u8 *in_buf, void *priv);
+int arm_jtag_buf_to_le32_flip(u8 *in_buf, void *priv);
+int arm_jtag_buf_to_le16_flip(u8 *in_buf, void *priv);
+int arm_jtag_buf_to_be32_flip(u8 *in_buf, void *priv);
+int arm_jtag_buf_to_be16_flip(u8 *in_buf, void *priv);
+int arm_jtag_buf_to_8_flip(u8 *in_buf, void *priv);
+
+/* JTAG buffers to host, be and le buffers */
+int arm_jtag_buf_to_u32(u8 *in_buf, void *priv);
+int arm_jtag_buf_to_le32(u8 *in_buf, void *priv);
+int arm_jtag_buf_to_le16(u8 *in_buf, void *priv);
+int arm_jtag_buf_to_be32(u8 *in_buf, void *priv);
+int arm_jtag_buf_to_be16(u8 *in_buf, void *priv);
+int arm_jtag_buf_to_8(u8 *in_buf, void *priv);
+
 #endif /* ARM_JTAG */
 
diff --git a/src/target/armv4_5.c b/src/target/armv4_5.c
index 9ee0577d..86d5dc89 100644
--- a/src/target/armv4_5.c
+++ b/src/target/armv4_5.c
@@ -392,7 +392,7 @@ int handle_armv4_5_disassemble_command(struct command_context_s *cmd_ctx, char *
 	int i;
 	arm_instruction_t cur_instruction;
 	u32 opcode;
-	int thumb;
+	int thumb = 0;
 	
 	if (armv4_5->common_magic != ARMV4_5_COMMON_MAGIC)
 	{
@@ -415,7 +415,7 @@ int handle_armv4_5_disassemble_command(struct command_context_s *cmd_ctx, char *
 	
 	for (i = 0; i < count; i++)
 	{
-		target->type->read_memory(target, address, 4, 1, (u8*)&opcode);
+		target_read_u32(target, address, &opcode);
 		evaluate_opcode(opcode, address, &cur_instruction);
 		command_print(cmd_ctx, "%s", cur_instruction.text);
 		address += (thumb) ? 2 : 4;
diff --git a/src/target/armv4_5.h b/src/target/armv4_5.h
index a28bfa12..ee37723e 100644
--- a/src/target/armv4_5.h
+++ b/src/target/armv4_5.h
@@ -193,6 +193,27 @@ extern int armv4_5_invalidate_core_regs(target_t *target);
  */
 #define ARMV4_5_BX(Rm) (0xe12fff10 | Rm)
 
+/* Move to ARM register from coprocessor
+ * CP: Coprocessor number
+ * op1: Coprocessor opcode
+ * Rd: destination register
+ * CRn: first coprocessor operand
+ * CRm: second coprocessor operand
+ * op2: Second coprocessor opcode
+ */
+#define ARMV4_5_MRC(CP, op1, Rd, CRn, CRm, op2) (0xee100010 | CRm | (op2 << 5) | (CP << 8) | (Rd << 12) | (CRn << 16) | (op1 << 21)) 
+
+/* Move to coprocessor from ARM register
+ * CP: Coprocessor number
+ * op1: Coprocessor opcode
+ * Rd: destination register
+ * CRn: first coprocessor operand
+ * CRm: second coprocessor operand
+ * op2: Second coprocessor opcode
+ */
+#define ARMV4_5_MCR(CP, op1, Rd, CRn, CRm, op2) (0xee000010 | CRm | (op2 << 5) | (CP << 8) | (Rd << 12) | (CRn << 16) | (op1 << 21)) 
+
+
 /* Thumb mode instructions
  */
  
diff --git a/src/target/armv4_5_cache.h b/src/target/armv4_5_cache.h
index 766718b6..03b95935 100644
--- a/src/target/armv4_5_cache.h
+++ b/src/target/armv4_5_cache.h
@@ -46,4 +46,12 @@ extern int armv4_5_cache_state(u32 cp15_control_reg, armv4_5_cache_common_t *cac
 
 extern int armv4_5_handle_cache_info_command(struct command_context_s *cmd_ctx, armv4_5_cache_common_t *armv4_5_cache);
 
+enum
+{
+	ARMV4_5_D_U_CACHE_ENABLED = 0x4,
+	ARMV4_5_I_CACHE_ENABLED = 0x1000,
+	ARMV4_5_WRITE_BUFFER_ENABLED = 0x8,
+	ARMV4_5_CACHE_RR_BIT = 0x5000,
+};
+
 #endif /* ARMV4_5_CACHE_H */
diff --git a/src/target/armv4_5_mmu.c b/src/target/armv4_5_mmu.c
index 269f9d57..b888933c 100644
--- a/src/target/armv4_5_mmu.c
+++ b/src/target/armv4_5_mmu.c
@@ -25,6 +25,7 @@
 #include "log.h"
 #include "command.h"
 #include "armv4_5_mmu.h"
+#include "target.h"
 
 #include <stdlib.h>
 
@@ -46,6 +47,7 @@ u32 armv4_5_mmu_translate_va(target_t *target, armv4_5_mmu_common_t *armv4_5_mmu
 	armv4_5_mmu_read_physical(target, armv4_5_mmu,
 		(ttb & 0xffffc000) | ((va & 0xfff00000) >> 18),
 		4, 1, (u8*)&first_lvl_descriptor);
+	first_lvl_descriptor = target_buffer_get_u32(target, (u8*)&first_lvl_descriptor);
 
 	DEBUG("1st lvl desc: %8.8x", first_lvl_descriptor);
 
@@ -80,16 +82,17 @@ u32 armv4_5_mmu_translate_va(target_t *target, armv4_5_mmu_common_t *armv4_5_mmu
 			(first_lvl_descriptor & 0xfffffc00) | ((va & 0x000ff000) >> 10),
 			4, 1, (u8*)&second_lvl_descriptor);
 	}
-
-	if ((first_lvl_descriptor & 0x3) == 3)
+	else if ((first_lvl_descriptor & 0x3) == 3)
 	{
 		/* fine page table */
 		armv4_5_mmu_read_physical(target, armv4_5_mmu,
 			(first_lvl_descriptor & 0xfffff000) | ((va & 0x000ffc00) >> 8),
 			4, 1, (u8*)&second_lvl_descriptor);
 	}
-
-	DEBUG("2nd lvl desc: %8.8x", first_lvl_descriptor);
+	
+	second_lvl_descriptor = target_buffer_get_u32(target, (u8*)&second_lvl_descriptor);
+	
+	DEBUG("2nd lvl desc: %8.8x", second_lvl_descriptor);
 
 	if ((second_lvl_descriptor & 0x3) == 0)
 	{
@@ -286,17 +289,17 @@ int armv4_5_mmu_handle_md_phys_command(command_context_t *cmd_ctx, char *cmd, ch
 		switch (size)
 		{
 			case 4:
-				output_len += snprintf(output + output_len, 128 - output_len, "%8.8x ", ((u32*)buffer)[i]);
+				output_len += snprintf(output + output_len, 128 - output_len, "%8.8x ", target_buffer_get_u32(target, &buffer[i*4]));
 				break;
 			case 2:
-				output_len += snprintf(output + output_len, 128 - output_len, "%4.4x ", ((u16*)buffer)[i]);
+				output_len += snprintf(output + output_len, 128 - output_len, "%4.4x ", target_buffer_get_u16(target, &buffer[i*2]));
 				break;
 			case 1:
-				output_len += snprintf(output + output_len, 128 - output_len, "%2.2x ", ((u8*)buffer)[i]);
+				output_len += snprintf(output + output_len, 128 - output_len, "%2.2x ", buffer[i*1]);
 				break;
 		}
 
-		if ((i%8 == 7) || (i == count - 1))
+		if ((i % 8 == 7) || (i == count - 1))
 		{
 			command_print(cmd_ctx, output);
 			output_len = 0;
@@ -313,6 +316,7 @@ int armv4_5_mmu_handle_mw_phys_command(command_context_t *cmd_ctx, char *cmd, ch
 	u32 address = 0;
 	u32 value = 0;
 	int retval;
+	u8 value_buf[4];
 
 	if (target->state != TARGET_HALTED)
 	{
@@ -329,13 +333,16 @@ int armv4_5_mmu_handle_mw_phys_command(command_context_t *cmd_ctx, char *cmd, ch
 	switch (cmd[2])
 	{
 		case 'w':
-			retval = armv4_5_mmu_write_physical(target, armv4_5_mmu, address, 4, 1, (u8*)&value);
+			target_buffer_set_u32(target, value_buf, value);
+			retval = armv4_5_mmu_write_physical(target, armv4_5_mmu, address, 4, 1, value_buf);
 			break;
 		case 'h':
-			retval = armv4_5_mmu_write_physical(target, armv4_5_mmu, address, 2, 1, (u8*)&value);
+			target_buffer_set_u16(target, value_buf, value);
+			retval = armv4_5_mmu_write_physical(target, armv4_5_mmu, address, 2, 1, value_buf);
 			break;
 		case 'b':
-			retval = armv4_5_mmu_write_physical(target, armv4_5_mmu, address, 1, 1, (u8*)&value);
+			value_buf[0] = value;
+			retval = armv4_5_mmu_write_physical(target, armv4_5_mmu, address, 1, 1, value_buf);
 			break;
 		default:
 			return ERROR_OK;
diff --git a/src/target/armv4_5_mmu.h b/src/target/armv4_5_mmu.h
index 3adb30e2..b0a87ebb 100644
--- a/src/target/armv4_5_mmu.h
+++ b/src/target/armv4_5_mmu.h
@@ -49,4 +49,12 @@ extern int armv4_5_mmu_handle_virt2phys_command(struct command_context_s *cmd_ct
 extern int armv4_5_mmu_handle_md_phys_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc, target_t *target, armv4_5_mmu_common_t *armv4_5_mmu);
 extern int armv4_5_mmu_handle_mw_phys_command(struct command_context_s *cmd_ctx, char *cmd, char **args, int argc, target_t *target, armv4_5_mmu_common_t *armv4_5_mmu);
 
+enum
+{
+	ARMV4_5_MMU_ENABLED = 0x1,
+	ARMV4_5_ALIGNMENT_CHECK = 0x2,
+	ARMV4_5_MMU_S_BIT = 0x100,
+	ARMV4_5_MMU_R_BIT = 0x200
+};
+
 #endif /* ARMV4_5_MMU_H */
diff --git a/src/target/embeddedice.c b/src/target/embeddedice.c
index 891f9e52..0cb4e017 100644
--- a/src/target/embeddedice.c
+++ b/src/target/embeddedice.c
@@ -124,6 +124,7 @@ reg_cache_t* embeddedice_build_reg_cache(target_t *target, arm_jtag_t *jtag_info
 	if (extra_reg)
 	{
 		reg_list[num_regs - 1].arch_info = &arch_info[num_regs - 1];
+		reg_list[num_regs - 1].arch_type = embeddedice_reg_arch_type;
 		arch_info[num_regs - 1].jtag_info = jtag_info;
 	}
 	
diff --git a/src/target/embeddedice.h b/src/target/embeddedice.h
index e038f92d..8631e7b9 100644
--- a/src/target/embeddedice.h
+++ b/src/target/embeddedice.h
@@ -1,5 +1,5 @@
 /***************************************************************************
- *   Copyright (C) 2005 by Dominic Rath                                    *
+ *   Copyright (C) 2005, 2006 by Dominic Rath                                    *
  *   Dominic.Rath@gmx.de                                                   *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
@@ -41,7 +41,9 @@ enum
 	EICE_W1_DATA_VALUE = 12,
 	EICE_W1_DATA_MASK = 13,
 	EICE_W1_CONTROL_VALUE = 14,
-	EICE_W1_CONTROL_MASK = 15
+	EICE_W1_CONTROL_MASK = 15,
+	EICE_ABT_STATUS = 16,
+	EICE_VEC_CATCH = 16
 };
 
 enum
diff --git a/src/target/target.c b/src/target/target.c
index 2f5126df..f1229d86 100644
--- a/src/target/target.c
+++ b/src/target/target.c
@@ -723,6 +723,50 @@ int target_read_buffer(struct target_s *target, u32 address, u32 size, u8 *buffe
 	return ERROR_OK;
 }
 
+void target_read_u32(struct target_s *target, u32 address, u32 *value)
+{
+	u8 value_buf[4];
+	
+	target->type->read_memory(target, address, 4, 1, value_buf);
+	
+	*value = target_buffer_get_u32(target, value_buf);
+}
+
+void target_read_u16(struct target_s *target, u32 address, u16 *value)
+{
+	u8 value_buf[2];
+	
+	target->type->read_memory(target, address, 2, 1, value_buf);
+	
+	*value = target_buffer_get_u16(target, value_buf);
+}
+
+void target_read_u8(struct target_s *target, u32 address, u8 *value)
+{
+	target->type->read_memory(target, address, 1, 1, value);
+}
+
+void target_write_u32(struct target_s *target, u32 address, u32 value)
+{
+	u8 value_buf[4];
+
+	target_buffer_set_u32(target, value_buf, value);	
+	target->type->write_memory(target, address, 4, 1, value_buf);
+}
+
+void target_write_u16(struct target_s *target, u32 address, u16 value)
+{
+	u8 value_buf[2];
+	
+	target_buffer_set_u16(target, value_buf, value);	
+	target->type->write_memory(target, address, 2, 1, value_buf);
+}
+
+void target_write_u8(struct target_s *target, u32 address, u8 value)
+{
+	target->type->read_memory(target, address, 1, 1, &value);
+}
+
 int target_register_user_commands(struct command_context_s *cmd_ctx)
 {
 	register_command(cmd_ctx,  NULL, "reg", handle_reg_command, COMMAND_EXEC, NULL);
@@ -1421,6 +1465,7 @@ int handle_md_command(struct command_context_s *cmd_ctx, char *cmd, char **args,
 				command_print(cmd_ctx, "error: unknown error");
 				break;
 		}
+		return ERROR_OK;
 	}
 
 	output_len = 0;
@@ -1433,13 +1478,13 @@ int handle_md_command(struct command_context_s *cmd_ctx, char *cmd, char **args,
 		switch (size)
 		{
 			case 4:
-				output_len += snprintf(output + output_len, 128 - output_len, "%8.8x ", ((u32*)buffer)[i]);
+				output_len += snprintf(output + output_len, 128 - output_len, "%8.8x ", target_buffer_get_u32(target, &buffer[i*4]));
 				break;
 			case 2:
-				output_len += snprintf(output + output_len, 128 - output_len, "%4.4x ", ((u16*)buffer)[i]);
+				output_len += snprintf(output + output_len, 128 - output_len, "%4.4x ", target_buffer_get_u16(target, &buffer[i*2]));
 				break;
 			case 1:
-				output_len += snprintf(output + output_len, 128 - output_len, "%2.2x ", ((u8*)buffer)[i]);
+				output_len += snprintf(output + output_len, 128 - output_len, "%2.2x ", buffer[i*1]);
 				break;
 		}
 
@@ -1461,6 +1506,7 @@ int handle_mw_command(struct command_context_s *cmd_ctx, char *cmd, char **args,
 	u32 value = 0;
 	int retval;
 	target_t *target = get_current_target(cmd_ctx);
+	u8 value_buf[4];
 
 	if (argc < 2)
 		return ERROR_OK;
@@ -1471,13 +1517,16 @@ int handle_mw_command(struct command_context_s *cmd_ctx, char *cmd, char **args,
 	switch (cmd[2])
 	{
 		case 'w':
-			retval = target->type->write_memory(target, address, 4, 1, (u8*)&value);
+			target_buffer_set_u32(target, value_buf, value);
+			retval = target->type->write_memory(target, address, 4, 1, value_buf);
 			break;
 		case 'h':
-			retval = target->type->write_memory(target, address, 2, 1, (u8*)&value);
+			target_buffer_set_u16(target, value_buf, value);
+			retval = target->type->write_memory(target, address, 2, 1, value_buf);
 			break;
 		case 'b':
-			retval = target->type->write_memory(target, address, 1, 1, (u8*)&value);
+			value_buf[0] = value;
+			retval = target->type->write_memory(target, address, 1, 1, value_buf);
 			break;
 		default:
 			return ERROR_OK;
@@ -1574,6 +1623,8 @@ int handle_dump_binary_command(struct command_context_s *cmd_ctx, char *cmd, cha
 	u32 size;
 	u8 buffer[560];
 	
+	struct timeval start, end, duration;
+	
 	target_t *target = get_current_target(cmd_ctx);
 
 	if (argc != 3)
@@ -1598,6 +1649,8 @@ int handle_dump_binary_command(struct command_context_s *cmd_ctx, char *cmd, cha
 		return ERROR_OK;
 	}
 
+	gettimeofday(&start, NULL);	
+	
 	while (size > 0)
 	{
 		u32 this_run_size = (size > 560) ? 560 : size;
@@ -1609,6 +1662,11 @@ int handle_dump_binary_command(struct command_context_s *cmd_ctx, char *cmd, cha
 
 	fclose(binary);
 
+	gettimeofday(&end, NULL);	
+
+	timeval_subtract(&duration, &end, &start);
+	command_print(cmd_ctx, "dumped %i byte in %is %ius", strtoul(args[2], NULL, 0), duration.tv_sec, duration.tv_usec);
+	
 	return ERROR_OK;
 
 }
diff --git a/src/target/target.h b/src/target/target.h
index dc6d8ce1..72018671 100644
--- a/src/target/target.h
+++ b/src/target/target.h
@@ -222,6 +222,13 @@ extern u16 target_buffer_get_u16(target_t *target, u8 *buffer);
 extern void target_buffer_set_u32(target_t *target, u8 *buffer, u32 value);
 extern void target_buffer_set_u16(target_t *target, u8 *buffer, u16 value);
 
+void target_read_u32(struct target_s *target, u32 address, u32 *value);
+void target_read_u16(struct target_s *target, u32 address, u16 *value);
+void target_read_u8(struct target_s *target, u32 address, u8 *value);
+void target_write_u32(struct target_s *target, u32 address, u32 value);
+void target_write_u16(struct target_s *target, u32 address, u16 value);
+void target_write_u8(struct target_s *target, u32 address, u8 value);
+
 #define ERROR_TARGET_INVALID	(-300)
 #define ERROR_TARGET_INIT_FAILED (-301)
 #define ERROR_TARGET_TIMEOUT	(-302)
-- 
cgit v1.2.3