summaryrefslogtreecommitdiff
path: root/src/target
diff options
context:
space:
mode:
authorDavid Brownell <dbrownell@users.sourceforge.net>2009-11-24 21:24:44 -0800
committerDavid Brownell <dbrownell@users.sourceforge.net>2009-11-24 21:24:44 -0800
commitb6210907ea584095cdede663f695eb8afeecef14 (patch)
tree13844914f68dc154959a27c914e9160ceb658817 /src/target
parente109bb6af211d3916e4006127945d128b138529b (diff)
downloadopenocd+libswd-b6210907ea584095cdede663f695eb8afeecef14.tar.gz
openocd+libswd-b6210907ea584095cdede663f695eb8afeecef14.tar.bz2
openocd+libswd-b6210907ea584095cdede663f695eb8afeecef14.tar.xz
openocd+libswd-b6210907ea584095cdede663f695eb8afeecef14.zip
Cortex-A8: avoid DSCR reads
There was a lot of needless handshaking overhead in the current Cortex-A8 DCC/ITR operations, since the status read by each step was discarded rather than letting the next step know it. This shrinks the handshaking by: (a) passing status along from previous steps, avoiding re-fetching; which enables the big win (b) relying on a useful invariant: that the DSCR_INSTR_COMP bit is set after every call to a DPM method. A "reg sp_usr" call previously took 17 flushes; now it takes just 9. This visibly speeds common operations like entry to debug state and stepping, as well as "arm reg" and so on. Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Diffstat (limited to 'src/target')
-rw-r--r--src/target/cortex_a8.c151
1 files changed, 103 insertions, 48 deletions
diff --git a/src/target/cortex_a8.c b/src/target/cortex_a8.c
index 299eb3c1..08e54601 100644
--- a/src/target/cortex_a8.c
+++ b/src/target/cortex_a8.c
@@ -89,20 +89,25 @@ static int cortex_a8_init_debug_access(struct target *target)
return retval;
}
-/* FIXME we waste a *LOT* of round-trips with needless DSCR reads, which
- * slows down operations considerably. One good way to start reducing
- * them would pass current values into and out of this routine. That
- * should also help synch DCC read/write.
+/* To reduce needless round-trips, pass in a pointer to the current
+ * DSCR value. Initialize it to zero if you just need to know the
+ * value on return from this function; or (1 << DSCR_INSTR_COMP) if
+ * you happen to know that no instruction is pending.
*/
-static int cortex_a8_exec_opcode(struct target *target, uint32_t opcode)
+static int cortex_a8_exec_opcode(struct target *target,
+ uint32_t opcode, uint32_t *dscr_p)
{
uint32_t dscr;
int retval;
struct armv7a_common *armv7a = target_to_armv7a(target);
struct swjdp_common *swjdp = &armv7a->swjdp_info;
+ dscr = dscr_p ? *dscr_p : 0;
+
LOG_DEBUG("exec opcode 0x%08" PRIx32, opcode);
- do
+
+ /* Wait for InstrCompl bit to be set */
+ while ((dscr & (1 << DSCR_INSTR_COMP)) == 0)
{
retval = mem_ap_read_atomic_u32(swjdp,
armv7a->debug_base + CPUDBG_DSCR, &dscr);
@@ -112,7 +117,6 @@ static int cortex_a8_exec_opcode(struct target *target, uint32_t opcode)
return retval;
}
}
- while ((dscr & (1 << DSCR_INSTR_COMP)) == 0); /* Wait for InstrCompl bit to be set */
mem_ap_write_u32(swjdp, armv7a->debug_base + CPUDBG_ITR, opcode);
@@ -128,6 +132,9 @@ static int cortex_a8_exec_opcode(struct target *target, uint32_t opcode)
}
while ((dscr & (1 << DSCR_INSTR_COMP)) == 0); /* Wait for InstrCompl bit to be set */
+ if (dscr_p)
+ *dscr_p = dscr;
+
return retval;
}
@@ -144,7 +151,7 @@ static int cortex_a8_read_regs_through_mem(struct target *target, uint32_t addre
cortex_a8_dap_read_coreregister_u32(target, regfile, 0);
cortex_a8_dap_write_coreregister_u32(target, address, 0);
- cortex_a8_exec_opcode(target, ARMV4_5_STMIA(0, 0xFFFE, 0, 0));
+ cortex_a8_exec_opcode(target, ARMV4_5_STMIA(0, 0xFFFE, 0, 0), NULL);
dap_ap_select(swjdp, swjdp_memoryap);
mem_ap_read_buf_u32(swjdp, (uint8_t *)(&regfile[1]), 4*15, address);
dap_ap_select(swjdp, swjdp_debugap);
@@ -158,10 +165,15 @@ static int cortex_a8_read_cp(struct target *target, uint32_t *value, uint8_t CP,
int retval;
struct armv7a_common *armv7a = target_to_armv7a(target);
struct swjdp_common *swjdp = &armv7a->swjdp_info;
+ uint32_t dscr = 0;
+
+ /* MRC(...) to read coprocessor register into r0 */
+ cortex_a8_exec_opcode(target, ARMV4_5_MRC(CP, op1, 0, CRn, CRm, op2),
+ &dscr);
- cortex_a8_exec_opcode(target, ARMV4_5_MRC(CP, op1, 0, CRn, CRm, op2));
/* Move R0 to DTRTX */
- cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+ cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
+ &dscr);
/* Read DCCTX */
retval = mem_ap_read_atomic_u32(swjdp,
@@ -187,16 +199,21 @@ static int cortex_a8_write_cp(struct target *target, uint32_t value,
{
LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
/* Clear DCCRX with MCR(p14, 0, Rd, c0, c5, 0), opcode 0xEE000E15 */
- cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
+ cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+ &dscr);
}
+ /* Write DTRRX ... sets DSCR.DTRRXfull but exec_opcode() won't care */
retval = mem_ap_write_u32(swjdp,
armv7a->debug_base + CPUDBG_DTRRX, value);
+
/* Move DTRRX to r0 */
- cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
+ cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0), &dscr);
- cortex_a8_exec_opcode(target, ARMV4_5_MCR(CP, op1, 0, CRn, CRm, op2));
- return retval;
+ /* MCR(...) to write r0 to coprocessor */
+ return cortex_a8_exec_opcode(target,
+ ARMV4_5_MCR(CP, op1, 0, CRn, CRm, op2),
+ &dscr);
}
static int cortex_a8_read_cp15(struct target *target, uint32_t op1, uint32_t op2,
@@ -238,7 +255,7 @@ static int cortex_a8_dap_read_coreregister_u32(struct target *target,
{
int retval = ERROR_OK;
uint8_t reg = regnum&0xFF;
- uint32_t dscr;
+ uint32_t dscr = 0;
struct armv7a_common *armv7a = target_to_armv7a(target);
struct swjdp_common *swjdp = &armv7a->swjdp_info;
@@ -248,30 +265,35 @@ static int cortex_a8_dap_read_coreregister_u32(struct target *target,
if (reg < 15)
{
/* Rn to DCCTX, "MCR p14, 0, Rn, c0, c5, 0" 0xEE00nE15 */
- cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, reg, 0, 5, 0));
+ cortex_a8_exec_opcode(target,
+ ARMV4_5_MCR(14, 0, reg, 0, 5, 0),
+ &dscr);
}
else if (reg == 15)
{
/* "MOV r0, r15"; then move r0 to DCCTX */
- cortex_a8_exec_opcode(target, 0xE1A0000F);
- cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+ cortex_a8_exec_opcode(target, 0xE1A0000F, &dscr);
+ cortex_a8_exec_opcode(target,
+ ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
+ &dscr);
}
else
{
/* "MRS r0, CPSR" or "MRS r0, SPSR"
* then move r0 to DCCTX
*/
- cortex_a8_exec_opcode(target, ARMV4_5_MRS(0, reg & 1));
- cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+ cortex_a8_exec_opcode(target, ARMV4_5_MRS(0, reg & 1), &dscr);
+ cortex_a8_exec_opcode(target,
+ ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
+ &dscr);
}
- /* Read DTRRTX */
- do
+ /* Wait for DTRRXfull then read DTRRTX */
+ while ((dscr & (1 << DSCR_DTR_TX_FULL)) == 0)
{
retval = mem_ap_read_atomic_u32(swjdp,
armv7a->debug_base + CPUDBG_DSCR, &dscr);
}
- while ((dscr & (1 << DSCR_DTR_TX_FULL)) == 0); /* Wait for DTRRXfull */
retval = mem_ap_read_atomic_u32(swjdp,
armv7a->debug_base + CPUDBG_DTRTX, value);
@@ -298,13 +320,14 @@ static int cortex_a8_dap_write_coreregister_u32(struct target *target,
{
LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
/* Clear DCCRX with MCR(p14, 0, Rd, c0, c5, 0), opcode 0xEE000E15 */
- cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
+ cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+ &dscr);
}
if (Rd > 17)
return retval;
- /* Write to DCCRX */
+ /* Write DTRRX ... sets DSCR.DTRRXfull but exec_opcode() won't care */
LOG_DEBUG("write DCC 0x%08" PRIx32, value);
retval = mem_ap_write_u32(swjdp,
armv7a->debug_base + CPUDBG_DTRRX, value);
@@ -312,28 +335,33 @@ static int cortex_a8_dap_write_coreregister_u32(struct target *target,
if (Rd < 15)
{
/* DCCRX to Rn, "MCR p14, 0, Rn, c0, c5, 0", 0xEE00nE15 */
- cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, Rd, 0, 5, 0));
+ cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, Rd, 0, 5, 0),
+ &dscr);
}
else if (Rd == 15)
{
/* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15
* then "mov r15, r0"
*/
- cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
- cortex_a8_exec_opcode(target, 0xE1A0F000);
+ cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+ &dscr);
+ cortex_a8_exec_opcode(target, 0xE1A0F000, &dscr);
}
else
{
/* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15
* then "MSR CPSR_cxsf, r0" or "MSR SPSR_cxsf, r0" (all fields)
*/
- cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
- cortex_a8_exec_opcode(target, ARMV4_5_MSR_GP(0, 0xF, Rd & 1));
+ cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+ &dscr);
+ cortex_a8_exec_opcode(target, ARMV4_5_MSR_GP(0, 0xF, Rd & 1),
+ &dscr);
/* "Prefetch flush" after modifying execution status in CPSR */
if (Rd == 16)
cortex_a8_exec_opcode(target,
- ARMV4_5_MCR(15, 0, 0, 7, 5, 4));
+ ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
+ &dscr);
}
return retval;
@@ -354,6 +382,9 @@ static int cortex_a8_dap_write_memap_register_u32(struct target *target, uint32_
/*
* Cortex-A8 implementation of Debug Programmer's Model
*
+ * NOTE the invariant: these routines return with DSCR_INSTR_COMP set,
+ * so there's no need to poll for it before executing an instruction.
+ *
* NOTE that in several of these cases the "stall" mode might be useful.
* It'd let us queue a few operations together... prepare/finish might
* be the places to enable/disable that mode.
@@ -371,23 +402,30 @@ static int cortex_a8_write_dcc(struct cortex_a8_common *a8, uint32_t data)
a8->armv7a_common.debug_base + CPUDBG_DTRRX, data);
}
-static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data)
+static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data,
+ uint32_t *dscr_p)
{
struct swjdp_common *swjdp = &a8->armv7a_common.swjdp_info;
- uint32_t dscr;
+ uint32_t dscr = 1 << DSCR_INSTR_COMP;
int retval;
+ if (dscr_p)
+ dscr = *dscr_p;
+
/* Wait for DTRRXfull */
- do {
+ while ((dscr & (1 << DSCR_DTR_TX_FULL)) == 0) {
retval = mem_ap_read_atomic_u32(swjdp,
a8->armv7a_common.debug_base + CPUDBG_DSCR,
&dscr);
- } while ((dscr & (1 << DSCR_DTR_TX_FULL)) == 0);
+ }
retval = mem_ap_read_atomic_u32(swjdp,
a8->armv7a_common.debug_base + CPUDBG_DTRTX, data);
LOG_DEBUG("read DCC 0x%08" PRIx32, *data);
+ if (dscr_p)
+ *dscr_p = dscr;
+
return retval;
}
@@ -398,9 +436,12 @@ static int cortex_a8_dpm_prepare(struct arm_dpm *dpm)
uint32_t dscr;
int retval;
- retval = mem_ap_read_atomic_u32(swjdp,
- a8->armv7a_common.debug_base + CPUDBG_DSCR,
- &dscr);
+ /* set up invariant: INSTR_COMP is set after ever DPM operation */
+ do {
+ retval = mem_ap_read_atomic_u32(swjdp,
+ a8->armv7a_common.debug_base + CPUDBG_DSCR,
+ &dscr);
+ } while ((dscr & (1 << DSCR_INSTR_COMP)) == 0);
/* this "should never happen" ... */
if (dscr & (1 << DSCR_DTR_RX_FULL)) {
@@ -408,7 +449,8 @@ static int cortex_a8_dpm_prepare(struct arm_dpm *dpm)
/* Clear DCCRX */
retval = cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target,
- ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
+ ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+ &dscr);
}
return retval;
@@ -425,18 +467,21 @@ static int cortex_a8_instr_write_data_dcc(struct arm_dpm *dpm,
{
struct cortex_a8_common *a8 = dpm_to_a8(dpm);
int retval;
+ uint32_t dscr = 1 << DSCR_INSTR_COMP;
retval = cortex_a8_write_dcc(a8, data);
return cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target,
- opcode);
+ opcode,
+ &dscr);
}
static int cortex_a8_instr_write_data_r0(struct arm_dpm *dpm,
uint32_t opcode, uint32_t data)
{
struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+ uint32_t dscr = 1 << DSCR_INSTR_COMP;
int retval;
retval = cortex_a8_write_dcc(a8, data);
@@ -444,12 +489,14 @@ static int cortex_a8_instr_write_data_r0(struct arm_dpm *dpm,
/* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15 */
retval = cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target,
- ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
+ ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
+ &dscr);
/* then the opcode, taking data from R0 */
retval = cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target,
- opcode);
+ opcode,
+ &dscr);
return retval;
}
@@ -457,9 +504,12 @@ static int cortex_a8_instr_write_data_r0(struct arm_dpm *dpm,
static int cortex_a8_instr_cpsr_sync(struct arm_dpm *dpm)
{
struct target *target = dpm->arm->target;
+ uint32_t dscr = 1 << DSCR_INSTR_COMP;
/* "Prefetch flush" after modifying execution status in CPSR */
- return cortex_a8_exec_opcode(target, ARMV4_5_MCR(15, 0, 0, 7, 5, 4));
+ return cortex_a8_exec_opcode(target,
+ ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
+ &dscr);
}
static int cortex_a8_instr_read_data_dcc(struct arm_dpm *dpm,
@@ -467,13 +517,15 @@ static int cortex_a8_instr_read_data_dcc(struct arm_dpm *dpm,
{
struct cortex_a8_common *a8 = dpm_to_a8(dpm);
int retval;
+ uint32_t dscr = 1 << DSCR_INSTR_COMP;
/* the opcode, writing data to DCC */
retval = cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target,
- opcode);
+ opcode,
+ &dscr);
- return cortex_a8_read_dcc(a8, data);
+ return cortex_a8_read_dcc(a8, data, &dscr);
}
@@ -481,19 +533,22 @@ static int cortex_a8_instr_read_data_r0(struct arm_dpm *dpm,
uint32_t opcode, uint32_t *data)
{
struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+ uint32_t dscr = 1 << DSCR_INSTR_COMP;
int retval;
/* the opcode, writing data to R0 */
retval = cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target,
- opcode);
+ opcode,
+ &dscr);
/* write R0 to DCC */
retval = cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target,
- ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+ ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
+ &dscr);
- return cortex_a8_read_dcc(a8, data);
+ return cortex_a8_read_dcc(a8, data, &dscr);
}
static int cortex_a8_dpm_setup(struct cortex_a8_common *a8, uint32_t didr)