diff mbox series

[v2,5/9] ALSA: emu10k1: fix PCM playback cache and interrupt handling

Message ID 20230518092224.3696958-5-oswald.buddenhagen@gmx.de (mailing list archive)
State New, archived
Headers show
Series [v2,1/9] ALSA: emu10k1: pass frame instead of byte addresses | expand

Commit Message

Oswald Buddenhagen May 18, 2023, 9:22 a.m. UTC
The cache causes a fixed delay regardless of stream parameters.
Consequently, all that "cache invalidate size" calculation stuff was
garbage (which can be traced right back to Creative's OSS driver).

This also removes the definitions of registers CD1..CDF, because they
are accessed only relative to CD0 anyway.

Signed-off-by: Oswald Buddenhagen <oswald.buddenhagen@gmx.de>
---
 include/sound/emu10k1.h    | 38 +++++++++++----------
 sound/pci/emu10k1/emupcm.c | 67 +++++++++++++-------------------------
 2 files changed, 43 insertions(+), 62 deletions(-)
diff mbox series

Patch

diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 2d64f07e3883..ee662a1b0dc7 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -116,6 +116,10 @@ 
 #define IPR_MIDITRANSBUFEMPTY	0x00000100	/* MIDI UART transmit buffer empty		*/
 #define IPR_MIDIRECVBUFEMPTY	0x00000080	/* MIDI UART receive buffer empty		*/
 #define IPR_CHANNELLOOP		0x00000040	/* Channel (half) loop interrupt(s) pending	*/
+						/* The interrupt is triggered shortly after	*/
+						/* CCR_READADDRESS has crossed the boundary;	*/
+						/* due to the cache, this runs ahead of the	*/
+						/* actual playback position.			*/
 #define IPR_CHANNELNUMBERMASK	0x0000003f	/* When IPR_CHANNELLOOP is set, indicates the	*/
 						/* highest set channel in CLIPL, CLIPH, HLIPL,  */
 						/* or HLIPH.  When IPR is written with CL set,	*/
@@ -586,24 +590,22 @@  SUB_REG(PEFE, FILTERAMOUNT,	0x000000ff)	/* Filter envlope amount				*/
 
 /* 0x1f: not used */
 
-#define CD0			0x20		/* Cache data 0 register				*/
-#define CD1			0x21		/* Cache data 1 register				*/
-#define CD2			0x22		/* Cache data 2 register				*/
-#define CD3			0x23		/* Cache data 3 register				*/
-#define CD4			0x24		/* Cache data 4 register				*/
-#define CD5			0x25		/* Cache data 5 register				*/
-#define CD6			0x26		/* Cache data 6 register				*/
-#define CD7			0x27		/* Cache data 7 register				*/
-#define CD8			0x28		/* Cache data 8 register				*/
-#define CD9			0x29		/* Cache data 9 register				*/
-#define CDA			0x2a		/* Cache data A register				*/
-#define CDB			0x2b		/* Cache data B register				*/
-#define CDC			0x2c		/* Cache data C register				*/
-#define CDD			0x2d		/* Cache data D register				*/
-#define CDE			0x2e		/* Cache data E register				*/
-#define CDF			0x2f		/* Cache data F register				*/
-
-/* 0x30-3f seem to be the same as 0x20-2f */
+// 32 cache registers (== 128 bytes) per channel follow.
+// In stereo mode, the two channels' caches are concatenated into one,
+// and hold the interleaved frames.
+// The cache holds 64 frames, so the upper half is not used in 8-bit mode.
+// All registers mentioned below count in frames.
+// The cache is a ring buffer; CCR_READADDRESS operates modulo 64.
+// The cache is filled from (CCCA_CURRADDR - CCR_CACHEINVALIDSIZE)
+// into (CCR_READADDRESS - CCR_CACHEINVALIDSIZE).
+// The engine has a fetch threshold of 32 bytes, so it tries to keep
+// CCR_CACHEINVALIDSIZE below 8 (16-bit stereo), 16 (16-bit mono,
+// 8-bit stereo), or 32 (8-bit mono). The actual transfers are pretty
+// unpredictable, especially if several voices are running.
+// Frames are consumed at CCR_READADDRESS, which is incremented afterwards,
+// along with CCCA_CURRADDR and CCR_CACHEINVALIDSIZE. This implies that the
+// actual playback position always lags CCCA_CURRADDR by exactly 64 frames.
+#define CD0			0x20		/* Cache data registers 0 .. 0x1f			*/
 
 #define PTB			0x40		/* Page table base register				*/
 #define PTB_MASK		0xfffff000	/* Physical address of the page table in host memory	*/
diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c
index a6c4f1895a08..feb575922738 100644
--- a/sound/pci/emu10k1/emupcm.c
+++ b/sound/pci/emu10k1/emupcm.c
@@ -112,6 +112,10 @@  static int snd_emu10k1_pcm_channel_alloc(struct snd_emu10k1_pcm * epcm, int voic
 		}
 	}
 	if (epcm->extra == NULL) {
+		// The hardware supports only (half-)loop interrupts, so to support an
+		// arbitrary number of periods per buffer, we use an extra voice with a
+		// period-sized loop as the interrupt source. Additionally, the interrupt
+		// timing of the hardware is "suboptimal" and needs some compensation.
 		err = snd_emu10k1_voice_alloc(epcm->emu,
 					      epcm->type == PLAYBACK_EMUVOICE ? EMU10K1_PCM : EMU10K1_EFX,
 					      1,
@@ -232,23 +236,6 @@  static unsigned int emu10k1_select_interprom(unsigned int pitch_target)
 		return CCCA_INTERPROM_2;
 }
 
-/*
- * calculate cache invalidate size 
- *
- * stereo: channel is stereo
- * w_16: using 16bit samples
- *
- * returns: cache invalidate size in samples
- */
-static inline int emu10k1_ccis(int stereo, int w_16)
-{
-	if (w_16) {
-		return stereo ? 24 : 26;
-	} else {
-		return stereo ? 24*2 : 26*2;
-	}
-}
-
 static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
 				       int master, int extra,
 				       struct snd_emu10k1_voice *evoice,
@@ -264,7 +251,6 @@  static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
 	unsigned char send_routing[8];
 	unsigned long flags;
 	unsigned int pitch_target;
-	unsigned int ccis;
 
 	voice = evoice->number;
 	stereo = runtime->channels == 2;
@@ -284,10 +270,8 @@  static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
 		memcpy(send_amount, &mix->send_volume[tmp][0], 8);
 	}
 
-	ccis = emu10k1_ccis(stereo, w_16);
-	
 	if (master) {
-		evoice->epcm->ccca_start_addr = start_addr + ccis;
+		evoice->epcm->ccca_start_addr = start_addr + 64 - 3;
 	}
 	if (stereo && !extra) {
 		// Not really necessary for the slave, but it doesn't hurt
@@ -317,11 +301,11 @@  static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
 	else 
 		pitch_target = emu10k1_calc_pitch_target(runtime->rate);
 	if (extra)
-		snd_emu10k1_ptr_write(emu, CCCA, voice, start_addr |
+		snd_emu10k1_ptr_write(emu, CCCA, voice, (end_addr - 3) |
 			      emu10k1_select_interprom(pitch_target) |
 			      (w_16 ? 0 : CCCA_8BITSELECT));
 	else
-		snd_emu10k1_ptr_write(emu, CCCA, voice, (start_addr + ccis) |
+		snd_emu10k1_ptr_write(emu, CCCA, voice, (start_addr + 64 - 3) |
 			      emu10k1_select_interprom(pitch_target) |
 			      (w_16 ? 0 : CCCA_8BITSELECT));
 	/* Clear filter delay memory */
@@ -532,35 +516,30 @@  static void snd_emu10k1_playback_invalidate_cache(struct snd_emu10k1 *emu,
 						  struct snd_emu10k1_voice *evoice)
 {
 	struct snd_pcm_runtime *runtime;
-	unsigned int voice, stereo, i, ccis, cra = 64, cs, sample;
+	unsigned voice, stereo, sample;
+	u32 ccr;
 
 	runtime = evoice->epcm->substream->runtime;
 	voice = evoice->number;
 	stereo = (runtime->channels == 2);
 	sample = snd_pcm_format_width(runtime->format) == 16 ? 0 : 0x80808080;
-	ccis = emu10k1_ccis(stereo, sample == 0);
-	/* set cs to 2 * number of cache registers beside the invalidated */
-	cs = (sample == 0) ? (32-ccis) : (64-ccis+1) >> 1;
-	if (cs > 16) cs = 16;
-	for (i = 0; i < cs; i++) {
+
+	// We assume that the cache is resting at this point (i.e.,
+	// CCR_CACHEINVALIDSIZE is very small).
+
+	// Clear leading frames. For simplicitly, this does too much,
+	// except for 16-bit stereo. And the interpolator will actually
+	// access them at all only when we're pitch-shifting.
+	for (int i = 0; i < 3; i++)
 		snd_emu10k1_ptr_write(emu, CD0 + i, voice, sample);
-		if (stereo) {
-			snd_emu10k1_ptr_write(emu, CD0 + i, voice + 1, sample);
-		}
-	}
-	/* reset cache */
-	snd_emu10k1_ptr_write(emu, CCR_CACHEINVALIDSIZE, voice, 0);
-	snd_emu10k1_ptr_write(emu, CCR_READADDRESS, voice, cra);
+
+	// Fill cache
+	ccr = (64 - 3) << REG_SHIFT(CCR_CACHEINVALIDSIZE);
 	if (stereo) {
-		snd_emu10k1_ptr_write(emu, CCR_CACHEINVALIDSIZE, voice + 1, 0);
-		// The engine goes haywire if this one is out of sync
-		snd_emu10k1_ptr_write(emu, CCR_READADDRESS, voice + 1, cra);
-	}
-	/* fill cache */
-	snd_emu10k1_ptr_write(emu, CCR_CACHEINVALIDSIZE, voice, ccis);
-	if (stereo) {
-		snd_emu10k1_ptr_write(emu, CCR_CACHEINVALIDSIZE, voice+1, ccis);
+		// The engine goes haywire if CCR_READADDRESS is out of sync
+		snd_emu10k1_ptr_write(emu, CCR, voice + 1, ccr);
 	}
+	snd_emu10k1_ptr_write(emu, CCR, voice, ccr);
 }
 
 static void snd_emu10k1_playback_commit_volume(struct snd_emu10k1 *emu,