@@ -10,7 +10,7 @@ pub(crate) struct NovaCore {
pub(crate) gpu: Gpu,
}
-const BAR0_SIZE: usize = 8;
+const BAR0_SIZE: usize = 0x9500;
pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
kernel::pci_device_table!(
@@ -42,6 +42,8 @@ fn probe(pdev: &mut pci::Device, _info: &Self::IdInfo) -> Result<Pin<KBox<Self>>
GFP_KERNEL,
)?;
+ let _ = this.gpu.test_timer();
+
Ok(this)
}
}
@@ -1,12 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
+use kernel::device::Device;
+use kernel::types::ARef;
use kernel::{
device, devres::Devres, error::code::*, firmware, fmt, pci, prelude::*, str::BStr, str::CString,
};
use crate::driver::Bar0;
use crate::regs;
+use crate::timer::Timer;
use core::fmt;
+use core::time::Duration;
const fn to_lowercase_bytes<const N: usize>(s: &str) -> [u8; N] {
let src = s.as_bytes();
@@ -201,10 +205,12 @@ fn new(dev: &device::Device, spec: &Spec, ver: &str) -> Result<Firmware> {
/// Structure holding the resources required to operate the GPU.
#[pin_data]
pub(crate) struct Gpu {
+ dev: ARef<Device>,
spec: Spec,
/// MMIO mapping of PCI BAR 0
bar: Devres<Bar0>,
fw: Firmware,
+ timer: Timer,
}
impl Gpu {
@@ -220,6 +226,56 @@ pub(crate) fn new(pdev: &pci::Device, bar: Devres<Bar0>) -> Result<impl PinInit<
spec.revision
);
- Ok(pin_init!(Self { spec, bar, fw }))
+ let dev = pdev.as_ref().into();
+ let timer = Timer::new();
+
+ Ok(pin_init!(Self {
+ dev,
+ spec,
+ bar,
+ fw,
+ timer,
+ }))
+ }
+
+ pub(crate) fn test_timer(&self) -> Result<()> {
+ let bar = self.bar.try_access().ok_or(ENXIO)?;
+ dev_info!(&self.dev, "testing timer subdev\n");
+ dev_info!(&self.dev, "current timestamp: {}\n", self.timer.read(&bar));
+ drop(bar);
+
+ assert!(matches!(
+ self.timer
+ .wait_on(&self.bar, Duration::from_millis(10), || Some(())),
+ Ok(())
+ ));
+
+ let bar = self.bar.try_access().ok_or(ENXIO)?;
+ dev_info!(
+ &self.dev,
+ "timestamp after immediate exit: {}\n",
+ self.timer.read(&bar)
+ );
+ let t1 = self.timer.read(&bar);
+ drop(bar);
+
+ assert_eq!(
+ self.timer
+ .wait_on(&self.bar, Duration::from_millis(10), || Option::<()>::None),
+ Err(ETIMEDOUT)
+ );
+
+ let bar = self.bar.try_access().ok_or(ENXIO)?;
+ let t2 = self.timer.read(&bar);
+ assert!(t2 - t1 >= Duration::from_millis(10));
+ dev_info!(
+ &self.dev,
+ "timestamp after timeout: {} ({:?})\n",
+ self.timer.read(&bar),
+ t2 - t1
+ );
+ drop(bar);
+
+ Ok(())
}
}
@@ -6,6 +6,7 @@
mod firmware;
mod gpu;
mod regs;
+mod timer;
kernel::module_pci_driver! {
type: driver::NovaCore,
@@ -172,3 +172,11 @@ impl Builder<$name> {
7:4 major_rev as (u8), "major revision of the chip";
25:20 chipset try_into (Chipset), "chipset model"
);
+
+nv_reg!(PtimerTime0@0x00009400;
+ 31:0 lo as (u32), "low 32-bits of the timer"
+);
+
+nv_reg!(PtimerTime1@0x00009410;
+ 31:0 hi as (u32), "high 32 bits of the timer"
+);
new file mode 100644
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Nova Core Timer subdevice
+
+use core::fmt::Display;
+use core::ops::{Add, Sub};
+use core::time::Duration;
+
+use kernel::devres::Devres;
+use kernel::num::U64Ext;
+use kernel::prelude::*;
+
+use crate::driver::Bar0;
+use crate::regs;
+
+/// A timestamp with nanosecond granularity obtained from the GPU timer.
+///
+/// A timestamp can also be substracted to another in order to obtain a [`Duration`].
+///
+/// TODO: add Kunit tests!
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) struct Timestamp(u64);
+
+impl Display for Timestamp {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, "{}", self.0)
+ }
+}
+
+impl Add<u64> for Timestamp {
+ type Output = Self;
+
+ fn add(self, rhs: u64) -> Self::Output {
+ Timestamp(self.0.wrapping_add(rhs))
+ }
+}
+
+impl Sub for Timestamp {
+ type Output = Duration;
+
+ fn sub(self, rhs: Self) -> Self::Output {
+ Duration::from_nanos(self.0.wrapping_sub(rhs.0))
+ }
+}
+
+pub(crate) struct Timer {}
+
+impl Timer {
+ pub(crate) fn new() -> Self {
+ Self {}
+ }
+
+ /// Read the current timer timestamp.
+ pub(crate) fn read(&self, bar: &Bar0) -> Timestamp {
+ loop {
+ let hi = regs::PtimerTime1::read(bar);
+ let lo = regs::PtimerTime0::read(bar);
+
+ if hi.hi() == regs::PtimerTime1::read(bar).hi() {
+ return Timestamp(u64::from_u32s(hi.hi(), lo.lo()));
+ }
+ }
+ }
+
+ #[allow(dead_code)]
+ pub(crate) fn time(bar: &Bar0, time: u64) {
+ regs::PtimerTime1::new().hi(time.upper_32_bits()).write(bar);
+ regs::PtimerTime0::new().lo(time.lower_32_bits()).write(bar);
+ }
+
+ /// Wait until `cond` is true or `timeout` elapsed, based on GPU time.
+ ///
+ /// When `cond` evaluates to `Some`, its return value is returned.
+ ///
+ /// `Err(ETIMEDOUT)` is returned if `timeout` has been reached without `cond` evaluating to
+ /// `Some`, or if the timer device is stuck for some reason.
+ pub(crate) fn wait_on<R, F: Fn() -> Option<R>>(
+ &self,
+ dev_bar: &Devres<Bar0>,
+ timeout: Duration,
+ cond: F,
+ ) -> Result<R> {
+ // Number of consecutive time reads after which we consider the timer frozen if it hasn't
+ // moved forward.
+ const MAX_STALLED_READS: usize = 16;
+
+ let (mut cur_time, mut prev_time, deadline) = {
+ let bar = dev_bar.try_access().ok_or(ENXIO)?;
+ let cur_time = self.read(&bar);
+ let deadline = cur_time + u64::try_from(timeout.as_nanos()).unwrap_or(u64::MAX);
+
+ (cur_time, cur_time, deadline)
+ };
+ let mut num_reads = 0;
+
+ loop {
+ if let Some(ret) = cond() {
+ return Ok(ret);
+ }
+
+ (|| {
+ let bar = dev_bar.try_access().ok_or(ENXIO)?;
+ cur_time = self.read(&bar);
+
+ /* Check if the timer is frozen for some reason. */
+ if cur_time == prev_time {
+ if num_reads >= MAX_STALLED_READS {
+ return Err(ETIMEDOUT);
+ }
+ num_reads += 1;
+ } else {
+ if cur_time >= deadline {
+ return Err(ETIMEDOUT);
+ }
+
+ num_reads = 0;
+ prev_time = cur_time;
+ }
+
+ Ok(())
+ })()?;
+ }
+ }
+}
Add a basic timer device and exercise it during device probing. This first draft is probably very questionable. One point in particular which should IMHO receive attention: the generic wait_on() method aims at providing similar functionality to Nouveau's nvkm_[num]sec() macros. Since this method will be heavily used with different conditions to test, I'd like to avoid monomorphizing it entirely with each instance ; that's something that is achieved in nvkm_xsec() using functions that the macros invoke. I have tried achieving the same result in Rust using closures (kept as-is in the current code), but they seem to be monomorphized as well. Calling extra functions could work better, but looks also less elegant to me, so I am really open to suggestions here. Signed-off-by: Alexandre Courbot <acourbot@nvidia.com> --- drivers/gpu/nova-core/driver.rs | 4 +- drivers/gpu/nova-core/gpu.rs | 58 ++++++++++++++++- drivers/gpu/nova-core/nova_core.rs | 1 + drivers/gpu/nova-core/regs.rs | 8 +++ drivers/gpu/nova-core/timer.rs | 124 +++++++++++++++++++++++++++++++++++++ 5 files changed, 193 insertions(+), 2 deletions(-)