diff mbox series

[OSSTEST,24/60] history reporting: Cache data limit now in History module

Message ID 20200814172205.9624-25-ian.jackson@eu.citrix.com (mailing list archive)
State New, archived
Headers show
Series Speed up sg-report-job-history | expand

Commit Message

Ian Jackson Aug. 14, 2020, 5:21 p.m. UTC
Replace the ad-hoc query-specific limit strategy in
sg-report-host-history with a new, more principled, arrangement, in
HistoryReport.

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
 Osstest/HistoryReport.pm | 38 ++++++++++++++++++++++++++++++++------
 sg-report-host-history   | 14 +++++---------
 2 files changed, 37 insertions(+), 15 deletions(-)
diff mbox series

Patch

diff --git a/Osstest/HistoryReport.pm b/Osstest/HistoryReport.pm
index 6e5ed6ec..b576c02a 100644
--- a/Osstest/HistoryReport.pm
+++ b/Osstest/HistoryReport.pm
@@ -31,7 +31,7 @@  BEGIN {
 			 cacheable_query
 			 cacheable_fn
 			 cache_write_entry
-			 cache_report_stats
+			 cache_finish
 		    );
     %EXPORT_TAGS = ();
 
@@ -43,6 +43,7 @@  use POSIX;
 our @key_cols;
 
 our %cache;
+our @previous;
 
 our %q_count;
 our %q_misses;
@@ -50,6 +51,7 @@  our %q_misses;
 our $rows_previous = 0;
 our $rows_today = 0;
 our $rows_hit = 0;
+our $rows_extra = 0;
 
 
 # History report query and processing cache
@@ -73,6 +75,12 @@  our $rows_hit = 0;
 # It also contains one entry for each cached subcomputation.
 # The keys for these entries are "\%$cachekey".
 
+# Limit strategy
+#
+# It is up to the caller to make a primary query that returns the rows
+# that it is interested in.  They should be in (perhaps approximate)
+# reverse time order.
+
 # These routines all expect the file handle ::DEBUG
 # They are not reentrant and can only be used for one HTML
 # output file in one program run.
@@ -118,7 +126,6 @@  sub cache_read_previous ($) {
     }
     %cache = ();
     for (;;) {
-	$rows_previous++;
         $_ = <H> // last;
         next unless m{^\<\!-- osstest-report-reuseable (.*)--\>$};
 	my $jr = {};
@@ -134,6 +141,7 @@  sub cache_read_previous ($) {
 	    s{\%([0-9a-f]{2})}{ chr hex $1 }ge;
 	    $ch->{$k} = $_;
 	}
+	push @previous, $jr;
 	$cache{key($jr)} = $jr;
     }
     close H;
@@ -169,18 +177,36 @@  sub cache_write_entry ($$) {
 	print $fh " $'";
 	$whash->($jr->{$hk});
     }
+    $jr->{'% written'} = 1;
     print $fh " -->\n";
 }
 
-sub cache_report_stats ($) {
-    my ($what) = @_;
-    print ::DEBUG "CACHE $what read=$rows_previous hits $rows_hit/$rows_today";
+sub cache_finish ($$) {
+    my ($fh, $what) = @_;
+
+    my $nprevious = @previous;
+
+    # Write as many rows again.  This saves re-querying in case of
+    # flapping etc.
+    for (;;) {
+	last if
+	  $rows_extra >= $rows_today &&
+	  ($rows_today + $rows_extra >= @previous * 0.9);
+	my $jr = shift @previous;
+	last unless $jr;
+	next if $jr->{'% written'};
+	cache_write_entry($fh, $jr);
+	$rows_extra++;
+    }
+
+    print ::DEBUG
+	"CACHE $what read=$nprevious hits $rows_hit/$rows_today";
     for my $cachekey (sort keys %q_count) {
 	my $total = $q_count{$cachekey};
 	my $hits = $total - ($q_misses{$cachekey} // 0);
 	print ::DEBUG " $cachekey=$hits/$total";
     }
-    print ::DEBUG "\n";
+    print ::DEBUG " extra=$rows_extra\n";
 }
 
 1;
diff --git a/sg-report-host-history b/sg-report-host-history
index e281986d..2ad2134d 100755
--- a/sg-report-host-history
+++ b/sg-report-host-history
@@ -119,7 +119,7 @@  our %hosts;
 sub mainquery ($) {
     my ($host) = @_;
 
-    our $runvarq //= db_prepare(<<END);
+    our $runvarq = db_prepare(<<END.($limit ? <<END_LIMIT : ''));
 	SELECT flight, job, name, status
 	  FROM runvars
           JOIN jobs USING (flight, job)
@@ -128,8 +128,9 @@  sub mainquery ($) {
 	   AND $flightcond
            AND $restrictflight_cond
 	 ORDER BY flight DESC
-         LIMIT $limit * 2
 END
+         LIMIT $limit
+END_LIMIT
 
     print DEBUG "MAINQUERY $host...\n";
     $runvarq->execute($host);
@@ -233,11 +234,6 @@  END
     my $runvarq_misses = 0;
 
     foreach my $jr (@rows) {
-	if ($wrote++ >= $limit) {
-	    cache_write_entry(\*H, $jr);
-	    next;
-	}
-
         #print DEBUG "JR $jr->{flight}.$jr->{job}\n";
 	my $ir = cacheable_query($infoq, $jr, 'i');
 	my $ar = cacheable_query($allocdq, $jr, 'a');
@@ -338,11 +334,11 @@  END
 
     print H "</table></body></html>\n";
 
+    cache_finish(\*H, "$hostname");
+
     close H or die $!;
     rename "$html_file.new", "$html_file" or die "$html_file $!"
         if $doinstall;
-
-    cache_report_stats("$hostname");
 }
 
 foreach my $host (@ARGV) {