You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
663 lines
16 KiB
663 lines
16 KiB
/*
|
|
* Copyright © 2015 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*
|
|
*/
|
|
|
|
#include <math.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "igt_core.h"
|
|
#include "igt_stats.h"
|
|
|
|
#define U64_MAX ((uint64_t)~0ULL)
|
|
|
|
#define sorted_value(stats, i) (stats->is_float ? stats->sorted_f[i] : stats->sorted_u64[i])
|
|
#define unsorted_value(stats, i) (stats->is_float ? stats->values_f[i] : stats->values_u64[i])
|
|
|
|
/**
|
|
* SECTION:igt_stats
|
|
* @short_description: Tools for statistical analysis
|
|
* @title: Stats
|
|
* @include: igt.h
|
|
*
|
|
* Various tools to make sense of data.
|
|
*
|
|
* #igt_stats_t is a container of data samples. igt_stats_push() is used to add
|
|
* new samples and various results (mean, variance, standard deviation, ...)
|
|
* can then be retrieved.
|
|
*
|
|
* |[
|
|
* igt_stats_t stats;
|
|
*
|
|
* igt_stats_init(&stats, 8);
|
|
*
|
|
* igt_stats_push(&stats, 2);
|
|
* igt_stats_push(&stats, 4);
|
|
* igt_stats_push(&stats, 4);
|
|
* igt_stats_push(&stats, 4);
|
|
* igt_stats_push(&stats, 5);
|
|
* igt_stats_push(&stats, 5);
|
|
* igt_stats_push(&stats, 7);
|
|
* igt_stats_push(&stats, 9);
|
|
*
|
|
* printf("Mean: %lf\n", igt_stats_get_mean(&stats));
|
|
*
|
|
* igt_stats_fini(&stats);
|
|
* ]|
|
|
*/
|
|
|
|
static unsigned int get_new_capacity(int need)
|
|
{
|
|
unsigned int new_capacity;
|
|
|
|
/* taken from Python's list */
|
|
new_capacity = (need >> 6) + (need < 9 ? 3 : 6);
|
|
new_capacity += need;
|
|
|
|
return new_capacity;
|
|
}
|
|
|
|
static void igt_stats_ensure_capacity(igt_stats_t *stats,
|
|
unsigned int n_additional_values)
|
|
{
|
|
unsigned int new_n_values = stats->n_values + n_additional_values;
|
|
unsigned int new_capacity;
|
|
|
|
if (new_n_values <= stats->capacity)
|
|
return;
|
|
|
|
new_capacity = get_new_capacity(new_n_values);
|
|
stats->values_u64 = realloc(stats->values_u64,
|
|
sizeof(*stats->values_u64) * new_capacity);
|
|
igt_assert(stats->values_u64);
|
|
|
|
stats->capacity = new_capacity;
|
|
|
|
free(stats->sorted_u64);
|
|
stats->sorted_u64 = NULL;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_init:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Initializes an #igt_stats_t instance. igt_stats_fini() must be called once
|
|
* finished with @stats.
|
|
*/
|
|
void igt_stats_init(igt_stats_t *stats)
|
|
{
|
|
memset(stats, 0, sizeof(*stats));
|
|
|
|
igt_stats_ensure_capacity(stats, 128);
|
|
|
|
stats->min = U64_MAX;
|
|
stats->max = 0;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_init_with_size:
|
|
* @stats: An #igt_stats_t instance
|
|
* @capacity: Number of data samples @stats can contain
|
|
*
|
|
* Like igt_stats_init() but with a size to avoid reallocating the underlying
|
|
* array(s) when pushing new values. Useful if we have a good idea of the
|
|
* number of data points we want @stats to hold.
|
|
*
|
|
* igt_stats_fini() must be called once finished with @stats.
|
|
*/
|
|
void igt_stats_init_with_size(igt_stats_t *stats, unsigned int capacity)
|
|
{
|
|
memset(stats, 0, sizeof(*stats));
|
|
|
|
igt_stats_ensure_capacity(stats, capacity);
|
|
|
|
stats->min = U64_MAX;
|
|
stats->max = 0;
|
|
stats->range[0] = HUGE_VAL;
|
|
stats->range[1] = -HUGE_VAL;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_fini:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Frees resources allocated in igt_stats_init().
|
|
*/
|
|
void igt_stats_fini(igt_stats_t *stats)
|
|
{
|
|
free(stats->values_u64);
|
|
free(stats->sorted_u64);
|
|
}
|
|
|
|
|
|
/**
|
|
* igt_stats_is_population:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Returns: #true if @stats represents a population, #false if only a sample.
|
|
*
|
|
* See igt_stats_set_population() for more details.
|
|
*/
|
|
bool igt_stats_is_population(igt_stats_t *stats)
|
|
{
|
|
return stats->is_population;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_set_population:
|
|
* @stats: An #igt_stats_t instance
|
|
* @full_population: Whether we're dealing with sample data or a full
|
|
* population
|
|
*
|
|
* In statistics, we usually deal with a subset of the full data (which may be
|
|
* a continuous or infinite set). Data analysis is then done on a sample of
|
|
* this population.
|
|
*
|
|
* This has some importance as only having a sample of the data leads to
|
|
* [biased estimators](https://en.wikipedia.org/wiki/Bias_of_an_estimator). We
|
|
* currently used the information given by this method to apply
|
|
* [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
|
|
* to the variance.
|
|
*
|
|
* Note that even if we manage to have an unbiased variance by multiplying
|
|
* a sample variance by the Bessel's correction, n/(n - 1), the standard
|
|
* deviation derived from the unbiased variance isn't itself unbiased.
|
|
* Statisticians talk about a "corrected" standard deviation.
|
|
*
|
|
* When giving #true to this function, the data set in @stats is considered a
|
|
* full population. It's considered a sample of a bigger population otherwise.
|
|
*
|
|
* When newly created, @stats defaults to holding sample data.
|
|
*/
|
|
void igt_stats_set_population(igt_stats_t *stats, bool full_population)
|
|
{
|
|
if (full_population == stats->is_population)
|
|
return;
|
|
|
|
stats->is_population = full_population;
|
|
stats->mean_variance_valid = false;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_push:
|
|
* @stats: An #igt_stats_t instance
|
|
* @value: An integer value
|
|
*
|
|
* Adds a new value to the @stats dataset.
|
|
*/
|
|
void igt_stats_push(igt_stats_t *stats, uint64_t value)
|
|
{
|
|
if (stats->is_float) {
|
|
igt_stats_push_float(stats, value);
|
|
return;
|
|
}
|
|
|
|
igt_stats_ensure_capacity(stats, 1);
|
|
|
|
stats->values_u64[stats->n_values++] = value;
|
|
|
|
stats->mean_variance_valid = false;
|
|
stats->sorted_array_valid = false;
|
|
|
|
if (value < stats->min)
|
|
stats->min = value;
|
|
if (value > stats->max)
|
|
stats->max = value;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_push:
|
|
* @stats: An #igt_stats_t instance
|
|
* @value: An floating point
|
|
*
|
|
* Adds a new value to the @stats dataset and converts the igt_stats from
|
|
* an integer collection to a floating point one.
|
|
*/
|
|
void igt_stats_push_float(igt_stats_t *stats, double value)
|
|
{
|
|
igt_stats_ensure_capacity(stats, 1);
|
|
|
|
if (!stats->is_float) {
|
|
int n;
|
|
|
|
for (n = 0; n < stats->n_values; n++)
|
|
stats->values_f[n] = stats->values_u64[n];
|
|
|
|
stats->is_float = true;
|
|
}
|
|
|
|
stats->values_f[stats->n_values++] = value;
|
|
|
|
stats->mean_variance_valid = false;
|
|
stats->sorted_array_valid = false;
|
|
|
|
if (value < stats->range[0])
|
|
stats->range[0] = value;
|
|
if (value > stats->range[1])
|
|
stats->range[1] = value;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_push_array:
|
|
* @stats: An #igt_stats_t instance
|
|
* @values: (array length=n_values): A pointer to an array of data points
|
|
* @n_values: The number of data points to add
|
|
*
|
|
* Adds an array of values to the @stats dataset.
|
|
*/
|
|
void igt_stats_push_array(igt_stats_t *stats,
|
|
const uint64_t *values, unsigned int n_values)
|
|
{
|
|
unsigned int i;
|
|
|
|
igt_stats_ensure_capacity(stats, n_values);
|
|
|
|
for (i = 0; i < n_values; i++)
|
|
igt_stats_push(stats, values[i]);
|
|
}
|
|
|
|
/**
|
|
* igt_stats_get_min:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Retrieves the minimal value in @stats
|
|
*/
|
|
uint64_t igt_stats_get_min(igt_stats_t *stats)
|
|
{
|
|
igt_assert(!stats->is_float);
|
|
return stats->min;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_get_max:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Retrieves the maximum value in @stats
|
|
*/
|
|
uint64_t igt_stats_get_max(igt_stats_t *stats)
|
|
{
|
|
igt_assert(!stats->is_float);
|
|
return stats->max;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_get_range:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Retrieves the range of the values in @stats. The range is the difference
|
|
* between the highest and the lowest value.
|
|
*
|
|
* The range can be a deceiving characterization of the values, because there
|
|
* can be extreme minimal and maximum values that are just anomalies. Prefer
|
|
* the interquatile range (see igt_stats_get_iqr()) or an histogram.
|
|
*/
|
|
uint64_t igt_stats_get_range(igt_stats_t *stats)
|
|
{
|
|
return igt_stats_get_max(stats) - igt_stats_get_min(stats);
|
|
}
|
|
|
|
static int cmp_u64(const void *pa, const void *pb)
|
|
{
|
|
const uint64_t *a = pa, *b = pb;
|
|
|
|
if (*a < *b)
|
|
return -1;
|
|
if (*a > *b)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
static int cmp_f(const void *pa, const void *pb)
|
|
{
|
|
const double *a = pa, *b = pb;
|
|
|
|
if (*a < *b)
|
|
return -1;
|
|
if (*a > *b)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
static void igt_stats_ensure_sorted_values(igt_stats_t *stats)
|
|
{
|
|
if (stats->sorted_array_valid)
|
|
return;
|
|
|
|
if (!stats->sorted_u64) {
|
|
/*
|
|
* igt_stats_ensure_capacity() will free ->sorted when the
|
|
* capacity increases, which also correspond to an invalidation
|
|
* of the sorted array. We'll then reallocate it here on
|
|
* demand.
|
|
*/
|
|
stats->sorted_u64 = calloc(stats->capacity,
|
|
sizeof(*stats->values_u64));
|
|
igt_assert(stats->sorted_u64);
|
|
}
|
|
|
|
memcpy(stats->sorted_u64, stats->values_u64,
|
|
sizeof(*stats->values_u64) * stats->n_values);
|
|
|
|
qsort(stats->sorted_u64, stats->n_values, sizeof(*stats->values_u64),
|
|
stats->is_float ? cmp_f : cmp_u64);
|
|
|
|
stats->sorted_array_valid = true;
|
|
}
|
|
|
|
/*
|
|
* We use Tukey's hinge for our quartiles determination.
|
|
* ends (end, lower_end) are exclusive.
|
|
*/
|
|
static double
|
|
igt_stats_get_median_internal(igt_stats_t *stats,
|
|
unsigned int start, unsigned int end,
|
|
unsigned int *lower_end /* out */,
|
|
unsigned int *upper_start /* out */)
|
|
{
|
|
unsigned int mid, n_values = end - start;
|
|
double median;
|
|
|
|
igt_stats_ensure_sorted_values(stats);
|
|
|
|
/* odd number of data points */
|
|
if (n_values % 2 == 1) {
|
|
/* median is the value in the middle (actual datum) */
|
|
mid = start + n_values / 2;
|
|
median = sorted_value(stats, mid);
|
|
|
|
/* the two halves contain the median value */
|
|
if (lower_end)
|
|
*lower_end = mid + 1;
|
|
if (upper_start)
|
|
*upper_start = mid;
|
|
|
|
/* even number of data points */
|
|
} else {
|
|
/*
|
|
* The middle is in between two indexes, 'mid' points at the
|
|
* lower one. The median is then the average between those two
|
|
* values.
|
|
*/
|
|
mid = start + n_values / 2 - 1;
|
|
median = (sorted_value(stats, mid) + sorted_value(stats, mid+1))/2.;
|
|
|
|
if (lower_end)
|
|
*lower_end = mid + 1;
|
|
if (upper_start)
|
|
*upper_start = mid + 1;
|
|
}
|
|
|
|
return median;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_get_quartiles:
|
|
* @stats: An #igt_stats_t instance
|
|
* @q1: (out): lower or 25th quartile
|
|
* @q2: (out): median or 50th quartile
|
|
* @q3: (out): upper or 75th quartile
|
|
*
|
|
* Retrieves the [quartiles](https://en.wikipedia.org/wiki/Quartile) of the
|
|
* @stats dataset.
|
|
*/
|
|
void igt_stats_get_quartiles(igt_stats_t *stats,
|
|
double *q1, double *q2, double *q3)
|
|
{
|
|
unsigned int lower_end, upper_start;
|
|
double ret;
|
|
|
|
if (stats->n_values < 3) {
|
|
if (q1)
|
|
*q1 = 0.;
|
|
if (q2)
|
|
*q2 = 0.;
|
|
if (q3)
|
|
*q3 = 0.;
|
|
return;
|
|
}
|
|
|
|
ret = igt_stats_get_median_internal(stats, 0, stats->n_values,
|
|
&lower_end, &upper_start);
|
|
if (q2)
|
|
*q2 = ret;
|
|
|
|
ret = igt_stats_get_median_internal(stats, 0, lower_end, NULL, NULL);
|
|
if (q1)
|
|
*q1 = ret;
|
|
|
|
ret = igt_stats_get_median_internal(stats, upper_start, stats->n_values,
|
|
NULL, NULL);
|
|
if (q3)
|
|
*q3 = ret;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_get_iqr:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Retrieves the
|
|
* [interquartile range](https://en.wikipedia.org/wiki/Interquartile_range)
|
|
* (IQR) of the @stats dataset.
|
|
*/
|
|
double igt_stats_get_iqr(igt_stats_t *stats)
|
|
{
|
|
double q1, q3;
|
|
|
|
igt_stats_get_quartiles(stats, &q1, NULL, &q3);
|
|
return (q3 - q1);
|
|
}
|
|
|
|
/**
|
|
* igt_stats_get_median:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Retrieves the median of the @stats dataset.
|
|
*/
|
|
double igt_stats_get_median(igt_stats_t *stats)
|
|
{
|
|
return igt_stats_get_median_internal(stats, 0, stats->n_values,
|
|
NULL, NULL);
|
|
}
|
|
|
|
/*
|
|
* Algorithm popularised by Knuth in:
|
|
*
|
|
* The Art of Computer Programming, volume 2: Seminumerical Algorithms,
|
|
* 3rd edn., p. 232. Boston: Addison-Wesley
|
|
*
|
|
* Source: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
|
|
*/
|
|
static void igt_stats_knuth_mean_variance(igt_stats_t *stats)
|
|
{
|
|
double mean = 0., m2 = 0.;
|
|
unsigned int i;
|
|
|
|
if (stats->mean_variance_valid)
|
|
return;
|
|
|
|
for (i = 0; i < stats->n_values; i++) {
|
|
double delta = unsorted_value(stats, i) - mean;
|
|
|
|
mean += delta / (i + 1);
|
|
m2 += delta * (unsorted_value(stats, i) - mean);
|
|
}
|
|
|
|
stats->mean = mean;
|
|
if (stats->n_values > 1 && !stats->is_population)
|
|
stats->variance = m2 / (stats->n_values - 1);
|
|
else
|
|
stats->variance = m2 / stats->n_values;
|
|
stats->mean_variance_valid = true;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_get_mean:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Retrieves the mean of the @stats dataset.
|
|
*/
|
|
double igt_stats_get_mean(igt_stats_t *stats)
|
|
{
|
|
igt_stats_knuth_mean_variance(stats);
|
|
|
|
return stats->mean;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_get_variance:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Retrieves the variance of the @stats dataset.
|
|
*/
|
|
double igt_stats_get_variance(igt_stats_t *stats)
|
|
{
|
|
igt_stats_knuth_mean_variance(stats);
|
|
|
|
return stats->variance;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_get_std_deviation:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Retrieves the standard deviation of the @stats dataset.
|
|
*/
|
|
double igt_stats_get_std_deviation(igt_stats_t *stats)
|
|
{
|
|
igt_stats_knuth_mean_variance(stats);
|
|
|
|
return sqrt(stats->variance);
|
|
}
|
|
|
|
/**
|
|
* igt_stats_get_iqm:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Retrieves the
|
|
* [interquartile mean](https://en.wikipedia.org/wiki/Interquartile_mean) (IQM)
|
|
* of the @stats dataset.
|
|
*
|
|
* The interquartile mean is a "statistical measure of central tendency".
|
|
* It is a truncated mean that discards the lowest and highest 25% of values,
|
|
* and calculates the mean value of the remaining central values.
|
|
*
|
|
* It's useful to hide outliers in measurements (due to cold cache etc).
|
|
*/
|
|
double igt_stats_get_iqm(igt_stats_t *stats)
|
|
{
|
|
unsigned int q1, q3, i;
|
|
double mean;
|
|
|
|
igt_stats_ensure_sorted_values(stats);
|
|
|
|
q1 = (stats->n_values + 3) / 4;
|
|
q3 = 3 * stats->n_values / 4;
|
|
|
|
mean = 0;
|
|
for (i = 0; i <= q3 - q1; i++)
|
|
mean += (sorted_value(stats, q1 + i) - mean) / (i + 1);
|
|
|
|
if (stats->n_values % 4) {
|
|
double rem = .5 * (stats->n_values % 4) / 4;
|
|
|
|
q1 = (stats->n_values) / 4;
|
|
q3 = (3 * stats->n_values + 3) / 4;
|
|
|
|
mean += rem * (sorted_value(stats, q1) - mean) / i++;
|
|
mean += rem * (sorted_value(stats, q3) - mean) / i++;
|
|
}
|
|
|
|
return mean;
|
|
}
|
|
|
|
/**
|
|
* igt_stats_get_trimean:
|
|
* @stats: An #igt_stats_t instance
|
|
*
|
|
* Retrieves the [trimean](https://en.wikipedia.org/wiki/Trimean) of the @stats
|
|
* dataset.
|
|
*
|
|
* The trimean is a the most efficient 3-point L-estimator, even more
|
|
* robust than the median at estimating the average of a sample population.
|
|
*/
|
|
double igt_stats_get_trimean(igt_stats_t *stats)
|
|
{
|
|
double q1, q2, q3;
|
|
igt_stats_get_quartiles(stats, &q1, &q2, &q3);
|
|
return (q1 + 2*q2 + q3) / 4;
|
|
}
|
|
|
|
/**
|
|
* igt_mean_init:
|
|
* @m: tracking structure
|
|
*
|
|
* Initializes or resets @m.
|
|
*/
|
|
void igt_mean_init(struct igt_mean *m)
|
|
{
|
|
memset(m, 0, sizeof(*m));
|
|
m->max = -HUGE_VAL;
|
|
m->min = HUGE_VAL;
|
|
}
|
|
|
|
/**
|
|
* igt_mean_add:
|
|
* @m: tracking structure
|
|
* @v: value
|
|
*
|
|
* Adds a new value @v to @m.
|
|
*/
|
|
void igt_mean_add(struct igt_mean *m, double v)
|
|
{
|
|
double delta = v - m->mean;
|
|
m->mean += delta / ++m->count;
|
|
m->sq += delta * (v - m->mean);
|
|
if (v < m->min)
|
|
m->min = v;
|
|
if (v > m->max)
|
|
m->max = v;
|
|
}
|
|
|
|
/**
|
|
* igt_mean_get:
|
|
* @m: tracking structure
|
|
*
|
|
* Computes the current mean of the samples tracked in @m.
|
|
*/
|
|
double igt_mean_get(struct igt_mean *m)
|
|
{
|
|
return m->mean;
|
|
}
|
|
|
|
/**
|
|
* igt_mean_get_variance:
|
|
* @m: tracking structure
|
|
*
|
|
* Computes the current variance of the samples tracked in @m.
|
|
*/
|
|
double igt_mean_get_variance(struct igt_mean *m)
|
|
{
|
|
return m->sq / m->count;
|
|
}
|
|
|