You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
144 lines
4.0 KiB
144 lines
4.0 KiB
4 months ago
|
#!/usr/local/bin/perl
|
||
|
# ********************************************************************
|
||
|
# * Copyright (C) 2016 and later: Unicode, Inc. and others.
|
||
|
# * License & terms of use: http://www.unicode.org/copyright.html
|
||
|
# ********************************************************************
|
||
|
# ********************************************************************
|
||
|
# * COPYRIGHT:
|
||
|
# * Copyright (c) 2002, International Business Machines Corporation and
|
||
|
# * others. All Rights Reserved.
|
||
|
# ********************************************************************
|
||
|
|
||
|
package Dataset;
|
||
|
use Statistics::Descriptive;
|
||
|
use Statistics::Distributions;
|
||
|
use strict;
|
||
|
|
||
|
# Create a new Dataset with the given data.
|
||
|
sub new {
|
||
|
my ($class) = shift;
|
||
|
my $self = bless {
|
||
|
_data => \@_,
|
||
|
_scale => 1.0,
|
||
|
_mean => 0.0,
|
||
|
_error => 0.0,
|
||
|
}, $class;
|
||
|
|
||
|
my $n = @_;
|
||
|
|
||
|
if ($n >= 1) {
|
||
|
my $stats = Statistics::Descriptive::Full->new();
|
||
|
$stats->add_data(@{$self->{_data}});
|
||
|
$self->{_mean} = $stats->mean();
|
||
|
|
||
|
if ($n >= 2) {
|
||
|
# Use a t distribution rather than Gaussian because (a) we
|
||
|
# assume an underlying normal dist, (b) we do not know the
|
||
|
# standard deviation -- we estimate it from the data, and (c)
|
||
|
# we MAY have a small sample size (also works for large n).
|
||
|
my $t = Statistics::Distributions::tdistr($n-1, 0.005);
|
||
|
$self->{_error} = $t * $stats->standard_deviation();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$self;
|
||
|
}
|
||
|
|
||
|
# Set a scaling factor for all data; 1.0 means no scaling.
|
||
|
# Scale must be > 0.
|
||
|
sub setScale {
|
||
|
my ($self, $scale) = @_;
|
||
|
$self->{_scale} = $scale;
|
||
|
}
|
||
|
|
||
|
# Multiply the scaling factor by a value.
|
||
|
sub scaleBy {
|
||
|
my ($self, $a) = @_;
|
||
|
$self->{_scale} *= $a;
|
||
|
}
|
||
|
|
||
|
# Return the mean.
|
||
|
sub getMean {
|
||
|
my $self = shift;
|
||
|
return $self->{_mean} * $self->{_scale};
|
||
|
}
|
||
|
|
||
|
# Return a 99% error based on the t distribution. The dataset
|
||
|
# is desribed as getMean() +/- getError().
|
||
|
sub getError {
|
||
|
my $self = shift;
|
||
|
return $self->{_error} * $self->{_scale};
|
||
|
}
|
||
|
|
||
|
# Divide two Datasets and return a new one, maintaining the
|
||
|
# mean+/-error. The new Dataset has no data points.
|
||
|
sub divide {
|
||
|
my $self = shift;
|
||
|
my $rhs = shift;
|
||
|
|
||
|
my $minratio = ($self->{_mean} - $self->{_error}) /
|
||
|
($rhs->{_mean} + $rhs->{_error});
|
||
|
my $maxratio = ($self->{_mean} + $self->{_error}) /
|
||
|
($rhs->{_mean} - $rhs->{_error});
|
||
|
|
||
|
my $result = Dataset->new();
|
||
|
$result->{_mean} = ($minratio + $maxratio) / 2;
|
||
|
$result->{_error} = $result->{_mean} - $minratio;
|
||
|
$result->{_scale} = $self->{_scale} / $rhs->{_scale};
|
||
|
$result;
|
||
|
}
|
||
|
|
||
|
# subtracts two Datasets and return a new one, maintaining the
|
||
|
# mean+/-error. The new Dataset has no data points.
|
||
|
sub subtract {
|
||
|
my $self = shift;
|
||
|
my $rhs = shift;
|
||
|
|
||
|
my $result = Dataset->new();
|
||
|
$result->{_mean} = $self->{_mean} - $rhs->{_mean};
|
||
|
$result->{_error} = $self->{_error} + $rhs->{_error};
|
||
|
$result->{_scale} = $self->{_scale};
|
||
|
$result;
|
||
|
}
|
||
|
|
||
|
# adds two Datasets and return a new one, maintaining the
|
||
|
# mean+/-error. The new Dataset has no data points.
|
||
|
sub add {
|
||
|
my $self = shift;
|
||
|
my $rhs = shift;
|
||
|
|
||
|
my $result = Dataset->new();
|
||
|
$result->{_mean} = $self->{_mean} + $rhs->{_mean};
|
||
|
$result->{_error} = $self->{_error} + $rhs->{_error};
|
||
|
$result->{_scale} = $self->{_scale};
|
||
|
$result;
|
||
|
}
|
||
|
|
||
|
# Divides a dataset by a scalar.
|
||
|
# The new Dataset has no data points.
|
||
|
sub divideByScalar {
|
||
|
my $self = shift;
|
||
|
my $s = shift;
|
||
|
|
||
|
my $result = Dataset->new();
|
||
|
$result->{_mean} = $self->{_mean}/$s;
|
||
|
$result->{_error} = $self->{_error}/$s;
|
||
|
$result->{_scale} = $self->{_scale};
|
||
|
$result;
|
||
|
}
|
||
|
|
||
|
# Divides a dataset by a scalar.
|
||
|
# The new Dataset has no data points.
|
||
|
sub multiplyByScalar {
|
||
|
my $self = shift;
|
||
|
my $s = shift;
|
||
|
|
||
|
my $result = Dataset->new();
|
||
|
$result->{_mean} = $self->{_mean}*$s;
|
||
|
$result->{_error} = $self->{_error}*$s;
|
||
|
$result->{_scale} = $self->{_scale};
|
||
|
$result;
|
||
|
}
|
||
|
|
||
|
1;
|