You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
313 lines
11 KiB
313 lines
11 KiB
// Copyright 2016 The Gemmlowp Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#ifndef GEMMLOWP_META_STREAMS_H_
|
|
#define GEMMLOWP_META_STREAMS_H_
|
|
|
|
#include <iostream>
|
|
#include <typeinfo>
|
|
#include "base.h"
|
|
|
|
namespace gemmlowp {
|
|
namespace meta {
|
|
|
|
struct RowMajor {
|
|
public:
|
|
int count;
|
|
int stride;
|
|
};
|
|
|
|
struct RowMajorWithSum {
|
|
public:
|
|
int count;
|
|
int stride;
|
|
int multiplicative_sum_offset;
|
|
int additive_sum_offset;
|
|
};
|
|
|
|
struct ColumnMajorWithSum {
|
|
public:
|
|
int count;
|
|
int stride;
|
|
int multiplicative_sum_offset;
|
|
int additive_sum_offset;
|
|
};
|
|
|
|
template <typename InType>
|
|
class StreamUtil<InType, RowMajor> {
|
|
public:
|
|
static const InType* Offset(const RowMajor& params, const InType* source,
|
|
int offset_stride, int offset_advance) {
|
|
return reinterpret_cast<const InType*>(
|
|
reinterpret_cast<const std::uint8_t*>(source) +
|
|
offset_stride * params.stride + offset_advance * sizeof(InType));
|
|
}
|
|
|
|
static InType* Offset(const RowMajor& params, InType* source,
|
|
int offset_stride, int offset_advance) {
|
|
return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) +
|
|
offset_stride * params.stride +
|
|
offset_advance * sizeof(InType));
|
|
}
|
|
|
|
static int Scratch(const RowMajor& params, int lanes_count, int pack_size) {
|
|
return AlignTo<64>(lanes_count * AlignTo(pack_size, params.stride));
|
|
}
|
|
};
|
|
|
|
template <typename InType>
|
|
class StreamUtil<InType, RowMajorWithSum> {
|
|
public:
|
|
static const InType* Offset(const RowMajorWithSum& params,
|
|
const InType* source, int offset_stride,
|
|
int offset_advance) {
|
|
return reinterpret_cast<const InType*>(
|
|
reinterpret_cast<const std::uint8_t*>(source) +
|
|
offset_stride * params.stride + offset_advance * sizeof(InType));
|
|
}
|
|
|
|
static InType* Offset(const RowMajorWithSum& params, InType* source,
|
|
int offset_stride, int offset_advance) {
|
|
return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) +
|
|
offset_stride * params.stride +
|
|
offset_advance * sizeof(InType));
|
|
}
|
|
|
|
static int Scratch(const RowMajorWithSum& params, int lanes_count,
|
|
int pack_size) {
|
|
return 32 + AlignTo<32>(sizeof(InType) * lanes_count *
|
|
AlignTo(pack_size, params.count));
|
|
}
|
|
};
|
|
|
|
template <typename InType>
|
|
class StreamUtil<InType, ColumnMajorWithSum> {
|
|
public:
|
|
static const InType* Offset(const ColumnMajorWithSum& params,
|
|
const InType* source, int offset_stride,
|
|
int offset_advance) {
|
|
return reinterpret_cast<const InType*>(
|
|
reinterpret_cast<const std::uint8_t*>(source) +
|
|
params.stride * offset_advance + offset_stride * sizeof(InType));
|
|
}
|
|
|
|
static const InType* Offset(const ColumnMajorWithSum& params, InType* source,
|
|
int offset_stride, int offset_advance) {
|
|
return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) +
|
|
params.stride * offset_advance +
|
|
offset_stride * sizeof(InType));
|
|
}
|
|
|
|
static int Scratch(const ColumnMajorWithSum& params, int lanes_count,
|
|
int pack_size) {
|
|
return 32 + AlignTo<32>(sizeof(InType) * lanes_count *
|
|
AlignTo(pack_size, params.count));
|
|
}
|
|
};
|
|
|
|
template <typename InType, int lanes_count, int pack_size, int leftovers>
|
|
class Stream<InType, lanes_count, pack_size, leftovers, RowMajor> {
|
|
public:
|
|
static void Pack(const InType* in, const RowMajor& params, InType* out) {
|
|
#ifdef DEBUG
|
|
#ifdef DEBUG_METAGEMM_VERBOSE
|
|
std::cout << "RowMajor(" << std::string(typeid(InType).name())
|
|
<< ")::Pack() -- " << lanes_count << "x" << pack_size << " + "
|
|
<< leftovers << std::endl;
|
|
#endif
|
|
#else
|
|
if (lanes_count != 0) {
|
|
std::cerr << "FATAL: RowMajorWithSum::Pack not implemented." << std::endl;
|
|
std::exit(1);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static int UnpackedAdvance(const RowMajor& params) {
|
|
return sizeof(InType) * pack_size;
|
|
}
|
|
|
|
static int PackedAdvance(const RowMajor& params) {
|
|
return sizeof(InType) * pack_size * lanes_count;
|
|
}
|
|
|
|
static int UnpackedStride(const RowMajor& params) {
|
|
return lanes_count * params.stride;
|
|
}
|
|
|
|
static int PackedStride(const RowMajor& params) {
|
|
return AlignTo<32>(lanes_count * AlignTo<pack_size>(params.stride));
|
|
}
|
|
|
|
static int Scratch(const RowMajor& params) { return PackedStride(params); }
|
|
|
|
#ifdef DEBUG
|
|
#ifdef DEBUG_METAGEMM_VERBOSE
|
|
static void Debug(const RowMajor& params) {
|
|
std::cout << "RowMajor(" << typeid(InType).name() << ")" << std::endl;
|
|
std::cout << " dims: " << lanes_count << "x" << pack_size << " + "
|
|
<< leftovers << std::endl;
|
|
std::cout << " scratch: " << Scratch(params) << std::endl;
|
|
std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl;
|
|
std::cout << " packed advance: " << PackedAdvance(params) << std::endl;
|
|
std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl;
|
|
std::cout << " packed stride: " << PackedStride(params) << std::endl;
|
|
std::cout << " params:" << std::endl;
|
|
std::cout << " count: " << params.count << std::endl;
|
|
std::cout << " stride: " << params.stride << std::endl;
|
|
}
|
|
#endif
|
|
#endif
|
|
};
|
|
|
|
template <typename InType, int lanes_count, int pack_size, int leftovers>
|
|
class Stream<InType, lanes_count, pack_size, leftovers, RowMajorWithSum> {
|
|
public:
|
|
static void Pack(const InType* in, const RowMajorWithSum& params,
|
|
InType* out) {
|
|
#ifdef DEBUG
|
|
#ifdef DEBUG_METAGEMM_VERBOSE
|
|
std::cout << "RowMajorWithSum(" << typeid(InType).name() << ")::Pack() -- "
|
|
<< lanes_count << "x" << pack_size << " + " << leftovers
|
|
<< std::endl;
|
|
#endif
|
|
#else
|
|
if (lanes_count != 0) {
|
|
std::cerr << "FATAL: RowMajorWithSum::Pack not implemented." << std::endl;
|
|
std::exit(1);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static int UnpackedAdvance(const RowMajorWithSum& params) {
|
|
return sizeof(InType) * pack_size;
|
|
}
|
|
|
|
static int PackedAdvance(const RowMajorWithSum& params) {
|
|
return sizeof(InType) * pack_size * lanes_count;
|
|
}
|
|
|
|
static int UnpackedStride(const RowMajorWithSum& params) {
|
|
return sizeof(InType) * lanes_count * params.stride;
|
|
}
|
|
|
|
static int PackedStride(const RowMajorWithSum& params) {
|
|
return 32 + AlignTo<32>(sizeof(InType) * lanes_count *
|
|
AlignTo<pack_size>(params.count));
|
|
}
|
|
|
|
static int Scratch(const RowMajorWithSum& params) {
|
|
return PackedStride(params);
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
#ifdef DEBUG_METAGEMM_VERBOSE
|
|
static void Debug(const RowMajorWithSum& params) {
|
|
std::cout << "RowMajorWithSum(" << typeid(InType).name() << ")"
|
|
<< std::endl;
|
|
std::cout << " dims: " << lanes_count << "x" << pack_size << " + "
|
|
<< leftovers << std::endl;
|
|
std::cout << " scratch: " << Scratch(params) << std::endl;
|
|
std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl;
|
|
std::cout << " packed advance: " << PackedAdvance(params) << std::endl;
|
|
std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl;
|
|
std::cout << " packed stride: " << PackedStride(params) << std::endl;
|
|
std::cout << " params:" << std::endl;
|
|
std::cout << " count: " << params.count << std::endl;
|
|
std::cout << " stride: " << params.stride << std::endl;
|
|
std::cout << " multiplicative_sum_offset: "
|
|
<< params.multiplicative_sum_offset << std::endl;
|
|
std::cout << " additive_sum_offset: " << params.additive_sum_offset
|
|
<< std::endl;
|
|
}
|
|
#endif
|
|
#endif
|
|
};
|
|
|
|
template <typename InType, int lanes_count, int pack_size, int leftovers>
|
|
class Stream<InType, lanes_count, pack_size, leftovers, ColumnMajorWithSum> {
|
|
public:
|
|
static void Pack(const InType* in, const ColumnMajorWithSum& params,
|
|
InType* out) {
|
|
#ifdef DEBUG
|
|
#ifdef DEBUG_METAGEMM_VERBOSE
|
|
std::cout << "ColumnMajorWithSum(" << typeid(InType).name()
|
|
<< ")::Pack() -- " << lanes_count << "x" << pack_size << " + "
|
|
<< leftovers << std::endl;
|
|
#endif
|
|
#else
|
|
if (lanes_count != 0) {
|
|
std::cerr << "FATAL: ColumnMajorWithSum::Pack not implemented."
|
|
<< std::endl;
|
|
std::exit(1);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static int UnpackedAdvance(const ColumnMajorWithSum& params) {
|
|
return sizeof(InType) * pack_size * params.stride;
|
|
}
|
|
|
|
static int PackedAdvance(const ColumnMajorWithSum& params) {
|
|
return sizeof(InType) * pack_size * lanes_count;
|
|
}
|
|
|
|
static int UnpackedStride(const ColumnMajorWithSum& params) {
|
|
return sizeof(InType) * lanes_count;
|
|
}
|
|
|
|
static int PackedStride(const ColumnMajorWithSum& params) {
|
|
return 32 + AlignTo<32>(sizeof(InType) * lanes_count *
|
|
AlignTo<pack_size>(params.count));
|
|
}
|
|
|
|
static int Scratch(const ColumnMajorWithSum& params) {
|
|
return PackedStride(params);
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
#ifdef DEBUG_METAGEMM_VERBOSE
|
|
static void Debug(const ColumnMajorWithSum& params) {
|
|
std::cout << "ColumnMajorWithSum(" << typeid(InType).name() << ")"
|
|
<< std::endl;
|
|
std::cout << " dims: " << lanes_count << "x" << pack_size << " + "
|
|
<< leftovers << std::endl;
|
|
std::cout << " scratch: " << Scratch(params) << std::endl;
|
|
std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl;
|
|
std::cout << " packed advance: " << PackedAdvance(params) << std::endl;
|
|
std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl;
|
|
std::cout << " packed stride: " << PackedStride(params) << std::endl;
|
|
std::cout << " params:" << std::endl;
|
|
std::cout << " count: " << params.count << std::endl;
|
|
std::cout << " stride: " << params.stride << std::endl;
|
|
std::cout << " multiplicative_sum_offset: "
|
|
<< params.multiplicative_sum_offset << std::endl;
|
|
std::cout << " additive_sum_offset: " << params.additive_sum_offset
|
|
<< std::endl;
|
|
}
|
|
#endif
|
|
#endif
|
|
};
|
|
|
|
} // namespace meta
|
|
} // namespace gemmlowp
|
|
|
|
#ifdef GEMMLOWP_NEON_32
|
|
#include "streams_arm_32.h"
|
|
#elif defined(GEMMLOWP_NEON_64)
|
|
#include "streams_arm_64.h"
|
|
#endif
|
|
|
|
#endif // GEMMLOWP_META_STREAMS_H_
|