You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
587 lines
22 KiB
587 lines
22 KiB
#include "strings/escaping.h"
|
|
|
|
#include <cassert>
|
|
|
|
#include "android-base/logging.h"
|
|
#include "strings/ascii_ctype.h"
|
|
|
|
namespace dynamic_depth {
|
|
|
|
// ----------------------------------------------------------------------
|
|
// ptrdiff_t Base64Unescape() - base64 decoder
|
|
// ptrdiff_t Base64Escape() - base64 encoder
|
|
// ptrdiff_t WebSafeBase64Unescape() - Google's variation of base64 decoder
|
|
// ptrdiff_t WebSafeBase64Escape() - Google's variation of base64 encoder
|
|
//
|
|
// Check out
|
|
// http://tools.ietf.org/html/rfc2045 for formal description, but what we
|
|
// care about is that...
|
|
// Take the encoded stuff in groups of 4 characters and turn each
|
|
// character into a code 0 to 63 thus:
|
|
// A-Z map to 0 to 25
|
|
// a-z map to 26 to 51
|
|
// 0-9 map to 52 to 61
|
|
// +(- for WebSafe) maps to 62
|
|
// /(_ for WebSafe) maps to 63
|
|
// There will be four numbers, all less than 64 which can be represented
|
|
// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
|
|
// Arrange the 6 digit binary numbers into three bytes as such:
|
|
// aaaaaabb bbbbcccc ccdddddd
|
|
// Equals signs (one or two) are used at the end of the encoded block to
|
|
// indicate that the text was not an integer multiple of three bytes long.
|
|
// ----------------------------------------------------------------------
|
|
|
|
bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
|
|
size_t szdest, const signed char* unbase64,
|
|
size_t* len) {
|
|
static const char kPad64Equals = '=';
|
|
static const char kPad64Dot = '.';
|
|
|
|
size_t destidx = 0;
|
|
int decode = 0;
|
|
int state = 0;
|
|
unsigned int ch = 0;
|
|
unsigned int temp = 0;
|
|
|
|
// If "char" is signed by default, using *src as an array index results in
|
|
// accessing negative array elements. Treat the input as a pointer to
|
|
// unsigned char to avoid this.
|
|
const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param);
|
|
|
|
// The GET_INPUT macro gets the next input character, skipping
|
|
// over any whitespace, and stopping when we reach the end of the
|
|
// string or when we read any non-data character. The arguments are
|
|
// an arbitrary identifier (used as a label for goto) and the number
|
|
// of data bytes that must remain in the input to avoid aborting the
|
|
// loop.
|
|
#define GET_INPUT(label, remain) \
|
|
label: \
|
|
--szsrc; \
|
|
ch = *src++; \
|
|
decode = unbase64[ch]; \
|
|
if (decode < 0) { \
|
|
if (ascii_isspace(ch) && szsrc >= remain) goto label; \
|
|
state = 4 - remain; \
|
|
break; \
|
|
}
|
|
|
|
// if dest is null, we're just checking to see if it's legal input
|
|
// rather than producing output. (I suspect this could just be done
|
|
// with a regexp...). We duplicate the loop so this test can be
|
|
// outside it instead of in every iteration.
|
|
|
|
if (dest) {
|
|
// This loop consumes 4 input bytes and produces 3 output bytes
|
|
// per iteration. We can't know at the start that there is enough
|
|
// data left in the string for a full iteration, so the loop may
|
|
// break out in the middle; if so 'state' will be set to the
|
|
// number of input bytes read.
|
|
|
|
while (szsrc >= 4) {
|
|
// We'll start by optimistically assuming that the next four
|
|
// bytes of the string (src[0..3]) are four good data bytes
|
|
// (that is, no nulls, whitespace, padding chars, or illegal
|
|
// chars). We need to test src[0..2] for nulls individually
|
|
// before constructing temp to preserve the property that we
|
|
// never read past a null in the string (no matter how long
|
|
// szsrc claims the string is).
|
|
|
|
if (!src[0] || !src[1] || !src[2] ||
|
|
((temp = ((unsigned(unbase64[src[0]]) << 18) |
|
|
(unsigned(unbase64[src[1]]) << 12) |
|
|
(unsigned(unbase64[src[2]]) << 6) |
|
|
(unsigned(unbase64[src[3]])))) &
|
|
0x80000000)) {
|
|
// Iff any of those four characters was bad (null, illegal,
|
|
// whitespace, padding), then temp's high bit will be set
|
|
// (because unbase64[] is -1 for all bad characters).
|
|
//
|
|
// We'll back up and resort to the slower decoder, which knows
|
|
// how to handle those cases.
|
|
|
|
GET_INPUT(first, 4);
|
|
temp = decode;
|
|
GET_INPUT(second, 3);
|
|
temp = (temp << 6) | decode;
|
|
GET_INPUT(third, 2);
|
|
temp = (temp << 6) | decode;
|
|
GET_INPUT(fourth, 1);
|
|
temp = (temp << 6) | decode;
|
|
} else {
|
|
// We really did have four good data bytes, so advance four
|
|
// characters in the string.
|
|
|
|
szsrc -= 4;
|
|
src += 4;
|
|
decode = -1;
|
|
ch = '\0';
|
|
}
|
|
|
|
// temp has 24 bits of input, so write that out as three bytes.
|
|
|
|
if (destidx + 3 > szdest) return false;
|
|
dest[destidx + 2] = temp;
|
|
temp >>= 8;
|
|
dest[destidx + 1] = temp;
|
|
temp >>= 8;
|
|
dest[destidx] = temp;
|
|
destidx += 3;
|
|
}
|
|
} else {
|
|
while (szsrc >= 4) {
|
|
if (!src[0] || !src[1] || !src[2] ||
|
|
((temp = ((unsigned(unbase64[src[0]]) << 18) |
|
|
(unsigned(unbase64[src[1]]) << 12) |
|
|
(unsigned(unbase64[src[2]]) << 6) |
|
|
(unsigned(unbase64[src[3]])))) &
|
|
0x80000000)) {
|
|
GET_INPUT(first_no_dest, 4);
|
|
GET_INPUT(second_no_dest, 3);
|
|
GET_INPUT(third_no_dest, 2);
|
|
GET_INPUT(fourth_no_dest, 1);
|
|
} else {
|
|
szsrc -= 4;
|
|
src += 4;
|
|
decode = -1;
|
|
ch = '\0';
|
|
}
|
|
destidx += 3;
|
|
}
|
|
}
|
|
|
|
#undef GET_INPUT
|
|
|
|
// if the loop terminated because we read a bad character, return
|
|
// now.
|
|
if (decode < 0 && ch != '\0' && ch != kPad64Equals && ch != kPad64Dot &&
|
|
!ascii_isspace(ch))
|
|
return false;
|
|
|
|
if (ch == kPad64Equals || ch == kPad64Dot) {
|
|
// if we stopped by hitting an '=' or '.', un-read that character -- we'll
|
|
// look at it again when we count to check for the proper number of
|
|
// equals signs at the end.
|
|
++szsrc;
|
|
--src;
|
|
} else {
|
|
// This loop consumes 1 input byte per iteration. It's used to
|
|
// clean up the 0-3 input bytes remaining when the first, faster
|
|
// loop finishes. 'temp' contains the data from 'state' input
|
|
// characters read by the first loop.
|
|
while (szsrc > 0) {
|
|
--szsrc;
|
|
ch = *src++;
|
|
decode = unbase64[ch];
|
|
if (decode < 0) {
|
|
if (ascii_isspace(ch)) {
|
|
continue;
|
|
} else if (ch == '\0') {
|
|
break;
|
|
} else if (ch == kPad64Equals || ch == kPad64Dot) {
|
|
// back up one character; we'll read it again when we check
|
|
// for the correct number of pad characters at the end.
|
|
++szsrc;
|
|
--src;
|
|
break;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Each input character gives us six bits of output.
|
|
temp = (temp << 6) | decode;
|
|
++state;
|
|
if (state == 4) {
|
|
// If we've accumulated 24 bits of output, write that out as
|
|
// three bytes.
|
|
if (dest) {
|
|
if (destidx + 3 > szdest) return false;
|
|
dest[destidx + 2] = temp;
|
|
temp >>= 8;
|
|
dest[destidx + 1] = temp;
|
|
temp >>= 8;
|
|
dest[destidx] = temp;
|
|
}
|
|
destidx += 3;
|
|
state = 0;
|
|
temp = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process the leftover data contained in 'temp' at the end of the input.
|
|
int expected_equals = 0;
|
|
switch (state) {
|
|
case 0:
|
|
// Nothing left over; output is a multiple of 3 bytes.
|
|
break;
|
|
|
|
case 1:
|
|
// Bad input; we have 6 bits left over.
|
|
return false;
|
|
|
|
case 2:
|
|
// Produce one more output byte from the 12 input bits we have left.
|
|
if (dest) {
|
|
if (destidx + 1 > szdest) return false;
|
|
temp >>= 4;
|
|
dest[destidx] = temp;
|
|
}
|
|
++destidx;
|
|
expected_equals = 2;
|
|
break;
|
|
|
|
case 3:
|
|
// Produce two more output bytes from the 18 input bits we have left.
|
|
if (dest) {
|
|
if (destidx + 2 > szdest) return false;
|
|
temp >>= 2;
|
|
dest[destidx + 1] = temp;
|
|
temp >>= 8;
|
|
dest[destidx] = temp;
|
|
}
|
|
destidx += 2;
|
|
expected_equals = 1;
|
|
break;
|
|
|
|
default:
|
|
// state should have no other values at this point.
|
|
LOG(FATAL) << "This can't happen; base64 decoder state = " << state;
|
|
}
|
|
|
|
// The remainder of the string should be all whitespace, mixed with
|
|
// exactly 0 equals signs, or exactly 'expected_equals' equals
|
|
// signs. (Always accepting 0 equals signs is a google extension
|
|
// not covered in the RFC, as is accepting dot as the pad character.)
|
|
|
|
int equals = 0;
|
|
while (szsrc > 0 && *src) {
|
|
if (*src == kPad64Equals || *src == kPad64Dot)
|
|
++equals;
|
|
else if (!ascii_isspace(*src))
|
|
return false;
|
|
--szsrc;
|
|
++src;
|
|
}
|
|
|
|
const bool ok = (equals == 0 || equals == expected_equals);
|
|
if (ok) *len = destidx;
|
|
return ok;
|
|
}
|
|
|
|
// The arrays below were generated by the following code
|
|
// #include <sys/time.h>
|
|
// #include <stdlib.h>
|
|
// #include <string.h>
|
|
// main()
|
|
// {
|
|
// static const char Base64[] =
|
|
// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
// char* pos;
|
|
// int idx, i, j;
|
|
// printf(" ");
|
|
// for (i = 0; i < 255; i += 8) {
|
|
// for (j = i; j < i + 8; j++) {
|
|
// pos = strchr(Base64, j);
|
|
// if ((pos == NULL) || (j == 0))
|
|
// idx = -1;
|
|
// else
|
|
// idx = pos - Base64;
|
|
// if (idx == -1)
|
|
// printf(" %2d, ", idx);
|
|
// else
|
|
// printf(" %2d/*%c*/,", idx, j);
|
|
// }
|
|
// printf("\n ");
|
|
// }
|
|
// }
|
|
//
|
|
// where the value of "Base64[]" was replaced by one of the base-64 conversion
|
|
// tables from the functions below.
|
|
static const signed char kUnBase64[] = {
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, 62 /*+*/, -1, -1, -1, 63 /*/ */, 52 /*0*/,
|
|
53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
|
|
60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
|
|
-1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
|
|
5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
|
|
12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
|
|
19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
|
|
-1, -1, -1, -1, -1, -1, 26 /*a*/,
|
|
27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
|
|
34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
|
|
41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
|
|
48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1};
|
|
static const signed char kUnWebSafeBase64[] = {
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, 62 /*-*/, -1, -1, 52 /*0*/,
|
|
53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
|
|
60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
|
|
-1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
|
|
5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
|
|
12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
|
|
19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
|
|
-1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/,
|
|
27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
|
|
34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
|
|
41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
|
|
48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1};
|
|
|
|
static bool Base64UnescapeInternal(const char* src, size_t slen, string* dest,
|
|
const signed char* unbase64) {
|
|
// Determine the size of the output string. Base64 encodes every 3 bytes into
|
|
// 4 characters. any leftover chars are added directly for good measure.
|
|
// This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548
|
|
const size_t dest_len = 3 * (slen / 4) + (slen % 4);
|
|
|
|
dest->resize(dest_len);
|
|
|
|
// We are getting the destination buffer by getting the beginning of the
|
|
// string and converting it into a char *.
|
|
size_t len;
|
|
const bool ok =
|
|
Base64UnescapeInternal(src, slen, dest->empty() ? NULL : &*dest->begin(),
|
|
dest_len, unbase64, &len);
|
|
if (!ok) {
|
|
dest->clear();
|
|
return false;
|
|
}
|
|
|
|
// could be shorter if there was padding
|
|
DCHECK_LE(len, dest_len);
|
|
dest->erase(len);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Base64Unescape(const string& src, string* dest) {
|
|
return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
|
|
}
|
|
|
|
bool WebSafeBase64Unescape(const string& src, string* dest) {
|
|
return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
|
|
}
|
|
|
|
// Base64Escape
|
|
//
|
|
// NOTE: We have to use an unsigned type for src because code built
|
|
// in the the /google tree treats characters as signed unless
|
|
// otherwised specified.
|
|
int Base64EscapeInternal(const unsigned char* src, int szsrc, char* dest,
|
|
int szdest, const char* base64, bool do_padding) {
|
|
static const char kPad64 = '=';
|
|
|
|
if (szsrc <= 0) return 0;
|
|
|
|
char* cur_dest = dest;
|
|
const unsigned char* cur_src = src;
|
|
|
|
// Three bytes of data encodes to four characters of cyphertext.
|
|
// So we can pump through three-byte chunks atomically.
|
|
while (szsrc > 2) { /* keep going until we have less than 24 bits */
|
|
if ((szdest -= 4) < 0) return 0;
|
|
cur_dest[0] = base64[cur_src[0] >> 2];
|
|
cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)];
|
|
cur_dest[2] = base64[((cur_src[1] & 0x0f) << 2) + (cur_src[2] >> 6)];
|
|
cur_dest[3] = base64[cur_src[2] & 0x3f];
|
|
|
|
cur_dest += 4;
|
|
cur_src += 3;
|
|
szsrc -= 3;
|
|
}
|
|
|
|
/* now deal with the tail (<=2 bytes) */
|
|
switch (szsrc) {
|
|
case 0:
|
|
// Nothing left; nothing more to do.
|
|
break;
|
|
case 1:
|
|
// One byte left: this encodes to two characters, and (optionally)
|
|
// two pad characters to round out the four-character cypherblock.
|
|
if ((szdest -= 2) < 0) return 0;
|
|
cur_dest[0] = base64[cur_src[0] >> 2];
|
|
cur_dest[1] = base64[(cur_src[0] & 0x03) << 4];
|
|
cur_dest += 2;
|
|
if (do_padding) {
|
|
if ((szdest -= 2) < 0) return 0;
|
|
cur_dest[0] = kPad64;
|
|
cur_dest[1] = kPad64;
|
|
cur_dest += 2;
|
|
}
|
|
break;
|
|
case 2:
|
|
// Two bytes left: this encodes to three characters, and (optionally)
|
|
// one pad character to round out the four-character cypherblock.
|
|
if ((szdest -= 3) < 0) return 0;
|
|
cur_dest[0] = base64[cur_src[0] >> 2];
|
|
cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)];
|
|
cur_dest[2] = base64[(cur_src[1] & 0x0f) << 2];
|
|
cur_dest += 3;
|
|
if (do_padding) {
|
|
if ((szdest -= 1) < 0) return 0;
|
|
cur_dest[0] = kPad64;
|
|
cur_dest += 1;
|
|
}
|
|
break;
|
|
default:
|
|
// Should not be reached: blocks of 3 bytes are handled
|
|
// in the while loop before this switch statement.
|
|
CHECK(false) << "Logic problem? szsrc = " << szsrc;
|
|
break;
|
|
}
|
|
return (cur_dest - dest);
|
|
}
|
|
|
|
static const char kBase64Chars[] =
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
|
|
// Digit conversion.
|
|
static const char kHexTable[513] =
|
|
"000102030405060708090a0b0c0d0e0f"
|
|
"101112131415161718191a1b1c1d1e1f"
|
|
"202122232425262728292a2b2c2d2e2f"
|
|
"303132333435363738393a3b3c3d3e3f"
|
|
"404142434445464748494a4b4c4d4e4f"
|
|
"505152535455565758595a5b5c5d5e5f"
|
|
"606162636465666768696a6b6c6d6e6f"
|
|
"707172737475767778797a7b7c7d7e7f"
|
|
"808182838485868788898a8b8c8d8e8f"
|
|
"909192939495969798999a9b9c9d9e9f"
|
|
"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
|
|
"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
|
|
"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
|
|
"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
|
|
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
|
|
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
|
|
|
|
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
|
|
// Base64 encodes three bytes of input at a time. If the input is not
|
|
// divisible by three, we pad as appropriate.
|
|
//
|
|
// (from http://tools.ietf.org/html/rfc3548)
|
|
// Special processing is performed if fewer than 24 bits are available
|
|
// at the end of the data being encoded. A full encoding quantum is
|
|
// always completed at the end of a quantity. When fewer than 24 input
|
|
// bits are available in an input group, zero bits are added (on the
|
|
// right) to form an integral number of 6-bit groups. Padding at the
|
|
// end of the data is performed using the '=' character. Since all base
|
|
// 64 input is an integral number of octets, only the following cases
|
|
// can arise:
|
|
|
|
// Base64 encodes each three bytes of input into four bytes of output.
|
|
size_t len = (input_len / 3) * 4;
|
|
|
|
if (input_len % 3 == 0) {
|
|
// (from http://tools.ietf.org/html/rfc3548)
|
|
// (1) the final quantum of encoding input is an integral multiple of 24
|
|
// bits; here, the final unit of encoded output will be an integral
|
|
// multiple of 4 characters with no "=" padding,
|
|
} else if (input_len % 3 == 1) {
|
|
// (from http://tools.ietf.org/html/rfc3548)
|
|
// (2) the final quantum of encoding input is exactly 8 bits; here, the
|
|
// final unit of encoded output will be two characters followed by two
|
|
// "=" padding characters, or
|
|
len += 2;
|
|
if (do_padding) {
|
|
len += 2;
|
|
}
|
|
} else { // (input_len % 3 == 2)
|
|
// (from http://tools.ietf.org/html/rfc3548)
|
|
// (3) the final quantum of encoding input is exactly 16 bits; here, the
|
|
// final unit of encoded output will be three characters followed by one
|
|
// "=" padding character.
|
|
len += 3;
|
|
if (do_padding) {
|
|
len += 1;
|
|
}
|
|
}
|
|
|
|
assert(len >= input_len); // make sure we didn't overflow
|
|
return len;
|
|
}
|
|
|
|
void Base64EscapeInternal(const unsigned char* src, size_t szsrc, string* dest,
|
|
bool do_padding, const char* base64_chars) {
|
|
const size_t calc_escaped_size =
|
|
CalculateBase64EscapedLenInternal(szsrc, do_padding);
|
|
dest->resize(calc_escaped_size);
|
|
const int escaped_len = Base64EscapeInternal(
|
|
src, static_cast<int>(szsrc), dest->empty() ? NULL : &*dest->begin(),
|
|
static_cast<int>(dest->size()), base64_chars, do_padding);
|
|
DCHECK_EQ(calc_escaped_size, escaped_len);
|
|
dest->erase(escaped_len);
|
|
}
|
|
|
|
void Base64Escape(const unsigned char* src, ptrdiff_t szsrc, string* dest,
|
|
bool do_padding) {
|
|
if (szsrc < 0) return;
|
|
Base64EscapeInternal(src, szsrc, dest, do_padding, kBase64Chars);
|
|
}
|
|
|
|
// This is a templated function so that T can be either a char* or a string.
|
|
template <typename T>
|
|
static void b2a_hex_t(const unsigned char* src, T dest, ptrdiff_t num) {
|
|
auto dest_ptr = &dest[0];
|
|
for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
|
|
const char* hex_p = &kHexTable[*src_ptr * 2];
|
|
std::copy(hex_p, hex_p + 2, dest_ptr);
|
|
}
|
|
}
|
|
|
|
string b2a_hex(const char* b, ptrdiff_t len) {
|
|
string result;
|
|
result.resize(len << 1);
|
|
b2a_hex_t<string&>(reinterpret_cast<const unsigned char*>(b), result, len);
|
|
return result;
|
|
}
|
|
|
|
} // namespace dynamic_depth
|