# Copyright 2020 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
#!/usr/bin/python2
"""Starts project build on Google Cloud Builder.

Usage: build_project.py <project_dir>
"""

from __future__ import print_function

import datetime
import json
import logging
import os
import re
import sys

import six
import yaml

from oauth2client.client import GoogleCredentials
from googleapiclient.discovery import build

import build_lib

FUZZING_BUILD_TAG = 'fuzzing'

GCB_LOGS_BUCKET = 'oss-fuzz-gcb-logs'

CONFIGURATIONS = {
    'sanitizer-address': ['SANITIZER=address'],
    'sanitizer-dataflow': ['SANITIZER=dataflow'],
    'sanitizer-memory': ['SANITIZER=memory'],
    'sanitizer-undefined': ['SANITIZER=undefined'],
    'engine-libfuzzer': ['FUZZING_ENGINE=libfuzzer'],
    'engine-afl': ['FUZZING_ENGINE=afl'],
    'engine-honggfuzz': ['FUZZING_ENGINE=honggfuzz'],
    'engine-dataflow': ['FUZZING_ENGINE=dataflow'],
    'engine-none': ['FUZZING_ENGINE=none'],
}

DEFAULT_ARCHITECTURES = ['x86_64']
DEFAULT_ENGINES = ['libfuzzer', 'afl', 'honggfuzz']
DEFAULT_SANITIZERS = ['address', 'undefined']

LATEST_VERSION_FILENAME = 'latest.version'
LATEST_VERSION_CONTENT_TYPE = 'text/plain'

QUEUE_TTL_SECONDS = 60 * 60 * 24  # 24 hours.


def usage():
  """Exit with code 1 and display syntax to use this file."""
  sys.stderr.write('Usage: ' + sys.argv[0] + ' <project_dir>\n')
  sys.exit(1)


def set_yaml_defaults(project_name, project_yaml, image_project):
  """Set project.yaml's default parameters."""
  project_yaml.setdefault('disabled', False)
  project_yaml.setdefault('name', project_name)
  project_yaml.setdefault('image',
                          'gcr.io/{0}/{1}'.format(image_project, project_name))
  project_yaml.setdefault('architectures', DEFAULT_ARCHITECTURES)
  project_yaml.setdefault('sanitizers', DEFAULT_SANITIZERS)
  project_yaml.setdefault('fuzzing_engines', DEFAULT_ENGINES)
  project_yaml.setdefault('run_tests', True)
  project_yaml.setdefault('coverage_extra_args', '')
  project_yaml.setdefault('labels', {})


def is_supported_configuration(fuzzing_engine, sanitizer, architecture):
  """Check if the given configuration is supported."""
  fuzzing_engine_info = build_lib.ENGINE_INFO[fuzzing_engine]
  if architecture == 'i386' and sanitizer != 'address':
    return False
  return (sanitizer in fuzzing_engine_info.supported_sanitizers and
          architecture in fuzzing_engine_info.supported_architectures)


def get_sanitizers(project_yaml):
  """Retrieve sanitizers from project.yaml."""
  sanitizers = project_yaml['sanitizers']
  assert isinstance(sanitizers, list)

  processed_sanitizers = []
  for sanitizer in sanitizers:
    if isinstance(sanitizer, six.string_types):
      processed_sanitizers.append(sanitizer)
    elif isinstance(sanitizer, dict):
      for key in sanitizer.keys():
        processed_sanitizers.append(key)

  return processed_sanitizers


def workdir_from_dockerfile(dockerfile_lines):
  """Parse WORKDIR from the Dockerfile."""
  workdir_regex = re.compile(r'\s*WORKDIR\s*([^\s]+)')
  for line in dockerfile_lines:
    match = re.match(workdir_regex, line)
    if match:
      # We need to escape '$' since they're used for subsitutions in Container
      # Builer builds.
      return match.group(1).replace('$', '$$')

  return None


def load_project_yaml(project_name, project_yaml_file, image_project):
  """Loads project yaml and sets default values."""
  project_yaml = yaml.safe_load(project_yaml_file)
  set_yaml_defaults(project_name, project_yaml, image_project)
  return project_yaml


# pylint: disable=too-many-locals, too-many-statements, too-many-branches
def get_build_steps(project_name, project_yaml_file, dockerfile_lines,
                    image_project, base_images_project):
  """Returns build steps for project."""
  project_yaml = load_project_yaml(project_name, project_yaml_file,
                                   image_project)

  if project_yaml['disabled']:
    logging.info('Project "%s" is disabled.', project_name)
    return []

  name = project_yaml['name']
  image = project_yaml['image']
  language = project_yaml['language']
  run_tests = project_yaml['run_tests']
  time_stamp = datetime.datetime.now().strftime('%Y%m%d%H%M')

  build_steps = build_lib.project_image_steps(name, image, language)
  # Copy over MSan instrumented libraries.
  build_steps.append({
      'name': 'gcr.io/{0}/msan-libs-builder'.format(base_images_project),
      'args': [
          'bash',
          '-c',
          'cp -r /msan /workspace',
      ],
  })

  for fuzzing_engine in project_yaml['fuzzing_engines']:
    for sanitizer in get_sanitizers(project_yaml):
      for architecture in project_yaml['architectures']:
        if not is_supported_configuration(fuzzing_engine, sanitizer,
                                          architecture):
          continue

        env = CONFIGURATIONS['engine-' + fuzzing_engine][:]
        env.extend(CONFIGURATIONS['sanitizer-' + sanitizer])
        out = '/workspace/out/' + sanitizer
        stamped_name = '-'.join([name, sanitizer, time_stamp])
        latest_version_file = '-'.join(
            [name, sanitizer, LATEST_VERSION_FILENAME])
        zip_file = stamped_name + '.zip'
        stamped_srcmap_file = stamped_name + '.srcmap.json'
        bucket = build_lib.ENGINE_INFO[fuzzing_engine].upload_bucket
        if architecture != 'x86_64':
          bucket += '-' + architecture

        upload_url = build_lib.get_signed_url(
            build_lib.GCS_UPLOAD_URL_FORMAT.format(bucket, name, zip_file))
        srcmap_url = build_lib.get_signed_url(
            build_lib.GCS_UPLOAD_URL_FORMAT.format(bucket, name,
                                                   stamped_srcmap_file))
        latest_version_url = build_lib.GCS_UPLOAD_URL_FORMAT.format(
            bucket, name, latest_version_file)
        latest_version_url = build_lib.get_signed_url(
            latest_version_url, content_type=LATEST_VERSION_CONTENT_TYPE)

        targets_list_filename = build_lib.get_targets_list_filename(sanitizer)
        targets_list_url = build_lib.get_signed_url(
            build_lib.get_targets_list_url(bucket, name, sanitizer))

        env.append('OUT=' + out)
        env.append('MSAN_LIBS_PATH=/workspace/msan')
        env.append('ARCHITECTURE=' + architecture)
        env.append('FUZZING_LANGUAGE=' + language)

        workdir = workdir_from_dockerfile(dockerfile_lines)
        if not workdir:
          workdir = '/src'

        failure_msg = ('*' * 80 + '\nFailed to build.\nTo reproduce, run:\n'
                       'python infra/helper.py build_image {name}\n'
                       'python infra/helper.py build_fuzzers --sanitizer '
                       '{sanitizer} --engine {engine} --architecture '
                       '{architecture} {name}\n' + '*' * 80).format(
                           name=name,
                           sanitizer=sanitizer,
                           engine=fuzzing_engine,
                           architecture=architecture)

        build_steps.append(
            # compile
            {
                'name':
                    image,
                'env':
                    env,
                'args': [
                    'bash',
                    '-c',
                    # Remove /out to break loudly when a build script
                    # incorrectly uses /out instead of $OUT.
                    # `cd /src && cd {workdir}` (where {workdir} is parsed from
                    # the Dockerfile). Container Builder overrides our workdir
                    # so we need to add this step to set it back.
                    ('rm -r /out && cd /src && cd {workdir} && mkdir -p {out} '
                     '&& compile || (echo "{failure_msg}" && false)'
                    ).format(workdir=workdir, out=out, failure_msg=failure_msg),
                ],
            })

        if sanitizer == 'memory':
          # Patch dynamic libraries to use instrumented ones.
          build_steps.append({
              'name':
                  'gcr.io/{0}/msan-libs-builder'.format(base_images_project),
              'args': [
                  'bash',
                  '-c',
                  # TODO(ochang): Replace with just patch_build.py once
                  # permission in image is fixed.
                  'python /usr/local/bin/patch_build.py {0}'.format(out),
              ],
          })

        if run_tests:
          failure_msg = ('*' * 80 + '\nBuild checks failed.\n'
                         'To reproduce, run:\n'
                         'python infra/helper.py build_image {name}\n'
                         'python infra/helper.py build_fuzzers --sanitizer '
                         '{sanitizer} --engine {engine} --architecture '
                         '{architecture} {name}\n'
                         'python infra/helper.py check_build --sanitizer '
                         '{sanitizer} --engine {engine} --architecture '
                         '{architecture} {name}\n' + '*' * 80).format(
                             name=name,
                             sanitizer=sanitizer,
                             engine=fuzzing_engine,
                             architecture=architecture)

          build_steps.append(
              # test binaries
              {
                  'name':
                      'gcr.io/{0}/base-runner'.format(base_images_project),
                  'env':
                      env,
                  'args': [
                      'bash', '-c',
                      'test_all.py || (echo "{0}" && false)'.format(failure_msg)
                  ],
              })

        if project_yaml['labels']:
          # write target labels
          build_steps.append({
              'name':
                  image,
              'env':
                  env,
              'args': [
                  '/usr/local/bin/write_labels.py',
                  json.dumps(project_yaml['labels']),
                  out,
              ],
          })

        if sanitizer == 'dataflow' and fuzzing_engine == 'dataflow':
          dataflow_steps = dataflow_post_build_steps(name, env,
                                                     base_images_project)
          if dataflow_steps:
            build_steps.extend(dataflow_steps)
          else:
            sys.stderr.write('Skipping dataflow post build steps.\n')

        build_steps.extend([
            # generate targets list
            {
                'name':
                    'gcr.io/{0}/base-runner'.format(base_images_project),
                'env':
                    env,
                'args': [
                    'bash',
                    '-c',
                    'targets_list > /workspace/{0}'.format(
                        targets_list_filename),
                ],
            },
            # zip binaries
            {
                'name':
                    image,
                'args': [
                    'bash', '-c',
                    'cd {out} && zip -r {zip_file} *'.format(out=out,
                                                             zip_file=zip_file)
                ],
            },
            # upload srcmap
            {
                'name': 'gcr.io/{0}/uploader'.format(base_images_project),
                'args': [
                    '/workspace/srcmap.json',
                    srcmap_url,
                ],
            },
            # upload binaries
            {
                'name': 'gcr.io/{0}/uploader'.format(base_images_project),
                'args': [
                    os.path.join(out, zip_file),
                    upload_url,
                ],
            },
            # upload targets list
            {
                'name':
                    'gcr.io/{0}/uploader'.format(base_images_project),
                'args': [
                    '/workspace/{0}'.format(targets_list_filename),
                    targets_list_url,
                ],
            },
            # upload the latest.version file
            build_lib.http_upload_step(zip_file, latest_version_url,
                                       LATEST_VERSION_CONTENT_TYPE),
            # cleanup
            {
                'name': image,
                'args': [
                    'bash',
                    '-c',
                    'rm -r ' + out,
                ],
            },
        ])

  return build_steps


def dataflow_post_build_steps(project_name, env, base_images_project):
  """Appends dataflow post build steps."""
  steps = build_lib.download_corpora_steps(project_name)
  if not steps:
    return None

  steps.append({
      'name':
          'gcr.io/{0}/base-runner'.format(base_images_project),
      'env':
          env + [
              'COLLECT_DFT_TIMEOUT=2h',
              'DFT_FILE_SIZE_LIMIT=65535',
              'DFT_MIN_TIMEOUT=2.0',
              'DFT_TIMEOUT_RANGE=6.0',
          ],
      'args': [
          'bash', '-c',
          ('for f in /corpus/*.zip; do unzip -q $f -d ${f%%.*}; done && '
           'collect_dft || (echo "DFT collection failed." && false)')
      ],
      'volumes': [{
          'name': 'corpus',
          'path': '/corpus'
      }],
  })
  return steps


def get_logs_url(build_id, image_project='oss-fuzz'):
  """Returns url where logs are displayed for the build."""
  url_format = ('https://console.developers.google.com/logs/viewer?'
                'resource=build%2Fbuild_id%2F{0}&project={1}')
  return url_format.format(build_id, image_project)


# pylint: disable=no-member
def run_build(build_steps, project_name, tag):
  """Run the build for given steps on cloud build."""
  options = {}
  if 'GCB_OPTIONS' in os.environ:
    options = yaml.safe_load(os.environ['GCB_OPTIONS'])

  build_body = {
      'steps': build_steps,
      'timeout': str(build_lib.BUILD_TIMEOUT) + 's',
      'options': options,
      'logsBucket': GCB_LOGS_BUCKET,
      'tags': [project_name + '-' + tag,],
      'queueTtl': str(QUEUE_TTL_SECONDS) + 's',
  }

  credentials = GoogleCredentials.get_application_default()
  cloudbuild = build('cloudbuild',
                     'v1',
                     credentials=credentials,
                     cache_discovery=False)
  build_info = cloudbuild.projects().builds().create(projectId='oss-fuzz',
                                                     body=build_body).execute()
  build_id = build_info['metadata']['build']['id']

  print('Logs:', get_logs_url(build_id), file=sys.stderr)
  print(build_id)


def main():
  """Build and run projects."""
  if len(sys.argv) != 2:
    usage()

  image_project = 'oss-fuzz'
  base_images_project = 'oss-fuzz-base'
  project_dir = sys.argv[1].rstrip(os.path.sep)
  dockerfile_path = os.path.join(project_dir, 'Dockerfile')
  project_yaml_path = os.path.join(project_dir, 'project.yaml')
  project_name = os.path.basename(project_dir)

  with open(dockerfile_path) as dockerfile:
    dockerfile_lines = dockerfile.readlines()

  with open(project_yaml_path) as project_yaml_file:
    steps = get_build_steps(project_name, project_yaml_file, dockerfile_lines,
                            image_project, base_images_project)

  run_build(steps, project_name, FUZZING_BUILD_TAG)


if __name__ == '__main__':
  main()