1#!/usr/bin/env python3 2 3# CDDL HEADER START 4# 5# The contents of this file are subject to the terms of the 6# Common Development and Distribution License (the "License"). 7# You may not use this file except in compliance with the License. 8# 9# See LICENSE.txt included in this distribution for the specific 10# language governing permissions and limitations under the License. 11# 12# When distributing Covered Code, include this CDDL HEADER in each 13# file and include the License file at LICENSE.txt. 14# If applicable, add the following below this CDDL HEADER, with the 15# fields enclosed by brackets "[]" replaced with your own identifying 16# information: Portions Copyright [yyyy] [name of copyright owner] 17# 18# CDDL HEADER END 19 20# 21# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. 22# 23 24import os 25import logging 26import multiprocessing 27import signal 28import shutil 29import subprocess 30import sys 31import tempfile 32import threading 33import time 34from pathlib import Path 35from requests import get, ConnectionError 36from flask import Flask 37from flask_httpauth import HTTPTokenAuth 38from waitress import serve 39 40from opengrok_tools.utils.log import get_console_logger, \ 41 get_log_level, get_class_basename 42from opengrok_tools.deploy import deploy_war 43from opengrok_tools.utils.indexer import Indexer 44from opengrok_tools.sync import do_sync 45from opengrok_tools.config_merge import merge_config_files 46from opengrok_tools.utils.opengrok import list_projects, \ 47 add_project, delete_project, get_configuration 48from opengrok_tools.utils.readconfig import read_config 49from opengrok_tools.utils.exitvals import SUCCESS_EXITVAL 50from opengrok_tools.utils.mirror import check_configuration 51from opengrok_tools.mirror import OPENGROK_NO_MIRROR_ENV 52 53 54fs_root = os.path.abspath('.').split(os.path.sep)[0] + os.path.sep 55if os.environ.get('OPENGROK_TOMCAT_ROOT'): # debug only 56 tomcat_root = os.environ.get('OPENGROK_TOMCAT_ROOT') 57else: 58 tomcat_root = os.path.join(fs_root, "usr", "local", "tomcat") 59 60if os.environ.get('OPENGROK_ROOT'): # debug only 61 OPENGROK_BASE_DIR = os.environ.get('OPENGROK_ROOT') 62else: 63 OPENGROK_BASE_DIR = os.path.join(fs_root, "opengrok") 64 65OPENGROK_LIB_DIR = os.path.join(OPENGROK_BASE_DIR, "lib") 66OPENGROK_DATA_ROOT = os.path.join(OPENGROK_BASE_DIR, "data") 67OPENGROK_SRC_ROOT = os.path.join(OPENGROK_BASE_DIR, "src") 68BODY_INCLUDE_FILE = os.path.join(OPENGROK_DATA_ROOT, "body_include") 69OPENGROK_CONFIG_DIR = os.path.join(OPENGROK_BASE_DIR, "etc") 70OPENGROK_CONFIG_FILE = os.path.join(OPENGROK_CONFIG_DIR, 71 "configuration.xml") 72OPENGROK_WEBAPPS_DIR = os.path.join(tomcat_root, "webapps") 73OPENGROK_JAR = os.path.join(OPENGROK_LIB_DIR, 'opengrok.jar') 74 75NOMIRROR_ENV_NAME = 'NOMIRROR' 76 77expected_token = None 78 79sleep_event = threading.Event() 80app = Flask(__name__) 81auth = HTTPTokenAuth(scheme='Bearer') 82REINDEX_POINT = '/reindex' 83 84 85def trigger_reindex(): 86 # Signal the sync/indexer thread. 87 sleep_event.set() 88 sleep_event.clear() 89 90 91@auth.verify_token 92def verify_token(token): 93 if expected_token is None: 94 return "yes" 95 96 if token is not None and token == expected_token: 97 return "yes" 98 99 100@app.route(REINDEX_POINT) 101@auth.login_required 102def index(): 103 trigger_reindex() 104 105 return "Reindex triggered" 106 107 108def rest_function(logger, rest_port): 109 logger.info("Starting REST app on port {}".format(rest_port)) 110 serve(app, host="0.0.0.0", port=rest_port) 111 112 113def set_url_root(logger, url_root): 114 """ 115 Set URL root and URI based on input 116 :param logger: logger instance 117 :param url_root: input 118 :return: URI and URL root 119 """ 120 if not url_root: 121 url_root = '/' 122 123 if ' ' in url_root: 124 logger.warn('Deployment path contains spaces. Deploying to root') 125 url_root = '/' 126 127 # Remove leading and trailing slashes 128 if url_root.startswith('/'): 129 url_root = url_root[1:] 130 if url_root.endswith('/'): 131 url_root = url_root[:-1] 132 133 uri = "http://localhost:8080/" + url_root 134 # 135 # Make sure URI ends with slash. This is important for the various API 136 # calls, notably for those that check the HTTP error code. 137 # Normally accessing the URI without the terminating slash results in 138 # HTTP redirect (code 302) instead of success (200). 139 # 140 if not uri.endswith('/'): 141 uri = uri + '/' 142 143 return uri, url_root 144 145 146def get_war_name(url_root): 147 """ 148 :param url_root: web app URL root 149 :return: filename of the WAR file 150 """ 151 if len(url_root) == 0: 152 return "ROOT.war" 153 154 return url_root + ".war" 155 156 157def deploy(logger, url_root): 158 """ 159 Deploy the web application 160 :param logger: logger instance 161 :param url_root: web app URL root 162 """ 163 164 logger.info('Deploying web application') 165 webapps_dir = os.path.join(tomcat_root, 'webapps') 166 if not os.path.isdir(webapps_dir): 167 raise Exception("{} is not a directory".format(webapps_dir)) 168 169 for item in os.listdir(webapps_dir): 170 subdir = os.path.join(webapps_dir, item) 171 if os.path.isdir(subdir): 172 logger.debug("Removing '{}' directory recursively".format(subdir)) 173 shutil.rmtree(subdir) 174 175 deploy_war(logger, os.path.join(OPENGROK_LIB_DIR, "source.war"), 176 os.path.join(OPENGROK_WEBAPPS_DIR, get_war_name(url_root)), 177 OPENGROK_CONFIG_FILE, None) 178 179 180def setup_redirect_source(logger, url_root): 181 """ 182 Set up redirect from /source 183 """ 184 logger.debug("Setting up redirect from /source to '{}'".format(url_root)) 185 source_dir = os.path.join(OPENGROK_WEBAPPS_DIR, "source") 186 if not os.path.isdir(source_dir): 187 os.makedirs(source_dir) 188 189 with open(os.path.join(source_dir, "index.jsp"), "w+") as index: 190 index.write("<% response.sendRedirect(\"/{}\"); %>".format(url_root)) 191 192 193def wait_for_tomcat(logger, uri): 194 """ 195 Active/busy waiting for Tomcat to come up. 196 Currently there is no upper time bound. 197 """ 198 logger.info("Waiting for Tomcat to start") 199 200 while True: 201 try: 202 ret = get(uri) 203 status = ret.status_code 204 except ConnectionError: 205 status = 0 206 207 if status != 200: 208 logger.debug("Got status {} for {}, sleeping for 1 second". 209 format(status, uri)) 210 time.sleep(1) 211 else: 212 break 213 214 logger.info("Tomcat is ready") 215 216 217def refresh_projects(logger, uri): 218 """ 219 Ensure each immediate source root subdirectory is a project. 220 """ 221 webapp_projects = list_projects(logger, uri) 222 if not webapp_projects: 223 return 224 225 logger.debug('Projects from the web app: {}'.format(webapp_projects)) 226 src_root = OPENGROK_SRC_ROOT 227 228 # Add projects. 229 for item in os.listdir(src_root): 230 logger.debug('Got item {}'.format(item)) 231 if os.path.isdir(os.path.join(src_root, item)): 232 if item not in webapp_projects: 233 logger.info("Adding project {}".format(item)) 234 add_project(logger, item, uri) 235 236 # Remove projects 237 for item in webapp_projects: 238 if not os.path.isdir(os.path.join(src_root, item)): 239 logger.info("Deleting project {}".format(item)) 240 delete_project(logger, item, uri) 241 242 243def save_config(logger, uri, config_path): 244 """ 245 Retrieve configuration from the web app and write it to file. 246 :param logger: logger instance 247 :param uri: web app URI 248 :param config_path: file path 249 """ 250 251 config = get_configuration(logger, uri) 252 if config is None: 253 return 254 255 logger.info('Saving configuration to {}'.format(config_path)) 256 with open(config_path, "w+") as config_file: 257 config_file.write(config) 258 259 260def merge_commands_env(commands, env): 261 """ 262 Merge environment into command structure. If any of the commands has 263 an environment already set, the env is merged in. 264 :param commands: commands structure 265 :param env: environment dictionary 266 :return: updated commands structure 267 """ 268 for cmd in commands: 269 cmd_env = cmd.get('env') 270 if cmd_env: 271 cmd.env.update(env) 272 else: 273 cmd['env'] = env 274 275 return commands 276 277 278def indexer_no_projects(logger, uri, config_path, extra_indexer_options): 279 """ 280 Project less indexer 281 """ 282 283 wait_for_tomcat(logger, uri) 284 285 while True: 286 indexer_options = ['-s', OPENGROK_SRC_ROOT, 287 '-d', OPENGROK_DATA_ROOT, 288 '-c', '/usr/local/bin/ctags', 289 '--remote', 'on', 290 '-H', 291 '-W', config_path, 292 '-U', uri] 293 if extra_indexer_options: 294 logger.debug("Adding extra indexer options: {}". 295 format(extra_indexer_options)) 296 indexer_options.extend(extra_indexer_options.split()) 297 indexer = Indexer(indexer_options, logger=logger, 298 jar=OPENGROK_JAR, doprint=True) 299 indexer.execute() 300 301 logger.info("Waiting for reindex to be triggered") 302 sleep_event.wait() 303 304 305def timeout_loop(logger, sync_period): 306 while True: 307 sleep_seconds = sync_period * 60 308 logger.info("Sleeping for {} seconds".format(sleep_seconds)) 309 time.sleep(sleep_seconds) 310 311 trigger_reindex() 312 313 314def project_syncer(logger, loglevel, uri, config_path, numworkers, env): 315 """ 316 Wrapper for running opengrok-sync. 317 To be run in a thread/process in the background. 318 """ 319 320 wait_for_tomcat(logger, uri) 321 322 while True: 323 refresh_projects(logger, uri) 324 325 if os.environ.get('OPENGROK_SYNC_YML'): # debug only 326 config_file = os.environ.get('OPENGROK_SYNC_YML') 327 else: 328 config_file = os.path.join(fs_root, 'scripts', 'sync.yml') 329 config = read_config(logger, config_file) 330 if config is None: 331 logger.error("Cannot read config file from {}".format(config_file)) 332 raise Exception("no sync config") 333 334 projects = list_projects(logger, uri) 335 if projects: 336 # 337 # The driveon=True is needed for the initial indexing of newly 338 # added project, otherwise the incoming check in the 339 # opengrok-mirror program would short circuit it. 340 # 341 if env: 342 logger.info('Merging commands with environment') 343 commands = merge_commands_env(config["commands"], env) 344 logger.debug(config['commands']) 345 else: 346 commands = config["commands"] 347 348 logger.info("Sync starting") 349 do_sync(loglevel, commands, config.get('cleanup'), 350 projects, config.get("ignore_errors"), uri, 351 numworkers, driveon=True, logger=logger, print_output=True) 352 logger.info("Sync done") 353 354 # Workaround for https://github.com/oracle/opengrok/issues/1670 355 Path(os.path.join(OPENGROK_DATA_ROOT, 'timestamp')).touch() 356 357 save_config(logger, uri, config_path) 358 359 logger.info("Waiting for reindex to be triggered") 360 sleep_event.wait() 361 362 363def create_bare_config(logger, use_projects, extra_indexer_options=None): 364 """ 365 Create bare configuration file with a few basic settings. 366 """ 367 368 logger.info('Creating bare configuration in {}'. 369 format(OPENGROK_CONFIG_FILE)) 370 indexer_options = ['-s', OPENGROK_SRC_ROOT, 371 '-d', OPENGROK_DATA_ROOT, 372 '-c', '/usr/local/bin/ctags', 373 '--remote', 'on', 374 '-H', 375 '-S', 376 '-W', OPENGROK_CONFIG_FILE, 377 '--noIndex'] 378 379 if extra_indexer_options: 380 if type(extra_indexer_options) is not list: 381 raise Exception("extra_indexer_options has to be a list") 382 indexer_options.extend(extra_indexer_options) 383 if use_projects: 384 indexer_options.append('-P') 385 indexer = Indexer(indexer_options, 386 jar=OPENGROK_JAR, 387 logger=logger, doprint=True) 388 indexer.execute() 389 ret = indexer.getretcode() 390 if ret != SUCCESS_EXITVAL: 391 logger.error('Command returned {}'.format(ret)) 392 logger.error(indexer.geterroutput()) 393 raise Exception("Failed to create bare configuration") 394 395 396def get_num_from_env(logger, env_name, default_value): 397 value = default_value 398 env_str = os.environ.get(env_name) 399 if env_str: 400 try: 401 n = int(env_str) 402 if n >= 0: 403 value = n 404 except ValueError: 405 logger.error("{} is not a number: {}". 406 format(env_name, env_str)) 407 408 return value 409 410 411def check_index_and_wipe_out(logger): 412 """ 413 Check index by running the indexer. If the index does not match 414 currently running version and the CHECK_INDEX environment variable 415 is non empty, wipe out the directories under data root. 416 """ 417 check_index = os.environ.get('CHECK_INDEX') 418 if check_index and os.path.exists(OPENGROK_CONFIG_FILE): 419 logger.info('Checking if index matches current version') 420 indexer_options = ['-R', OPENGROK_CONFIG_FILE, '--checkIndex'] 421 indexer = Indexer(indexer_options, logger=logger, 422 jar=OPENGROK_JAR, doprint=True) 423 indexer.execute() 424 if indexer.getretcode() == 1: 425 logger.info('Wiping out data root') 426 root = OPENGROK_DATA_ROOT 427 for entry in os.listdir(root): 428 path = os.path.join(root, entry) 429 if os.path.isdir(path): 430 try: 431 logger.info("Removing '{}'".format(path)) 432 shutil.rmtree(path) 433 except Exception as e: 434 logger.error("cannot delete '{}': {}".format(path, e)) 435 436 437def start_rest_thread(logger): 438 rest_port = get_num_from_env(logger, 'REST_PORT', 5000) 439 token = os.environ.get('REST_TOKEN') 440 global expected_token 441 if token: 442 logger.debug("Setting expected token for REST endpoint" 443 "on port {}".format(rest_port)) 444 expected_token = token 445 logger.debug("Starting REST thread to listen for requests " 446 "on port {} on the {} endpoint". 447 format(rest_port, REINDEX_POINT)) 448 rest_thread = threading.Thread(target=rest_function, 449 name="REST thread", 450 args=(logger, rest_port), daemon=True) 451 rest_thread.start() 452 453 454def start_timeout_thread(logger, sync_period): 455 logger.debug("Starting timeout thread") 456 thread = threading.Thread(target=timeout_loop, 457 name="Timeout thread", 458 args=(logger, sync_period), daemon=True) 459 thread.start() 460 461 462def main(): 463 log_level = os.environ.get('OPENGROK_LOG_LEVEL') 464 if log_level: 465 log_level = get_log_level(log_level) 466 else: 467 log_level = logging.INFO 468 469 logger = get_console_logger(get_class_basename(), log_level) 470 471 try: 472 with open(os.path.join(OPENGROK_BASE_DIR, "VERSION"), "r") as f: 473 version = f.read() 474 logger.info("Running version {}".format(version)) 475 except Exception: 476 pass 477 478 uri, url_root = set_url_root(logger, os.environ.get('URL_ROOT')) 479 logger.debug("URL_ROOT = {}".format(url_root)) 480 logger.debug("URI = {}".format(uri)) 481 482 sync_period = get_num_from_env(logger, 'SYNC_PERIOD_MINUTES', 10) 483 if sync_period == 0: 484 logger.info("periodic synchronization disabled") 485 else: 486 logger.info("synchronization period = {} minutes".format(sync_period)) 487 488 # Note that deploy is done before Tomcat is started. 489 deploy(logger, url_root) 490 491 if url_root != '/source': 492 setup_redirect_source(logger, url_root) 493 494 env = {} 495 extra_indexer_options = os.environ.get('INDEXER_OPT', '') 496 if extra_indexer_options: 497 logger.info("extra indexer options: {}".format(extra_indexer_options)) 498 env['OPENGROK_INDEXER_OPTIONAL_ARGS'] = extra_indexer_options 499 if os.environ.get(NOMIRROR_ENV_NAME): 500 env[OPENGROK_NO_MIRROR_ENV] = os.environ.get(NOMIRROR_ENV_NAME) 501 logger.debug('Extra environment: {}'.format(env)) 502 503 use_projects = True 504 if os.environ.get('AVOID_PROJECTS'): 505 use_projects = False 506 507 # 508 # Create empty configuration to avoid the non existent file exception 509 # in the web app during the first web app startup. 510 # 511 if not os.path.exists(OPENGROK_CONFIG_FILE) or \ 512 os.path.getsize(OPENGROK_CONFIG_FILE) == 0: 513 create_bare_config(logger, use_projects, extra_indexer_options.split()) 514 515 # 516 # Index check needs read-only configuration so it is placed 517 # right after create_bare_config(). 518 # 519 check_index_and_wipe_out(logger) 520 521 # 522 # If there is read-only configuration file, merge it with current 523 # configuration. 524 # 525 read_only_config_file = os.environ.get('READONLY_CONFIG_FILE') 526 if read_only_config_file and os.path.exists(read_only_config_file): 527 logger.info('Merging read-only configuration from \'{}\' with current ' 528 'configuration in \'{}\''.format(read_only_config_file, 529 OPENGROK_CONFIG_FILE)) 530 out_file = None 531 with tempfile.NamedTemporaryFile(mode='w+', delete=False, 532 prefix='merged_config') as tmp_out: 533 out_file = tmp_out.name 534 merge_config_files(read_only_config_file, OPENGROK_CONFIG_FILE, 535 tmp_out, jar=OPENGROK_JAR, loglevel=log_level) 536 537 if out_file and os.path.getsize(out_file) > 0: 538 shutil.move(tmp_out.name, OPENGROK_CONFIG_FILE) 539 else: 540 logger.warning('Failed to merge read-only configuration, ' 541 'leaving the original in place') 542 if out_file: 543 os.remove(out_file) 544 545 sync_enabled = True 546 if use_projects: 547 mirror_config = os.path.join(OPENGROK_CONFIG_DIR, "mirror.yml") 548 if not os.path.exists(mirror_config): 549 with open(mirror_config, 'w') as fp: 550 fp.write("# Empty config file for opengrok-mirror\n") 551 552 num_workers = get_num_from_env(logger, 'WORKERS', 553 multiprocessing.cpu_count()) 554 logger.info('Number of sync workers: {}'.format(num_workers)) 555 556 if not os.environ.get(NOMIRROR_ENV_NAME): 557 conf = read_config(logger, mirror_config) 558 logger.info("Checking mirror configuration in '{}'". 559 format(mirror_config)) 560 if not check_configuration(conf): 561 logger.error("Mirror configuration in '{}' is invalid, " 562 "disabling sync".format(mirror_config)) 563 sync_enabled = False 564 565 worker_function = project_syncer 566 syncer_args = (logger, log_level, uri, 567 OPENGROK_CONFIG_FILE, 568 num_workers, env) 569 else: 570 worker_function = indexer_no_projects 571 syncer_args = (logger, uri, OPENGROK_CONFIG_FILE, 572 extra_indexer_options) 573 574 if sync_enabled: 575 logger.debug("Starting sync thread") 576 sync_thread = threading.Thread(target=worker_function, 577 name="Sync thread", 578 args=syncer_args, daemon=True) 579 sync_thread.start() 580 581 start_rest_thread(logger) 582 if sync_period > 0: 583 start_timeout_thread(logger, sync_period) 584 585 # Start Tomcat last. It will be the foreground process. 586 logger.info("Starting Tomcat") 587 global tomcat_popen 588 tomcat_popen = subprocess.Popen([os.path.join(tomcat_root, 'bin', 589 'catalina.sh'), 590 'run']) 591 tomcat_popen.wait() 592 593 594def signal_handler(signum, frame): 595 print("Received signal {}".format(signum)) 596 597 global tomcat_popen 598 print("Terminating Tomcat {}".format(tomcat_popen)) 599 tomcat_popen.terminate() 600 601 sys.exit(0) 602 603 604if __name__ == "__main__": 605 signal.signal(signal.SIGTERM, signal_handler) 606 signal.signal(signal.SIGINT, signal_handler) 607 608 main() 609