xref: /OpenGrok/docker/start.py (revision b7ca2541c62176e5ead0ed2ade6d0edc17db61ea)
1#!/usr/bin/env python3
2
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# See LICENSE.txt included in this distribution for the specific
10# language governing permissions and limitations under the License.
11#
12# When distributing Covered Code, include this CDDL HEADER in each
13# file and include the License file at LICENSE.txt.
14# If applicable, add the following below this CDDL HEADER, with the
15# fields enclosed by brackets "[]" replaced with your own identifying
16# information: Portions Copyright [yyyy] [name of copyright owner]
17#
18# CDDL HEADER END
19
20#
21# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
22#
23
24import os
25import logging
26import multiprocessing
27import signal
28import shutil
29import subprocess
30import sys
31import tempfile
32import threading
33import time
34from pathlib import Path
35from requests import get, ConnectionError
36from flask import Flask
37from flask_httpauth import HTTPTokenAuth
38from waitress import serve
39
40from opengrok_tools.utils.log import get_console_logger, \
41    get_log_level, get_class_basename
42from opengrok_tools.deploy import deploy_war
43from opengrok_tools.utils.indexer import Indexer
44from opengrok_tools.sync import do_sync
45from opengrok_tools.config_merge import merge_config_files
46from opengrok_tools.utils.opengrok import list_projects, \
47    add_project, delete_project, get_configuration
48from opengrok_tools.utils.readconfig import read_config
49from opengrok_tools.utils.exitvals import SUCCESS_EXITVAL
50from opengrok_tools.utils.mirror import check_configuration
51from opengrok_tools.mirror import OPENGROK_NO_MIRROR_ENV
52
53
54fs_root = os.path.abspath('.').split(os.path.sep)[0] + os.path.sep
55if os.environ.get('OPENGROK_TOMCAT_ROOT'):  # debug only
56    tomcat_root = os.environ.get('OPENGROK_TOMCAT_ROOT')
57else:
58    tomcat_root = os.path.join(fs_root, "usr", "local", "tomcat")
59
60if os.environ.get('OPENGROK_ROOT'):  # debug only
61    OPENGROK_BASE_DIR = os.environ.get('OPENGROK_ROOT')
62else:
63    OPENGROK_BASE_DIR = os.path.join(fs_root, "opengrok")
64
65OPENGROK_LIB_DIR = os.path.join(OPENGROK_BASE_DIR, "lib")
66OPENGROK_DATA_ROOT = os.path.join(OPENGROK_BASE_DIR, "data")
67OPENGROK_SRC_ROOT = os.path.join(OPENGROK_BASE_DIR, "src")
68BODY_INCLUDE_FILE = os.path.join(OPENGROK_DATA_ROOT, "body_include")
69OPENGROK_CONFIG_DIR = os.path.join(OPENGROK_BASE_DIR, "etc")
70OPENGROK_CONFIG_FILE = os.path.join(OPENGROK_CONFIG_DIR,
71                                    "configuration.xml")
72OPENGROK_WEBAPPS_DIR = os.path.join(tomcat_root, "webapps")
73OPENGROK_JAR = os.path.join(OPENGROK_LIB_DIR, 'opengrok.jar')
74
75NOMIRROR_ENV_NAME = 'NOMIRROR'
76
77expected_token = None
78
79sleep_event = threading.Event()
80app = Flask(__name__)
81auth = HTTPTokenAuth(scheme='Bearer')
82REINDEX_POINT = '/reindex'
83
84
85def trigger_reindex():
86    # Signal the sync/indexer thread.
87    sleep_event.set()
88    sleep_event.clear()
89
90
91@auth.verify_token
92def verify_token(token):
93    if expected_token is None:
94        return "yes"
95
96    if token is not None and token == expected_token:
97        return "yes"
98
99
100@app.route(REINDEX_POINT)
101@auth.login_required
102def index():
103    trigger_reindex()
104
105    return "Reindex triggered"
106
107
108def rest_function(logger, rest_port):
109    logger.info("Starting REST app on port {}".format(rest_port))
110    serve(app, host="0.0.0.0", port=rest_port)
111
112
113def set_url_root(logger, url_root):
114    """
115    Set URL root and URI based on input
116    :param logger: logger instance
117    :param url_root: input
118    :return: URI and URL root
119    """
120    if not url_root:
121        url_root = '/'
122
123    if ' ' in url_root:
124        logger.warn('Deployment path contains spaces. Deploying to root')
125        url_root = '/'
126
127    # Remove leading and trailing slashes
128    if url_root.startswith('/'):
129        url_root = url_root[1:]
130    if url_root.endswith('/'):
131        url_root = url_root[:-1]
132
133    uri = "http://localhost:8080/" + url_root
134    #
135    # Make sure URI ends with slash. This is important for the various API
136    # calls, notably for those that check the HTTP error code.
137    # Normally accessing the URI without the terminating slash results in
138    # HTTP redirect (code 302) instead of success (200).
139    #
140    if not uri.endswith('/'):
141        uri = uri + '/'
142
143    return uri, url_root
144
145
146def get_war_name(url_root):
147    """
148    :param url_root: web app URL root
149    :return: filename of the WAR file
150    """
151    if len(url_root) == 0:
152        return "ROOT.war"
153
154    return url_root + ".war"
155
156
157def deploy(logger, url_root):
158    """
159    Deploy the web application
160    :param logger: logger instance
161    :param url_root: web app URL root
162    """
163
164    logger.info('Deploying web application')
165    webapps_dir = os.path.join(tomcat_root, 'webapps')
166    if not os.path.isdir(webapps_dir):
167        raise Exception("{} is not a directory".format(webapps_dir))
168
169    for item in os.listdir(webapps_dir):
170        subdir = os.path.join(webapps_dir, item)
171        if os.path.isdir(subdir):
172            logger.debug("Removing '{}' directory recursively".format(subdir))
173            shutil.rmtree(subdir)
174
175    deploy_war(logger, os.path.join(OPENGROK_LIB_DIR, "source.war"),
176               os.path.join(OPENGROK_WEBAPPS_DIR, get_war_name(url_root)),
177               OPENGROK_CONFIG_FILE, None)
178
179
180def setup_redirect_source(logger, url_root):
181    """
182    Set up redirect from /source
183    """
184    logger.debug("Setting up redirect from /source to '{}'".format(url_root))
185    source_dir = os.path.join(OPENGROK_WEBAPPS_DIR, "source")
186    if not os.path.isdir(source_dir):
187        os.makedirs(source_dir)
188
189    with open(os.path.join(source_dir, "index.jsp"), "w+") as index:
190        index.write("<% response.sendRedirect(\"/{}\"); %>".format(url_root))
191
192
193def wait_for_tomcat(logger, uri):
194    """
195    Active/busy waiting for Tomcat to come up.
196    Currently there is no upper time bound.
197    """
198    logger.info("Waiting for Tomcat to start")
199
200    while True:
201        try:
202            ret = get(uri)
203            status = ret.status_code
204        except ConnectionError:
205            status = 0
206
207        if status != 200:
208            logger.debug("Got status {} for {}, sleeping for 1 second".
209                         format(status, uri))
210            time.sleep(1)
211        else:
212            break
213
214    logger.info("Tomcat is ready")
215
216
217def refresh_projects(logger, uri):
218    """
219    Ensure each immediate source root subdirectory is a project.
220    """
221    webapp_projects = list_projects(logger, uri)
222    if not webapp_projects:
223        return
224
225    logger.debug('Projects from the web app: {}'.format(webapp_projects))
226    src_root = OPENGROK_SRC_ROOT
227
228    # Add projects.
229    for item in os.listdir(src_root):
230        logger.debug('Got item {}'.format(item))
231        if os.path.isdir(os.path.join(src_root, item)):
232            if item not in webapp_projects:
233                logger.info("Adding project {}".format(item))
234                add_project(logger, item, uri)
235
236    # Remove projects
237    for item in webapp_projects:
238        if not os.path.isdir(os.path.join(src_root, item)):
239            logger.info("Deleting project {}".format(item))
240            delete_project(logger, item, uri)
241
242
243def save_config(logger, uri, config_path):
244    """
245    Retrieve configuration from the web app and write it to file.
246    :param logger: logger instance
247    :param uri: web app URI
248    :param config_path: file path
249    """
250
251    config = get_configuration(logger, uri)
252    if config is None:
253        return
254
255    logger.info('Saving configuration to {}'.format(config_path))
256    with open(config_path, "w+") as config_file:
257        config_file.write(config)
258
259
260def merge_commands_env(commands, env):
261    """
262    Merge environment into command structure. If any of the commands has
263    an environment already set, the env is merged in.
264    :param commands: commands structure
265    :param env: environment dictionary
266    :return: updated commands structure
267    """
268    for cmd in commands:
269        cmd_env = cmd.get('env')
270        if cmd_env:
271            cmd.env.update(env)
272        else:
273            cmd['env'] = env
274
275    return commands
276
277
278def indexer_no_projects(logger, uri, config_path, extra_indexer_options):
279    """
280    Project less indexer
281    """
282
283    wait_for_tomcat(logger, uri)
284
285    while True:
286        indexer_options = ['-s', OPENGROK_SRC_ROOT,
287                           '-d', OPENGROK_DATA_ROOT,
288                           '-c', '/usr/local/bin/ctags',
289                           '--remote', 'on',
290                           '-H',
291                           '-W', config_path,
292                           '-U', uri]
293        if extra_indexer_options:
294            logger.debug("Adding extra indexer options: {}".
295                         format(extra_indexer_options))
296            indexer_options.extend(extra_indexer_options.split())
297        indexer = Indexer(indexer_options, logger=logger,
298                          jar=OPENGROK_JAR, doprint=True)
299        indexer.execute()
300
301        logger.info("Waiting for reindex to be triggered")
302        sleep_event.wait()
303
304
305def timeout_loop(logger, sync_period):
306    while True:
307        sleep_seconds = sync_period * 60
308        logger.info("Sleeping for {} seconds".format(sleep_seconds))
309        time.sleep(sleep_seconds)
310
311        trigger_reindex()
312
313
314def project_syncer(logger, loglevel, uri, config_path, numworkers, env):
315    """
316    Wrapper for running opengrok-sync.
317    To be run in a thread/process in the background.
318    """
319
320    wait_for_tomcat(logger, uri)
321
322    while True:
323        refresh_projects(logger, uri)
324
325        if os.environ.get('OPENGROK_SYNC_YML'):  # debug only
326            config_file = os.environ.get('OPENGROK_SYNC_YML')
327        else:
328            config_file = os.path.join(fs_root, 'scripts', 'sync.yml')
329        config = read_config(logger, config_file)
330        if config is None:
331            logger.error("Cannot read config file from {}".format(config_file))
332            raise Exception("no sync config")
333
334        projects = list_projects(logger, uri)
335        if projects:
336            #
337            # The driveon=True is needed for the initial indexing of newly
338            # added project, otherwise the incoming check in the
339            # opengrok-mirror program would short circuit it.
340            #
341            if env:
342                logger.info('Merging commands with environment')
343                commands = merge_commands_env(config["commands"], env)
344                logger.debug(config['commands'])
345            else:
346                commands = config["commands"]
347
348            logger.info("Sync starting")
349            do_sync(loglevel, commands, config.get('cleanup'),
350                    projects, config.get("ignore_errors"), uri,
351                    numworkers, driveon=True, logger=logger, print_output=True)
352            logger.info("Sync done")
353
354            # Workaround for https://github.com/oracle/opengrok/issues/1670
355            Path(os.path.join(OPENGROK_DATA_ROOT, 'timestamp')).touch()
356
357            save_config(logger, uri, config_path)
358
359        logger.info("Waiting for reindex to be triggered")
360        sleep_event.wait()
361
362
363def create_bare_config(logger, use_projects, extra_indexer_options=None):
364    """
365    Create bare configuration file with a few basic settings.
366    """
367
368    logger.info('Creating bare configuration in {}'.
369                format(OPENGROK_CONFIG_FILE))
370    indexer_options = ['-s', OPENGROK_SRC_ROOT,
371                       '-d', OPENGROK_DATA_ROOT,
372                       '-c', '/usr/local/bin/ctags',
373                       '--remote', 'on',
374                       '-H',
375                       '-S',
376                       '-W', OPENGROK_CONFIG_FILE,
377                       '--noIndex']
378
379    if extra_indexer_options:
380        if type(extra_indexer_options) is not list:
381            raise Exception("extra_indexer_options has to be a list")
382        indexer_options.extend(extra_indexer_options)
383    if use_projects:
384        indexer_options.append('-P')
385    indexer = Indexer(indexer_options,
386                      jar=OPENGROK_JAR,
387                      logger=logger, doprint=True)
388    indexer.execute()
389    ret = indexer.getretcode()
390    if ret != SUCCESS_EXITVAL:
391        logger.error('Command returned {}'.format(ret))
392        logger.error(indexer.geterroutput())
393        raise Exception("Failed to create bare configuration")
394
395
396def get_num_from_env(logger, env_name, default_value):
397    value = default_value
398    env_str = os.environ.get(env_name)
399    if env_str:
400        try:
401            n = int(env_str)
402            if n >= 0:
403                value = n
404        except ValueError:
405            logger.error("{} is not a number: {}".
406                         format(env_name, env_str))
407
408    return value
409
410
411def check_index_and_wipe_out(logger):
412    """
413    Check index by running the indexer. If the index does not match
414    currently running version and the CHECK_INDEX environment variable
415    is non empty, wipe out the directories under data root.
416    """
417    check_index = os.environ.get('CHECK_INDEX')
418    if check_index and os.path.exists(OPENGROK_CONFIG_FILE):
419        logger.info('Checking if index matches current version')
420        indexer_options = ['-R', OPENGROK_CONFIG_FILE, '--checkIndex']
421        indexer = Indexer(indexer_options, logger=logger,
422                          jar=OPENGROK_JAR, doprint=True)
423        indexer.execute()
424        if indexer.getretcode() == 1:
425            logger.info('Wiping out data root')
426            root = OPENGROK_DATA_ROOT
427            for entry in os.listdir(root):
428                path = os.path.join(root, entry)
429                if os.path.isdir(path):
430                    try:
431                        logger.info("Removing '{}'".format(path))
432                        shutil.rmtree(path)
433                    except Exception as e:
434                        logger.error("cannot delete '{}': {}".format(path, e))
435
436
437def start_rest_thread(logger):
438    rest_port = get_num_from_env(logger, 'REST_PORT', 5000)
439    token = os.environ.get('REST_TOKEN')
440    global expected_token
441    if token:
442        logger.debug("Setting expected token for REST endpoint"
443                     "on port {}".format(rest_port))
444        expected_token = token
445    logger.debug("Starting REST thread to listen for requests "
446                 "on port {} on the {} endpoint".
447                 format(rest_port, REINDEX_POINT))
448    rest_thread = threading.Thread(target=rest_function,
449                                   name="REST thread",
450                                   args=(logger, rest_port), daemon=True)
451    rest_thread.start()
452
453
454def start_timeout_thread(logger, sync_period):
455    logger.debug("Starting timeout thread")
456    thread = threading.Thread(target=timeout_loop,
457                              name="Timeout thread",
458                              args=(logger, sync_period), daemon=True)
459    thread.start()
460
461
462def main():
463    log_level = os.environ.get('OPENGROK_LOG_LEVEL')
464    if log_level:
465        log_level = get_log_level(log_level)
466    else:
467        log_level = logging.INFO
468
469    logger = get_console_logger(get_class_basename(), log_level)
470
471    try:
472        with open(os.path.join(OPENGROK_BASE_DIR, "VERSION"), "r") as f:
473            version = f.read()
474            logger.info("Running version {}".format(version))
475    except Exception:
476        pass
477
478    uri, url_root = set_url_root(logger, os.environ.get('URL_ROOT'))
479    logger.debug("URL_ROOT = {}".format(url_root))
480    logger.debug("URI = {}".format(uri))
481
482    sync_period = get_num_from_env(logger, 'SYNC_PERIOD_MINUTES', 10)
483    if sync_period == 0:
484        logger.info("periodic synchronization disabled")
485    else:
486        logger.info("synchronization period = {} minutes".format(sync_period))
487
488    # Note that deploy is done before Tomcat is started.
489    deploy(logger, url_root)
490
491    if url_root != '/source':
492        setup_redirect_source(logger, url_root)
493
494    env = {}
495    extra_indexer_options = os.environ.get('INDEXER_OPT', '')
496    if extra_indexer_options:
497        logger.info("extra indexer options: {}".format(extra_indexer_options))
498        env['OPENGROK_INDEXER_OPTIONAL_ARGS'] = extra_indexer_options
499    if os.environ.get(NOMIRROR_ENV_NAME):
500        env[OPENGROK_NO_MIRROR_ENV] = os.environ.get(NOMIRROR_ENV_NAME)
501    logger.debug('Extra environment: {}'.format(env))
502
503    use_projects = True
504    if os.environ.get('AVOID_PROJECTS'):
505        use_projects = False
506
507    #
508    # Create empty configuration to avoid the non existent file exception
509    # in the web app during the first web app startup.
510    #
511    if not os.path.exists(OPENGROK_CONFIG_FILE) or \
512            os.path.getsize(OPENGROK_CONFIG_FILE) == 0:
513        create_bare_config(logger, use_projects, extra_indexer_options.split())
514
515    #
516    # Index check needs read-only configuration so it is placed
517    # right after create_bare_config().
518    #
519    check_index_and_wipe_out(logger)
520
521    #
522    # If there is read-only configuration file, merge it with current
523    # configuration.
524    #
525    read_only_config_file = os.environ.get('READONLY_CONFIG_FILE')
526    if read_only_config_file and os.path.exists(read_only_config_file):
527        logger.info('Merging read-only configuration from \'{}\' with current '
528                    'configuration in \'{}\''.format(read_only_config_file,
529                                                     OPENGROK_CONFIG_FILE))
530        out_file = None
531        with tempfile.NamedTemporaryFile(mode='w+', delete=False,
532                                         prefix='merged_config') as tmp_out:
533            out_file = tmp_out.name
534            merge_config_files(read_only_config_file, OPENGROK_CONFIG_FILE,
535                               tmp_out, jar=OPENGROK_JAR, loglevel=log_level)
536
537        if out_file and os.path.getsize(out_file) > 0:
538            shutil.move(tmp_out.name, OPENGROK_CONFIG_FILE)
539        else:
540            logger.warning('Failed to merge read-only configuration, '
541                           'leaving the original in place')
542            if out_file:
543                os.remove(out_file)
544
545    sync_enabled = True
546    if use_projects:
547        mirror_config = os.path.join(OPENGROK_CONFIG_DIR, "mirror.yml")
548        if not os.path.exists(mirror_config):
549            with open(mirror_config, 'w') as fp:
550                fp.write("# Empty config file for opengrok-mirror\n")
551
552        num_workers = get_num_from_env(logger, 'WORKERS',
553                                       multiprocessing.cpu_count())
554        logger.info('Number of sync workers: {}'.format(num_workers))
555
556        if not os.environ.get(NOMIRROR_ENV_NAME):
557            conf = read_config(logger, mirror_config)
558            logger.info("Checking mirror configuration in '{}'".
559                        format(mirror_config))
560            if not check_configuration(conf):
561                logger.error("Mirror configuration in '{}' is invalid, "
562                             "disabling sync".format(mirror_config))
563                sync_enabled = False
564
565        worker_function = project_syncer
566        syncer_args = (logger, log_level, uri,
567                       OPENGROK_CONFIG_FILE,
568                       num_workers, env)
569    else:
570        worker_function = indexer_no_projects
571        syncer_args = (logger, uri, OPENGROK_CONFIG_FILE,
572                       extra_indexer_options)
573
574    if sync_enabled:
575        logger.debug("Starting sync thread")
576        sync_thread = threading.Thread(target=worker_function,
577                                       name="Sync thread",
578                                       args=syncer_args, daemon=True)
579        sync_thread.start()
580
581        start_rest_thread(logger)
582        if sync_period > 0:
583            start_timeout_thread(logger, sync_period)
584
585    # Start Tomcat last. It will be the foreground process.
586    logger.info("Starting Tomcat")
587    global tomcat_popen
588    tomcat_popen = subprocess.Popen([os.path.join(tomcat_root, 'bin',
589                                                  'catalina.sh'),
590                                    'run'])
591    tomcat_popen.wait()
592
593
594def signal_handler(signum, frame):
595    print("Received signal {}".format(signum))
596
597    global tomcat_popen
598    print("Terminating Tomcat {}".format(tomcat_popen))
599    tomcat_popen.terminate()
600
601    sys.exit(0)
602
603
604if __name__ == "__main__":
605    signal.signal(signal.SIGTERM, signal_handler)
606    signal.signal(signal.SIGINT, signal_handler)
607
608    main()
609