test_blockade_scm_isolation.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #!/usr/bin/python
  2. # Licensed to the Apache Software Foundation (ASF) under one or more
  3. # contributor license agreements. See the NOTICE file distributed with
  4. # this work for additional information regarding copyright ownership.
  5. # The ASF licenses this file to You under the Apache License, Version 2.0
  6. # (the "License"); you may not use this file except in compliance with
  7. # the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. import os
  17. import time
  18. import logging
  19. from blockadeUtils.blockade import Blockade
  20. from clusterUtils.cluster_utils import ClusterUtils
  21. logger = logging.getLogger(__name__)
  22. parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
  23. FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
  24. "docker-compose.yaml")
  25. os.environ["DOCKER_COMPOSE_FILE"] = FILE
  26. SCALE = 3
  27. CONTAINER_LIST = []
  28. OM = []
  29. SCM = []
  30. DATANODES = []
  31. def setup():
  32. global CONTAINER_LIST, OM, SCM, DATANODES
  33. Blockade.blockade_destroy()
  34. CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
  35. exit_code, output = Blockade.blockade_status()
  36. assert exit_code == 0, "blockade status command failed with output=[%s]" % \
  37. output
  38. OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
  39. SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
  40. DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
  41. exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
  42. "THREE")
  43. assert exit_code == 0, "freon run failed with output=[%s]" % output
  44. def teardown():
  45. logger.info("Inside teardown")
  46. Blockade.blockade_destroy()
  47. def teardown_module():
  48. ClusterUtils.cluster_destroy(FILE)
  49. def test_scm_isolation_one_node():
  50. """
  51. In this test, one of the datanodes cannot communicate with SCM.
  52. Other datanodes can communicate with SCM.
  53. Expectation : The container should eventually have at least two closed
  54. replicas.
  55. """
  56. first_set = [OM[0], DATANODES[0], DATANODES[1], DATANODES[2]]
  57. second_set = [OM[0], SCM[0], DATANODES[1], DATANODES[2]]
  58. Blockade.blockade_create_partition(first_set, second_set)
  59. Blockade.blockade_status()
  60. ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
  61. logger.info("Waiting for %s seconds before checking container status",
  62. os.environ["CONTAINER_STATUS_SLEEP"])
  63. time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
  64. all_datanodes_container_status = \
  65. ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
  66. count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
  67. all_datanodes_container_status)
  68. assert len(count_closed_container_datanodes) >= 2, \
  69. "The container should have at least two closed replicas."
  70. def test_scm_isolation_two_node():
  71. """
  72. In this test, two datanodes cannot communicate with SCM.
  73. Expectation : The container should eventually have at three closed replicas
  74. or, two open replicas and one quasi-closed replica.
  75. """
  76. first_set = [OM[0], DATANODES[0], DATANODES[1], DATANODES[2]]
  77. second_set = [OM[0], SCM[0], DATANODES[1]]
  78. Blockade.blockade_create_partition(first_set, second_set)
  79. Blockade.blockade_status()
  80. ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
  81. logger.info("Waiting for %s seconds before checking container status",
  82. os.environ["CONTAINER_STATUS_SLEEP"])
  83. time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
  84. all_datanodes_container_status = \
  85. ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
  86. count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
  87. all_datanodes_container_status)
  88. count_qausi_closed_container_datanodes = \
  89. filter(lambda x: x == 'QUASI_CLOSED', all_datanodes_container_status)
  90. count_open_container_datanodes = filter(lambda x: x == 'OPEN',
  91. all_datanodes_container_status)
  92. assert len(count_closed_container_datanodes) == 3 or \
  93. (len(count_open_container_datanodes) == 2 and
  94. len(count_qausi_closed_container_datanodes) == 1), \
  95. "The container should have three closed replicas or two open " \
  96. "replicas and one quasi_closed replica."