test_blockade_datanode_isolation.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #!/usr/bin/python
  2. # Licensed to the Apache Software Foundation (ASF) under one or more
  3. # contributor license agreements. See the NOTICE file distributed with
  4. # this work for additional information regarding copyright ownership.
  5. # The ASF licenses this file to You under the Apache License, Version 2.0
  6. # (the "License"); you may not use this file except in compliance with
  7. # the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. import os
  17. import time
  18. import logging
  19. from blockadeUtils.blockade import Blockade
  20. from clusterUtils.cluster_utils import ClusterUtils
  21. logger = logging.getLogger(__name__)
  22. parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
  23. FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
  24. "docker-compose.yaml")
  25. os.environ["DOCKER_COMPOSE_FILE"] = FILE
  26. SCALE = 3
  27. CONTAINER_LIST = []
  28. OM = []
  29. SCM = []
  30. DATANODES = []
  31. def setup():
  32. global CONTAINER_LIST, OM, SCM, DATANODES
  33. Blockade.blockade_destroy()
  34. CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
  35. exit_code, output = Blockade.blockade_status()
  36. assert exit_code == 0, "blockade status command failed with output=[%s]" % \
  37. output
  38. OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
  39. SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
  40. DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
  41. exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
  42. "THREE")
  43. assert exit_code == 0, "freon run failed with output=[%s]" % output
  44. def teardown():
  45. logger.info("Inside teardown")
  46. Blockade.blockade_destroy()
  47. def teardown_module():
  48. ClusterUtils.cluster_destroy(FILE)
  49. def test_datanode_isolation_one_node():
  50. """
  51. In this test, one of the datanodes (first datanode) cannot communicate
  52. with other two datanodes.
  53. All datanodes can communicate with SCM.
  54. Expectation :
  55. The container replica state in first datanode should be quasi-closed.
  56. The container replica state in other datanodes should be closed.
  57. """
  58. first_set = [OM[0], SCM[0], DATANODES[0]]
  59. second_set = [OM[0], SCM[0], DATANODES[1], DATANODES[2]]
  60. Blockade.blockade_create_partition(first_set, second_set)
  61. Blockade.blockade_status()
  62. ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
  63. logger.info("Waiting for %s seconds before checking container status",
  64. os.environ["CONTAINER_STATUS_SLEEP"])
  65. time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
  66. all_datanodes_container_status = \
  67. ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
  68. first_datanode_status = all_datanodes_container_status[0]
  69. count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
  70. all_datanodes_container_status)
  71. assert first_datanode_status == 'QUASI_CLOSED'
  72. assert len(count_closed_container_datanodes) == 2, \
  73. "The container should have three closed replicas."
  74. def test_datanode_isolation_all():
  75. """
  76. In this test, none of the datanodes can communicate with other two
  77. datanodes.
  78. All datanodes can communicate with SCM.
  79. Expectation : The container should eventually have at least two closed
  80. replicas.
  81. """
  82. first_set = [OM[0], SCM[0], DATANODES[0]]
  83. second_set = [OM[0], SCM[0], DATANODES[1]]
  84. third_set = [OM[0], SCM[0], DATANODES[2]]
  85. Blockade.blockade_create_partition(first_set, second_set, third_set)
  86. Blockade.blockade_status()
  87. ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
  88. logger.info("Waiting for %s seconds before checking container status",
  89. os.environ["CONTAINER_STATUS_SLEEP"])
  90. time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
  91. all_datanodes_container_status = \
  92. ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
  93. count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
  94. all_datanodes_container_status)
  95. assert len(count_closed_container_datanodes) >= 2, \
  96. "The container should have at least two closed replicas."