1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273 |
- <?xml version="1.0"?>
- <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
- <!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
- <!-- Do not modify this file directly. Instead, copy entries that you -->
- <!-- wish to modify from this file into hdfs-site.xml and change them -->
- <!-- there. If hdfs-site.xml does not already exist, create it. -->
- <configuration>
- <property>
- <name>hadoop.hdfs.configuration.version</name>
- <value>1</value>
- <description>version of this configuration file</description>
- </property>
- <property>
- <name>dfs.namenode.logging.level</name>
- <value>info</value>
- <description>
- The logging level for dfs namenode. Other values are "dir" (trace
- namespace mutations), "block" (trace block under/over replications
- and block creations/deletions), or "all".
- </description>
- </property>
- <property>
- <name>dfs.namenode.rpc-address</name>
- <value></value>
- <description>
- RPC address that handles all clients requests. In the case of HA/Federation where multiple namenodes exist,
- the name service id is added to the name e.g. dfs.namenode.rpc-address.ns1
- dfs.namenode.rpc-address.EXAMPLENAMESERVICE
- The value of this property will take the form of nn-host1:rpc-port.
- </description>
- </property>
- <property>
- <name>dfs.namenode.servicerpc-address</name>
- <value></value>
- <description>
- RPC address for HDFS Services communication. BackupNode, Datanodes and all other services should be
- connecting to this address if it is configured. In the case of HA/Federation where multiple namenodes exist,
- the name service id is added to the name e.g. dfs.namenode.servicerpc-address.ns1
- dfs.namenode.rpc-address.EXAMPLENAMESERVICE
- The value of this property will take the form of nn-host1:rpc-port.
- If the value of this property is unset the value of dfs.namenode.rpc-address will be used as the default.
- </description>
- </property>
- <property>
- <name>dfs.namenode.secondary.http-address</name>
- <value>0.0.0.0:50090</value>
- <description>
- The secondary namenode http server address and port.
- </description>
- </property>
- <property>
- <name>dfs.datanode.address</name>
- <value>0.0.0.0:50010</value>
- <description>
- The datanode server address and port for data transfer.
- </description>
- </property>
- <property>
- <name>dfs.datanode.http.address</name>
- <value>0.0.0.0:50075</value>
- <description>
- The datanode http server address and port.
- </description>
- </property>
- <property>
- <name>dfs.datanode.ipc.address</name>
- <value>0.0.0.0:50020</value>
- <description>
- The datanode ipc server address and port.
- </description>
- </property>
- <property>
- <name>dfs.datanode.handler.count</name>
- <value>10</value>
- <description>The number of server threads for the datanode.</description>
- </property>
- <property>
- <name>dfs.namenode.http-address</name>
- <value>0.0.0.0:50070</value>
- <description>
- The address and the base port where the dfs namenode web ui will listen on.
- </description>
- </property>
- <property>
- <name>dfs.https.enable</name>
- <value>false</value>
- <description>Decide if HTTPS(SSL) is supported on HDFS
- </description>
- </property>
- <property>
- <name>dfs.client.https.need-auth</name>
- <value>false</value>
- <description>Whether SSL client certificate authentication is required
- </description>
- </property>
- <property>
- <name>dfs.https.server.keystore.resource</name>
- <value>ssl-server.xml</value>
- <description>Resource file from which ssl server keystore
- information will be extracted
- </description>
- </property>
- <property>
- <name>dfs.client.https.keystore.resource</name>
- <value>ssl-client.xml</value>
- <description>Resource file from which ssl client keystore
- information will be extracted
- </description>
- </property>
- <property>
- <name>dfs.datanode.https.address</name>
- <value>0.0.0.0:50475</value>
- <description>The datanode secure http server address and port.</description>
- </property>
- <property>
- <name>dfs.namenode.https-address</name>
- <value>0.0.0.0:50470</value>
- <description>The namenode secure http server address and port.</description>
- </property>
- <property>
- <name>dfs.datanode.dns.interface</name>
- <value>default</value>
- <description>The name of the Network Interface from which a data node should
- report its IP address.
- </description>
- </property>
-
- <property>
- <name>dfs.datanode.dns.nameserver</name>
- <value>default</value>
- <description>The host name or IP address of the name server (DNS)
- which a DataNode should use to determine the host name used by the
- NameNode for communication and display purposes.
- </description>
- </property>
-
- <property>
- <name>dfs.namenode.backup.address</name>
- <value>0.0.0.0:50100</value>
- <description>
- The backup node server address and port.
- If the port is 0 then the server will start on a free port.
- </description>
- </property>
-
- <property>
- <name>dfs.namenode.backup.http-address</name>
- <value>0.0.0.0:50105</value>
- <description>
- The backup node http server address and port.
- If the port is 0 then the server will start on a free port.
- </description>
- </property>
- <property>
- <name>dfs.namenode.replication.considerLoad</name>
- <value>true</value>
- <description>Decide if chooseTarget considers the target's load or not
- </description>
- </property>
- <property>
- <name>dfs.default.chunk.view.size</name>
- <value>32768</value>
- <description>The number of bytes to view for a file on the browser.
- </description>
- </property>
- <property>
- <name>dfs.datanode.du.reserved</name>
- <value>0</value>
- <description>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
- </description>
- </property>
- <property>
- <name>dfs.namenode.name.dir</name>
- <value>file://${hadoop.tmp.dir}/dfs/name</value>
- <description>Determines where on the local filesystem the DFS name node
- should store the name table(fsimage). If this is a comma-delimited list
- of directories then the name table is replicated in all of the
- directories, for redundancy. </description>
- </property>
- <property>
- <name>dfs.namenode.name.dir.restore</name>
- <value>false</value>
- <description>Set to true to enable NameNode to attempt recovering a
- previously failed dfs.namenode.name.dir. When enabled, a recovery of any
- failed directory is attempted during checkpoint.</description>
- </property>
- <property>
- <name>dfs.namenode.fs-limits.max-component-length</name>
- <value>0</value>
- <description>Defines the maximum number of characters in each component
- of a path. A value of 0 will disable the check.</description>
- </property>
- <property>
- <name>dfs.namenode.fs-limits.max-directory-items</name>
- <value>0</value>
- <description>Defines the maximum number of items that a directory may
- contain. A value of 0 will disable the check.</description>
- </property>
- <property>
- <name>dfs.namenode.edits.dir</name>
- <value>${dfs.namenode.name.dir}</value>
- <description>Determines where on the local filesystem the DFS name node
- should store the transaction (edits) file. If this is a comma-delimited list
- of directories then the transaction file is replicated in all of the
- directories, for redundancy. Default value is same as dfs.namenode.name.dir
- </description>
- </property>
- <property>
- <name>dfs.namenode.shared.edits.dir</name>
- <value></value>
- <description>A directory on shared storage between the multiple namenodes
- in an HA cluster. This directory will be written by the active and read
- by the standby in order to keep the namespaces synchronized. This directory
- does not need to be listed in dfs.namenode.edits.dir above. It should be
- left empty in a non-HA cluster.
- </description>
- </property>
- <property>
- <name>dfs.namenode.edits.journal-plugin.qjournal</name>
- <value>org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager</value>
- </property>
- <property>
- <name>dfs.permissions.enabled</name>
- <value>true</value>
- <description>
- If "true", enable permission checking in HDFS.
- If "false", permission checking is turned off,
- but all other behavior is unchanged.
- Switching from one parameter value to the other does not change the mode,
- owner or group of files or directories.
- </description>
- </property>
- <property>
- <name>dfs.permissions.superusergroup</name>
- <value>supergroup</value>
- <description>The name of the group of super-users.</description>
- </property>
- <!--
- <property>
- <name>dfs.cluster.administrators</name>
- <value>ACL for the admins</value>
- <description>This configuration is used to control who can access the
- default servlets in the namenode, etc.
- </description>
- </property>
- -->
- <property>
- <name>dfs.block.access.token.enable</name>
- <value>false</value>
- <description>
- If "true", access tokens are used as capabilities for accessing datanodes.
- If "false", no access tokens are checked on accessing datanodes.
- </description>
- </property>
- <property>
- <name>dfs.block.access.key.update.interval</name>
- <value>600</value>
- <description>
- Interval in minutes at which namenode updates its access keys.
- </description>
- </property>
- <property>
- <name>dfs.block.access.token.lifetime</name>
- <value>600</value>
- <description>The lifetime of access tokens in minutes.</description>
- </property>
- <property>
- <name>dfs.datanode.data.dir</name>
- <value>file://${hadoop.tmp.dir}/dfs/data</value>
- <description>Determines where on the local filesystem an DFS data node
- should store its blocks. If this is a comma-delimited
- list of directories, then data will be stored in all named
- directories, typically on different devices.
- Directories that do not exist are ignored.
- </description>
- </property>
- <property>
- <name>dfs.datanode.data.dir.perm</name>
- <value>700</value>
- <description>Permissions for the directories on on the local filesystem where
- the DFS data node store its blocks. The permissions can either be octal or
- symbolic.</description>
- </property>
- <property>
- <name>dfs.replication</name>
- <value>3</value>
- <description>Default block replication.
- The actual number of replications can be specified when the file is created.
- The default is used if replication is not specified in create time.
- </description>
- </property>
- <property>
- <name>dfs.replication.max</name>
- <value>512</value>
- <description>Maximal block replication.
- </description>
- </property>
- <property>
- <name>dfs.namenode.replication.min</name>
- <value>1</value>
- <description>Minimal block replication.
- </description>
- </property>
- <property>
- <name>dfs.blocksize</name>
- <value>134217728</value>
- <description>
- The default block size for new files, in bytes.
- You can use the following suffix (case insensitive):
- k(kilo), m(mega), g(giga), t(tera), p(peta), e(exa) to specify the size (such as 128k, 512m, 1g, etc.),
- Or provide complete size in bytes (such as 134217728 for 128 MB).
- </description>
- </property>
- <property>
- <name>dfs.client.block.write.retries</name>
- <value>3</value>
- <description>The number of retries for writing blocks to the data nodes,
- before we signal failure to the application.
- </description>
- </property>
- <property>
- <name>dfs.client.block.write.replace-datanode-on-failure.enable</name>
- <value>true</value>
- <description>
- If there is a datanode/network failure in the write pipeline,
- DFSClient will try to remove the failed datanode from the pipeline
- and then continue writing with the remaining datanodes. As a result,
- the number of datanodes in the pipeline is decreased. The feature is
- to add new datanodes to the pipeline.
- This is a site-wide property to enable/disable the feature.
- When the cluster size is extremely small, e.g. 3 nodes or less, cluster
- administrators may want to set the policy to NEVER in the default
- configuration file or disable this feature. Otherwise, users may
- experience an unusually high rate of pipeline failures since it is
- impossible to find new datanodes for replacement.
- See also dfs.client.block.write.replace-datanode-on-failure.policy
- </description>
- </property>
- <property>
- <name>dfs.client.block.write.replace-datanode-on-failure.policy</name>
- <value>DEFAULT</value>
- <description>
- This property is used only if the value of
- dfs.client.block.write.replace-datanode-on-failure.enable is true.
- ALWAYS: always add a new datanode when an existing datanode is removed.
-
- NEVER: never add a new datanode.
- DEFAULT:
- Let r be the replication number.
- Let n be the number of existing datanodes.
- Add a new datanode only if r is greater than or equal to 3 and either
- (1) floor(r/2) is greater than or equal to n; or
- (2) r is greater than n and the block is hflushed/appended.
- </description>
- </property>
- <property>
- <name>dfs.blockreport.intervalMsec</name>
- <value>21600000</value>
- <description>Determines block reporting interval in milliseconds.</description>
- </property>
- <property>
- <name>dfs.blockreport.initialDelay</name> <value>0</value>
- <description>Delay for first block report in seconds.</description>
- </property>
- <property>
- <name>dfs.datanode.directoryscan.interval</name>
- <value>21600</value>
- <description>Interval in seconds for Datanode to scan data directories and
- reconcile the difference between blocks in memory and on the disk.
- </description>
- </property>
- <property>
- <name>dfs.datanode.directoryscan.threads</name>
- <value>1</value>
- <description>How many threads should the threadpool used to compile reports
- for volumes in parallel have.
- </description>
- </property>
- <property>
- <name>dfs.heartbeat.interval</name>
- <value>3</value>
- <description>Determines datanode heartbeat interval in seconds.</description>
- </property>
- <property>
- <name>dfs.namenode.handler.count</name>
- <value>10</value>
- <description>The number of server threads for the namenode.</description>
- </property>
- <property>
- <name>dfs.namenode.safemode.threshold-pct</name>
- <value>0.999f</value>
- <description>
- Specifies the percentage of blocks that should satisfy
- the minimal replication requirement defined by dfs.namenode.replication.min.
- Values less than or equal to 0 mean not to wait for any particular
- percentage of blocks before exiting safemode.
- Values greater than 1 will make safe mode permanent.
- </description>
- </property>
- <property>
- <name>dfs.namenode.safemode.min.datanodes</name>
- <value>0</value>
- <description>
- Specifies the number of datanodes that must be considered alive
- before the name node exits safemode.
- Values less than or equal to 0 mean not to take the number of live
- datanodes into account when deciding whether to remain in safe mode
- during startup.
- Values greater than the number of datanodes in the cluster
- will make safe mode permanent.
- </description>
- </property>
- <property>
- <name>dfs.namenode.safemode.extension</name>
- <value>30000</value>
- <description>
- Determines extension of safe mode in milliseconds
- after the threshold level is reached.
- </description>
- </property>
- <property>
- <name>dfs.datanode.balance.bandwidthPerSec</name>
- <value>1048576</value>
- <description>
- Specifies the maximum amount of bandwidth that each datanode
- can utilize for the balancing purpose in term of
- the number of bytes per second.
- </description>
- </property>
- <property>
- <name>dfs.hosts</name>
- <value></value>
- <description>Names a file that contains a list of hosts that are
- permitted to connect to the namenode. The full pathname of the file
- must be specified. If the value is empty, all hosts are
- permitted.</description>
- </property>
- <property>
- <name>dfs.hosts.exclude</name>
- <value></value>
- <description>Names a file that contains a list of hosts that are
- not permitted to connect to the namenode. The full pathname of the
- file must be specified. If the value is empty, no hosts are
- excluded.</description>
- </property>
- <property>
- <name>dfs.namenode.max.objects</name>
- <value>0</value>
- <description>The maximum number of files, directories and blocks
- dfs supports. A value of zero indicates no limit to the number
- of objects that dfs supports.
- </description>
- </property>
- <property>
- <name>dfs.namenode.decommission.interval</name>
- <value>30</value>
- <description>Namenode periodicity in seconds to check if decommission is
- complete.</description>
- </property>
- <property>
- <name>dfs.namenode.decommission.nodes.per.interval</name>
- <value>5</value>
- <description>The number of nodes namenode checks if decommission is complete
- in each dfs.namenode.decommission.interval.</description>
- </property>
- <property>
- <name>dfs.namenode.replication.interval</name>
- <value>3</value>
- <description>The periodicity in seconds with which the namenode computes
- repliaction work for datanodes. </description>
- </property>
- <property>
- <name>dfs.namenode.accesstime.precision</name>
- <value>3600000</value>
- <description>The access time for HDFS file is precise upto this value.
- The default value is 1 hour. Setting a value of 0 disables
- access times for HDFS.
- </description>
- </property>
- <property>
- <name>dfs.datanode.plugins</name>
- <value></value>
- <description>Comma-separated list of datanode plug-ins to be activated.
- </description>
- </property>
- <property>
- <name>dfs.namenode.plugins</name>
- <value></value>
- <description>Comma-separated list of namenode plug-ins to be activated.
- </description>
- </property>
- <property>
- <name>dfs.stream-buffer-size</name>
- <value>4096</value>
- <description>The size of buffer to stream files.
- The size of this buffer should probably be a multiple of hardware
- page size (4096 on Intel x86), and it determines how much data is
- buffered during read and write operations.</description>
- </property>
- <property>
- <name>dfs.bytes-per-checksum</name>
- <value>512</value>
- <description>The number of bytes per checksum. Must not be larger than
- dfs.stream-buffer-size</description>
- </property>
- <property>
- <name>dfs.client-write-packet-size</name>
- <value>65536</value>
- <description>Packet size for clients to write</description>
- </property>
- <property>
- <name>dfs.client.write.exclude.nodes.cache.expiry.interval.millis</name>
- <value>600000</value>
- <description>The maximum period to keep a DN in the excluded nodes list
- at a client. After this period, in milliseconds, the previously excluded node(s) will
- be removed automatically from the cache and will be considered good for block allocations
- again. Useful to lower or raise in situations where you keep a file open for very long
- periods (such as a Write-Ahead-Log (WAL) file) to make the writer tolerant to cluster maintenance
- restarts. Defaults to 10 minutes.</description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.dir</name>
- <value>file://${hadoop.tmp.dir}/dfs/namesecondary</value>
- <description>Determines where on the local filesystem the DFS secondary
- name node should store the temporary images to merge.
- If this is a comma-delimited list of directories then the image is
- replicated in all of the directories for redundancy.
- </description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.edits.dir</name>
- <value>${dfs.namenode.checkpoint.dir}</value>
- <description>Determines where on the local filesystem the DFS secondary
- name node should store the temporary edits to merge.
- If this is a comma-delimited list of directoires then teh edits is
- replicated in all of the directoires for redundancy.
- Default value is same as dfs.namenode.checkpoint.dir
- </description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.period</name>
- <value>3600</value>
- <description>The number of seconds between two periodic checkpoints.
- </description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.txns</name>
- <value>1000000</value>
- <description>The Secondary NameNode or CheckpointNode will create a checkpoint
- of the namespace every 'dfs.namenode.checkpoint.txns' transactions, regardless
- of whether 'dfs.namenode.checkpoint.period' has expired.
- </description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.check.period</name>
- <value>60</value>
- <description>The SecondaryNameNode and CheckpointNode will poll the NameNode
- every 'dfs.namenode.checkpoint.check.period' seconds to query the number
- of uncheckpointed transactions.
- </description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.max-retries</name>
- <value>3</value>
- <description>The SecondaryNameNode retries failed checkpointing. If the
- failure occurs while loading fsimage or replaying edits, the number of
- retries is limited by this variable.
- </description>
- </property>
- <property>
- <name>dfs.namenode.num.checkpoints.retained</name>
- <value>2</value>
- <description>The number of image checkpoint files that will be retained by
- the NameNode and Secondary NameNode in their storage directories. All edit
- logs necessary to recover an up-to-date namespace from the oldest retained
- checkpoint will also be retained.
- </description>
- </property>
- <property>
- <name>dfs.namenode.num.extra.edits.retained</name>
- <value>1000000</value>
- <description>The number of extra transactions which should be retained
- beyond what is minimally necessary for a NN restart. This can be useful for
- audit purposes or for an HA setup where a remote Standby Node may have
- been offline for some time and need to have a longer backlog of retained
- edits in order to start again.
- Typically each edit is on the order of a few hundred bytes, so the default
- of 1 million edits should be on the order of hundreds of MBs or low GBs.
- NOTE: Fewer extra edits may be retained than value specified for this setting
- if doing so would mean that more segments would be retained than the number
- configured by dfs.namenode.max.extra.edits.segments.retained.
- </description>
- </property>
- <property>
- <name>dfs.namenode.max.extra.edits.segments.retained</name>
- <value>10000</value>
- <description>The maximum number of extra edit log segments which should be retained
- beyond what is minimally necessary for a NN restart. When used in conjunction with
- dfs.namenode.num.extra.edits.retained, this configuration property serves to cap
- the number of extra edits files to a reasonable value.
- </description>
- </property>
- <property>
- <name>dfs.namenode.delegation.key.update-interval</name>
- <value>86400000</value>
- <description>The update interval for master key for delegation tokens
- in the namenode in milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.namenode.delegation.token.max-lifetime</name>
- <value>604800000</value>
- <description>The maximum lifetime in milliseconds for which a delegation
- token is valid.
- </description>
- </property>
- <property>
- <name>dfs.namenode.delegation.token.renew-interval</name>
- <value>86400000</value>
- <description>The renewal interval for delegation token in milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.datanode.failed.volumes.tolerated</name>
- <value>0</value>
- <description>The number of volumes that are allowed to
- fail before a datanode stops offering service. By default
- any volume failure will cause a datanode to shutdown.
- </description>
- </property>
- <property>
- <name>dfs.image.compress</name>
- <value>false</value>
- <description>Should the dfs image be compressed?
- </description>
- </property>
- <property>
- <name>dfs.image.compression.codec</name>
- <value>org.apache.hadoop.io.compress.DefaultCodec</value>
- <description>If the dfs image is compressed, how should they be compressed?
- This has to be a codec defined in io.compression.codecs.
- </description>
- </property>
- <property>
- <name>dfs.image.transfer.timeout</name>
- <value>600000</value>
- <description>
- Timeout for image transfer in milliseconds. This timeout and the related
- dfs.image.transfer.bandwidthPerSec parameter should be configured such
- that normal image transfer can complete within the timeout.
- This timeout prevents client hangs when the sender fails during
- image transfer, which is particularly important during checkpointing.
- Note that this timeout applies to the entirety of image transfer, and
- is not a socket timeout.
- </description>
- </property>
- <property>
- <name>dfs.image.transfer.bandwidthPerSec</name>
- <value>0</value>
- <description>
- Maximum bandwidth used for image transfer in bytes per second.
- This can help keep normal namenode operations responsive during
- checkpointing. The maximum bandwidth and timeout in
- dfs.image.transfer.timeout should be set such that normal image
- transfers can complete successfully.
- A default value of 0 indicates that throttling is disabled.
- </description>
- </property>
- <property>
- <name>dfs.namenode.support.allow.format</name>
- <value>true</value>
- <description>Does HDFS namenode allow itself to be formatted?
- You may consider setting this to false for any production
- cluster, to avoid any possibility of formatting a running DFS.
- </description>
- </property>
- <property>
- <name>dfs.datanode.max.transfer.threads</name>
- <value>4096</value>
- <description>
- Specifies the maximum number of threads to use for transferring data
- in and out of the DN.
- </description>
- </property>
- <property>
- <name>dfs.datanode.readahead.bytes</name>
- <value>4193404</value>
- <description>
- While reading block files, if the Hadoop native libraries are available,
- the datanode can use the posix_fadvise system call to explicitly
- page data into the operating system buffer cache ahead of the current
- reader's position. This can improve performance especially when
- disks are highly contended.
- This configuration specifies the number of bytes ahead of the current
- read position which the datanode will attempt to read ahead. This
- feature may be disabled by configuring this property to 0.
- If the native libraries are not available, this configuration has no
- effect.
- </description>
- </property>
- <property>
- <name>dfs.datanode.drop.cache.behind.reads</name>
- <value>false</value>
- <description>
- In some workloads, the data read from HDFS is known to be significantly
- large enough that it is unlikely to be useful to cache it in the
- operating system buffer cache. In this case, the DataNode may be
- configured to automatically purge all data from the buffer cache
- after it is delivered to the client. This behavior is automatically
- disabled for workloads which read only short sections of a block
- (e.g HBase random-IO workloads).
- This may improve performance for some workloads by freeing buffer
- cache spage usage for more cacheable data.
- If the Hadoop native libraries are not available, this configuration
- has no effect.
- </description>
- </property>
- <property>
- <name>dfs.datanode.drop.cache.behind.writes</name>
- <value>false</value>
- <description>
- In some workloads, the data written to HDFS is known to be significantly
- large enough that it is unlikely to be useful to cache it in the
- operating system buffer cache. In this case, the DataNode may be
- configured to automatically purge all data from the buffer cache
- after it is written to disk.
- This may improve performance for some workloads by freeing buffer
- cache spage usage for more cacheable data.
- If the Hadoop native libraries are not available, this configuration
- has no effect.
- </description>
- </property>
- <property>
- <name>dfs.datanode.sync.behind.writes</name>
- <value>false</value>
- <description>
- If this configuration is enabled, the datanode will instruct the
- operating system to enqueue all written data to the disk immediately
- after it is written. This differs from the usual OS policy which
- may wait for up to 30 seconds before triggering writeback.
- This may improve performance for some workloads by smoothing the
- IO profile for data written to disk.
- If the Hadoop native libraries are not available, this configuration
- has no effect.
- </description>
- </property>
- <property>
- <name>dfs.client.failover.max.attempts</name>
- <value>15</value>
- <description>
- Expert only. The number of client failover attempts that should be
- made before the failover is considered failed.
- </description>
- </property>
- <property>
- <name>dfs.client.failover.sleep.base.millis</name>
- <value>500</value>
- <description>
- Expert only. The time to wait, in milliseconds, between failover
- attempts increases exponentially as a function of the number of
- attempts made so far, with a random factor of +/- 50%. This option
- specifies the base value used in the failover calculation. The
- first failover will retry immediately. The 2nd failover attempt
- will delay at least dfs.client.failover.sleep.base.millis
- milliseconds. And so on.
- </description>
- </property>
- <property>
- <name>dfs.client.failover.sleep.max.millis</name>
- <value>15000</value>
- <description>
- Expert only. The time to wait, in milliseconds, between failover
- attempts increases exponentially as a function of the number of
- attempts made so far, with a random factor of +/- 50%. This option
- specifies the maximum value to wait between failovers.
- Specifically, the time between two failover attempts will not
- exceed +/- 50% of dfs.client.failover.sleep.max.millis
- milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.client.failover.connection.retries</name>
- <value>0</value>
- <description>
- Expert only. Indicates the number of retries a failover IPC client
- will make to establish a server connection.
- </description>
- </property>
- <property>
- <name>dfs.client.failover.connection.retries.on.timeouts</name>
- <value>0</value>
- <description>
- Expert only. The number of retry attempts a failover IPC client
- will make on socket timeout when establishing a server connection.
- </description>
- </property>
- <property>
- <name>dfs.nameservices</name>
- <value></value>
- <description>
- Comma-separated list of nameservices.
- </description>
- </property>
- <property>
- <name>dfs.nameservice.id</name>
- <value></value>
- <description>
- The ID of this nameservice. If the nameservice ID is not
- configured or more than one nameservice is configured for
- dfs.nameservices it is determined automatically by
- matching the local node's address with the configured address.
- </description>
- </property>
- <property>
- <name>dfs.ha.namenodes.EXAMPLENAMESERVICE</name>
- <value></value>
- <description>
- The prefix for a given nameservice, contains a comma-separated
- list of namenodes for a given nameservice (eg EXAMPLENAMESERVICE).
- </description>
- </property>
- <property>
- <name>dfs.ha.namenode.id</name>
- <value></value>
- <description>
- The ID of this namenode. If the namenode ID is not configured it
- is determined automatically by matching the local node's address
- with the configured address.
- </description>
- </property>
- <property>
- <name>dfs.ha.log-roll.period</name>
- <value>120</value>
- <description>
- How often, in seconds, the StandbyNode should ask the active to
- roll edit logs. Since the StandbyNode only reads from finalized
- log segments, the StandbyNode will only be as up-to-date as how
- often the logs are rolled. Note that failover triggers a log roll
- so the StandbyNode will be up to date before it becomes active.
- </description>
- </property>
- <property>
- <name>dfs.ha.tail-edits.period</name>
- <value>60</value>
- <description>
- How often, in seconds, the StandbyNode should check for new
- finalized log segments in the shared edits log.
- </description>
- </property>
- <property>
- <name>dfs.ha.automatic-failover.enabled</name>
- <value>false</value>
- <description>
- Whether automatic failover is enabled. See the HDFS High
- Availability documentation for details on automatic HA
- configuration.
- </description>
- </property>
- <property>
- <name>dfs.support.append</name>
- <value>true</value>
- <description>
- Does HDFS allow appends to files?
- </description>
- </property>
- <property>
- <name>dfs.client.use.datanode.hostname</name>
- <value>false</value>
- <description>Whether clients should use datanode hostnames when
- connecting to datanodes.
- </description>
- </property>
- <property>
- <name>dfs.datanode.use.datanode.hostname</name>
- <value>false</value>
- <description>Whether datanodes should use datanode hostnames when
- connecting to other datanodes for data transfer.
- </description>
- </property>
- <property>
- <name>dfs.client.local.interfaces</name>
- <value></value>
- <description>A comma separated list of network interface names to use
- for data transfer between the client and datanodes. When creating
- a connection to read from or write to a datanode, the client
- chooses one of the specified interfaces at random and binds its
- socket to the IP of that interface. Individual names may be
- specified as either an interface name (eg "eth0"), a subinterface
- name (eg "eth0:0"), or an IP address (which may be specified using
- CIDR notation to match a range of IPs).
- </description>
- </property>
- <property>
- <name>dfs.namenode.kerberos.internal.spnego.principal</name>
- <value>${dfs.web.authentication.kerberos.principal}</value>
- </property>
- <property>
- <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
- <value>${dfs.web.authentication.kerberos.principal}</value>
- </property>
- <property>
- <name>dfs.namenode.avoid.read.stale.datanode</name>
- <value>false</value>
- <description>
- Indicate whether or not to avoid reading from "stale" datanodes whose
- heartbeat messages have not been received by the namenode
- for more than a specified time interval. Stale datanodes will be
- moved to the end of the node list returned for reading. See
- dfs.namenode.avoid.write.stale.datanode for a similar setting for writes.
- </description>
- </property>
- <property>
- <name>dfs.namenode.avoid.write.stale.datanode</name>
- <value>false</value>
- <description>
- Indicate whether or not to avoid writing to "stale" datanodes whose
- heartbeat messages have not been received by the namenode
- for more than a specified time interval. Writes will avoid using
- stale datanodes unless more than a configured ratio
- (dfs.namenode.write.stale.datanode.ratio) of datanodes are marked as
- stale. See dfs.namenode.avoid.read.stale.datanode for a similar setting
- for reads.
- </description>
- </property>
- <property>
- <name>dfs.namenode.stale.datanode.interval</name>
- <value>30000</value>
- <description>
- Default time interval for marking a datanode as "stale", i.e., if
- the namenode has not received heartbeat msg from a datanode for
- more than this time interval, the datanode will be marked and treated
- as "stale" by default. The stale interval cannot be too small since
- otherwise this may cause too frequent change of stale states.
- We thus set a minimum stale interval value (the default value is 3 times
- of heartbeat interval) and guarantee that the stale interval cannot be less
- than the minimum value.
- </description>
- </property>
- <property>
- <name>dfs.namenode.write.stale.datanode.ratio</name>
- <value>0.5f</value>
- <description>
- When the ratio of number stale datanodes to total datanodes marked
- is greater than this ratio, stop avoiding writing to stale nodes so
- as to prevent causing hotspots.
- </description>
- </property>
- <property>
- <name>dfs.namenode.invalidate.work.pct.per.iteration</name>
- <value>0.32f</value>
- <description>
- *Note*: Advanced property. Change with caution.
- This determines the percentage amount of block
- invalidations (deletes) to do over a single DN heartbeat
- deletion command. The final deletion count is determined by applying this
- percentage to the number of live nodes in the system.
- The resultant number is the number of blocks from the deletion list
- chosen for proper invalidation over a single heartbeat of a single DN.
- Value should be a positive, non-zero percentage in float notation (X.Yf),
- with 1.0f meaning 100%.
- </description>
- </property>
- <property>
- <name>dfs.namenode.replication.work.multiplier.per.iteration</name>
- <value>2</value>
- <description>
- *Note*: Advanced property. Change with caution.
- This determines the total amount of block transfers to begin in
- parallel at a DN, for replication, when such a command list is being
- sent over a DN heartbeat by the NN. The actual number is obtained by
- multiplying this multiplier with the total number of live nodes in the
- cluster. The result number is the number of blocks to begin transfers
- immediately for, per DN heartbeat. This number can be any positive,
- non-zero integer.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.enabled</name>
- <value>false</value>
- <description>
- Enable WebHDFS (REST API) in Namenodes and Datanodes.
- </description>
- </property>
- <property>
- <name>hadoop.fuse.connection.timeout</name>
- <value>300</value>
- <description>
- The minimum number of seconds that we'll cache libhdfs connection objects
- in fuse_dfs. Lower values will result in lower memory consumption; higher
- values may speed up access by avoiding the overhead of creating new
- connection objects.
- </description>
- </property>
- <property>
- <name>hadoop.fuse.timer.period</name>
- <value>5</value>
- <description>
- The number of seconds between cache expiry checks in fuse_dfs. Lower values
- will result in fuse_dfs noticing changes to Kerberos ticket caches more
- quickly.
- </description>
- </property>
- <property>
- <name>dfs.metrics.percentiles.intervals</name>
- <value></value>
- <description>
- Comma-delimited set of integers denoting the desired rollover intervals
- (in seconds) for percentile latency metrics on the Namenode and Datanode.
- By default, percentile latency metrics are disabled.
- </description>
- </property>
- <property>
- <name>dfs.encrypt.data.transfer</name>
- <value>false</value>
- <description>
- Whether or not actual block data that is read/written from/to HDFS should
- be encrypted on the wire. This only needs to be set on the NN and DNs,
- clients will deduce this automatically.
- </description>
- </property>
- <property>
- <name>dfs.encrypt.data.transfer.algorithm</name>
- <value></value>
- <description>
- This value may be set to either "3des" or "rc4". If nothing is set, then
- the configured JCE default on the system is used (usually 3DES.) It is
- widely believed that 3DES is more cryptographically secure, but RC4 is
- substantially faster.
- </description>
- </property>
- <property>
- <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
- <value>false</value>
- <description>
- Boolean which enables backend datanode-side support for the experimental DistributedFileSystem#getFileVBlockStorageLocations API.
- </description>
- </property>
- <property>
- <name>dfs.client.file-block-storage-locations.num-threads</name>
- <value>10</value>
- <description>
- Number of threads used for making parallel RPCs in DistributedFileSystem#getFileBlockStorageLocations().
- </description>
- </property>
- <property>
- <name>dfs.client.file-block-storage-locations.timeout</name>
- <value>60</value>
- <description>
- Timeout (in seconds) for the parallel RPCs made in DistributedFileSystem#getFileBlockStorageLocations().
- </description>
- </property>
- <property>
- <name>dfs.journalnode.rpc-address</name>
- <value>0.0.0.0:8485</value>
- <description>
- The JournalNode RPC server address and port.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.http-address</name>
- <value>0.0.0.0:8480</value>
- <description>
- The address and port the JournalNode web UI listens on.
- If the port is 0 then the server will start on a free port.
- </description>
- </property>
- <property>
- <name>dfs.namenode.audit.loggers</name>
- <value>default</value>
- <description>
- List of classes implementing audit loggers that will receive audit events.
- These should be implementations of org.apache.hadoop.hdfs.server.namenode.AuditLogger.
- The special value "default" can be used to reference the default audit
- logger, which uses the configured log system. Installing custom audit loggers
- may affect the performance and stability of the NameNode. Refer to the custom
- logger's documentation for more details.
- </description>
- </property>
- <property>
- <name>dfs.domain.socket.path</name>
- <value></value>
- <description>
- Optional. This is a path to a UNIX domain socket that will be used for
- communication between the DataNode and local HDFS clients.
- If the string "_PORT" is present in this path, it will be replaced by the
- TCP port of the DataNode.
- </description>
- </property>
- <property>
- <name>dfs.datanode.fsdataset.volume.choosing.balanced-space-threshold</name>
- <value>10737418240</value> <!-- 10 GB -->
- <description>
- Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to
- org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy.
- This setting controls how much DN volumes are allowed to differ in terms of
- bytes of free disk space before they are considered imbalanced. If the free
- space of all the volumes are within this range of each other, the volumes
- will be considered balanced and block assignments will be done on a pure
- round robin basis.
- </description>
- </property>
- <property>
- <name>dfs.datanode.fsdataset.volume.choosing.balanced-space-preference-percent</name>
- <value>0.75f</value>
- <description>
- Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to
- org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy.
- This setting controls what percentage of new block allocations will be sent
- to volumes with more available disk space than others. This setting should
- be in the range 0.0 - 1.0, though in practice 0.5 - 1.0, since there should
- be no reason to prefer that volumes with less available disk space receive
- more block allocations.
- </description>
- </property>
- </configuration>
|