1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900 |
- <?xml version="1.0"?>
- <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
- <!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
- <!-- Do not modify this file directly. Instead, copy entries that you -->
- <!-- wish to modify from this file into hdfs-site.xml and change them -->
- <!-- there. If hdfs-site.xml does not already exist, create it. -->
- <configuration>
- <property>
- <name>hadoop.hdfs.configuration.version</name>
- <value>1</value>
- <description>version of this configuration file</description>
- </property>
- <property>
- <name>dfs.namenode.rpc-address</name>
- <value></value>
- <description>
- RPC address that handles all clients requests. In the case of HA/Federation where multiple namenodes exist,
- the name service id is added to the name e.g. dfs.namenode.rpc-address.ns1
- dfs.namenode.rpc-address.EXAMPLENAMESERVICE
- The value of this property will take the form of nn-host1:rpc-port. The NameNode's default RPC port is 9820.
- </description>
- </property>
- <property>
- <name>dfs.namenode.rpc-bind-host</name>
- <value></value>
- <description>
- The actual address the RPC server will bind to. If this optional address is
- set, it overrides only the hostname portion of dfs.namenode.rpc-address.
- It can also be specified per name node or name service for HA/Federation.
- This is useful for making the name node listen on all interfaces by
- setting it to 0.0.0.0.
- </description>
- </property>
- <property>
- <name>dfs.namenode.servicerpc-address</name>
- <value></value>
- <description>
- RPC address for HDFS Services communication. BackupNode, Datanodes and all other services should be
- connecting to this address if it is configured. In the case of HA/Federation where multiple namenodes exist,
- the name service id is added to the name e.g. dfs.namenode.servicerpc-address.ns1
- dfs.namenode.rpc-address.EXAMPLENAMESERVICE
- The value of this property will take the form of nn-host1:rpc-port.
- If the value of this property is unset the value of dfs.namenode.rpc-address will be used as the default.
- </description>
- </property>
- <property>
- <name>dfs.namenode.servicerpc-bind-host</name>
- <value></value>
- <description>
- The actual address the service RPC server will bind to. If this optional address is
- set, it overrides only the hostname portion of dfs.namenode.servicerpc-address.
- It can also be specified per name node or name service for HA/Federation.
- This is useful for making the name node listen on all interfaces by
- setting it to 0.0.0.0.
- </description>
- </property>
- <property>
- <name>dfs.namenode.lifeline.rpc-address</name>
- <value></value>
- <description>
- NameNode RPC lifeline address. This is an optional separate RPC address
- that can be used to isolate health checks and liveness to protect against
- resource exhaustion in the main RPC handler pool. In the case of
- HA/Federation where multiple NameNodes exist, the name service ID is added
- to the name e.g. dfs.namenode.lifeline.rpc-address.ns1. The value of this
- property will take the form of nn-host1:rpc-port. If this property is not
- defined, then the NameNode will not start a lifeline RPC server. By
- default, the property is not defined.
- </description>
- </property>
- <property>
- <name>dfs.namenode.lifeline.rpc-bind-host</name>
- <value></value>
- <description>
- The actual address the lifeline RPC server will bind to. If this optional
- address is set, it overrides only the hostname portion of
- dfs.namenode.lifeline.rpc-address. It can also be specified per name node
- or name service for HA/Federation. This is useful for making the name node
- listen on all interfaces by setting it to 0.0.0.0.
- </description>
- </property>
- <property>
- <name>dfs.namenode.secondary.http-address</name>
- <value>0.0.0.0:9868</value>
- <description>
- The secondary namenode http server address and port.
- </description>
- </property>
- <property>
- <name>dfs.namenode.secondary.https-address</name>
- <value>0.0.0.0:9869</value>
- <description>
- The secondary namenode HTTPS server address and port.
- </description>
- </property>
- <property>
- <name>dfs.datanode.address</name>
- <value>0.0.0.0:9866</value>
- <description>
- The datanode server address and port for data transfer.
- </description>
- </property>
- <property>
- <name>dfs.datanode.http.address</name>
- <value>0.0.0.0:9864</value>
- <description>
- The datanode http server address and port.
- </description>
- </property>
- <property>
- <name>dfs.datanode.ipc.address</name>
- <value>0.0.0.0:9867</value>
- <description>
- The datanode ipc server address and port.
- </description>
- </property>
- <property>
- <name>dfs.datanode.handler.count</name>
- <value>10</value>
- <description>The number of server threads for the datanode.</description>
- </property>
- <property>
- <name>dfs.namenode.http-address</name>
- <value>0.0.0.0:9870</value>
- <description>
- The address and the base port where the dfs namenode web ui will listen on.
- </description>
- </property>
- <property>
- <name>dfs.namenode.http-bind-host</name>
- <value></value>
- <description>
- The actual address the HTTP server will bind to. If this optional address
- is set, it overrides only the hostname portion of dfs.namenode.http-address.
- It can also be specified per name node or name service for HA/Federation.
- This is useful for making the name node HTTP server listen on all
- interfaces by setting it to 0.0.0.0.
- </description>
- </property>
- <property>
- <name>dfs.namenode.heartbeat.recheck-interval</name>
- <value>300000</value>
- <description>
- This time decides the interval to check for expired datanodes.
- With this value and dfs.heartbeat.interval, the interval of
- deciding the datanode is stale or not is also calculated.
- The unit of this configuration is millisecond.
- </description>
- </property>
- <property>
- <name>dfs.http.policy</name>
- <value>HTTP_ONLY</value>
- <description>Decide if HTTPS(SSL) is supported on HDFS
- This configures the HTTP endpoint for HDFS daemons:
- The following values are supported:
- - HTTP_ONLY : Service is provided only on http
- - HTTPS_ONLY : Service is provided only on https
- - HTTP_AND_HTTPS : Service is provided both on http and https
- </description>
- </property>
- <property>
- <name>dfs.client.https.need-auth</name>
- <value>false</value>
- <description>Whether SSL client certificate authentication is required
- </description>
- </property>
- <property>
- <name>dfs.client.cached.conn.retry</name>
- <value>3</value>
- <description>The number of times the HDFS client will pull a socket from the
- cache. Once this number is exceeded, the client will try to create a new
- socket.
- </description>
- </property>
- <property>
- <name>dfs.https.server.keystore.resource</name>
- <value>ssl-server.xml</value>
- <description>Resource file from which ssl server keystore
- information will be extracted
- </description>
- </property>
- <property>
- <name>dfs.client.https.keystore.resource</name>
- <value>ssl-client.xml</value>
- <description>Resource file from which ssl client keystore
- information will be extracted
- </description>
- </property>
- <property>
- <name>dfs.datanode.https.address</name>
- <value>0.0.0.0:9865</value>
- <description>The datanode secure http server address and port.</description>
- </property>
- <property>
- <name>dfs.namenode.https-address</name>
- <value>0.0.0.0:9871</value>
- <description>The namenode secure http server address and port.</description>
- </property>
- <property>
- <name>dfs.namenode.https-bind-host</name>
- <value></value>
- <description>
- The actual address the HTTPS server will bind to. If this optional address
- is set, it overrides only the hostname portion of dfs.namenode.https-address.
- It can also be specified per name node or name service for HA/Federation.
- This is useful for making the name node HTTPS server listen on all
- interfaces by setting it to 0.0.0.0.
- </description>
- </property>
- <property>
- <name>dfs.datanode.dns.interface</name>
- <value>default</value>
- <description>
- The name of the Network Interface from which a data node should
- report its IP address. e.g. eth2. This setting may be required for some
- multi-homed nodes where the DataNodes are assigned multiple hostnames
- and it is desirable for the DataNodes to use a non-default hostname.
- Prefer using hadoop.security.dns.interface over
- dfs.datanode.dns.interface.
- </description>
- </property>
- <property>
- <name>dfs.datanode.dns.nameserver</name>
- <value>default</value>
- <description>
- The host name or IP address of the name server (DNS) which a DataNode
- should use to determine its own host name.
- Prefer using hadoop.security.dns.nameserver over
- dfs.datanode.dns.nameserver.
- </description>
- </property>
- <property>
- <name>dfs.namenode.backup.address</name>
- <value>0.0.0.0:50100</value>
- <description>
- The backup node server address and port.
- If the port is 0 then the server will start on a free port.
- </description>
- </property>
- <property>
- <name>dfs.namenode.backup.http-address</name>
- <value>0.0.0.0:50105</value>
- <description>
- The backup node http server address and port.
- If the port is 0 then the server will start on a free port.
- </description>
- </property>
- <property>
- <name>dfs.namenode.redundancy.considerLoad</name>
- <value>true</value>
- <description>Decide if chooseTarget considers the target's load or not
- </description>
- </property>
- <property>
- <name>dfs.namenode.redundancy.considerLoad.factor</name>
- <value>2.0</value>
- <description>The factor by which a node's load can exceed the average
- before being rejected for writes, only if considerLoad is true.
- </description>
- </property>
- <property>
- <name>dfs.default.chunk.view.size</name>
- <value>32768</value>
- <description>The number of bytes to view for a file on the browser.
- </description>
- </property>
- <property>
- <name>dfs.datanode.du.reserved</name>
- <value>0</value>
- <description>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
- Specific storage type based reservation is also supported. The property can be followed with
- corresponding storage types ([ssd]/[disk]/[archive]/[ram_disk]) for cluster with heterogeneous storage.
- For example, reserved space for RAM_DISK storage can be configured using property
- 'dfs.datanode.du.reserved.ram_disk'. If specific storage type reservation is not configured
- then dfs.datanode.du.reserved will be used.
- </description>
- </property>
- <property>
- <name>dfs.namenode.name.dir</name>
- <value>file://${hadoop.tmp.dir}/dfs/name</value>
- <description>Determines where on the local filesystem the DFS name node
- should store the name table(fsimage). If this is a comma-delimited list
- of directories then the name table is replicated in all of the
- directories, for redundancy. </description>
- </property>
- <property>
- <name>dfs.namenode.name.dir.restore</name>
- <value>false</value>
- <description>Set to true to enable NameNode to attempt recovering a
- previously failed dfs.namenode.name.dir. When enabled, a recovery of any
- failed directory is attempted during checkpoint.</description>
- </property>
- <property>
- <name>dfs.namenode.fs-limits.max-component-length</name>
- <value>255</value>
- <description>Defines the maximum number of bytes in UTF-8 encoding in each
- component of a path. A value of 0 will disable the check.</description>
- </property>
- <property>
- <name>dfs.namenode.fs-limits.max-directory-items</name>
- <value>1048576</value>
- <description>Defines the maximum number of items that a directory may
- contain. Cannot set the property to a value less than 1 or more than
- 6400000.</description>
- </property>
- <property>
- <name>dfs.namenode.fs-limits.min-block-size</name>
- <value>1048576</value>
- <description>Minimum block size in bytes, enforced by the Namenode at create
- time. This prevents the accidental creation of files with tiny block
- sizes (and thus many blocks), which can degrade
- performance.</description>
- </property>
- <property>
- <name>dfs.namenode.fs-limits.max-blocks-per-file</name>
- <value>10000</value>
- <description>Maximum number of blocks per file, enforced by the Namenode on
- write. This prevents the creation of extremely large files which can
- degrade performance.</description>
- </property>
- <property>
- <name>dfs.namenode.edits.dir</name>
- <value>${dfs.namenode.name.dir}</value>
- <description>Determines where on the local filesystem the DFS name node
- should store the transaction (edits) file. If this is a comma-delimited list
- of directories then the transaction file is replicated in all of the
- directories, for redundancy. Default value is same as dfs.namenode.name.dir
- </description>
- </property>
- <property>
- <name>dfs.namenode.edits.dir.required</name>
- <value></value>
- <description>This should be a subset of dfs.namenode.edits.dir,
- to ensure that the transaction (edits) file
- in these places is always up-to-date.
- </description>
- </property>
- <property>
- <name>dfs.namenode.shared.edits.dir</name>
- <value></value>
- <description>A directory on shared storage between the multiple namenodes
- in an HA cluster. This directory will be written by the active and read
- by the standby in order to keep the namespaces synchronized. This directory
- does not need to be listed in dfs.namenode.edits.dir above. It should be
- left empty in a non-HA cluster.
- </description>
- </property>
- <property>
- <name>dfs.namenode.edits.journal-plugin.qjournal</name>
- <value>org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager</value>
- </property>
- <property>
- <name>dfs.permissions.enabled</name>
- <value>true</value>
- <description>
- If "true", enable permission checking in HDFS.
- If "false", permission checking is turned off,
- but all other behavior is unchanged.
- Switching from one parameter value to the other does not change the mode,
- owner or group of files or directories.
- </description>
- </property>
- <property>
- <name>dfs.permissions.superusergroup</name>
- <value>supergroup</value>
- <description>The name of the group of super-users.
- The value should be a single group name.
- </description>
- </property>
- <property>
- <name>dfs.cluster.administrators</name>
- <value></value>
- <description>ACL for the admins, this configuration is used to control
- who can access the default servlets in the namenode, etc. The value
- should be a comma separated list of users and groups. The user list
- comes first and is separated by a space followed by the group list,
- e.g. "user1,user2 group1,group2". Both users and groups are optional,
- so "user1", " group1", "", "user1 group1", "user1,user2 group1,group2"
- are all valid (note the leading space in " group1"). '*' grants access
- to all users and groups, e.g. '*', '* ' and ' *' are all valid.
- </description>
- </property>
- <property>
- <name>dfs.namenode.acls.enabled</name>
- <value>false</value>
- <description>
- Set to true to enable support for HDFS ACLs (Access Control Lists). By
- default, ACLs are disabled. When ACLs are disabled, the NameNode rejects
- all RPCs related to setting or getting ACLs.
- </description>
- </property>
- <property>
- <name>dfs.namenode.posix.acl.inheritance.enabled</name>
- <value>true</value>
- <description>
- Set to true to enable POSIX style ACL inheritance. When it is enabled
- and the create request comes from a compatible client, the NameNode
- will apply default ACLs from the parent directory to the create mode
- and ignore the client umask. If no default ACL found, it will apply the
- client umask.
- </description>
- </property>
- <property>
- <name>dfs.namenode.lazypersist.file.scrub.interval.sec</name>
- <value>300</value>
- <description>
- The NameNode periodically scans the namespace for LazyPersist files with
- missing blocks and unlinks them from the namespace. This configuration key
- controls the interval between successive scans. Set it to a negative value
- to disable this behavior.
- </description>
- </property>
- <property>
- <name>dfs.block.access.token.enable</name>
- <value>false</value>
- <description>
- If "true", access tokens are used as capabilities for accessing datanodes.
- If "false", no access tokens are checked on accessing datanodes.
- </description>
- </property>
- <property>
- <name>dfs.block.access.key.update.interval</name>
- <value>600</value>
- <description>
- Interval in minutes at which namenode updates its access keys.
- </description>
- </property>
- <property>
- <name>dfs.block.access.token.lifetime</name>
- <value>600</value>
- <description>The lifetime of access tokens in minutes.</description>
- </property>
- <property>
- <name>dfs.block.access.token.protobuf.enable</name>
- <value>false</value>
- <description>
- If "true", block tokens are written using Protocol Buffers.
- If "false", block tokens are written using Legacy format.
- </description>
- </property>
- <property>
- <name>dfs.datanode.data.dir</name>
- <value>file://${hadoop.tmp.dir}/dfs/data</value>
- <description>Determines where on the local filesystem an DFS data node
- should store its blocks. If this is a comma-delimited
- list of directories, then data will be stored in all named
- directories, typically on different devices. The directories should be tagged
- with corresponding storage types ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]) for HDFS
- storage policies. The default storage type will be DISK if the directory does
- not have a storage type tagged explicitly. Directories that do not exist will
- be created if local filesystem permission allows.
- </description>
- </property>
- <property>
- <name>dfs.datanode.data.dir.perm</name>
- <value>700</value>
- <description>Permissions for the directories on on the local filesystem where
- the DFS data node store its blocks. The permissions can either be octal or
- symbolic.</description>
- </property>
- <property>
- <name>dfs.replication</name>
- <value>3</value>
- <description>Default block replication.
- The actual number of replications can be specified when the file is created.
- The default is used if replication is not specified in create time.
- </description>
- </property>
- <property>
- <name>dfs.replication.max</name>
- <value>512</value>
- <description>Maximal block replication.
- </description>
- </property>
- <property>
- <name>dfs.namenode.replication.min</name>
- <value>1</value>
- <description>Minimal block replication.
- </description>
- </property>
- <property>
- <name>dfs.namenode.maintenance.replication.min</name>
- <value>1</value>
- <description>Minimal live block replication in existence of maintenance mode.
- </description>
- </property>
- <property>
- <name>dfs.namenode.safemode.replication.min</name>
- <value></value>
- <description>
- a separate minimum replication factor for calculating safe block count.
- This is an expert level setting.
- Setting this lower than the dfs.namenode.replication.min
- is not recommend and/or dangerous for production setups.
- When it's not set it takes value from dfs.namenode.replication.min
- </description>
- </property>
- <property>
- <name>dfs.blocksize</name>
- <value>134217728</value>
- <description>
- The default block size for new files, in bytes.
- You can use the following suffix (case insensitive):
- k(kilo), m(mega), g(giga), t(tera), p(peta), e(exa) to specify the size (such as 128k, 512m, 1g, etc.),
- Or provide complete size in bytes (such as 134217728 for 128 MB).
- </description>
- </property>
- <property>
- <name>dfs.client.block.write.retries</name>
- <value>3</value>
- <description>The number of retries for writing blocks to the data nodes,
- before we signal failure to the application.
- </description>
- </property>
- <property>
- <name>dfs.client.block.write.replace-datanode-on-failure.enable</name>
- <value>true</value>
- <description>
- If there is a datanode/network failure in the write pipeline,
- DFSClient will try to remove the failed datanode from the pipeline
- and then continue writing with the remaining datanodes. As a result,
- the number of datanodes in the pipeline is decreased. The feature is
- to add new datanodes to the pipeline.
- This is a site-wide property to enable/disable the feature.
- When the cluster size is extremely small, e.g. 3 nodes or less, cluster
- administrators may want to set the policy to NEVER in the default
- configuration file or disable this feature. Otherwise, users may
- experience an unusually high rate of pipeline failures since it is
- impossible to find new datanodes for replacement.
- See also dfs.client.block.write.replace-datanode-on-failure.policy
- </description>
- </property>
- <property>
- <name>dfs.client.block.write.replace-datanode-on-failure.policy</name>
- <value>DEFAULT</value>
- <description>
- This property is used only if the value of
- dfs.client.block.write.replace-datanode-on-failure.enable is true.
- ALWAYS: always add a new datanode when an existing datanode is removed.
-
- NEVER: never add a new datanode.
- DEFAULT:
- Let r be the replication number.
- Let n be the number of existing datanodes.
- Add a new datanode only if r is greater than or equal to 3 and either
- (1) floor(r/2) is greater than or equal to n; or
- (2) r is greater than n and the block is hflushed/appended.
- </description>
- </property>
- <property>
- <name>dfs.client.block.write.replace-datanode-on-failure.best-effort</name>
- <value>false</value>
- <description>
- This property is used only if the value of
- dfs.client.block.write.replace-datanode-on-failure.enable is true.
- Best effort means that the client will try to replace a failed datanode
- in write pipeline (provided that the policy is satisfied), however, it
- continues the write operation in case that the datanode replacement also
- fails.
- Suppose the datanode replacement fails.
- false: An exception should be thrown so that the write will fail.
- true : The write should be resumed with the remaining datandoes.
-
- Note that setting this property to true allows writing to a pipeline
- with a smaller number of datanodes. As a result, it increases the
- probability of data loss.
- </description>
- </property>
- <property>
- <name>dfs.client.block.write.replace-datanode-on-failure.min-replication</name>
- <value>0</value>
- <description>
- The minimum number of replications that are needed to not to fail
- the write pipeline if new datanodes can not be found to replace
- failed datanodes (could be due to network failure) in the write pipeline.
- If the number of the remaining datanodes in the write pipeline is greater
- than or equal to this property value, continue writing to the remaining nodes.
- Otherwise throw exception.
- If this is set to 0, an exception will be thrown, when a replacement
- can not be found.
- See also dfs.client.block.write.replace-datanode-on-failure.policy
- </description>
- </property>
- <property>
- <name>dfs.blockreport.intervalMsec</name>
- <value>21600000</value>
- <description>Determines block reporting interval in milliseconds.</description>
- </property>
- <property>
- <name>dfs.blockreport.initialDelay</name>
- <value>0s</value>
- <description>
- Delay for first block report in seconds. Support multiple time unit
- suffix(case insensitive), as described in dfs.heartbeat.interval.
- </description>
- </property>
- <property>
- <name>dfs.blockreport.split.threshold</name>
- <value>1000000</value>
- <description>If the number of blocks on the DataNode is below this
- threshold then it will send block reports for all Storage Directories
- in a single message.
- If the number of blocks exceeds this threshold then the DataNode will
- send block reports for each Storage Directory in separate messages.
- Set to zero to always split.
- </description>
- </property>
- <property>
- <name>dfs.namenode.max.full.block.report.leases</name>
- <value>6</value>
- <description>The maximum number of leases for full block reports that the
- NameNode will issue at any given time. This prevents the NameNode from
- being flooded with full block reports that use up all the RPC handler
- threads. This number should never be more than the number of RPC handler
- threads or less than 1.
- </description>
- </property>
- <property>
- <name>dfs.namenode.full.block.report.lease.length.ms</name>
- <value>300000</value>
- <description>
- The number of milliseconds that the NameNode will wait before invalidating
- a full block report lease. This prevents a crashed DataNode from
- permanently using up a full block report lease.
- </description>
- </property>
- <property>
- <name>dfs.datanode.directoryscan.interval</name>
- <value>21600s</value>
- <description>Interval in seconds for Datanode to scan data directories and
- reconcile the difference between blocks in memory and on the disk.
- Support multiple time unit suffix(case insensitive), as described
- in dfs.heartbeat.interval.
- </description>
- </property>
- <property>
- <name>dfs.datanode.directoryscan.threads</name>
- <value>1</value>
- <description>How many threads should the threadpool used to compile reports
- for volumes in parallel have.
- </description>
- </property>
- <property>
- <name>dfs.datanode.directoryscan.throttle.limit.ms.per.sec</name>
- <value>1000</value>
- <description>The report compilation threads are limited to only running for
- a given number of milliseconds per second, as configured by the
- property. The limit is taken per thread, not in aggregate, e.g. setting
- a limit of 100ms for 4 compiler threads will result in each thread being
- limited to 100ms, not 25ms.
- Note that the throttle does not interrupt the report compiler threads, so the
- actual running time of the threads per second will typically be somewhat
- higher than the throttle limit, usually by no more than 20%.
- Setting this limit to 1000 disables compiler thread throttling. Only
- values between 1 and 1000 are valid. Setting an invalid value will result
- in the throttle being disabled and an error message being logged. 1000 is
- the default setting.
- </description>
- </property>
- <property>
- <name>dfs.heartbeat.interval</name>
- <value>3s</value>
- <description>
- Determines datanode heartbeat interval in seconds.
- Can use the following suffix (case insensitive):
- ms(millis), s(sec), m(min), h(hour), d(day)
- to specify the time (such as 2s, 2m, 1h, etc.).
- Or provide complete number in seconds (such as 30 for 30 seconds).
- </description>
- </property>
- <property>
- <name>dfs.datanode.lifeline.interval.seconds</name>
- <value></value>
- <description>
- Sets the interval in seconds between sending DataNode Lifeline Protocol
- messages from the DataNode to the NameNode. The value must be greater than
- the value of dfs.heartbeat.interval. If this property is not defined, then
- the default behavior is to calculate the interval as 3x the value of
- dfs.heartbeat.interval. Note that normal heartbeat processing may cause the
- DataNode to postpone sending lifeline messages if they are not required.
- Under normal operations with speedy heartbeat processing, it is possible
- that no lifeline messages will need to be sent at all. This property has no
- effect if dfs.namenode.lifeline.rpc-address is not defined.
- </description>
- </property>
- <property>
- <name>dfs.namenode.handler.count</name>
- <value>10</value>
- <description>The number of Namenode RPC server threads that listen to
- requests from clients.
- If dfs.namenode.servicerpc-address is not configured then
- Namenode RPC server threads listen to requests from all nodes.
- </description>
- </property>
- <property>
- <name>dfs.namenode.service.handler.count</name>
- <value>10</value>
- <description>The number of Namenode RPC server threads that listen to
- requests from DataNodes and from all other non-client nodes.
- dfs.namenode.service.handler.count will be valid only if
- dfs.namenode.servicerpc-address is configured.
- </description>
- </property>
- <property>
- <name>dfs.namenode.lifeline.handler.ratio</name>
- <value>0.10</value>
- <description>
- A ratio applied to the value of dfs.namenode.handler.count, which then
- provides the number of RPC server threads the NameNode runs for handling the
- lifeline RPC server. For example, if dfs.namenode.handler.count is 100, and
- dfs.namenode.lifeline.handler.factor is 0.10, then the NameNode starts
- 100 * 0.10 = 10 threads for handling the lifeline RPC server. It is common
- to tune the value of dfs.namenode.handler.count as a function of the number
- of DataNodes in a cluster. Using this property allows for the lifeline RPC
- server handler threads to be tuned automatically without needing to touch a
- separate property. Lifeline message processing is lightweight, so it is
- expected to require many fewer threads than the main NameNode RPC server.
- This property is not used if dfs.namenode.lifeline.handler.count is defined,
- which sets an absolute thread count. This property has no effect if
- dfs.namenode.lifeline.rpc-address is not defined.
- </description>
- </property>
- <property>
- <name>dfs.namenode.lifeline.handler.count</name>
- <value></value>
- <description>
- Sets an absolute number of RPC server threads the NameNode runs for handling
- the DataNode Lifeline Protocol and HA health check requests from ZKFC. If
- this property is defined, then it overrides the behavior of
- dfs.namenode.lifeline.handler.ratio. By default, it is not defined. This
- property has no effect if dfs.namenode.lifeline.rpc-address is not defined.
- </description>
- </property>
- <property>
- <name>dfs.namenode.safemode.threshold-pct</name>
- <value>0.999f</value>
- <description>
- Specifies the percentage of blocks that should satisfy
- the minimal replication requirement defined by dfs.namenode.replication.min.
- Values less than or equal to 0 mean not to wait for any particular
- percentage of blocks before exiting safemode.
- Values greater than 1 will make safe mode permanent.
- </description>
- </property>
- <property>
- <name>dfs.namenode.safemode.min.datanodes</name>
- <value>0</value>
- <description>
- Specifies the number of datanodes that must be considered alive
- before the name node exits safemode.
- Values less than or equal to 0 mean not to take the number of live
- datanodes into account when deciding whether to remain in safe mode
- during startup.
- Values greater than the number of datanodes in the cluster
- will make safe mode permanent.
- </description>
- </property>
- <property>
- <name>dfs.namenode.safemode.extension</name>
- <value>30000</value>
- <description>
- Determines extension of safe mode in milliseconds after the threshold level
- is reached. Support multiple time unit suffix (case insensitive), as
- described in dfs.heartbeat.interval.
- </description>
- </property>
- <property>
- <name>dfs.namenode.resource.check.interval</name>
- <value>5000</value>
- <description>
- The interval in milliseconds at which the NameNode resource checker runs.
- The checker calculates the number of the NameNode storage volumes whose
- available spaces are more than dfs.namenode.resource.du.reserved, and
- enters safemode if the number becomes lower than the minimum value
- specified by dfs.namenode.resource.checked.volumes.minimum.
- </description>
- </property>
- <property>
- <name>dfs.namenode.resource.du.reserved</name>
- <value>104857600</value>
- <description>
- The amount of space to reserve/require for a NameNode storage directory
- in bytes. The default is 100MB.
- </description>
- </property>
- <property>
- <name>dfs.namenode.resource.checked.volumes</name>
- <value></value>
- <description>
- A list of local directories for the NameNode resource checker to check in
- addition to the local edits directories.
- </description>
- </property>
- <property>
- <name>dfs.namenode.resource.checked.volumes.minimum</name>
- <value>1</value>
- <description>
- The minimum number of redundant NameNode storage volumes required.
- </description>
- </property>
- <property>
- <name>dfs.datanode.balance.bandwidthPerSec</name>
- <value>10m</value>
- <description>
- Specifies the maximum amount of bandwidth that each datanode
- can utilize for the balancing purpose in term of
- the number of bytes per second. You can use the following
- suffix (case insensitive):
- k(kilo), m(mega), g(giga), t(tera), p(peta), e(exa)to specify the size
- (such as 128k, 512m, 1g, etc.).
- Or provide complete size in bytes (such as 134217728 for 128 MB).
- </description>
- </property>
- <property>
- <name>dfs.hosts</name>
- <value></value>
- <description>Names a file that contains a list of hosts that are
- permitted to connect to the namenode. The full pathname of the file
- must be specified. If the value is empty, all hosts are
- permitted.</description>
- </property>
- <property>
- <name>dfs.hosts.exclude</name>
- <value></value>
- <description>Names a file that contains a list of hosts that are
- not permitted to connect to the namenode. The full pathname of the
- file must be specified. If the value is empty, no hosts are
- excluded.</description>
- </property>
- <property>
- <name>dfs.namenode.max.objects</name>
- <value>0</value>
- <description>The maximum number of files, directories and blocks
- dfs supports. A value of zero indicates no limit to the number
- of objects that dfs supports.
- </description>
- </property>
- <property>
- <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
- <value>true</value>
- <description>
- If true (the default), then the namenode requires that a connecting
- datanode's address must be resolved to a hostname. If necessary, a reverse
- DNS lookup is performed. All attempts to register a datanode from an
- unresolvable address are rejected.
- It is recommended that this setting be left on to prevent accidental
- registration of datanodes listed by hostname in the excludes file during a
- DNS outage. Only set this to false in environments where there is no
- infrastructure to support reverse DNS lookup.
- </description>
- </property>
- <property>
- <name>dfs.namenode.decommission.interval</name>
- <value>30s</value>
- <description>Namenode periodicity in seconds to check if
- decommission or maintenance is complete. Support multiple time unit
- suffix(case insensitive), as described in dfs.heartbeat.interval.
- </description>
- </property>
- <property>
- <name>dfs.namenode.decommission.blocks.per.interval</name>
- <value>500000</value>
- <description>The approximate number of blocks to process per decommission
- or maintenance interval, as defined in dfs.namenode.decommission.interval.
- </description>
- </property>
- <property>
- <name>dfs.namenode.decommission.max.concurrent.tracked.nodes</name>
- <value>100</value>
- <description>
- The maximum number of decommission-in-progress or
- entering-maintenance datanodes nodes that will be tracked at one time by
- the namenode. Tracking these datanode consumes additional NN memory
- proportional to the number of blocks on the datnode. Having a conservative
- limit reduces the potential impact of decommissioning or maintenance of
- a large number of nodes at once.
-
- A value of 0 means no limit will be enforced.
- </description>
- </property>
- <property>
- <name>dfs.namenode.redundancy.interval.seconds</name>
- <value>3s</value>
- <description>The periodicity in seconds with which the namenode computes
- low redundancy work for datanodes. Support multiple time unit suffix(case insensitive),
- as described in dfs.heartbeat.interval.
- </description>
- </property>
- <property>
- <name>dfs.namenode.accesstime.precision</name>
- <value>3600000</value>
- <description>The access time for HDFS file is precise upto this value.
- The default value is 1 hour. Setting a value of 0 disables
- access times for HDFS.
- </description>
- </property>
- <property>
- <name>dfs.datanode.plugins</name>
- <value></value>
- <description>Comma-separated list of datanode plug-ins to be activated.
- </description>
- </property>
- <property>
- <name>dfs.namenode.plugins</name>
- <value></value>
- <description>Comma-separated list of namenode plug-ins to be activated.
- </description>
- </property>
- <property>
- <name>dfs.namenode.block-placement-policy.default.prefer-local-node</name>
- <value>true</value>
- <description>Controls how the default block placement policy places
- the first replica of a block. When true, it will prefer the node where
- the client is running. When false, it will prefer a node in the same rack
- as the client. Setting to false avoids situations where entire copies of
- large files end up on a single node, thus creating hotspots.
- </description>
- </property>
- <property>
- <name>dfs.stream-buffer-size</name>
- <value>4096</value>
- <description>The size of buffer to stream files.
- The size of this buffer should probably be a multiple of hardware
- page size (4096 on Intel x86), and it determines how much data is
- buffered during read and write operations.</description>
- </property>
- <property>
- <name>dfs.bytes-per-checksum</name>
- <value>512</value>
- <description>The number of bytes per checksum. Must not be larger than
- dfs.stream-buffer-size</description>
- </property>
- <property>
- <name>dfs.client-write-packet-size</name>
- <value>65536</value>
- <description>Packet size for clients to write</description>
- </property>
- <property>
- <name>dfs.client.write.exclude.nodes.cache.expiry.interval.millis</name>
- <value>600000</value>
- <description>The maximum period to keep a DN in the excluded nodes list
- at a client. After this period, in milliseconds, the previously excluded node(s) will
- be removed automatically from the cache and will be considered good for block allocations
- again. Useful to lower or raise in situations where you keep a file open for very long
- periods (such as a Write-Ahead-Log (WAL) file) to make the writer tolerant to cluster maintenance
- restarts. Defaults to 10 minutes.</description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.dir</name>
- <value>file://${hadoop.tmp.dir}/dfs/namesecondary</value>
- <description>Determines where on the local filesystem the DFS secondary
- name node should store the temporary images to merge.
- If this is a comma-delimited list of directories then the image is
- replicated in all of the directories for redundancy.
- </description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.edits.dir</name>
- <value>${dfs.namenode.checkpoint.dir}</value>
- <description>Determines where on the local filesystem the DFS secondary
- name node should store the temporary edits to merge.
- If this is a comma-delimited list of directories then the edits is
- replicated in all of the directories for redundancy.
- Default value is same as dfs.namenode.checkpoint.dir
- </description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.period</name>
- <value>3600s</value>
- <description>
- The number of seconds between two periodic checkpoints.
- Support multiple time unit suffix(case insensitive), as described
- in dfs.heartbeat.interval.
- </description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.txns</name>
- <value>1000000</value>
- <description>The Secondary NameNode or CheckpointNode will create a checkpoint
- of the namespace every 'dfs.namenode.checkpoint.txns' transactions, regardless
- of whether 'dfs.namenode.checkpoint.period' has expired.
- </description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.check.period</name>
- <value>60s</value>
- <description>The SecondaryNameNode and CheckpointNode will poll the NameNode
- every 'dfs.namenode.checkpoint.check.period' seconds to query the number
- of uncheckpointed transactions. Support multiple time unit suffix(case insensitive),
- as described in dfs.heartbeat.interval.
- </description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.max-retries</name>
- <value>3</value>
- <description>The SecondaryNameNode retries failed checkpointing. If the
- failure occurs while loading fsimage or replaying edits, the number of
- retries is limited by this variable.
- </description>
- </property>
- <property>
- <name>dfs.namenode.checkpoint.check.quiet-multiplier</name>
- <value>1.5</value>
- <description>
- Used to calculate the amount of time between retries when in the 'quiet' period
- for creating checkpoints (active namenode already has an up-to-date image from another
- checkpointer), so we wait a multiplier of the dfs.namenode.checkpoint.check.period before
- retrying the checkpoint because another node likely is already managing the checkpoints,
- allowing us to save bandwidth to transfer checkpoints that don't need to be used.
- </description>
- </property>
- <property>
- <name>dfs.namenode.num.checkpoints.retained</name>
- <value>2</value>
- <description>The number of image checkpoint files (fsimage_*) that will be retained by
- the NameNode and Secondary NameNode in their storage directories. All edit
- logs (stored on edits_* files) necessary to recover an up-to-date namespace from the oldest retained
- checkpoint will also be retained.
- </description>
- </property>
- <property>
- <name>dfs.namenode.num.extra.edits.retained</name>
- <value>1000000</value>
- <description>The number of extra transactions which should be retained
- beyond what is minimally necessary for a NN restart.
- It does not translate directly to file's age, or the number of files kept,
- but to the number of transactions (here "edits" means transactions).
- One edit file may contain several transactions (edits).
- During checkpoint, NameNode will identify the total number of edits to retain as extra by
- checking the latest checkpoint transaction value, subtracted by the value of this property.
- Then, it scans edits files to identify the older ones that don't include the computed range of
- retained transactions that are to be kept around, and purges them subsequently.
- The retainment can be useful for audit purposes or for an HA setup where a remote Standby Node may have
- been offline for some time and need to have a longer backlog of retained
- edits in order to start again.
- Typically each edit is on the order of a few hundred bytes, so the default
- of 1 million edits should be on the order of hundreds of MBs or low GBs.
- NOTE: Fewer extra edits may be retained than value specified for this setting
- if doing so would mean that more segments would be retained than the number
- configured by dfs.namenode.max.extra.edits.segments.retained.
- </description>
- </property>
- <property>
- <name>dfs.namenode.max.extra.edits.segments.retained</name>
- <value>10000</value>
- <description>The maximum number of extra edit log segments which should be retained
- beyond what is minimally necessary for a NN restart. When used in conjunction with
- dfs.namenode.num.extra.edits.retained, this configuration property serves to cap
- the number of extra edits files to a reasonable value.
- </description>
- </property>
- <property>
- <name>dfs.namenode.delegation.key.update-interval</name>
- <value>86400000</value>
- <description>The update interval for master key for delegation tokens
- in the namenode in milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.namenode.delegation.token.max-lifetime</name>
- <value>604800000</value>
- <description>The maximum lifetime in milliseconds for which a delegation
- token is valid.
- </description>
- </property>
- <property>
- <name>dfs.namenode.delegation.token.renew-interval</name>
- <value>86400000</value>
- <description>The renewal interval for delegation token in milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.datanode.failed.volumes.tolerated</name>
- <value>0</value>
- <description>The number of volumes that are allowed to
- fail before a datanode stops offering service. By default
- any volume failure will cause a datanode to shutdown.
- </description>
- </property>
- <property>
- <name>dfs.image.compress</name>
- <value>false</value>
- <description>Should the dfs image be compressed?
- </description>
- </property>
- <property>
- <name>dfs.image.compression.codec</name>
- <value>org.apache.hadoop.io.compress.DefaultCodec</value>
- <description>If the dfs image is compressed, how should they be compressed?
- This has to be a codec defined in io.compression.codecs.
- </description>
- </property>
- <property>
- <name>dfs.image.transfer.timeout</name>
- <value>60000</value>
- <description>
- Socket timeout for image transfer in milliseconds. This timeout and the related
- dfs.image.transfer.bandwidthPerSec parameter should be configured such
- that normal image transfer can complete successfully.
- This timeout prevents client hangs when the sender fails during
- image transfer. This is socket timeout during image transfer.
- </description>
- </property>
- <property>
- <name>dfs.image.transfer.bandwidthPerSec</name>
- <value>0</value>
- <description>
- Maximum bandwidth used for regular image transfers (instead of
- bootstrapping the standby namenode), in bytes per second.
- This can help keep normal namenode operations responsive during
- checkpointing. The maximum bandwidth and timeout in
- dfs.image.transfer.timeout should be set such that normal image
- transfers can complete successfully.
- A default value of 0 indicates that throttling is disabled.
- The maximum bandwidth used for bootstrapping standby namenode is
- configured with dfs.image.transfer-bootstrap-standby.bandwidthPerSec.
- </description>
- </property>
- <property>
- <name>dfs.image.transfer-bootstrap-standby.bandwidthPerSec</name>
- <value>0</value>
- <description>
- Maximum bandwidth used for transferring image to bootstrap standby
- namenode, in bytes per second.
- A default value of 0 indicates that throttling is disabled. This default
- value should be used in most cases, to ensure timely HA operations.
- The maximum bandwidth used for regular image transfers is configured
- with dfs.image.transfer.bandwidthPerSec.
- </description>
- </property>
- <property>
- <name>dfs.image.transfer.chunksize</name>
- <value>65536</value>
- <description>
- Chunksize in bytes to upload the checkpoint.
- Chunked streaming is used to avoid internal buffering of contents
- of image file of huge size.
- </description>
- </property>
- <property>
- <name>dfs.edit.log.transfer.timeout</name>
- <value>30000</value>
- <description>
- Socket timeout for edit log transfer in milliseconds. This timeout
- should be configured such that normal edit log transfer for journal
- node syncing can complete successfully.
- </description>
- </property>
- <property>
- <name>dfs.edit.log.transfer.bandwidthPerSec</name>
- <value>0</value>
- <description>
- Maximum bandwidth used for transferring edit log to between journal nodes
- for syncing, in bytes per second.
- A default value of 0 indicates that throttling is disabled.
- </description>
- </property>
- <property>
- <name>dfs.namenode.support.allow.format</name>
- <value>true</value>
- <description>Does HDFS namenode allow itself to be formatted?
- You may consider setting this to false for any production
- cluster, to avoid any possibility of formatting a running DFS.
- </description>
- </property>
- <property>
- <name>dfs.datanode.max.transfer.threads</name>
- <value>4096</value>
- <description>
- Specifies the maximum number of threads to use for transferring data
- in and out of the DN.
- </description>
- </property>
- <property>
- <name>dfs.datanode.scan.period.hours</name>
- <value>504</value>
- <description>
- If this is positive, the DataNode will not scan any
- individual block more than once in the specified scan period.
- If this is negative, the block scanner is disabled.
- If this is set to zero, then the default value of 504 hours
- or 3 weeks is used. Prior versions of HDFS incorrectly documented
- that setting this key to zero will disable the block scanner.
- </description>
- </property>
- <property>
- <name>dfs.block.scanner.volume.bytes.per.second</name>
- <value>1048576</value>
- <description>
- If this is 0, the DataNode's block scanner will be disabled. If this
- is positive, this is the number of bytes per second that the DataNode's
- block scanner will try to scan from each volume.
- </description>
- </property>
- <property>
- <name>dfs.datanode.readahead.bytes</name>
- <value>4194304</value>
- <description>
- While reading block files, if the Hadoop native libraries are available,
- the datanode can use the posix_fadvise system call to explicitly
- page data into the operating system buffer cache ahead of the current
- reader's position. This can improve performance especially when
- disks are highly contended.
- This configuration specifies the number of bytes ahead of the current
- read position which the datanode will attempt to read ahead. This
- feature may be disabled by configuring this property to 0.
- If the native libraries are not available, this configuration has no
- effect.
- </description>
- </property>
- <property>
- <name>dfs.datanode.drop.cache.behind.reads</name>
- <value>false</value>
- <description>
- In some workloads, the data read from HDFS is known to be significantly
- large enough that it is unlikely to be useful to cache it in the
- operating system buffer cache. In this case, the DataNode may be
- configured to automatically purge all data from the buffer cache
- after it is delivered to the client. This behavior is automatically
- disabled for workloads which read only short sections of a block
- (e.g HBase random-IO workloads).
- This may improve performance for some workloads by freeing buffer
- cache space usage for more cacheable data.
- If the Hadoop native libraries are not available, this configuration
- has no effect.
- </description>
- </property>
- <property>
- <name>dfs.datanode.drop.cache.behind.writes</name>
- <value>false</value>
- <description>
- In some workloads, the data written to HDFS is known to be significantly
- large enough that it is unlikely to be useful to cache it in the
- operating system buffer cache. In this case, the DataNode may be
- configured to automatically purge all data from the buffer cache
- after it is written to disk.
- This may improve performance for some workloads by freeing buffer
- cache space usage for more cacheable data.
- If the Hadoop native libraries are not available, this configuration
- has no effect.
- </description>
- </property>
- <property>
- <name>dfs.datanode.sync.behind.writes</name>
- <value>false</value>
- <description>
- If this configuration is enabled, the datanode will instruct the
- operating system to enqueue all written data to the disk immediately
- after it is written. This differs from the usual OS policy which
- may wait for up to 30 seconds before triggering writeback.
- This may improve performance for some workloads by smoothing the
- IO profile for data written to disk.
- If the Hadoop native libraries are not available, this configuration
- has no effect.
- </description>
- </property>
- <property>
- <name>dfs.client.failover.max.attempts</name>
- <value>15</value>
- <description>
- Expert only. The number of client failover attempts that should be
- made before the failover is considered failed.
- </description>
- </property>
- <property>
- <name>dfs.client.failover.sleep.base.millis</name>
- <value>500</value>
- <description>
- Expert only. The time to wait, in milliseconds, between failover
- attempts increases exponentially as a function of the number of
- attempts made so far, with a random factor of +/- 50%. This option
- specifies the base value used in the failover calculation. The
- first failover will retry immediately. The 2nd failover attempt
- will delay at least dfs.client.failover.sleep.base.millis
- milliseconds. And so on.
- </description>
- </property>
- <property>
- <name>dfs.client.failover.sleep.max.millis</name>
- <value>15000</value>
- <description>
- Expert only. The time to wait, in milliseconds, between failover
- attempts increases exponentially as a function of the number of
- attempts made so far, with a random factor of +/- 50%. This option
- specifies the maximum value to wait between failovers.
- Specifically, the time between two failover attempts will not
- exceed +/- 50% of dfs.client.failover.sleep.max.millis
- milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.client.failover.connection.retries</name>
- <value>0</value>
- <description>
- Expert only. Indicates the number of retries a failover IPC client
- will make to establish a server connection.
- </description>
- </property>
- <property>
- <name>dfs.client.failover.connection.retries.on.timeouts</name>
- <value>0</value>
- <description>
- Expert only. The number of retry attempts a failover IPC client
- will make on socket timeout when establishing a server connection.
- </description>
- </property>
- <property>
- <name>dfs.client.datanode-restart.timeout</name>
- <value>30s</value>
- <description>
- Expert only. The time to wait, in seconds, from reception of an
- datanode shutdown notification for quick restart, until declaring
- the datanode dead and invoking the normal recovery mechanisms.
- The notification is sent by a datanode when it is being shutdown
- using the shutdownDatanode admin command with the upgrade option.
- Support multiple time unit suffix(case insensitive), as described
- in dfs.heartbeat.interval.
- </description>
- </property>
- <property>
- <name>dfs.nameservices</name>
- <value></value>
- <description>
- Comma-separated list of nameservices.
- </description>
- </property>
- <property>
- <name>dfs.nameservice.id</name>
- <value></value>
- <description>
- The ID of this nameservice. If the nameservice ID is not
- configured or more than one nameservice is configured for
- dfs.nameservices it is determined automatically by
- matching the local node's address with the configured address.
- </description>
- </property>
- <property>
- <name>dfs.internal.nameservices</name>
- <value></value>
- <description>
- Comma-separated list of nameservices that belong to this cluster.
- Datanode will report to all the nameservices in this list. By default
- this is set to the value of dfs.nameservices.
- </description>
- </property>
- <property>
- <name>dfs.ha.namenodes.EXAMPLENAMESERVICE</name>
- <value></value>
- <description>
- The prefix for a given nameservice, contains a comma-separated
- list of namenodes for a given nameservice (eg EXAMPLENAMESERVICE).
- Unique identifiers for each NameNode in the nameservice, delimited by
- commas. This will be used by DataNodes to determine all the NameNodes
- in the cluster. For example, if you used “mycluster” as the nameservice
- ID previously, and you wanted to use “nn1” and “nn2” as the individual
- IDs of the NameNodes, you would configure a property
- dfs.ha.namenodes.mycluster, and its value "nn1,nn2".
- </description>
- </property>
- <property>
- <name>dfs.ha.namenode.id</name>
- <value></value>
- <description>
- The ID of this namenode. If the namenode ID is not configured it
- is determined automatically by matching the local node's address
- with the configured address.
- </description>
- </property>
- <property>
- <name>dfs.ha.log-roll.period</name>
- <value>120s</value>
- <description>
- How often, in seconds, the StandbyNode should ask the active to
- roll edit logs. Since the StandbyNode only reads from finalized
- log segments, the StandbyNode will only be as up-to-date as how
- often the logs are rolled. Note that failover triggers a log roll
- so the StandbyNode will be up to date before it becomes active.
- Support multiple time unit suffix(case insensitive), as described
- in dfs.heartbeat.interval.
- </description>
- </property>
- <property>
- <name>dfs.ha.tail-edits.period</name>
- <value>60s</value>
- <description>
- How often, in seconds, the StandbyNode should check for new
- finalized log segments in the shared edits log.
- Support multiple time unit suffix(case insensitive), as described
- in dfs.heartbeat.interval.
- </description>
- </property>
- <property>
- <name>dfs.ha.tail-edits.namenode-retries</name>
- <value>3</value>
- <description>
- Number of retries to use when contacting the namenode when tailing the log.
- </description>
- </property>
- <property>
- <name>dfs.ha.tail-edits.rolledits.timeout</name>
- <value>60</value>
- <description>The timeout in seconds of calling rollEdits RPC on Active NN.
- </description>
- </property>
- <property>
- <name>dfs.ha.automatic-failover.enabled</name>
- <value>false</value>
- <description>
- Whether automatic failover is enabled. See the HDFS High
- Availability documentation for details on automatic HA
- configuration.
- </description>
- </property>
- <property>
- <name>dfs.client.use.datanode.hostname</name>
- <value>false</value>
- <description>Whether clients should use datanode hostnames when
- connecting to datanodes.
- </description>
- </property>
- <property>
- <name>dfs.datanode.use.datanode.hostname</name>
- <value>false</value>
- <description>Whether datanodes should use datanode hostnames when
- connecting to other datanodes for data transfer.
- </description>
- </property>
- <property>
- <name>dfs.client.local.interfaces</name>
- <value></value>
- <description>A comma separated list of network interface names to use
- for data transfer between the client and datanodes. When creating
- a connection to read from or write to a datanode, the client
- chooses one of the specified interfaces at random and binds its
- socket to the IP of that interface. Individual names may be
- specified as either an interface name (eg "eth0"), a subinterface
- name (eg "eth0:0"), or an IP address (which may be specified using
- CIDR notation to match a range of IPs).
- </description>
- </property>
- <property>
- <name>dfs.datanode.shared.file.descriptor.paths</name>
- <value>/dev/shm,/tmp</value>
- <description>
- A comma-separated list of paths to use when creating file descriptors that
- will be shared between the DataNode and the DFSClient. Typically we use
- /dev/shm, so that the file descriptors will not be written to disk.
- Systems that don't have /dev/shm will fall back to /tmp by default.
- </description>
- </property>
- <property>
- <name>dfs.short.circuit.shared.memory.watcher.interrupt.check.ms</name>
- <value>60000</value>
- <description>
- The length of time in milliseconds that the short-circuit shared memory
- watcher will go between checking for java interruptions sent from other
- threads. This is provided mainly for unit tests.
- </description>
- </property>
- <property>
- <name>dfs.namenode.kerberos.principal</name>
- <value></value>
- <description>
- The NameNode service principal. This is typically set to
- nn/_HOST@REALM.TLD. Each NameNode will substitute _HOST with its
- own fully qualified hostname at startup. The _HOST placeholder
- allows using the same configuration setting on both NameNodes
- in an HA setup.
- </description>
- </property>
- <property>
- <name>dfs.namenode.keytab.file</name>
- <value></value>
- <description>
- The keytab file used by each NameNode daemon to login as its
- service principal. The principal name is configured with
- dfs.namenode.kerberos.principal.
- </description>
- </property>
- <property>
- <name>dfs.datanode.kerberos.principal</name>
- <value></value>
- <description>
- The DataNode service principal. This is typically set to
- dn/_HOST@REALM.TLD. Each DataNode will substitute _HOST with its
- own fully qualified hostname at startup. The _HOST placeholder
- allows using the same configuration setting on all DataNodes.
- </description>
- </property>
- <property>
- <name>dfs.datanode.keytab.file</name>
- <value></value>
- <description>
- The keytab file used by each DataNode daemon to login as its
- service principal. The principal name is configured with
- dfs.datanode.kerberos.principal.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.kerberos.principal</name>
- <value></value>
- <description>
- The JournalNode service principal. This is typically set to
- jn/_HOST@REALM.TLD. Each JournalNode will substitute _HOST with its
- own fully qualified hostname at startup. The _HOST placeholder
- allows using the same configuration setting on all JournalNodes.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.keytab.file</name>
- <value></value>
- <description>
- The keytab file used by each JournalNode daemon to login as its
- service principal. The principal name is configured with
- dfs.journalnode.kerberos.principal.
- </description>
- </property>
- <property>
- <name>dfs.namenode.kerberos.internal.spnego.principal</name>
- <value>${dfs.web.authentication.kerberos.principal}</value>
- <description>
- The server principal used by the NameNode for web UI SPNEGO
- authentication when Kerberos security is enabled. This is
- typically set to HTTP/_HOST@REALM.TLD The SPNEGO server principal
- begins with the prefix HTTP/ by convention.
- If the value is '*', the web server will attempt to login with
- every principal specified in the keytab file
- dfs.web.authentication.kerberos.keytab.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.kerberos.internal.spnego.principal</name>
- <value></value>
- <description>
- The server principal used by the JournalNode HTTP Server for
- SPNEGO authentication when Kerberos security is enabled. This is
- typically set to HTTP/_HOST@REALM.TLD. The SPNEGO server principal
- begins with the prefix HTTP/ by convention.
- If the value is '*', the web server will attempt to login with
- every principal specified in the keytab file
- dfs.web.authentication.kerberos.keytab.
- For most deployments this can be set to ${dfs.web.authentication.kerberos.principal}
- i.e use the value of dfs.web.authentication.kerberos.principal.
- </description>
- </property>
- <property>
- <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
- <value>${dfs.web.authentication.kerberos.principal}</value>
- <description>
- The server principal used by the Secondary NameNode for web UI SPNEGO
- authentication when Kerberos security is enabled. Like all other
- Secondary NameNode settings, it is ignored in an HA setup.
- If the value is '*', the web server will attempt to login with
- every principal specified in the keytab file
- dfs.web.authentication.kerberos.keytab.
- </description>
- </property>
- <property>
- <name>dfs.web.authentication.kerberos.principal</name>
- <value></value>
- <description>
- The server principal used by the NameNode for WebHDFS SPNEGO
- authentication.
- Required when WebHDFS and security are enabled. In most secure clusters this
- setting is also used to specify the values for
- dfs.namenode.kerberos.internal.spnego.principal and
- dfs.journalnode.kerberos.internal.spnego.principal.
- </description>
- </property>
- <property>
- <name>dfs.web.authentication.kerberos.keytab</name>
- <value></value>
- <description>
- The keytab file for the principal corresponding to
- dfs.web.authentication.kerberos.principal.
- </description>
- </property>
- <property>
- <name>dfs.namenode.kerberos.principal.pattern</name>
- <value>*</value>
- <description>
- A client-side RegEx that can be configured to control
- allowed realms to authenticate with (useful in cross-realm env.)
- </description>
- </property>
- <property>
- <name>dfs.namenode.avoid.read.stale.datanode</name>
- <value>false</value>
- <description>
- Indicate whether or not to avoid reading from "stale" datanodes whose
- heartbeat messages have not been received by the namenode
- for more than a specified time interval. Stale datanodes will be
- moved to the end of the node list returned for reading. See
- dfs.namenode.avoid.write.stale.datanode for a similar setting for writes.
- </description>
- </property>
- <property>
- <name>dfs.namenode.avoid.write.stale.datanode</name>
- <value>false</value>
- <description>
- Indicate whether or not to avoid writing to "stale" datanodes whose
- heartbeat messages have not been received by the namenode
- for more than a specified time interval. Writes will avoid using
- stale datanodes unless more than a configured ratio
- (dfs.namenode.write.stale.datanode.ratio) of datanodes are marked as
- stale. See dfs.namenode.avoid.read.stale.datanode for a similar setting
- for reads.
- </description>
- </property>
- <property>
- <name>dfs.namenode.stale.datanode.interval</name>
- <value>30000</value>
- <description>
- Default time interval in milliseconds for marking a datanode as "stale",
- i.e., if the namenode has not received heartbeat msg from a datanode for
- more than this time interval, the datanode will be marked and treated
- as "stale" by default. The stale interval cannot be too small since
- otherwise this may cause too frequent change of stale states.
- We thus set a minimum stale interval value (the default value is 3 times
- of heartbeat interval) and guarantee that the stale interval cannot be less
- than the minimum value. A stale data node is avoided during lease/block
- recovery. It can be conditionally avoided for reads (see
- dfs.namenode.avoid.read.stale.datanode) and for writes (see
- dfs.namenode.avoid.write.stale.datanode).
- </description>
- </property>
- <property>
- <name>dfs.namenode.write.stale.datanode.ratio</name>
- <value>0.5f</value>
- <description>
- When the ratio of number stale datanodes to total datanodes marked
- is greater than this ratio, stop avoiding writing to stale nodes so
- as to prevent causing hotspots.
- </description>
- </property>
- <property>
- <name>dfs.namenode.invalidate.work.pct.per.iteration</name>
- <value>0.32f</value>
- <description>
- *Note*: Advanced property. Change with caution.
- This determines the percentage amount of block
- invalidations (deletes) to do over a single DN heartbeat
- deletion command. The final deletion count is determined by applying this
- percentage to the number of live nodes in the system.
- The resultant number is the number of blocks from the deletion list
- chosen for proper invalidation over a single heartbeat of a single DN.
- Value should be a positive, non-zero percentage in float notation (X.Yf),
- with 1.0f meaning 100%.
- </description>
- </property>
- <property>
- <name>dfs.namenode.replication.work.multiplier.per.iteration</name>
- <value>2</value>
- <description>
- *Note*: Advanced property. Change with caution.
- This determines the total amount of block transfers to begin in
- parallel at a DN, for replication, when such a command list is being
- sent over a DN heartbeat by the NN. The actual number is obtained by
- multiplying this multiplier with the total number of live nodes in the
- cluster. The result number is the number of blocks to begin transfers
- immediately for, per DN heartbeat. This number can be any positive,
- non-zero integer.
- </description>
- </property>
- <property>
- <name>nfs.server.port</name>
- <value>2049</value>
- <description>
- Specify the port number used by Hadoop NFS.
- </description>
- </property>
- <property>
- <name>nfs.mountd.port</name>
- <value>4242</value>
- <description>
- Specify the port number used by Hadoop mount daemon.
- </description>
- </property>
- <property>
- <name>nfs.dump.dir</name>
- <value>/tmp/.hdfs-nfs</value>
- <description>
- This directory is used to temporarily save out-of-order writes before
- writing to HDFS. For each file, the out-of-order writes are dumped after
- they are accumulated to exceed certain threshold (e.g., 1MB) in memory.
- One needs to make sure the directory has enough space.
- </description>
- </property>
- <property>
- <name>nfs.rtmax</name>
- <value>1048576</value>
- <description>This is the maximum size in bytes of a READ request
- supported by the NFS gateway. If you change this, make sure you
- also update the nfs mount's rsize(add rsize= # of bytes to the
- mount directive).
- </description>
- </property>
- <property>
- <name>nfs.wtmax</name>
- <value>1048576</value>
- <description>This is the maximum size in bytes of a WRITE request
- supported by the NFS gateway. If you change this, make sure you
- also update the nfs mount's wsize(add wsize= # of bytes to the
- mount directive).
- </description>
- </property>
- <property>
- <name>nfs.keytab.file</name>
- <value></value>
- <description>
- *Note*: Advanced property. Change with caution.
- This is the path to the keytab file for the hdfs-nfs gateway.
- This is required when the cluster is kerberized.
- </description>
- </property>
- <property>
- <name>nfs.kerberos.principal</name>
- <value></value>
- <description>
- *Note*: Advanced property. Change with caution.
- This is the name of the kerberos principal. This is required when
- the cluster is kerberized.It must be of this format:
- nfs-gateway-user/nfs-gateway-host@kerberos-realm
- </description>
- </property>
- <property>
- <name>nfs.allow.insecure.ports</name>
- <value>true</value>
- <description>
- When set to false, client connections originating from unprivileged ports
- (those above 1023) will be rejected. This is to ensure that clients
- connecting to this NFS Gateway must have had root privilege on the machine
- where they're connecting from.
- </description>
- </property>
- <property>
- <name>hadoop.fuse.connection.timeout</name>
- <value>300</value>
- <description>
- The minimum number of seconds that we'll cache libhdfs connection objects
- in fuse_dfs. Lower values will result in lower memory consumption; higher
- values may speed up access by avoiding the overhead of creating new
- connection objects.
- </description>
- </property>
- <property>
- <name>hadoop.fuse.timer.period</name>
- <value>5</value>
- <description>
- The number of seconds between cache expiry checks in fuse_dfs. Lower values
- will result in fuse_dfs noticing changes to Kerberos ticket caches more
- quickly.
- </description>
- </property>
- <property>
- <name>dfs.namenode.metrics.logger.period.seconds</name>
- <value>600</value>
- <description>
- This setting controls how frequently the NameNode logs its metrics. The
- logging configuration must also define one or more appenders for
- NameNodeMetricsLog for the metrics to be logged.
- NameNode metrics logging is disabled if this value is set to zero or
- less than zero.
- </description>
- </property>
- <property>
- <name>dfs.datanode.metrics.logger.period.seconds</name>
- <value>600</value>
- <description>
- This setting controls how frequently the DataNode logs its metrics. The
- logging configuration must also define one or more appenders for
- DataNodeMetricsLog for the metrics to be logged.
- DataNode metrics logging is disabled if this value is set to zero or
- less than zero.
- </description>
- </property>
- <property>
- <name>dfs.metrics.percentiles.intervals</name>
- <value></value>
- <description>
- Comma-delimited set of integers denoting the desired rollover intervals
- (in seconds) for percentile latency metrics on the Namenode and Datanode.
- By default, percentile latency metrics are disabled.
- </description>
- </property>
- <property>
- <name>dfs.datanode.peer.stats.enabled</name>
- <value>false</value>
- <description>
- A switch to turn on/off tracking DataNode peer statistics.
- </description>
- </property>
- <property>
- <name>dfs.datanode.outliers.report.interval</name>
- <value>30m</value>
- <description>
- This setting controls how frequently DataNodes will report their peer
- latencies to the NameNode via heartbeats. This setting supports
- multiple time unit suffixes as described in dfs.heartbeat.interval.
- If no suffix is specified then milliseconds is assumed.
- It is ignored if dfs.datanode.peer.stats.enabled is false.
- </description>
- </property>
- <property>
- <name>dfs.datanode.fileio.profiling.sampling.percentage</name>
- <value>0</value>
- <description>
- This setting controls the percentage of file I/O events which will be
- profiled for DataNode disk statistics. The default value of 0 disables
- disk statistics. Set to an integer value between 1 and 100 to enable disk
- statistics.
- </description>
- </property>
- <property>
- <name>hadoop.user.group.metrics.percentiles.intervals</name>
- <value></value>
- <description>
- A comma-separated list of the granularity in seconds for the metrics
- which describe the 50/75/90/95/99th percentile latency for group resolution
- in milliseconds.
- By default, percentile latency metrics are disabled.
- </description>
- </property>
- <property>
- <name>dfs.encrypt.data.transfer</name>
- <value>false</value>
- <description>
- Whether or not actual block data that is read/written from/to HDFS should
- be encrypted on the wire. This only needs to be set on the NN and DNs,
- clients will deduce this automatically. It is possible to override this setting
- per connection by specifying custom logic via dfs.trustedchannel.resolver.class.
- </description>
- </property>
- <property>
- <name>dfs.encrypt.data.transfer.algorithm</name>
- <value></value>
- <description>
- This value may be set to either "3des" or "rc4". If nothing is set, then
- the configured JCE default on the system is used (usually 3DES.) It is
- widely believed that 3DES is more cryptographically secure, but RC4 is
- substantially faster.
-
- Note that if AES is supported by both the client and server then this
- encryption algorithm will only be used to initially transfer keys for AES.
- (See dfs.encrypt.data.transfer.cipher.suites.)
- </description>
- </property>
- <property>
- <name>dfs.encrypt.data.transfer.cipher.suites</name>
- <value></value>
- <description>
- This value may be either undefined or AES/CTR/NoPadding. If defined, then
- dfs.encrypt.data.transfer uses the specified cipher suite for data
- encryption. If not defined, then only the algorithm specified in
- dfs.encrypt.data.transfer.algorithm is used. By default, the property is
- not defined.
- </description>
- </property>
- <property>
- <name>dfs.encrypt.data.transfer.cipher.key.bitlength</name>
- <value>128</value>
- <description>
- The key bitlength negotiated by dfsclient and datanode for encryption.
- This value may be set to either 128, 192 or 256.
- </description>
- </property>
- <property>
- <name>dfs.trustedchannel.resolver.class</name>
- <value></value>
- <description>
- TrustedChannelResolver is used to determine whether a channel
- is trusted for plain data transfer. The TrustedChannelResolver is
- invoked on both client and server side. If the resolver indicates
- that the channel is trusted, then the data transfer will not be
- encrypted even if dfs.encrypt.data.transfer is set to true. The
- default implementation returns false indicating that the channel
- is not trusted.
- </description>
- </property>
- <property>
- <name>dfs.data.transfer.protection</name>
- <value></value>
- <description>
- A comma-separated list of SASL protection values used for secured
- connections to the DataNode when reading or writing block data. Possible
- values are authentication, integrity and privacy. authentication means
- authentication only and no integrity or privacy; integrity implies
- authentication and integrity are enabled; and privacy implies all of
- authentication, integrity and privacy are enabled. If
- dfs.encrypt.data.transfer is set to true, then it supersedes the setting for
- dfs.data.transfer.protection and enforces that all connections must use a
- specialized encrypted SASL handshake. This property is ignored for
- connections to a DataNode listening on a privileged port. In this case, it
- is assumed that the use of a privileged port establishes sufficient trust.
- </description>
- </property>
- <property>
- <name>dfs.data.transfer.saslproperties.resolver.class</name>
- <value></value>
- <description>
- SaslPropertiesResolver used to resolve the QOP used for a connection to the
- DataNode when reading or writing block data. If not specified, the value of
- hadoop.security.saslproperties.resolver.class is used as the default value.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.rpc-address</name>
- <value>0.0.0.0:8485</value>
- <description>
- The JournalNode RPC server address and port.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.http-address</name>
- <value>0.0.0.0:8480</value>
- <description>
- The address and port the JournalNode HTTP server listens on.
- If the port is 0 then the server will start on a free port.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.https-address</name>
- <value>0.0.0.0:8481</value>
- <description>
- The address and port the JournalNode HTTPS server listens on.
- If the port is 0 then the server will start on a free port.
- </description>
- </property>
- <property>
- <name>dfs.namenode.audit.loggers</name>
- <value>default</value>
- <description>
- List of classes implementing audit loggers that will receive audit events.
- These should be implementations of org.apache.hadoop.hdfs.server.namenode.AuditLogger.
- The special value "default" can be used to reference the default audit
- logger, which uses the configured log system. Installing custom audit loggers
- may affect the performance and stability of the NameNode. Refer to the custom
- logger's documentation for more details.
- </description>
- </property>
- <property>
- <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold</name>
- <value>10737418240</value> <!-- 10 GB -->
- <description>
- Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to
- org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy.
- This setting controls how much DN volumes are allowed to differ in terms of
- bytes of free disk space before they are considered imbalanced. If the free
- space of all the volumes are within this range of each other, the volumes
- will be considered balanced and block assignments will be done on a pure
- round robin basis.
- </description>
- </property>
- <property>
- <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction</name>
- <value>0.75f</value>
- <description>
- Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to
- org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy.
- This setting controls what percentage of new block allocations will be sent
- to volumes with more available disk space than others. This setting should
- be in the range 0.0 - 1.0, though in practice 0.5 - 1.0, since there should
- be no reason to prefer that volumes with less available disk space receive
- more block allocations.
- </description>
- </property>
- <property>
- <name>dfs.namenode.edits.noeditlogchannelflush</name>
- <value>false</value>
- <description>
- Specifies whether to flush edit log file channel. When set, expensive
- FileChannel#force calls are skipped and synchronous disk writes are
- enabled instead by opening the edit log file with RandomAccessFile("rws")
- flags. This can significantly improve the performance of edit log writes
- on the Windows platform.
- Note that the behavior of the "rws" flags is platform and hardware specific
- and might not provide the same level of guarantees as FileChannel#force.
- For example, the write will skip the disk-cache on SAS and SCSI devices
- while it might not on SATA devices. This is an expert level setting,
- change with caution.
- </description>
- </property>
- <property>
- <name>dfs.client.cache.drop.behind.writes</name>
- <value></value>
- <description>
- Just like dfs.datanode.drop.cache.behind.writes, this setting causes the
- page cache to be dropped behind HDFS writes, potentially freeing up more
- memory for other uses. Unlike dfs.datanode.drop.cache.behind.writes, this
- is a client-side setting rather than a setting for the entire datanode.
- If present, this setting will override the DataNode default.
- If the native libraries are not available to the DataNode, this
- configuration has no effect.
- </description>
- </property>
- <property>
- <name>dfs.client.cache.drop.behind.reads</name>
- <value></value>
- <description>
- Just like dfs.datanode.drop.cache.behind.reads, this setting causes the
- page cache to be dropped behind HDFS reads, potentially freeing up more
- memory for other uses. Unlike dfs.datanode.drop.cache.behind.reads, this
- is a client-side setting rather than a setting for the entire datanode. If
- present, this setting will override the DataNode default.
- If the native libraries are not available to the DataNode, this
- configuration has no effect.
- </description>
- </property>
- <property>
- <name>dfs.client.cache.readahead</name>
- <value></value>
- <description>
- When using remote reads, this setting causes the datanode to
- read ahead in the block file using posix_fadvise, potentially decreasing
- I/O wait times. Unlike dfs.datanode.readahead.bytes, this is a client-side
- setting rather than a setting for the entire datanode. If present, this
- setting will override the DataNode default.
- When using local reads, this setting determines how much readahead we do in
- BlockReaderLocal.
- If the native libraries are not available to the DataNode, this
- configuration has no effect.
- </description>
- </property>
- <property>
- <name>dfs.namenode.enable.retrycache</name>
- <value>true</value>
- <description>
- This enables the retry cache on the namenode. Namenode tracks for
- non-idempotent requests the corresponding response. If a client retries the
- request, the response from the retry cache is sent. Such operations
- are tagged with annotation @AtMostOnce in namenode protocols. It is
- recommended that this flag be set to true. Setting it to false, will result
- in clients getting failure responses to retried request. This flag must
- be enabled in HA setup for transparent fail-overs.
- The entries in the cache have expiration time configurable
- using dfs.namenode.retrycache.expirytime.millis.
- </description>
- </property>
- <property>
- <name>dfs.namenode.retrycache.expirytime.millis</name>
- <value>600000</value>
- <description>
- The time for which retry cache entries are retained.
- </description>
- </property>
- <property>
- <name>dfs.namenode.retrycache.heap.percent</name>
- <value>0.03f</value>
- <description>
- This parameter configures the heap size allocated for retry cache
- (excluding the response cached). This corresponds to approximately
- 4096 entries for every 64MB of namenode process java heap size.
- Assuming retry cache entry expiration time (configured using
- dfs.namenode.retrycache.expirytime.millis) of 10 minutes, this
- enables retry cache to support 7 operations per second sustained
- for 10 minutes. As the heap size is increased, the operation rate
- linearly increases.
- </description>
- </property>
- <property>
- <name>dfs.client.mmap.enabled</name>
- <value>true</value>
- <description>
- If this is set to false, the client won't attempt to perform memory-mapped reads.
- </description>
- </property>
- <property>
- <name>dfs.client.mmap.cache.size</name>
- <value>256</value>
- <description>
- When zero-copy reads are used, the DFSClient keeps a cache of recently used
- memory mapped regions. This parameter controls the maximum number of
- entries that we will keep in that cache.
- The larger this number is, the more file descriptors we will potentially
- use for memory-mapped files. mmaped files also use virtual address space.
- You may need to increase your ulimit virtual address space limits before
- increasing the client mmap cache size.
- Note that you can still do zero-copy reads when this size is set to 0.
- </description>
- </property>
- <property>
- <name>dfs.client.mmap.cache.timeout.ms</name>
- <value>3600000</value>
- <description>
- The minimum length of time that we will keep an mmap entry in the cache
- between uses. If an entry is in the cache longer than this, and nobody
- uses it, it will be removed by a background thread.
- </description>
- </property>
- <property>
- <name>dfs.client.mmap.retry.timeout.ms</name>
- <value>300000</value>
- <description>
- The minimum amount of time that we will wait before retrying a failed mmap
- operation.
- </description>
- </property>
- <property>
- <name>dfs.client.short.circuit.replica.stale.threshold.ms</name>
- <value>1800000</value>
- <description>
- The maximum amount of time that we will consider a short-circuit replica to
- be valid, if there is no communication from the DataNode. After this time
- has elapsed, we will re-fetch the short-circuit replica even if it is in
- the cache.
- </description>
- </property>
- <property>
- <name>dfs.namenode.path.based.cache.block.map.allocation.percent</name>
- <value>0.25</value>
- <description>
- The percentage of the Java heap which we will allocate to the cached blocks
- map. The cached blocks map is a hash map which uses chained hashing.
- Smaller maps may be accessed more slowly if the number of cached blocks is
- large; larger maps will consume more memory.
- </description>
- </property>
- <property>
- <name>dfs.datanode.max.locked.memory</name>
- <value>0</value>
- <description>
- The amount of memory in bytes to use for caching of block replicas in
- memory on the datanode. The datanode's maximum locked memory soft ulimit
- (RLIMIT_MEMLOCK) must be set to at least this value, else the datanode
- will abort on startup.
- By default, this parameter is set to 0, which disables in-memory caching.
- If the native libraries are not available to the DataNode, this
- configuration has no effect.
- </description>
- </property>
- <property>
- <name>dfs.namenode.list.cache.directives.num.responses</name>
- <value>100</value>
- <description>
- This value controls the number of cache directives that the NameNode will
- send over the wire in response to a listDirectives RPC.
- </description>
- </property>
- <property>
- <name>dfs.namenode.list.cache.pools.num.responses</name>
- <value>100</value>
- <description>
- This value controls the number of cache pools that the NameNode will
- send over the wire in response to a listPools RPC.
- </description>
- </property>
- <property>
- <name>dfs.namenode.path.based.cache.refresh.interval.ms</name>
- <value>30000</value>
- <description>
- The amount of milliseconds between subsequent path cache rescans. Path
- cache rescans are when we calculate which blocks should be cached, and on
- what datanodes.
- By default, this parameter is set to 30 seconds.
- </description>
- </property>
- <property>
- <name>dfs.namenode.path.based.cache.retry.interval.ms</name>
- <value>30000</value>
- <description>
- When the NameNode needs to uncache something that is cached, or cache
- something that is not cached, it must direct the DataNodes to do so by
- sending a DNA_CACHE or DNA_UNCACHE command in response to a DataNode
- heartbeat. This parameter controls how frequently the NameNode will
- resend these commands.
- </description>
- </property>
- <property>
- <name>dfs.datanode.fsdatasetcache.max.threads.per.volume</name>
- <value>4</value>
- <description>
- The maximum number of threads per volume to use for caching new data
- on the datanode. These threads consume both I/O and CPU. This can affect
- normal datanode operations.
- </description>
- </property>
- <property>
- <name>dfs.cachereport.intervalMsec</name>
- <value>10000</value>
- <description>
- Determines cache reporting interval in milliseconds. After this amount of
- time, the DataNode sends a full report of its cache state to the NameNode.
- The NameNode uses the cache report to update its map of cached blocks to
- DataNode locations.
- This configuration has no effect if in-memory caching has been disabled by
- setting dfs.datanode.max.locked.memory to 0 (which is the default).
- If the native libraries are not available to the DataNode, this
- configuration has no effect.
- </description>
- </property>
- <property>
- <name>dfs.namenode.edit.log.autoroll.multiplier.threshold</name>
- <value>2.0</value>
- <description>
- Determines when an active namenode will roll its own edit log.
- The actual threshold (in number of edits) is determined by multiplying
- this value by dfs.namenode.checkpoint.txns.
- This prevents extremely large edit files from accumulating on the active
- namenode, which can cause timeouts during namenode startup and pose an
- administrative hassle. This behavior is intended as a failsafe for when
- the standby or secondary namenode fail to roll the edit log by the normal
- checkpoint threshold.
- </description>
- </property>
- <property>
- <name>dfs.namenode.edit.log.autoroll.check.interval.ms</name>
- <value>300000</value>
- <description>
- How often an active namenode will check if it needs to roll its edit log,
- in milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.user.provider.user.pattern</name>
- <value>^[A-Za-z_][A-Za-z0-9._-]*[$]?$</value>
- <description>
- Valid pattern for user and group names for webhdfs, it must be a valid java regex.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.acl.provider.permission.pattern</name>
- <value>^(default:)?(user|group|mask|other):[[A-Za-z_][A-Za-z0-9._-]]*:([rwx-]{3})?(,(default:)?(user|group|mask|other):[[A-Za-z_][A-Za-z0-9._-]]*:([rwx-]{3})?)*$</value>
- <description>
- Valid pattern for user and group names in webhdfs acl operations, it must be a valid java regex.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.socket.connect-timeout</name>
- <value>60s</value>
- <description>
- Socket timeout for connecting to WebHDFS servers. This prevents a
- WebHDFS client from hanging if the server hostname is
- misconfigured, or the server does not response before the timeout
- expires. Value is followed by a unit specifier: ns, us, ms, s, m,
- h, d for nanoseconds, microseconds, milliseconds, seconds,
- minutes, hours, days respectively. Values should provide units,
- but milliseconds are assumed.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.socket.read-timeout</name>
- <value>60s</value>
- <description>
- Socket timeout for reading data from WebHDFS servers. This
- prevents a WebHDFS client from hanging if the server stops sending
- data. Value is followed by a unit specifier: ns, us, ms, s, m, h,
- d for nanoseconds, microseconds, milliseconds, seconds, minutes,
- hours, days respectively. Values should provide units,
- but milliseconds are assumed.
- </description>
- </property>
- <property>
- <name>dfs.client.context</name>
- <value>default</value>
- <description>
- The name of the DFSClient context that we should use. Clients that share
- a context share a socket cache and short-circuit cache, among other things.
- You should only change this if you don't want to share with another set of
- threads.
- </description>
- </property>
- <property>
- <name>dfs.client.read.shortcircuit</name>
- <value>false</value>
- <description>
- This configuration parameter turns on short-circuit local reads.
- </description>
- </property>
- <property>
- <name>dfs.client.socket.send.buffer.size</name>
- <value>0</value>
- <description>
- Socket send buffer size for a write pipeline in DFSClient side.
- This may affect TCP connection throughput.
- If it is set to zero or negative value,
- no buffer size will be set explicitly,
- thus enable tcp auto-tuning on some system.
- The default value is 0.
- </description>
- </property>
- <property>
- <name>dfs.domain.socket.path</name>
- <value></value>
- <description>
- Optional. This is a path to a UNIX domain socket that will be used for
- communication between the DataNode and local HDFS clients.
- If the string "_PORT" is present in this path, it will be replaced by the
- TCP port of the DataNode.
- </description>
- </property>
- <property>
- <name>dfs.client.read.shortcircuit.skip.checksum</name>
- <value>false</value>
- <description>
- If this configuration parameter is set,
- short-circuit local reads will skip checksums.
- This is normally not recommended,
- but it may be useful for special setups.
- You might consider using this
- if you are doing your own checksumming outside of HDFS.
- </description>
- </property>
- <property>
- <name>dfs.client.read.shortcircuit.streams.cache.size</name>
- <value>256</value>
- <description>
- The DFSClient maintains a cache of recently opened file descriptors.
- This parameter controls the maximum number of file descriptors in the cache.
- Setting this higher will use more file descriptors,
- but potentially provide better performance on workloads
- involving lots of seeks.
- </description>
- </property>
- <property>
- <name>dfs.client.read.shortcircuit.streams.cache.expiry.ms</name>
- <value>300000</value>
- <description>
- This controls the minimum amount of time
- file descriptors need to sit in the client cache context
- before they can be closed for being inactive for too long.
- </description>
- </property>
- <property>
- <name>dfs.datanode.shared.file.descriptor.paths</name>
- <value>/dev/shm,/tmp</value>
- <description>
- Comma separated paths to the directory on which
- shared memory segments are created.
- The client and the DataNode exchange information via
- this shared memory segment.
- It tries paths in order until creation of shared memory segment succeeds.
- </description>
- </property>
- <property>
- <name>dfs.namenode.audit.log.debug.cmdlist</name>
- <value></value>
- <description>
- A comma separated list of NameNode commands that are written to the HDFS
- namenode audit log only if the audit log level is debug.
- </description>
- </property>
- <property>
- <name>dfs.client.use.legacy.blockreader.local</name>
- <value>false</value>
- <description>
- Legacy short-circuit reader implementation based on HDFS-2246 is used
- if this configuration parameter is true.
- This is for the platforms other than Linux
- where the new implementation based on HDFS-347 is not available.
- </description>
- </property>
- <property>
- <name>dfs.block.local-path-access.user</name>
- <value></value>
- <description>
- Comma separated list of the users allowed to open block files
- on legacy short-circuit local read.
- </description>
- </property>
- <property>
- <name>dfs.client.domain.socket.data.traffic</name>
- <value>false</value>
- <description>
- This control whether we will try to pass normal data traffic
- over UNIX domain socket rather than over TCP socket
- on node-local data transfer.
- This is currently experimental and turned off by default.
- </description>
- </property>
- <property>
- <name>dfs.namenode.reject-unresolved-dn-topology-mapping</name>
- <value>false</value>
- <description>
- If the value is set to true, then namenode will reject datanode
- registration if the topology mapping for a datanode is not resolved and
- NULL is returned (script defined by net.topology.script.file.name fails
- to execute). Otherwise, datanode will be registered and the default rack
- will be assigned as the topology path. Topology paths are important for
- data resiliency, since they define fault domains. Thus it may be unwanted
- behavior to allow datanode registration with the default rack if the
- resolving topology failed.
- </description>
- </property>
- <property>
- <name>dfs.namenode.xattrs.enabled</name>
- <value>true</value>
- <description>
- Whether support for extended attributes is enabled on the NameNode.
- </description>
- </property>
- <property>
- <name>dfs.namenode.fs-limits.max-xattrs-per-inode</name>
- <value>32</value>
- <description>
- Maximum number of extended attributes per inode.
- </description>
- </property>
- <property>
- <name>dfs.namenode.fs-limits.max-xattr-size</name>
- <value>16384</value>
- <description>
- The maximum combined size of the name and value of an extended attribute
- in bytes. It should be larger than 0, and less than or equal to maximum
- size hard limit which is 32768.
- </description>
- </property>
- <property>
- <name>dfs.client.slow.io.warning.threshold.ms</name>
- <value>30000</value>
- <description>The threshold in milliseconds at which we will log a slow
- io warning in a dfsclient. By default, this parameter is set to 30000
- milliseconds (30 seconds).
- </description>
- </property>
- <property>
- <name>dfs.datanode.slow.io.warning.threshold.ms</name>
- <value>300</value>
- <description>The threshold in milliseconds at which we will log a slow
- io warning in a datanode. By default, this parameter is set to 300
- milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.namenode.lease-recheck-interval-ms</name>
- <value>2000</value>
- <description>During the release of lease a lock is hold that make any
- operations on the namenode stuck. In order to not block them during
- a too long duration we stop releasing lease after this max lock limit.
- </description>
- </property>
- <property>
- <name>dfs.namenode.max-lock-hold-to-release-lease-ms</name>
- <value>25</value>
- <description>During the release of lease a lock is hold that make any
- operations on the namenode stuck. In order to not block them during
- a too long duration we stop releasing lease after this max lock limit.
- </description>
- </property>
- <property>
- <name>dfs.namenode.write-lock-reporting-threshold-ms</name>
- <value>5000</value>
- <description>When a write lock is held on the namenode for a long time,
- this will be logged as the lock is released. This sets how long the
- lock must be held for logging to occur.
- </description>
- </property>
- <property>
- <name>dfs.namenode.read-lock-reporting-threshold-ms</name>
- <value>5000</value>
- <description>When a read lock is held on the namenode for a long time,
- this will be logged as the lock is released. This sets how long the
- lock must be held for logging to occur.
- </description>
- </property>
- <property>
- <name>dfs.namenode.lock.detailed-metrics.enabled</name>
- <value>false</value>
- <description>If true, the namenode will keep track of how long various
- operations hold the Namesystem lock for and emit this as metrics. These
- metrics have names of the form FSN(Read|Write)LockNanosOperationName,
- where OperationName denotes the name of the operation that initiated the
- lock hold (this will be OTHER for certain uncategorized operations) and
- they export the hold time values in nanoseconds.
- </description>
- </property>
- <property>
- <name>dfs.namenode.fslock.fair</name>
- <value>true</value>
- <description>If this is true, the FS Namesystem lock will be used in Fair mode,
- which will help to prevent writer threads from being starved, but can provide
- lower lock throughput. See java.util.concurrent.locks.ReentrantReadWriteLock
- for more information on fair/non-fair locks.
- </description>
- </property>
- <property>
- <name>dfs.namenode.startup.delay.block.deletion.sec</name>
- <value>0</value>
- <description>The delay in seconds at which we will pause the blocks deletion
- after Namenode startup. By default it's disabled.
- In the case a directory has large number of directories and files are
- deleted, suggested delay is one hour to give the administrator enough time
- to notice large number of pending deletion blocks and take corrective
- action.
- </description>
- </property>
- <property>
- <name>dfs.datanode.block.id.layout.upgrade.threads</name>
- <value>12</value>
- <description>The number of threads to use when creating hard links from
- current to previous blocks during upgrade of a DataNode to block ID-based
- block layout (see HDFS-6482 for details on the layout).</description>
- </property>
- <property>
- <name>dfs.namenode.list.encryption.zones.num.responses</name>
- <value>100</value>
- <description>When listing encryption zones, the maximum number of zones
- that will be returned in a batch. Fetching the list incrementally in
- batches improves namenode performance.
- </description>
- </property>
- <property>
- <name>dfs.namenode.list.reencryption.status.num.responses</name>
- <value>100</value>
- <description>When listing re-encryption status, the maximum number of zones
- that will be returned in a batch. Fetching the list incrementally in
- batches improves namenode performance.
- </description>
- </property>
- <property>
- <name>dfs.namenode.list.openfiles.num.responses</name>
- <value>1000</value>
- <description>
- When listing open files, the maximum number of open files that will be
- returned in a single batch. Fetching the list incrementally in batches
- improves namenode performance.
- </description>
- </property>
- <property>
- <name>dfs.namenode.edekcacheloader.interval.ms</name>
- <value>1000</value>
- <description>When KeyProvider is configured, the interval time of warming
- up edek cache on NN starts up / becomes active. All edeks will be loaded
- from KMS into provider cache. The edek cache loader will try to warm up the
- cache until succeed or NN leaves active state.
- </description>
- </property>
- <property>
- <name>dfs.namenode.edekcacheloader.initial.delay.ms</name>
- <value>3000</value>
- <description>When KeyProvider is configured, the time delayed until the first
- attempt to warm up edek cache on NN start up / become active.
- </description>
- </property>
- <property>
- <name>dfs.namenode.reencrypt.sleep.interval</name>
- <value>1m</value>
- <description>Interval the re-encrypt EDEK thread sleeps in the main loop. The
- interval accepts units. If none given, millisecond is assumed.
- </description>
- </property>
- <property>
- <name>dfs.namenode.reencrypt.batch.size</name>
- <value>1000</value>
- <description>How many EDEKs should the re-encrypt thread process in one batch.
- </description>
- </property>
- <property>
- <name>dfs.namenode.reencrypt.throttle.limit.handler.ratio</name>
- <value>1.0</value>
- <description>Throttling ratio for the re-encryption, indicating what fraction
- of time should the re-encrypt handler thread work under NN read lock.
- Larger than 1.0 values are interpreted as 1.0. Negative value or 0 are
- invalid values and will fail NN startup.
- </description>
- </property>
- <property>
- <name>dfs.namenode.reencrypt.throttle.limit.updater.ratio</name>
- <value>1.0</value>
- <description>Throttling ratio for the re-encryption, indicating what fraction
- of time should the re-encrypt updater thread work under NN write lock.
- Larger than 1.0 values are interpreted as 1.0. Negative value or 0 are
- invalid values and will fail NN startup.
- </description>
- </property>
- <property>
- <name>dfs.namenode.reencrypt.edek.threads</name>
- <value>10</value>
- <description>Maximum number of re-encrypt threads to contact the KMS
- and re-encrypt the edeks.
- </description>
- </property>
- <property>
- <name>dfs.namenode.inotify.max.events.per.rpc</name>
- <value>1000</value>
- <description>Maximum number of events that will be sent to an inotify client
- in a single RPC response. The default value attempts to amortize away
- the overhead for this RPC while avoiding huge memory requirements for the
- client and NameNode (1000 events should consume no more than 1 MB.)
- </description>
- </property>
- <property>
- <name>dfs.user.home.dir.prefix</name>
- <value>/user</value>
- <description>The directory to prepend to user name to get the user's
- home direcotry.
- </description>
- </property>
- <property>
- <name>dfs.datanode.cache.revocation.timeout.ms</name>
- <value>900000</value>
- <description>When the DFSClient reads from a block file which the DataNode is
- caching, the DFSClient can skip verifying checksums. The DataNode will
- keep the block file in cache until the client is done. If the client takes
- an unusually long time, though, the DataNode may need to evict the block
- file from the cache anyway. This value controls how long the DataNode will
- wait for the client to release a replica that it is reading without
- checksums.
- </description>
- </property>
- <property>
- <name>dfs.datanode.cache.revocation.polling.ms</name>
- <value>500</value>
- <description>How often the DataNode should poll to see if the clients have
- stopped using a replica that the DataNode wants to uncache.
- </description>
- </property>
- <property>
- <name>dfs.storage.policy.enabled</name>
- <value>true</value>
- <description>
- Allow users to change the storage policy on files and directories.
- </description>
- </property>
- <property>
- <name>dfs.namenode.legacy-oiv-image.dir</name>
- <value></value>
- <description>Determines where to save the namespace in the old fsimage format
- during checkpointing by standby NameNode or SecondaryNameNode. Users can
- dump the contents of the old format fsimage by oiv_legacy command. If
- the value is not specified, old format fsimage will not be saved in
- checkpoint.
- </description>
- </property>
- <property>
- <name>dfs.namenode.top.enabled</name>
- <value>true</value>
- <description>Enable nntop: reporting top users on namenode
- </description>
- </property>
- <property>
- <name>dfs.namenode.top.window.num.buckets</name>
- <value>10</value>
- <description>Number of buckets in the rolling window implementation of nntop
- </description>
- </property>
- <property>
- <name>dfs.namenode.top.num.users</name>
- <value>10</value>
- <description>Number of top users returned by the top tool
- </description>
- </property>
- <property>
- <name>dfs.namenode.top.windows.minutes</name>
- <value>1,5,25</value>
- <description>comma separated list of nntop reporting periods in minutes
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.ugi.expire.after.access</name>
- <value>600000</value>
- <description>How long in milliseconds after the last access
- the cached UGI will expire. With 0, never expire.
- </description>
- </property>
- <property>
- <name>dfs.namenode.blocks.per.postponedblocks.rescan</name>
- <value>10000</value>
- <description>Number of blocks to rescan for each iteration of
- postponedMisreplicatedBlocks.
- </description>
- </property>
- <property>
- <name>dfs.datanode.block-pinning.enabled</name>
- <value>false</value>
- <description>Whether pin blocks on favored DataNode.</description>
- </property>
- <property>
- <name>dfs.client.block.write.locateFollowingBlock.initial.delay.ms</name>
- <value>400</value>
- <description>The initial delay (unit is ms) for locateFollowingBlock,
- the delay time will increase exponentially(double) for each retry.
- </description>
- </property>
- <property>
- <name>dfs.ha.zkfc.nn.http.timeout.ms</name>
- <value>20000</value>
- <description>
- The HTTP connection and read timeout value (unit is ms ) when DFS ZKFC
- tries to get local NN thread dump after local NN becomes
- SERVICE_NOT_RESPONDING or SERVICE_UNHEALTHY.
- If it is set to zero, DFS ZKFC won't get local NN thread dump.
- </description>
- </property>
- <property>
- <name>dfs.ha.tail-edits.in-progress</name>
- <value>false</value>
- <description>
- Whether enable standby namenode to tail in-progress edit logs.
- Clients might want to turn it on when they want Standby NN to have
- more up-to-date data.
- </description>
- </property>
- <property>
- <name>dfs.namenode.ec.system.default.policy</name>
- <value>RS-6-3-1024k</value>
- <description>The default erasure coding policy name will be used
- on the path if no policy name is passed.
- </description>
- </property>
- <property>
- <name>dfs.namenode.ec.policies.max.cellsize</name>
- <value>4194304</value>
- <description>The maximum cell size of erasure coding policy. Default is 4MB.
- </description>
- </property>
- <property>
- <name>dfs.datanode.ec.reconstruction.stripedread.timeout.millis</name>
- <value>5000</value>
- <description>Datanode striped read timeout in milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.datanode.ec.reconstruction.stripedread.buffer.size</name>
- <value>65536</value>
- <description>Datanode striped read buffer size.
- </description>
- </property>
- <property>
- <name>dfs.datanode.ec.reconstruction.threads</name>
- <value>8</value>
- <description>
- Number of threads used by the Datanode for background
- reconstruction work.
- </description>
- </property>
- <property>
- <name>dfs.namenode.quota.init-threads</name>
- <value>4</value>
- <description>
- The number of concurrent threads to be used in quota initialization. The
- speed of quota initialization also affects the namenode fail-over latency.
- If the size of name space is big, try increasing this.
- </description>
- </property>
- <property>
- <name>dfs.datanode.transfer.socket.send.buffer.size</name>
- <value>0</value>
- <description>
- Socket send buffer size for DataXceiver (mirroring packets to downstream
- in pipeline). This may affect TCP connection throughput.
- If it is set to zero or negative value, no buffer size will be set
- explicitly, thus enable tcp auto-tuning on some system.
- The default value is 0.
- </description>
- </property>
- <property>
- <name>dfs.datanode.transfer.socket.recv.buffer.size</name>
- <value>0</value>
- <description>
- Socket receive buffer size for DataXceiver (receiving packets from client
- during block writing). This may affect TCP connection throughput.
- If it is set to zero or negative value, no buffer size will be set
- explicitly, thus enable tcp auto-tuning on some system.
- The default value is 0.
- </description>
- </property>
- <property>
- <name>dfs.namenode.upgrade.domain.factor</name>
- <value>${dfs.replication}</value>
- <description>
- This is valid only when block placement policy is set to
- BlockPlacementPolicyWithUpgradeDomain. It defines the number of
- unique upgrade domains any block's replicas should have.
- When the number of replicas is less or equal to this value, the policy
- ensures each replica has an unique upgrade domain. When the number of
- replicas is greater than this value, the policy ensures the number of
- unique domains is at least this value.
- </description>
- </property>
- <property>
- <name>dfs.ha.zkfc.port</name>
- <value>8019</value>
- <description>
- RPC port for Zookeeper Failover Controller.
- </description>
- </property>
- <property>
- <name>dfs.datanode.bp-ready.timeout</name>
- <value>20s</value>
- <description>
- The maximum wait time for datanode to be ready before failing the
- received request. Setting this to 0 fails requests right away if the
- datanode is not yet registered with the namenode. This wait time
- reduces initial request failures after datanode restart.
- Support multiple time unit suffix(case insensitive), as described
- in dfs.heartbeat.interval.
- </description>
- </property>
- <property>
- <name>dfs.datanode.cached-dfsused.check.interval.ms</name>
- <value>600000</value>
- <description>
- The interval check time of loading DU_CACHE_FILE in each volume.
- When the cluster doing the rolling upgrade operations, it will
- usually lead dfsUsed cache file of each volume expired and redo the
- du operations in datanode and that makes datanode start slowly. Adjust
- this property can make cache file be available for the time as you want.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.rest-csrf.enabled</name>
- <value>false</value>
- <description>
- If true, then enables WebHDFS protection against cross-site request forgery
- (CSRF). The WebHDFS client also uses this property to determine whether or
- not it needs to send the custom CSRF prevention header in its HTTP requests.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.rest-csrf.custom-header</name>
- <value>X-XSRF-HEADER</value>
- <description>
- The name of a custom header that HTTP requests must send when protection
- against cross-site request forgery (CSRF) is enabled for WebHDFS by setting
- dfs.webhdfs.rest-csrf.enabled to true. The WebHDFS client also uses this
- property to determine whether or not it needs to send the custom CSRF
- prevention header in its HTTP requests.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.rest-csrf.methods-to-ignore</name>
- <value>GET,OPTIONS,HEAD,TRACE</value>
- <description>
- A comma-separated list of HTTP methods that do not require HTTP requests to
- include a custom header when protection against cross-site request forgery
- (CSRF) is enabled for WebHDFS by setting dfs.webhdfs.rest-csrf.enabled to
- true. The WebHDFS client also uses this property to determine whether or
- not it needs to send the custom CSRF prevention header in its HTTP requests.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.rest-csrf.browser-useragents-regex</name>
- <value>^Mozilla.*,^Opera.*</value>
- <description>
- A comma-separated list of regular expressions used to match against an HTTP
- request's User-Agent header when protection against cross-site request
- forgery (CSRF) is enabled for WebHDFS by setting
- dfs.webhdfs.reset-csrf.enabled to true. If the incoming User-Agent matches
- any of these regular expressions, then the request is considered to be sent
- by a browser, and therefore CSRF prevention is enforced. If the request's
- User-Agent does not match any of these regular expressions, then the request
- is considered to be sent by something other than a browser, such as scripted
- automation. In this case, CSRF is not a potential attack vector, so
- the prevention is not enforced. This helps achieve backwards-compatibility
- with existing automation that has not been updated to send the CSRF
- prevention header.
- </description>
- </property>
- <property>
- <name>dfs.xframe.enabled</name>
- <value>true</value>
- <description>
- If true, then enables protection against clickjacking by returning
- X_FRAME_OPTIONS header value set to SAMEORIGIN.
- Clickjacking protection prevents an attacker from using transparent or
- opaque layers to trick a user into clicking on a button
- or link on another page.
- </description>
- </property>
- <property>
- <name>dfs.xframe.value</name>
- <value>SAMEORIGIN</value>
- <description>
- This configration value allows user to specify the value for the
- X-FRAME-OPTIONS. The possible values for this field are
- DENY, SAMEORIGIN and ALLOW-FROM. Any other value will throw an
- exception when namenode and datanodes are starting up.
- </description>
- </property>
- <property>
- <name>dfs.balancer.keytab.enabled</name>
- <value>false</value>
- <description>
- Set to true to enable login using a keytab for Kerberized Hadoop.
- </description>
- </property>
- <property>
- <name>dfs.balancer.address</name>
- <value>0.0.0.0:0</value>
- <description>
- The hostname used for a keytab based Kerberos login. Keytab based login
- can be enabled with dfs.balancer.keytab.enabled.
- </description>
- </property>
- <property>
- <name>dfs.balancer.keytab.file</name>
- <value></value>
- <description>
- The keytab file used by the Balancer to login as its
- service principal. The principal name is configured with
- dfs.balancer.kerberos.principal. Keytab based login can be
- enabled with dfs.balancer.keytab.enabled.
- </description>
- </property>
- <property>
- <name>dfs.balancer.kerberos.principal</name>
- <value></value>
- <description>
- The Balancer principal. This is typically set to
- balancer/_HOST@REALM.TLD. The Balancer will substitute _HOST with its
- own fully qualified hostname at startup. The _HOST placeholder
- allows using the same configuration setting on different servers.
- Keytab based login can be enabled with dfs.balancer.keytab.enabled.
- </description>
- </property>
- <property>
- <name>dfs.http.client.retry.policy.enabled</name>
- <value>false</value>
- <description>
- If "true", enable the retry policy of WebHDFS client.
- If "false", retry policy is turned off.
- Enabling the retry policy can be quite useful while using WebHDFS to
- copy large files between clusters that could timeout, or
- copy files between HA clusters that could failover during the copy.
- </description>
- </property>
- <property>
- <name>dfs.http.client.retry.policy.spec</name>
- <value>10000,6,60000,10</value>
- <description>
- Specify a policy of multiple linear random retry for WebHDFS client,
- e.g. given pairs of number of retries and sleep time (n0, t0), (n1, t1),
- ..., the first n0 retries sleep t0 milliseconds on average,
- the following n1 retries sleep t1 milliseconds on average, and so on.
- </description>
- </property>
- <property>
- <name>dfs.http.client.failover.max.attempts</name>
- <value>15</value>
- <description>
- Specify the max number of failover attempts for WebHDFS client
- in case of network exception.
- </description>
- </property>
- <property>
- <name>dfs.http.client.retry.max.attempts</name>
- <value>10</value>
- <description>
- Specify the max number of retry attempts for WebHDFS client,
- if the difference between retried attempts and failovered attempts is
- larger than the max number of retry attempts, there will be no more
- retries.
- </description>
- </property>
- <property>
- <name>dfs.http.client.failover.sleep.base.millis</name>
- <value>500</value>
- <description>
- Specify the base amount of time in milliseconds upon which the
- exponentially increased sleep time between retries or failovers
- is calculated for WebHDFS client.
- </description>
- </property>
- <property>
- <name>dfs.http.client.failover.sleep.max.millis</name>
- <value>15000</value>
- <description>
- Specify the upper bound of sleep time in milliseconds between
- retries or failovers for WebHDFS client.
- </description>
- </property>
- <property>
- <name>dfs.namenode.hosts.provider.classname</name>
- <value>org.apache.hadoop.hdfs.server.blockmanagement.HostFileManager</value>
- <description>
- The class that provides access for host files.
- org.apache.hadoop.hdfs.server.blockmanagement.HostFileManager is used
- by default which loads files specified by dfs.hosts and dfs.hosts.exclude.
- If org.apache.hadoop.hdfs.server.blockmanagement.CombinedHostFileManager is
- used, it will load the JSON file defined in dfs.hosts.
- To change class name, nn restart is required. "dfsadmin -refreshNodes" only
- refreshes the configuration files used by the class.
- </description>
- </property>
- <property>
- <name>datanode.https.port</name>
- <value>50475</value>
- <description>
- HTTPS port for DataNode.
- </description>
- </property>
- <property>
- <name>dfs.balancer.dispatcherThreads</name>
- <value>200</value>
- <description>
- Size of the thread pool for the HDFS balancer block mover.
- dispatchExecutor
- </description>
- </property>
- <property>
- <name>dfs.balancer.movedWinWidth</name>
- <value>5400000</value>
- <description>
- Window of time in ms for the HDFS balancer tracking blocks and its
- locations.
- </description>
- </property>
- <property>
- <name>dfs.balancer.moverThreads</name>
- <value>1000</value>
- <description>
- Thread pool size for executing block moves.
- moverThreadAllocator
- </description>
- </property>
- <property>
- <name>dfs.balancer.max-size-to-move</name>
- <value>10737418240</value>
- <description>
- Maximum number of bytes that can be moved by the balancer in a single
- thread.
- </description>
- </property>
- <property>
- <name>dfs.balancer.getBlocks.min-block-size</name>
- <value>10485760</value>
- <description>
- Minimum block threshold size in bytes to ignore when fetching a source's
- block list.
- </description>
- </property>
- <property>
- <name>dfs.balancer.getBlocks.size</name>
- <value>2147483648</value>
- <description>
- Total size in bytes of Datanode blocks to get when fetching a source's
- block list.
- </description>
- </property>
- <property>
- <name>dfs.balancer.block-move.timeout</name>
- <value>0</value>
- <description>
- Maximum amount of time in milliseconds for a block to move. If this is set
- greater than 0, Balancer will stop waiting for a block move completion
- after this time. In typical clusters, a 3 to 5 minute timeout is reasonable.
- If timeout happens to a large proportion of block moves, this needs to be
- increased. It could also be that too much work is dispatched and many nodes
- are constantly exceeding the bandwidth limit as a result. In that case,
- other balancer parameters might need to be adjusted.
- It is disabled (0) by default.
- </description>
- </property>
- <property>
- <name>dfs.balancer.max-no-move-interval</name>
- <value>60000</value>
- <description>
- If this specified amount of time has elapsed and no block has been moved
- out of a source DataNode, on more effort will be made to move blocks out of
- this DataNode in the current Balancer iteration.
- </description>
- </property>
- <property>
- <name>dfs.block.invalidate.limit</name>
- <value>1000</value>
- <description>
- The maximum number of invalidate blocks sent by namenode to a datanode
- per heartbeat deletion command. This property works with
- "dfs.namenode.invalidate.work.pct.per.iteration" to throttle block
- deletions.
- </description>
- </property>
- <property>
- <name>dfs.block.misreplication.processing.limit</name>
- <value>10000</value>
- <description>
- Maximum number of blocks to process for initializing replication queues.
- </description>
- </property>
- <property>
- <name>dfs.block.placement.ec.classname</name>
- <value>org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRackFaultTolerant</value>
- <description>
- Placement policy class for striped files.
- Defaults to BlockPlacementPolicyRackFaultTolerant.class
- </description>
- </property>
- <property>
- <name>dfs.block.replicator.classname</name>
- <value>org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault</value>
- <description>
- Class representing block placement policy for non-striped files.
- There are four block placement policies currently being supported:
- BlockPlacementPolicyDefault, BlockPlacementPolicyWithNodeGroup,
- BlockPlacementPolicyRackFaultTolerant and BlockPlacementPolicyWithUpgradeDomain.
- BlockPlacementPolicyDefault chooses the desired number of targets
- for placing block replicas in a default way. BlockPlacementPolicyWithNodeGroup
- places block replicas on environment with node-group layer. BlockPlacementPolicyRackFaultTolerant
- places the replicas to more racks.
- BlockPlacementPolicyWithUpgradeDomain places block replicas that honors upgrade domain policy.
- The details of placing replicas are documented in the javadoc of the corresponding policy classes.
- The default policy is BlockPlacementPolicyDefault, and the corresponding class is
- org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.
- </description>
- </property>
- <property>
- <name>dfs.blockreport.incremental.intervalMsec</name>
- <value>0</value>
- <description>
- If set to a positive integer, the value in ms to wait between sending
- incremental block reports from the Datanode to the Namenode.
- </description>
- </property>
- <property>
- <name>dfs.checksum.type</name>
- <value>CRC32C</value>
- <description>
- Checksum type
- </description>
- </property>
- <property>
- <name>dfs.client.block.write.locateFollowingBlock.retries</name>
- <value>5</value>
- <description>
- Number of retries to use when finding the next block during HDFS writes.
- </description>
- </property>
- <property>
- <name>dfs.client.failover.proxy.provider</name>
- <value></value>
- <description>
- The prefix (plus a required nameservice ID) for the class name of the
- configured Failover proxy provider for the host. For more detailed
- information, please consult the "Configuration Details" section of
- the HDFS High Availability documentation.
- </description>
- </property>
- <property>
- <name>dfs.client.key.provider.cache.expiry</name>
- <value>864000000</value>
- <description>
- DFS client security key cache expiration in milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.client.max.block.acquire.failures</name>
- <value>3</value>
- <description>
- Maximum failures allowed when trying to get block information from a specific datanode.
- </description>
- </property>
- <property>
- <name>dfs.client.read.prefetch.size</name>
- <value></value>
- <description>
- The number of bytes for the DFSClient will fetch from the Namenode
- during a read operation. Defaults to 10 * ${dfs.blocksize}.
- </description>
- </property>
- <property>
- <name>dfs.client.read.short.circuit.replica.stale.threshold.ms</name>
- <value>1800000</value>
- <description>
- Threshold in milliseconds for read entries during short-circuit local reads.
- </description>
- </property>
- <property>
- <name>dfs.client.read.shortcircuit.buffer.size</name>
- <value>1048576</value>
- <description>
- Buffer size in bytes for short-circuit local reads.
- </description>
- </property>
- <property>
- <name>dfs.client.read.striped.threadpool.size</name>
- <value>18</value>
- <description>
- The maximum number of threads used for parallel reading
- in striped layout.
- </description>
- </property>
- <property>
- <name>dfs.client.replica.accessor.builder.classes</name>
- <value></value>
- <description>
- Comma-separated classes for building ReplicaAccessor. If the classes
- are specified, client will use external BlockReader that uses the
- ReplicaAccessor built by the builder.
- </description>
- </property>
- <property>
- <name>dfs.client.retry.interval-ms.get-last-block-length</name>
- <value>4000</value>
- <description>
- Retry interval in milliseconds to wait between retries in getting
- block lengths from the datanodes.
- </description>
- </property>
- <property>
- <name>dfs.client.retry.max.attempts</name>
- <value>10</value>
- <description>
- Max retry attempts for DFSClient talking to namenodes.
- </description>
- </property>
- <property>
- <name>dfs.client.retry.policy.enabled</name>
- <value>false</value>
- <description>
- If true, turns on DFSClient retry policy.
- </description>
- </property>
- <property>
- <name>dfs.client.retry.policy.spec</name>
- <value>10000,6,60000,10</value>
- <description>
- Set to pairs of timeouts and retries for DFSClient.
- </description>
- </property>
- <property>
- <name>dfs.client.retry.times.get-last-block-length</name>
- <value>3</value>
- <description>
- Number of retries for calls to fetchLocatedBlocksAndGetLastBlockLength().
- </description>
- </property>
- <property>
- <name>dfs.client.retry.window.base</name>
- <value>3000</value>
- <description>
- Base time window in ms for DFSClient retries. For each retry attempt,
- this value is extended linearly (e.g. 3000 ms for first attempt and
- first retry, 6000 ms for second retry, 9000 ms for third retry, etc.).
- </description>
- </property>
- <property>
- <name>dfs.client.socket-timeout</name>
- <value>60000</value>
- <description>
- Default timeout value in milliseconds for all sockets.
- </description>
- </property>
- <property>
- <name>dfs.client.socketcache.capacity</name>
- <value>16</value>
- <description>
- Socket cache capacity (in entries) for short-circuit reads.
- </description>
- </property>
- <property>
- <name>dfs.client.socketcache.expiryMsec</name>
- <value>3000</value>
- <description>
- Socket cache expiration for short-circuit reads in msec.
- </description>
- </property>
- <property>
- <name>dfs.client.test.drop.namenode.response.number</name>
- <value>0</value>
- <description>
- The number of Namenode responses dropped by DFSClient for each RPC call. Used
- for testing the NN retry cache.
- </description>
- </property>
- <property>
- <name>dfs.client.hedged.read.threadpool.size</name>
- <value>0</value>
- <description>
- Support 'hedged' reads in DFSClient. To enable this feature, set the parameter
- to a positive number. The threadpool size is how many threads to dedicate
- to the running of these 'hedged', concurrent reads in your client.
- </description>
- </property>
- <property>
- <name>dfs.client.hedged.read.threshold.millis</name>
- <value>500</value>
- <description>
- Configure 'hedged' reads in DFSClient. This is the number of milliseconds
- to wait before starting up a 'hedged' read.
- </description>
- </property>
- <property>
- <name>dfs.client.write.byte-array-manager.count-limit</name>
- <value>2048</value>
- <description>
- The maximum number of arrays allowed for each array length.
- </description>
- </property>
- <property>
- <name>dfs.client.write.byte-array-manager.count-reset-time-period-ms</name>
- <value>10000</value>
- <description>
- The time period in milliseconds that the allocation count for each array length is
- reset to zero if there is no increment.
- </description>
- </property>
- <property>
- <name>dfs.client.write.byte-array-manager.count-threshold</name>
- <value>128</value>
- <description>
- The count threshold for each array length so that a manager is created only after the
- allocation count exceeds the threshold. In other words, the particular array length
- is not managed until the allocation count exceeds the threshold.
- </description>
- </property>
- <property>
- <name>dfs.client.write.byte-array-manager.enabled</name>
- <value>false</value>
- <description>
- If true, enables byte array manager used by DFSOutputStream.
- </description>
- </property>
- <property>
- <name>dfs.client.write.max-packets-in-flight</name>
- <value>80</value>
- <description>
- The maximum number of DFSPackets allowed in flight.
- </description>
- </property>
- <property>
- <name>dfs.content-summary.limit</name>
- <value>5000</value>
- <description>
- The maximum content summary counts allowed in one locking period. 0 or a negative number
- means no limit (i.e. no yielding).
- </description>
- </property>
- <property>
- <name>dfs.content-summary.sleep-microsec</name>
- <value>500</value>
- <description>
- The length of time in microseconds to put the thread to sleep, between reaquiring the locks
- in content summary computation.
- </description>
- </property>
- <property>
- <name>dfs.data.transfer.client.tcpnodelay</name>
- <value>true</value>
- <description>
- If true, set TCP_NODELAY to sockets for transferring data from DFS client.
- </description>
- </property>
- <property>
- <name>dfs.data.transfer.server.tcpnodelay</name>
- <value>true</value>
- <description>
- If true, set TCP_NODELAY to sockets for transferring data between Datanodes.
- </description>
- </property>
- <property>
- <name>dfs.datanode.balance.max.concurrent.moves</name>
- <value>50</value>
- <description>
- Maximum number of threads for Datanode balancer pending moves. This
- value is reconfigurable via the "dfsadmin -reconfig" command.
- </description>
- </property>
- <property>
- <name>dfs.datanode.fsdataset.factory</name>
- <value></value>
- <description>
- The class name for the underlying storage that stores replicas for a
- Datanode. Defaults to
- org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetFactory.
- </description>
- </property>
- <property>
- <name>dfs.datanode.fsdataset.volume.choosing.policy</name>
- <value></value>
- <description>
- The class name of the policy for choosing volumes in the list of
- directories. Defaults to
- org.apache.hadoop.hdfs.server.datanode.fsdataset.RoundRobinVolumeChoosingPolicy.
- If you would like to take into account available disk space, set the
- value to
- "org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy".
- </description>
- </property>
- <property>
- <name>dfs.datanode.hostname</name>
- <value></value>
- <description>
- Optional. The hostname for the Datanode containing this
- configuration file. Will be different for each machine.
- Defaults to current hostname.
- </description>
- </property>
- <property>
- <name>dfs.datanode.lazywriter.interval.sec</name>
- <value>60</value>
- <description>
- Interval in seconds for Datanodes for lazy persist writes.
- </description>
- </property>
- <property>
- <name>dfs.datanode.network.counts.cache.max.size</name>
- <value>2147483647</value>
- <description>
- The maximum number of entries the datanode per-host network error
- count cache may contain.
- </description>
- </property>
- <property>
- <name>dfs.datanode.oob.timeout-ms</name>
- <value>1500,0,0,0</value>
- <description>
- Timeout value when sending OOB response for each OOB type, which are
- OOB_RESTART, OOB_RESERVED1, OOB_RESERVED2, and OOB_RESERVED3,
- respectively. Currently, only OOB_RESTART is used.
- </description>
- </property>
- <property>
- <name>dfs.datanode.parallel.volumes.load.threads.num</name>
- <value></value>
- <description>
- Maximum number of threads to use for upgrading data directories.
- The default value is the number of storage directories in the
- DataNode.
- </description>
- </property>
- <property>
- <name>dfs.datanode.ram.disk.replica.tracker</name>
- <value></value>
- <description>
- Name of the class implementing the RamDiskReplicaTracker interface.
- Defaults to
- org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.RamDiskReplicaLruTracker.
- </description>
- </property>
- <property>
- <name>dfs.datanode.restart.replica.expiration</name>
- <value>50</value>
- <description>
- During shutdown for restart, the amount of time in seconds budgeted for
- datanode restart.
- </description>
- </property>
- <property>
- <name>dfs.datanode.socket.reuse.keepalive</name>
- <value>4000</value>
- <description>
- The window of time in ms before the DataXceiver closes a socket for a
- single request. If a second request occurs within that window, the
- socket can be reused.
- </description>
- </property>
- <property>
- <name>dfs.datanode.socket.write.timeout</name>
- <value>480000</value>
- <description>
- Timeout in ms for clients socket writes to DataNodes.
- </description>
- </property>
- <property>
- <name>dfs.datanode.sync.behind.writes.in.background</name>
- <value>false</value>
- <description>
- If set to true, then sync_file_range() system call will occur
- asynchronously. This property is only valid when the property
- dfs.datanode.sync.behind.writes is true.
- </description>
- </property>
- <property>
- <name>dfs.datanode.transferTo.allowed</name>
- <value>true</value>
- <description>
- If false, break block transfers on 32-bit machines greater than
- or equal to 2GB into smaller chunks.
- </description>
- </property>
- <property>
- <name>dfs.ha.fencing.methods</name>
- <value></value>
- <description>
- A list of scripts or Java classes which will be used to fence
- the Active NameNode during a failover. See the HDFS High
- Availability documentation for details on automatic HA
- configuration.
- </description>
- </property>
- <property>
- <name>dfs.ha.standby.checkpoints</name>
- <value>true</value>
- <description>
- If true, a NameNode in Standby state periodically takes a checkpoint
- of the namespace, saves it to its local storage and then upload to
- the remote NameNode.
- </description>
- </property>
- <property>
- <name>dfs.ha.zkfc.port</name>
- <value>8019</value>
- <description>
- The port number that the zookeeper failover controller RPC
- server binds to.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.edits.dir</name>
- <value>/tmp/hadoop/dfs/journalnode/</value>
- <description>
- The directory where the journal edit files are stored.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.enable.sync</name>
- <value>true</value>
- <description>
- If true, the journal nodes wil sync with each other. The journal nodes
- will periodically gossip with other journal nodes to compare edit log
- manifests and if they detect any missing log segment, they will download
- it from the other journal nodes.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.sync.interval</name>
- <value>120000</value>
- <description>
- Time interval, in milliseconds, between two Journal Node syncs.
- This configuration takes effect only if the journalnode sync is enabled
- by setting the configuration parameter dfs.journalnode.enable.sync to true.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.kerberos.internal.spnego.principal</name>
- <value></value>
- <description>
- Kerberos SPNEGO principal name used by the journal node.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.kerberos.principal</name>
- <value></value>
- <description>
- Kerberos principal name for the journal node.
- </description>
- </property>
- <property>
- <name>dfs.journalnode.keytab.file</name>
- <value></value>
- <description>
- Kerberos keytab file for the journal node.
- </description>
- </property>
- <property>
- <name>dfs.ls.limit</name>
- <value>1000</value>
- <description>
- Limit the number of files printed by ls. If less or equal to
- zero, at most DFS_LIST_LIMIT_DEFAULT (= 1000) will be printed.
- </description>
- </property>
- <property>
- <name>dfs.mover.movedWinWidth</name>
- <value>5400000</value>
- <description>
- The minimum time interval, in milliseconds, that a block can be
- moved to another location again.
- </description>
- </property>
- <property>
- <name>dfs.mover.moverThreads</name>
- <value>1000</value>
- <description>
- Configure the balancer's mover thread pool size.
- </description>
- </property>
- <property>
- <name>dfs.mover.retry.max.attempts</name>
- <value>10</value>
- <description>
- The maximum number of retries before the mover consider the
- move failed.
- </description>
- </property>
- <property>
- <name>dfs.mover.keytab.enabled</name>
- <value>false</value>
- <description>
- Set to true to enable login using a keytab for Kerberized Hadoop.
- </description>
- </property>
- <property>
- <name>dfs.mover.address</name>
- <value>0.0.0.0:0</value>
- <description>
- The hostname used for a keytab based Kerberos login. Keytab based login
- can be enabled with dfs.mover.keytab.enabled.
- </description>
- </property>
- <property>
- <name>dfs.mover.keytab.file</name>
- <value></value>
- <description>
- The keytab file used by the Mover to login as its
- service principal. The principal name is configured with
- dfs.mover.kerberos.principal. Keytab based login can be
- enabled with dfs.mover.keytab.enabled.
- </description>
- </property>
- <property>
- <name>dfs.mover.kerberos.principal</name>
- <value></value>
- <description>
- The Mover principal. This is typically set to
- mover/_HOST@REALM.TLD. The Mover will substitute _HOST with its
- own fully qualified hostname at startup. The _HOST placeholder
- allows using the same configuration setting on different servers.
- Keytab based login can be enabled with dfs.mover.keytab.enabled.
- </description>
- </property>
- <property>
- <name>dfs.mover.max-no-move-interval</name>
- <value>60000</value>
- <description>
- If this specified amount of time has elapsed and no block has been moved
- out of a source DataNode, on more effort will be made to move blocks out of
- this DataNode in the current Mover iteration.
- </description>
- </property>
- <property>
- <name>dfs.namenode.audit.log.async</name>
- <value>false</value>
- <description>
- If true, enables asynchronous audit log.
- </description>
- </property>
- <property>
- <name>dfs.namenode.audit.log.token.tracking.id</name>
- <value>false</value>
- <description>
- If true, adds a tracking ID for all audit log events.
- </description>
- </property>
- <property>
- <name>dfs.namenode.available-space-block-placement-policy.balanced-space-preference-fraction</name>
- <value>0.6</value>
- <description>
- Only used when the dfs.block.replicator.classname is set to
- org.apache.hadoop.hdfs.server.blockmanagement.AvailableSpaceBlockPlacementPolicy.
- Special value between 0 and 1, noninclusive. Increases chance of
- placing blocks on Datanodes with less disk space used.
- </description>
- </property>
- <property>
- <name>dfs.namenode.backup.dnrpc-address</name>
- <value></value>
- <description>
- Service RPC address for the backup Namenode.
- </description>
- </property>
- <property>
- <name>dfs.namenode.delegation.token.always-use</name>
- <value>false</value>
- <description>
- For testing. Setting to true always allows the DT secret manager
- to be used, even if security is disabled.
- </description>
- </property>
- <property>
- <name>dfs.namenode.edits.asynclogging</name>
- <value>false</value>
- <description>
- If set to true, enables asynchronous edit logs in the Namenode. If set
- to false, the Namenode uses the traditional synchronous edit logs.
- </description>
- </property>
- <property>
- <name>dfs.namenode.edits.dir.minimum</name>
- <value>1</value>
- <description>
- dfs.namenode.edits.dir includes both required directories
- (specified by dfs.namenode.edits.dir.required) and optional directories.
- The number of usable optional directories must be greater than or equal
- to this property. If the number of usable optional directories falls
- below dfs.namenode.edits.dir.minimum, HDFS will issue an error.
- This property defaults to 1.
- </description>
- </property>
- <property>
- <name>dfs.namenode.edits.journal-plugin</name>
- <value></value>
- <description>
- When FSEditLog is creating JournalManagers from dfs.namenode.edits.dir,
- and it encounters a URI with a schema different to "file" it loads the
- name of the implementing class from
- "dfs.namenode.edits.journal-plugin.[schema]". This class must implement
- JournalManager and have a constructor which takes (Configuration, URI).
- </description>
- </property>
- <property>
- <name>dfs.namenode.file.close.num-committed-allowed</name>
- <value>0</value>
- <description>
- Normally a file can only be closed with all its blocks are committed.
- When this value is set to a positive integer N, a file can be closed
- when N blocks are committed and the rest complete.
- </description>
- </property>
- <property>
- <name>dfs.namenode.inode.attributes.provider.class</name>
- <value></value>
- <description>
- Name of class to use for delegating HDFS authorization.
- </description>
- </property>
- <property>
- <name>dfs.namenode.inode.attributes.provider.bypass.users</name>
- <value></value>
- <description>
- A list of user principals (in secure cluster) or user names (in insecure
- cluster) for whom the external attributes provider will be bypassed for all
- operations. This means file attributes stored in HDFS instead of the
- external provider will be used for permission checking and be returned when
- requested.
- </description>
- </property>
- <property>
- <name>dfs.namenode.max-num-blocks-to-log</name>
- <value>1000</value>
- <description>
- Puts a limit on the number of blocks printed to the log by the Namenode
- after a block report.
- </description>
- </property>
- <property>
- <name>dfs.namenode.max.op.size</name>
- <value>52428800</value>
- <description>
- Maximum opcode size in bytes.
- </description>
- </property>
- <property>
- <name>dfs.namenode.missing.checkpoint.periods.before.shutdown</name>
- <value>3</value>
- <description>
- The number of checkpoint period windows (as defined by the property
- dfs.namenode.checkpoint.period) allowed by the Namenode to perform
- saving the namespace before shutdown.
- </description>
- </property>
- <property>
- <name>dfs.namenode.name.cache.threshold</name>
- <value>10</value>
- <description>
- Frequently accessed files that are accessed more times than this
- threshold are cached in the FSDirectory nameCache.
- </description>
- </property>
- <property>
- <name>dfs.namenode.replication.max-streams</name>
- <value>2</value>
- <description>
- Hard limit for the number of highest-priority replication streams.
- </description>
- </property>
- <property>
- <name>dfs.namenode.replication.max-streams-hard-limit</name>
- <value>4</value>
- <description>
- Hard limit for all replication streams.
- </description>
- </property>
- <property>
- <name>dfs.namenode.reconstruction.pending.timeout-sec</name>
- <value>300</value>
- <description>
- Timeout in seconds for block reconstruction. If this value is 0 or less,
- then it will default to 5 minutes.
- </description>
- </property>
- <property>
- <name>dfs.namenode.stale.datanode.minimum.interval</name>
- <value>3</value>
- <description>
- Minimum number of missed heartbeats intervals for a datanode to
- be marked stale by the Namenode. The actual interval is calculated as
- (dfs.namenode.stale.datanode.minimum.interval * dfs.heartbeat.interval)
- in seconds. If this value is greater than the property
- dfs.namenode.stale.datanode.interval, then the calculated value above
- is used.
- </description>
- </property>
- <property>
- <name>dfs.namenode.storageinfo.defragment.timeout.ms</name>
- <value>4</value>
- <description>
- Timeout value in ms for the StorageInfo compaction run.
- </description>
- </property>
- <property>
- <name>dfs.namenode.storageinfo.defragment.interval.ms</name>
- <value>600000</value>
- <description>
- The thread for checking the StorageInfo for defragmentation will
- run periodically. The time between runs is determined by this
- property.
- </description>
- </property>
- <property>
- <name>dfs.namenode.storageinfo.defragment.ratio</name>
- <value>0.75</value>
- <description>
- The defragmentation threshold for the StorageInfo.
- </description>
- </property>
- <property>
- <name>dfs.namenode.snapshot.capture.openfiles</name>
- <value>false</value>
- <description>
- If true, snapshots taken will have an immutable shared copy of
- the open files that have valid leases. Even after the open files
- grow or shrink in size, snapshot will always have the previous
- point-in-time version of the open files, just like all other
- closed files. Default is false.
- Note: The file length captured for open files in snapshot is
- whats recorded in NameNode at the time of snapshot and it may
- be shorter than what the client has written till then. In order
- to capture the latest length, the client can call hflush/hsync
- with the flag SyncFlag.UPDATE_LENGTH on the open files handles.
- </description>
- </property>
- <property>
- <name>dfs.namenode.snapshot.skip.capture.accesstime-only-change</name>
- <value>false</value>
- <description>
- If accessTime of a file/directory changed but there is no other
- modification made to the file/directory, the changed accesstime will
- not be captured in next snapshot. However, if there is other modification
- made to the file/directory, the latest access time will be captured
- together with the modification in next snapshot.
- </description>
- </property>
- <property>
- <name>dfs.pipeline.ecn</name>
- <value>false</value>
- <description>
- If true, allows ECN (explicit congestion notification) from the
- Datanode.
- </description>
- </property>
- <property>
- <name>dfs.qjournal.accept-recovery.timeout.ms</name>
- <value>120000</value>
- <description>
- Quorum timeout in milliseconds during accept phase of
- recovery/synchronization for a specific segment.
- </description>
- </property>
- <property>
- <name>dfs.qjournal.finalize-segment.timeout.ms</name>
- <value>120000</value>
- <description>
- Quorum timeout in milliseconds during finalizing for a specific
- segment.
- </description>
- </property>
- <property>
- <name>dfs.qjournal.get-journal-state.timeout.ms</name>
- <value>120000</value>
- <description>
- Timeout in milliseconds when calling getJournalState().
- JournalNodes.
- </description>
- </property>
- <property>
- <name>dfs.qjournal.new-epoch.timeout.ms</name>
- <value>120000</value>
- <description>
- Timeout in milliseconds when getting an epoch number for write
- access to JournalNodes.
- </description>
- </property>
- <property>
- <name>dfs.qjournal.prepare-recovery.timeout.ms</name>
- <value>120000</value>
- <description>
- Quorum timeout in milliseconds during preparation phase of
- recovery/synchronization for a specific segment.
- </description>
- </property>
- <property>
- <name>dfs.qjournal.queued-edits.limit.mb</name>
- <value>10</value>
- <description>
- Queue size in MB for quorum journal edits.
- </description>
- </property>
- <property>
- <name>dfs.qjournal.select-input-streams.timeout.ms</name>
- <value>20000</value>
- <description>
- Timeout in milliseconds for accepting streams from JournalManagers.
- </description>
- </property>
- <property>
- <name>dfs.qjournal.start-segment.timeout.ms</name>
- <value>20000</value>
- <description>
- Quorum timeout in milliseconds for starting a log segment.
- </description>
- </property>
- <property>
- <name>dfs.qjournal.write-txns.timeout.ms</name>
- <value>20000</value>
- <description>
- Write timeout in milliseconds when writing to a quorum of remote
- journals.
- </description>
- </property>
- <property>
- <name>dfs.quota.by.storage.type.enabled</name>
- <value>true</value>
- <description>
- If true, enables quotas based on storage type.
- </description>
- </property>
- <property>
- <name>dfs.secondary.namenode.kerberos.principal</name>
- <value></value>
- <description>
- Kerberos principal name for the Secondary NameNode.
- </description>
- </property>
- <property>
- <name>dfs.secondary.namenode.keytab.file</name>
- <value></value>
- <description>
- Kerberos keytab file for the Secondary NameNode.
- </description>
- </property>
- <property>
- <name>dfs.web.authentication.filter</name>
- <value>org.apache.hadoop.hdfs.web.AuthFilter</value>
- <description>
- Authentication filter class used for WebHDFS.
- </description>
- </property>
- <property>
- <name>dfs.web.authentication.simple.anonymous.allowed</name>
- <value></value>
- <description>
- If true, allow anonymous user to access WebHDFS. Set to
- false to disable anonymous authentication.
- </description>
- </property>
- <property>
- <name>dfs.web.ugi</name>
- <value></value>
- <description>
- dfs.web.ugi is deprecated. Use hadoop.http.staticuser.user instead.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.netty.high.watermark</name>
- <value>65535</value>
- <description>
- High watermark configuration to Netty for Datanode WebHdfs.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.netty.low.watermark</name>
- <value>32768</value>
- <description>
- Low watermark configuration to Netty for Datanode WebHdfs.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.oauth2.access.token.provider</name>
- <value></value>
- <description>
- Access token provider class for WebHDFS using OAuth2.
- Defaults to org.apache.hadoop.hdfs.web.oauth2.ConfCredentialBasedAccessTokenProvider.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.oauth2.client.id</name>
- <value></value>
- <description>
- Client id used to obtain access token with either credential or
- refresh token.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.oauth2.enabled</name>
- <value>false</value>
- <description>
- If true, enables OAuth2 in WebHDFS
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.oauth2.refresh.url</name>
- <value></value>
- <description>
- URL against which to post for obtaining bearer token with
- either credential or refresh token.
- </description>
- </property>
- <property>
- <name>ssl.server.keystore.keypassword</name>
- <value></value>
- <description>
- Keystore key password for HTTPS SSL configuration
- </description>
- </property>
- <property>
- <name>ssl.server.keystore.location</name>
- <value></value>
- <description>
- Keystore location for HTTPS SSL configuration
- </description>
- </property>
- <property>
- <name>ssl.server.keystore.password</name>
- <value></value>
- <description>
- Keystore password for HTTPS SSL configuration
- </description>
- </property>
- <property>
- <name>ssl.server.truststore.location</name>
- <value></value>
- <description>
- Truststore location for HTTPS SSL configuration
- </description>
- </property>
- <property>
- <name>ssl.server.truststore.password</name>
- <value></value>
- <description>
- Truststore password for HTTPS SSL configuration
- </description>
- </property>
- <!--Disk baalncer properties-->
- <property>
- <name>dfs.disk.balancer.max.disk.throughputInMBperSec</name>
- <value>10</value>
- <description>Maximum disk bandwidth used by diskbalancer
- during read from a source disk. The unit is MB/sec.
- </description>
- </property>
- <property>
- <name>dfs.disk.balancer.block.tolerance.percent</name>
- <value>10</value>
- <description>
- When a disk balancer copy operation is proceeding, the datanode is still
- active. So it might not be possible to move the exactly specified
- amount of data. So tolerance allows us to define a percentage which
- defines a good enough move.
- </description>
- </property>
- <property>
- <name>dfs.disk.balancer.max.disk.errors</name>
- <value>5</value>
- <description>
- During a block move from a source to destination disk, we might
- encounter various errors. This defines how many errors we can tolerate
- before we declare a move between 2 disks (or a step) has failed.
- </description>
- </property>
- <property>
- <name>dfs.disk.balancer.enabled</name>
- <value>false</value>
- <description>
- This enables the diskbalancer feature on a cluster. By default, disk
- balancer is disabled.
- </description>
- </property>
- <property>
- <name>dfs.disk.balancer.plan.threshold.percent</name>
- <value>10</value>
- <description>
- The percentage threshold value for volume Data Density in a plan.
- If the absolute value of volume Data Density which is out of
- threshold value in a node, it means that the volumes corresponding to
- the disks should do the balancing in the plan. The default value is 10.
- </description>
- </property>
- <property>
- <name>dfs.lock.suppress.warning.interval</name>
- <value>10s</value>
- <description>Instrumentation reporting long critical sections will suppress
- consecutive warnings within this interval.</description>
- </property>
- <property>
- <name>httpfs.buffer.size</name>
- <value>4096</value>
- <description>
- The size buffer to be used when creating or opening httpfs filesystem IO stream.
- </description>
- </property>
- <property>
- <name>dfs.webhdfs.use.ipc.callq</name>
- <value>true</value>
- <description>Enables routing of webhdfs calls through rpc
- call queue</description>
- </property>
- <property>
- <name>dfs.datanode.disk.check.min.gap</name>
- <value>15m</value>
- <description>
- The minimum gap between two successive checks of the same DataNode
- volume. This setting supports multiple time unit suffixes as described
- in dfs.heartbeat.interval. If no suffix is specified then milliseconds
- is assumed.
- </description>
- </property>
- <property>
- <name>dfs.datanode.disk.check.timeout</name>
- <value>10m</value>
- <description>
- Maximum allowed time for a disk check to complete during DataNode
- startup. If the check does not complete within this time interval
- then the disk is declared as failed. This setting supports
- multiple time unit suffixes as described in dfs.heartbeat.interval.
- If no suffix is specified then milliseconds is assumed.
- </description>
- </property>
- <property>
- <name>dfs.use.dfs.network.topology</name>
- <value>true</value>
- <description>
- Enables DFSNetworkTopology to choose nodes for placing replicas.
- </description>
- </property>
- <property>
- <name>dfs.qjm.operations.timeout</name>
- <value>60s</value>
- <description>
- Common key to set timeout for related operations in
- QuorumJournalManager. This setting supports multiple time unit suffixes
- as described in dfs.heartbeat.interval.
- If no suffix is specified then milliseconds is assumed.
- </description>
- </property>
- <property>
- <name>dfs.reformat.disabled</name>
- <value>false</value>
- <description>
- Disable reformat of NameNode. If it's value is set to "true"
- and metadata directories already exist then attempt to format NameNode
- will throw NameNodeFormatException.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.default.nameserviceId</name>
- <value></value>
- <description>
- Nameservice identifier of the default subcluster to monitor.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.rpc.enable</name>
- <value>true</value>
- <description>
- If true, the RPC service to handle client requests in the router is
- enabled.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.rpc-address</name>
- <value>0.0.0.0:8888</value>
- <description>
- RPC address that handles all clients requests.
- The value of this property will take the form of router-host1:rpc-port.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.rpc-bind-host</name>
- <value></value>
- <description>
- The actual address the RPC server will bind to. If this optional address is
- set, it overrides only the hostname portion of
- dfs.federation.router.rpc-address. This is useful for making the name node
- listen on all interfaces by setting it to 0.0.0.0.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.handler.count</name>
- <value>10</value>
- <description>
- The number of server threads for the router to handle RPC requests from
- clients.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.handler.queue.size</name>
- <value>100</value>
- <description>
- The size of the queue for the number of handlers to handle RPC client requests.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.reader.count</name>
- <value>1</value>
- <description>
- The number of readers for the router to handle RPC client requests.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.reader.queue.size</name>
- <value>100</value>
- <description>
- The size of the queue for the number of readers for the router to handle RPC client requests.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.connection.pool-size</name>
- <value>1</value>
- <description>
- Size of the pool of connections from the router to namenodes.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.connection.clean.ms</name>
- <value>10000</value>
- <description>
- Time interval, in milliseconds, to check if the connection pool should
- remove unused connections.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.connection.pool.clean.ms</name>
- <value>60000</value>
- <description>
- Time interval, in milliseconds, to check if the connection manager should
- remove unused connection pools.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.metrics.enable</name>
- <value>true</value>
- <description>
- If the metrics in the router are enabled.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.metrics.class</name>
- <value>org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCPerformanceMonitor</value>
- <description>
- Class to monitor the RPC system in the router. It must implement the
- RouterRpcMonitor interface.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.admin.enable</name>
- <value>true</value>
- <description>
- If true, the RPC admin service to handle client requests in the router is
- enabled.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.admin-address</name>
- <value>0.0.0.0:8111</value>
- <description>
- RPC address that handles the admin requests.
- The value of this property will take the form of router-host1:rpc-port.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.admin-bind-host</name>
- <value></value>
- <description>
- The actual address the RPC admin server will bind to. If this optional
- address is set, it overrides only the hostname portion of
- dfs.federation.router.admin-address. This is useful for making the name
- node listen on all interfaces by setting it to 0.0.0.0.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.admin.handler.count</name>
- <value>1</value>
- <description>
- The number of server threads for the router to handle RPC requests from
- admin.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.file.resolver.client.class</name>
- <value>org.apache.hadoop.hdfs.server.federation.MockResolver</value>
- <description>
- Class to resolve files to subclusters.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.namenode.resolver.client.class</name>
- <value>org.apache.hadoop.hdfs.server.federation.resolver.MembershipNamenodeResolver</value>
- <description>
- Class to resolve the namenode for a subcluster.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.store.enable</name>
- <value>true</value>
- <description>
- If true, the Router connects to the State Store.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.store.serializer</name>
- <value>org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreSerializerPBImpl</value>
- <description>
- Class to serialize State Store records.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.store.driver.class</name>
- <value>org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreFileImpl</value>
- <description>
- Class to implement the State Store. By default it uses the local disk.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.store.connection.test</name>
- <value>60000</value>
- <description>
- How often to check for the connection to the State Store in milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.cache.ttl</name>
- <value>60000</value>
- <description>
- How often to refresh the State Store caches in milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.store.membership.expiration</name>
- <value>300000</value>
- <description>
- Expiration time in milliseconds for a membership record.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.heartbeat.enable</name>
- <value>true</value>
- <description>
- If true, the Router heartbeats into the State Store.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.heartbeat.interval</name>
- <value>5000</value>
- <description>
- How often the Router should heartbeat into the State Store in milliseconds.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.monitor.namenode</name>
- <value></value>
- <description>
- The identifier of the namenodes to monitor and heartbeat.
- </description>
- </property>
- <property>
- <name>dfs.federation.router.monitor.localnamenode.enable</name>
- <value>true</value>
- <description>
- If true, the Router should monitor the namenode in the local machine.
- </description>
- </property>
- </configuration>
|