123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- <?xml version="1.0"?>
- <!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
- <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
- "http://forrest.apache.org/dtd/document-v20.dtd">
- <document>
- <header>
- <title>C API libhdfs</title>
- <meta name="http-equiv">Content-Type</meta>
- <meta name="content">text/html;</meta>
- <meta name="charset">utf-8</meta>
- </header>
- <body>
- <section>
- <title>Overview</title>
- <p>
- libhdfs is a JNI based C API for Hadoop's Distributed File System (HDFS).
- It provides C APIs to a subset of the HDFS APIs to manipulate HDFS files and
- the filesystem. libhdfs is part of the Hadoop distribution and comes
- pre-compiled in ${HADOOP_PREFIX}/libhdfs/libhdfs.so .
- </p>
- </section>
- <section>
- <title>The APIs</title>
- <p>
- The libhdfs APIs are a subset of: <a href="api/org/apache/hadoop/fs/FileSystem.html" >hadoop fs APIs</a>.
- </p>
- <p>
- The header file for libhdfs describes each API in detail and is available in ${HADOOP_PREFIX}/src/c++/libhdfs/hdfs.h
- </p>
- </section>
- <section>
- <title>A Sample Program</title>
- <source>
- #include "hdfs.h"
- int main(int argc, char **argv) {
- hdfsFS fs = hdfsConnect("default", 0);
- const char* writePath = "/tmp/testfile.txt";
- hdfsFile writeFile = hdfsOpenFile(fs, writePath, O_WRONLY|O_CREAT, 0, 0, 0);
- if(!writeFile) {
- fprintf(stderr, "Failed to open %s for writing!\n", writePath);
- exit(-1);
- }
- char* buffer = "Hello, World!";
- tSize num_written_bytes = hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1);
- if (hdfsFlush(fs, writeFile)) {
- fprintf(stderr, "Failed to 'flush' %s\n", writePath);
- exit(-1);
- }
- hdfsCloseFile(fs, writeFile);
- }
- </source>
- </section>
- <section>
- <title>How To Link With The Library</title>
- <p>
- See the Makefile for hdfs_test.c in the libhdfs source directory (${HADOOP_PREFIX}/src/c++/libhdfs/Makefile) or something like:<br />
- gcc above_sample.c -I${HADOOP_PREFIX}/src/c++/libhdfs -L${HADOOP_PREFIX}/libhdfs -lhdfs -o above_sample
- </p>
- </section>
- <section>
- <title>Common Problems</title>
- <p>
- The most common problem is the CLASSPATH is not set properly when calling a program that uses libhdfs.
- Make sure you set it to all the Hadoop jars needed to run Hadoop itself. Currently, there is no way to
- programmatically generate the classpath, but a good bet is to include all the jar files in ${HADOOP_PREFIX}
- and ${HADOOP_PREFIX}/lib as well as the right configuration directory containing hdfs-site.xml
- </p>
- </section>
- <section>
- <title>Thread Safe</title>
- <p>libdhfs is thread safe.</p>
- <ul>
- <li>Concurrency and Hadoop FS "handles"
- <br />The Hadoop FS implementation includes a FS handle cache which caches based on the URI of the
- namenode along with the user connecting. So, all calls to hdfsConnect will return the same handle but
- calls to hdfsConnectAsUser with different users will return different handles. But, since HDFS client
- handles are completely thread safe, this has no bearing on concurrency.
- </li>
- <li>Concurrency and libhdfs/JNI
- <br />The libhdfs calls to JNI should always be creating thread local storage, so (in theory), libhdfs
- should be as thread safe as the underlying calls to the Hadoop FS.
- </li>
- </ul>
- </section>
- </body>
- </document>
|