HDFS_API基本应用
实验 目的 要求 |
目的:
|
实 验 环 境
|
|
练习内容
任务一:HDFS API连接测试;
1、Test类测试连接;
关键代码:
package org.gy.myhadoop.mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Before;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
/**
*
* @author yanxukun
* @date 2019年3月7日 上午11:46:23
*/
public class Test {
@Before
public static void main(String[] args )throws Exception
{
System.out.println("Hello World!");
Configuration conf = new Configuration();
conf.set("fs.defaultFS","hdfs://192.168.10.111:9000");
FileSystem fs = null;
fs = FileSystem.get(conf);
fs.mkdirs(new Path("hdfs://192.168.10.111:9000/user/input/test2"));
}
}
运行结果:
任务二:**单元测试的setUp和tearDown方法**
关键代码:
package org.gy.myhadoop.mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.Before;
import org.junit.After;
import org.junit.Test;
public class HDFSApp {
public static final String HDFS_PATH = "hdfs://192.168.10.111:9000";
public static Configuration configuration = null;
public static FileSystem fileSystem = null;
@Before
public void setUp() throws Exception {
System.out.println("HDFSApp.setUp()");
configuration = new Configuration();
configuration.set("fs.defaultFS", "hdfs://192.168.10.111:9000");
fileSystem = FileSystem.get(configuration);
}
@After
public void tearDown() throws Exception{
fileSystem = null;
configuration = null;
System.out.println("HDFSApp.tearDown()");
}
@Test
public void rename() throws Exception{
Path oldPath = new Path("hdfs://192.168.10.111:9000/user/test/a.txt");
Path newPath = new Path("hdfs://192.168.10.111:9000/user/test/b.txt");
System.out.println(fileSystem.rename(oldPath, newPath));
}
@Test
public void copyFromLocalFile() throws Exception{
Path src = new Path("C:/luke/hello.txt");
Path dist = new Path("hdfs://192.168.10.111:9000/user/test/");
fileSystem.copyFromLocalFile(src, dist);
}
@Test
public void listFiles() throws Exception{
FileStatus[] listStatus = fileSystem.listStatus(new Path("hdfs://192.168.2.100:9000/user/test"));
for (FileStatus fileStatus : listStatus) {
String isDir = fileStatus.isDirectory() ? "文件夹" : "文件";
String permission = fileStatus.getPermission().toString();
short replication = fileStatus.getReplication();
long len = fileStatus.getLen();
String path = fileStatus.getPath().toString();
System.out.println(isDir+"\t"+permission+"\t"+replication+"\t"+len+"\t"+path);
}
}
}
运行结果:
任务三:SequenceFile的基本操作
1、SequenceFile文件的写操作;
关键代码:
package org.gy.myhadoop.mr;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
public class SequenceFileWriter {
private static Configuration configuration = new Configuration();
private static String url = "hdfs://192.168.10.111:9000";
private static String[] data = {"a,b,c,d,e,f,g","e,f,g,h,i,j,k","l,m,n,o,p,q,r,s","t,u,v,w,x,y,z"};
public static void main(String[] args) throws Exception {
FileSystem fs = FileSystem.get(configuration);
Path outputPath = new Path("MySequenceFile.seq");
IntWritable key = new IntWritable();
Text value = new Text();
SequenceFile.Writer writer = SequenceFile.createWriter(fs, configuration, outputPath, IntWritable.class, Text.class);
for (int i=0;i<10;i++) {
key.set(10-i);
value.set(data[i%data.length]);
writer.append(key, value);
}
IOUtils.closeStream(writer);
Date day = new Date();
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
System.out.println("计算机161燕旭坤在"+df.format(day)+"完成");
}
}
运行结果:
2、SequenceFile读操作;
关键代码:
package org.gy.myhadoop.mr;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;
public class SequenceFileReader
{
private static Configuration configuration = new Configuration();
private static String url = "hdfs://192.168.10.111:9000";
public static void main(String[] args) throws Exception
{
FileSystem fs = FileSystem.get(configuration);
Path inputPath = new Path("MySequenceFile.seq");
SequenceFile.Reader reader = new SequenceFile.Reader(fs,inputPath, configuration);
Writable keyClass = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), configuration);
Writable valueClass = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), configuration);
while(reader.next(keyClass,valueClass))
{
System.out.println("key:"+keyClass);
System.out.println("value:"+valueClass);
System.out.println("position:"+reader.getPosition());
}
IOUtils.closeStream(reader);
}
}
运行结果:
任务四:MapFile的基本操作
1、MapFile的写操作;
关键代码:
package org.gy.myhadoop.mr;
import java.io.IOException;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.Text;
public class MapFileWriter {
static Configuration configuration = new Configuration();
private static String url = "hdfs://192.168.10.111:9000";
public static void main(String[] args) throws Exception {
FileSystem fs = FileSystem.get(URI.create(url),configuration);
Path outPath = new Path("MyMapFile.map");
Text key = new Text();
key.set("mymapkey");
Text value = new Text();
value.set("mymapvalue");
MapFile.Writer writer = new MapFile.Writer(configuration, fs,
outPath.toString(),Text.class,Text.class);
writer.append(key, value);
IOUtils.closeStream(writer);
Date day = new Date();
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
System.out.println("计算机161燕旭坤在"+df.format(day)+"完成");
}
}
运行结果:
2、MapFile的读操作;
关键代码:
package org.gy.myhadoop.mr;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.util.ReflectionUtils;
public class MapFileReader {
static Configuration configuration = new Configuration();
private static String url = "hdfs://192.168.10.111:9000";
public static void main(String[] args) throws Exception {
FileSystem fs = FileSystem.get(URI.create(url),configuration);
Path inPath = new Path("MyMapFile.map");
MapFile.Reader reader = new MapFile.Reader(fs,inPath.toString(),configuration);
Writable keyclass = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), configuration);
Writable valueclass = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), configuration);
while(reader.next((WritableComparable) keyclass,valueclass)) {
System.out.println(keyclass);
System.out.println(valueclass);
}
IOUtils.closeStream(reader);
}
}
运行结果:
出现的问题与解决方案
排错一:JavaAPI操作HDFS文件系统报错,Connection refused
错误:在WIN10系统下使用IDEA操作部HDFS系统时报错java.net.ConnectException:Connection refused:no,我们要首先检查以下设置是否正确;
排错思路:
1.虚拟机的防火墙需要关闭
2.虚拟机和物理机需要互相ping通。
3.虚拟机HDFS的Hadoop目录下的etc/hadoop/core-site.xml配置正确
4.物理机C:\Windows\System32\drivers\etc\hosts需要配置虚拟机ip和主机名的映射
在以上几步均正确的情况下,依然报错,那么检查下虚拟机的主机名是不是localhost(我当时把localhost改成hadoop之后问题解决),如果是的话,建议换成别的
排错二:
错误:ava.lang.IllegalArgumentException: Wrong FS: hdfs://master:9000/test.txt, expected: file:/// at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:648)
原因: 默认读取file:///…, 其中的core-site.xml文件,我们没有配置,所以得
解决:在工程下创建core-site.xml,将虚拟机master的core-site.xml复制到工程 下 的 core- site.xml.
排错三:
错误:org.apache.hadoop.hdfs.protocol.DSQuotaExceededException:
The DiskSpace quota of /spaceQuota is exceeded: quota = 402653184 B = 384 MB but diskspace consumed = 402653220 B = 384.00 MB
原因:文件的空间配额做出了限制,导致不能进行写操作
解决:2个
$> hdfs dfsadmin -setSpaceQuota 384p /spaceQuota //增大空间配额
$>hadoop dfsadmin -clrSpaceQuota /user/hadoop //清除空间配额的限制
还没有评论,来说两句吧...