HDFS_API基本应用

野性酷女 2022-01-28 10:21 287阅读 0赞











实验

目的

要求

 

目的:



  1. 了解HDFS文件系统;

  2. 掌握HDFS的架构及核心组件的职能;

  3. 掌握HDFS数据的读写操作;

  4. HDFS常用操作(Shell。Java API)

  5. 了解Hadoop2.0中HDFS相关的新特性

 

 



  1. Java jdk 1.8;

  2. apache-maven-3.6.0;

  3. Myeclipse C10;

  4. Hadoop集群;

练习内容

任务一:HDFS API连接测试;

1、Test类测试连接;

关键代码:

  1. package org.gy.myhadoop.mr;
  2. import org.apache.hadoop.conf.Configuration;
  3. import org.apache.hadoop.fs.FileSystem;
  4. import org.apache.hadoop.fs.Path;
  5. import org.apache.hadoop.io.IOUtils;
  6. import org.junit.Before;
  7. import java.io.FileOutputStream;
  8. import java.io.IOException;
  9. import java.io.InputStream;
  10. import java.io.OutputStream;
  11. import java.net.URI;
  12. import java.net.URISyntaxException;
  13. /**
  14. *
  15. * @author yanxukun
  16. * @date 2019年3月7日 上午11:46:23
  17. */
  18. public class Test {
  19. @Before
  20. public static void main(String[] args )throws Exception
  21. {
  22. System.out.println("Hello World!");
  23. Configuration conf = new Configuration();
  24. conf.set("fs.defaultFS","hdfs://192.168.10.111:9000");
  25. FileSystem fs = null;
  26. fs = FileSystem.get(conf);
  27. fs.mkdirs(new Path("hdfs://192.168.10.111:9000/user/input/test2"));
  28. }
  29. }

运行结果:

20190523224545334.jpeg

任务二:**单元测试的setUp和tearDown方法**

关键代码:

  1. package org.gy.myhadoop.mr;
  2. import org.apache.hadoop.conf.Configuration;
  3. import org.apache.hadoop.fs.FileSystem;
  4. import org.apache.hadoop.fs.Path;
  5. import org.apache.hadoop.io.IOUtils;
  6. import org.apache.hadoop.conf.Configuration;
  7. import org.apache.hadoop.fs.*;
  8. import org.junit.Before;
  9. import org.junit.After;
  10. import org.junit.Test;
  11. public class HDFSApp {
  12. public static final String HDFS_PATH = "hdfs://192.168.10.111:9000";
  13. public static Configuration configuration = null;
  14. public static FileSystem fileSystem = null;
  15. @Before
  16. public void setUp() throws Exception {
  17. System.out.println("HDFSApp.setUp()");
  18. configuration = new Configuration();
  19. configuration.set("fs.defaultFS", "hdfs://192.168.10.111:9000");
  20. fileSystem = FileSystem.get(configuration);
  21. }
  22. @After
  23. public void tearDown() throws Exception{
  24. fileSystem = null;
  25. configuration = null;
  26. System.out.println("HDFSApp.tearDown()");
  27. }
  28. @Test
  29. public void rename() throws Exception{
  30. Path oldPath = new Path("hdfs://192.168.10.111:9000/user/test/a.txt");
  31. Path newPath = new Path("hdfs://192.168.10.111:9000/user/test/b.txt");
  32. System.out.println(fileSystem.rename(oldPath, newPath));
  33. }
  34. @Test
  35. public void copyFromLocalFile() throws Exception{
  36. Path src = new Path("C:/luke/hello.txt");
  37. Path dist = new Path("hdfs://192.168.10.111:9000/user/test/");
  38. fileSystem.copyFromLocalFile(src, dist);
  39. }
  40. @Test
  41. public void listFiles() throws Exception{
  42. FileStatus[] listStatus = fileSystem.listStatus(new Path("hdfs://192.168.2.100:9000/user/test"));
  43. for (FileStatus fileStatus : listStatus) {
  44. String isDir = fileStatus.isDirectory() ? "文件夹" : "文件";
  45. String permission = fileStatus.getPermission().toString();
  46. short replication = fileStatus.getReplication();
  47. long len = fileStatus.getLen();
  48. String path = fileStatus.getPath().toString();
  49. System.out.println(isDir+"\t"+permission+"\t"+replication+"\t"+len+"\t"+path);
  50. }
  51. }
  52. }

运行结果:

watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM3ODIzNjA1_size_16_color_FFFFFF_t_70

任务三:SequenceFile的基本操作

1、SequenceFile文件的写操作;

关键代码:

  1. package org.gy.myhadoop.mr;
  2. import java.io.IOException;
  3. import java.text.SimpleDateFormat;
  4. import java.util.Date;
  5. import org.apache.hadoop.conf.Configuration;
  6. import org.apache.hadoop.fs.FileSystem;
  7. import org.apache.hadoop.fs.Path;
  8. import org.apache.hadoop.io.IOUtils;
  9. import org.apache.hadoop.io.IntWritable;
  10. import org.apache.hadoop.io.SequenceFile;
  11. import org.apache.hadoop.io.Text;
  12. public class SequenceFileWriter {
  13. private static Configuration configuration = new Configuration();
  14. private static String url = "hdfs://192.168.10.111:9000";
  15. private static String[] data = {"a,b,c,d,e,f,g","e,f,g,h,i,j,k","l,m,n,o,p,q,r,s","t,u,v,w,x,y,z"};
  16. public static void main(String[] args) throws Exception {
  17. FileSystem fs = FileSystem.get(configuration);
  18. Path outputPath = new Path("MySequenceFile.seq");
  19. IntWritable key = new IntWritable();
  20. Text value = new Text();
  21. SequenceFile.Writer writer = SequenceFile.createWriter(fs, configuration, outputPath, IntWritable.class, Text.class);
  22. for (int i=0;i<10;i++) {
  23. key.set(10-i);
  24. value.set(data[i%data.length]);
  25. writer.append(key, value);
  26. }
  27. IOUtils.closeStream(writer);
  28. Date day = new Date();
  29. SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  30. System.out.println("计算机161燕旭坤在"+df.format(day)+"完成");
  31. }
  32. }

运行结果:

watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM3ODIzNjA1_size_16_color_FFFFFF_t_70 1

2、SequenceFile读操作;

关键代码:

  1. package org.gy.myhadoop.mr;
  2. import java.io.IOException;
  3. import org.apache.hadoop.conf.Configuration;
  4. import org.apache.hadoop.fs.FileSystem;
  5. import org.apache.hadoop.fs.Path;
  6. import org.apache.hadoop.io.IOUtils;
  7. import org.apache.hadoop.io.IntWritable;
  8. import org.apache.hadoop.io.SequenceFile;
  9. import org.apache.hadoop.io.Text;
  10. import org.apache.hadoop.io.Writable;
  11. import org.apache.hadoop.util.ReflectionUtils;
  12. public class SequenceFileReader
  13. {
  14. private static Configuration configuration = new Configuration();
  15. private static String url = "hdfs://192.168.10.111:9000";
  16. public static void main(String[] args) throws Exception
  17. {
  18. FileSystem fs = FileSystem.get(configuration);
  19. Path inputPath = new Path("MySequenceFile.seq");
  20. SequenceFile.Reader reader = new SequenceFile.Reader(fs,inputPath, configuration);
  21. Writable keyClass = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), configuration);
  22. Writable valueClass = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), configuration);
  23. while(reader.next(keyClass,valueClass))
  24. {
  25. System.out.println("key:"+keyClass);
  26. System.out.println("value:"+valueClass);
  27. System.out.println("position:"+reader.getPosition());
  28. }
  29. IOUtils.closeStream(reader);
  30. }
  31. }

运行结果:

watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM3ODIzNjA1_size_16_color_FFFFFF_t_70 2

20190523224545383.png

任务四:MapFile的基本操作

1、MapFile的写操作;

关键代码:

  1. package org.gy.myhadoop.mr;
  2. import java.io.IOException;
  3. import java.net.URI;
  4. import java.text.SimpleDateFormat;
  5. import java.util.Date;
  6. import org.apache.hadoop.conf.Configuration;
  7. import org.apache.hadoop.fs.FileSystem;
  8. import org.apache.hadoop.fs.Path;
  9. import org.apache.hadoop.io.IOUtils;
  10. import org.apache.hadoop.io.MapFile;
  11. import org.apache.hadoop.io.Text;
  12. public class MapFileWriter {
  13. static Configuration configuration = new Configuration();
  14. private static String url = "hdfs://192.168.10.111:9000";
  15. public static void main(String[] args) throws Exception {
  16. FileSystem fs = FileSystem.get(URI.create(url),configuration);
  17. Path outPath = new Path("MyMapFile.map");
  18. Text key = new Text();
  19. key.set("mymapkey");
  20. Text value = new Text();
  21. value.set("mymapvalue");
  22. MapFile.Writer writer = new MapFile.Writer(configuration, fs,
  23. outPath.toString(),Text.class,Text.class);
  24. writer.append(key, value);
  25. IOUtils.closeStream(writer);
  26. Date day = new Date();
  27. SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  28. System.out.println("计算机161燕旭坤在"+df.format(day)+"完成");
  29. }
  30. }

运行结果:

watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM3ODIzNjA1_size_16_color_FFFFFF_t_70 3

2、MapFile的读操作;

关键代码:

  1. package org.gy.myhadoop.mr;
  2. import java.io.IOException;
  3. import java.net.URI;
  4. import org.apache.hadoop.conf.Configuration;
  5. import org.apache.hadoop.fs.FileSystem;
  6. import org.apache.hadoop.fs.Path;
  7. import org.apache.hadoop.io.IOUtils;
  8. import org.apache.hadoop.io.MapFile;
  9. import org.apache.hadoop.io.Text;
  10. import org.apache.hadoop.io.Writable;
  11. import org.apache.hadoop.io.WritableComparable;
  12. import org.apache.hadoop.util.ReflectionUtils;
  13. public class MapFileReader {
  14. static Configuration configuration = new Configuration();
  15. private static String url = "hdfs://192.168.10.111:9000";
  16. public static void main(String[] args) throws Exception {
  17. FileSystem fs = FileSystem.get(URI.create(url),configuration);
  18. Path inPath = new Path("MyMapFile.map");
  19. MapFile.Reader reader = new MapFile.Reader(fs,inPath.toString(),configuration);
  20. Writable keyclass = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), configuration);
  21. Writable valueclass = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), configuration);
  22. while(reader.next((WritableComparable) keyclass,valueclass)) {
  23. System.out.println(keyclass);
  24. System.out.println(valueclass);
  25. }
  26. IOUtils.closeStream(reader);
  27. }
  28. }

运行结果:

watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM3ODIzNjA1_size_16_color_FFFFFF_t_70 4

出现的问题与解决方案

排错一:JavaAPI操作HDFS文件系统报错,Connection refused

错误:在WIN10系统下使用IDEA操作部HDFS系统时报错java.net.ConnectException:Connection refused:no,我们要首先检查以下设置是否正确;

排错思路:

1.虚拟机的防火墙需要关闭

2.虚拟机和物理机需要互相ping通。

3.虚拟机HDFS的Hadoop目录下的etc/hadoop/core-site.xml配置正确

4.物理机C:\Windows\System32\drivers\etc\hosts需要配置虚拟机ip和主机名的映射

在以上几步均正确的情况下,依然报错,那么检查下虚拟机的主机名是不是localhost(我当时把localhost改成hadoop之后问题解决),如果是的话,建议换成别的

排错二:

错误:ava.lang.IllegalArgumentException: Wrong FS: hdfs://master:9000/test.txt, expected: file:/// at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:648)

原因: 默认读取file:///…, 其中的core-site.xml文件,我们没有配置,所以得

解决:在工程下创建core-site.xml,将虚拟机master的core-site.xml复制到工程 下 的 core- site.xml.

排错三:

错误:org.apache.hadoop.hdfs.protocol.DSQuotaExceededException:

The DiskSpace quota of /spaceQuota is exceeded: quota = 402653184 B = 384 MB but diskspace consumed = 402653220 B = 384.00 MB

原因:文件的空间配额做出了限制,导致不能进行写操作

解决:2个

$> hdfs dfsadmin -setSpaceQuota 384p /spaceQuota //增大空间配额

$>hadoop dfsadmin -clrSpaceQuota /user/hadoop //清除空间配额的限制

发表评论

表情:
评论列表 (有 0 条评论,287人围观)

还没有评论,来说两句吧...

相关阅读

    相关 应用基本思路

    1、多个module注意规范,自己用的对象,自己包内定义 2、用户名、密码登录,密码用对称加密处理后传递,服务端解密,然后校验 3、对外接口,设置访问限制,提供公共加密方法

    相关 PS基本应用

         PhotoShop是一款图形设计软件,推荐一款在线PS工具  [在线PS][PS]。这款软件和本地下载的功能基本相同,部分功能缺失,像切片工具,存储为WEB所用格式功

    相关 GreyBox基本应用

    GreyBox是[www.open-open.com][]开发的一个遮罩层的组件,它运行以后可以产生不错的界面。类似于thinkbox,lightbox等。 首先,我们可以先

    相关 atexit函数基本应用

    很多时候我们需要在程序退出的时候做一些诸如释放资源的操作,但程序退出的方式有很多种,比如main()函数运行结束、在程序的某个地方用exit() 结束程序、用户通过Ctrl+C