Package org.apache.hadoop.mapreduce.filecache

Examples of org.apache.hadoop.mapreduce.filecache.TrackerDistributedCacheManager


  public void testReferenceCount() throws IOException, LoginException,
      URISyntaxException, InterruptedException {
    if (!canRun()) {
      return;
    }
    TrackerDistributedCacheManager manager =
      new FakeTrackerDistributedCacheManager(conf);
    String userName = getJobOwnerName();
    File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString());

    // Configures a job with a regular file
    Job job1 = Job.getInstance(conf);
    job1.setUser(userName);
    job1.addCacheFile(secondCacheFile.toUri());
    Configuration conf1 = job1.getConfiguration();
    ClientDistributedCacheManager.determineTimestamps(conf1);
    ClientDistributedCacheManager.determineCacheVisibilities(conf1);

    // Task localizing for first job
    TaskDistributedCacheManager handle = manager
        .newTaskDistributedCacheManager(conf1);
    handle.setup(localDirAllocator, workDir, TaskTracker
          .getPrivateDistributedCacheDir(userName),
          TaskTracker.getPublicDistributedCacheDir());
    handle.release();
    for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) {
      assertEquals(0, manager.getReferenceCount(c.uri, conf1, c.timestamp,
          c.owner, false));
    }
   
    Path thirdCacheFile = new Path(TEST_ROOT_DIR, "thirdcachefile");
    createPrivateTempFile(thirdCacheFile);
   
    // Configures another job with three regular files.
    Job job2 = Job.getInstance(conf);
    job2.setUser(userName);
    // add a file that would get failed to localize
    job2.addCacheFile(firstCacheFile.toUri());
    // add a file that is already localized by different job
    job2.addCacheFile(secondCacheFile.toUri());
    // add a file that is never localized
    job2.addCacheFile(thirdCacheFile.toUri());
    Configuration conf2 = job2.getConfiguration();
    ClientDistributedCacheManager.determineTimestamps(conf2);
    ClientDistributedCacheManager.determineCacheVisibilities(conf2);

    // Task localizing for second job
    // localization for the "firstCacheFile" will fail.
    handle = manager.newTaskDistributedCacheManager(conf2);
    Throwable th = null;
    try {
      handle.setup(localDirAllocator, workDir, TaskTracker
          .getPrivateDistributedCacheDir(userName),
          TaskTracker.getPublicDistributedCacheDir());
    } catch (IOException e) {
      th = e;
      Log.info("Exception during setup", e);
    }
    assertNotNull(th);
    assertTrue(th.getMessage().contains("fake fail"));
    handle.release();
    th = null;
    for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) {
      try {
        assertEquals(0, manager.getReferenceCount(c.uri, conf2, c.timestamp,
            c.owner, false));
      } catch (IOException ie) {
        th = ie;
        Log.info("Exception getting reference count for " + c.uri, ie);
      }
View Full Code Here


   
  }
 
  private Path checkLocalizedPath(boolean visibility)
  throws IOException, LoginException, InterruptedException {
    TrackerDistributedCacheManager manager =
      new TrackerDistributedCacheManager(conf, taskController);
    String userName = getJobOwnerName();
    File workDir = new File(TEST_ROOT_DIR, "workdir");
    Path cacheFile = new Path(TEST_ROOT_DIR, "fourthcachefile");
    if (visibility) {
      createPublicTempFile(cacheFile);
    } else {
      createPrivateTempFile(cacheFile);
    }
   
    Job job1 = Job.getInstance(conf);
    job1.setUser(userName);
    job1.addCacheFile(cacheFile.toUri());
    Configuration conf1 = job1.getConfiguration();
    ClientDistributedCacheManager.determineTimestamps(conf1);
    ClientDistributedCacheManager.determineCacheVisibilities(conf1);

    // Task localizing for job
    TaskDistributedCacheManager handle = manager
        .newTaskDistributedCacheManager(conf1);
    handle.setup(localDirAllocator, workDir, TaskTracker
          .getPrivateDistributedCacheDir(userName),
          TaskTracker.getPublicDistributedCacheDir());
    TaskDistributedCacheManager.CacheFile c = handle.getCacheFiles().get(0);
    String distCacheDir;
    if (visibility) {
      distCacheDir = TaskTracker.getPublicDistributedCacheDir();
    } else {
      distCacheDir = TaskTracker.getPrivateDistributedCacheDir(userName);
    }
    Path localizedPath =
      manager.getLocalCache(cacheFile.toUri(), conf1, distCacheDir,
          fs.getFileStatus(cacheFile), false,
          c.timestamp, new Path(TEST_ROOT_DIR), false,
          visibility);
    assertTrue("Cache file didn't get localized in the expected directory. " +
        "Expected localization to happen within " +
View Full Code Here

    conf2.setLong(TTConfig.TT_LOCAL_CACHE_SUBDIRS_LIMIT, 3);
    //The goal is to get down to 15.75K and 2 dirs
    conf2.setFloat(TTConfig.TT_LOCAL_CACHE_KEEP_AROUND_PCT, 0.75f);
    conf2.setLong(TTConfig.TT_DISTRIBUTED_CACHE_CHECK_PERIOD, CACHE_DELETE_PERIOD_MS);
    refreshConf(conf2);
    TrackerDistributedCacheManager manager =
        new TrackerDistributedCacheManager(conf2, taskController);
    manager.startCleanupThread();
    FileSystem localfs = FileSystem.getLocal(conf2);
    String userName = getJobOwnerName();
    conf2.set(MRJobConfig.USER_NAME, userName);

    //Here we are testing the LRU.  In this case we will add in 4 cache entries
    // 2 of them are 8k each and 2 of them are very small.  We want to verify
    // That they are deleted in LRU order.
    // So what we will do is add in the two large files first, 1 then 2, and
    // then one of the small ones 3.  We will then release them in opposite
    // order 3, 2, 1.
    //
    // Finally we will add in the last small file.  This last file should push
    // us over the 3 entry limit to trigger a cleanup.  So LRU order is 3, 2, 1
    // And we will only delete 2 entries so that should leave 1 un touched
    // but 3 and 2 deleted

    Path thirdCacheFile = new Path(TEST_ROOT_DIR, "thirdcachefile");
    Path fourthCacheFile = new Path(TEST_ROOT_DIR, "fourthcachefile");
    // Adding two more small files, so it triggers the number of sub directory
    // limit but does not trigger the file size limit.
    createTempFile(thirdCacheFile, 1);
    createTempFile(fourthCacheFile, 1);

    Path firstLocalCache = manager.getLocalCache(firstCacheFile.toUri(), conf2,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(firstCacheFile), false,
        getFileStamp(firstCacheFile), new Path(TEST_ROOT_DIR), false, false);

    Path secondLocalCache = manager.getLocalCache(secondCacheFile.toUri(), conf2,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(secondCacheFile), false,
        getFileStamp(secondCacheFile), new Path(TEST_ROOT_DIR), false, false);

    Path thirdLocalCache = manager.getLocalCache(thirdCacheFile.toUri(), conf2,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(thirdCacheFile), false,
        getFileStamp(thirdCacheFile), new Path(TEST_ROOT_DIR), false, false);

    manager.releaseCache(thirdCacheFile.toUri(), conf2,
        getFileStamp(thirdCacheFile),
        TrackerDistributedCacheManager.getLocalizedCacheOwner(false), false);

    manager.releaseCache(secondCacheFile.toUri(), conf2,
        getFileStamp(secondCacheFile),
        TrackerDistributedCacheManager.getLocalizedCacheOwner(false), false);

    manager.releaseCache(firstCacheFile.toUri(), conf2,
        getFileStamp(firstCacheFile),
        TrackerDistributedCacheManager.getLocalizedCacheOwner(false), false);


    // Getting the fourth cache will make the number of sub directories becomes
    // 4 which is greater than 3. So the released cache will be deleted.
    manager.getLocalCache(fourthCacheFile.toUri(), conf2,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(fourthCacheFile), false,
        getFileStamp(fourthCacheFile), new Path(TEST_ROOT_DIR), false, false);

    checkCacheDeletion(localfs, secondLocalCache, "DistributedCache failed " +
    "deleting second cache LRU order");

    checkCacheDeletion(localfs, thirdLocalCache,
    "DistributedCache failed deleting third" +
    " cache LRU order.");

    checkCacheNOTDeletion(localfs, firstLocalCache, "DistributedCache failed " +
    "Deleted first cache LRU order.");

    checkCacheNOTDeletion(localfs, fourthCacheFile, "DistributedCache failed " +
    "Deleted fourth cache LRU order.");
    // Clean up the files created in this test
    new File(thirdCacheFile.toString()).delete();
    new File(fourthCacheFile.toString()).delete();
    manager.stopCleanupThread();
  }
View Full Code Here

  public void testSameNameFileArchiveCache() throws IOException,
      URISyntaxException, InterruptedException {
    if (!canRun()) {
      return;
    }
    TrackerDistributedCacheManager manager =
        spy(new TrackerDistributedCacheManager(conf, taskController));
    URI rsrc = new URI("file://foo/bar/yak");
    Path cacheDir = new Path("file:///localcache");
    Path archivePath = new Path(cacheDir, "archive");
    Path filePath = new Path(cacheDir, "file");
    doReturn(archivePath).when(manager).localizeCache(eq(conf), eq(rsrc),
        anyLong(), Matchers.<CacheStatus> anyObject(), eq(true), anyBoolean());
    doReturn(filePath).when(manager).localizeCache(eq(conf), eq(rsrc),
        anyLong(), Matchers.<CacheStatus> anyObject(), eq(false), anyBoolean());
    // could fail, but check match instead
    doNothing().when(manager).checkCacheStatusValidity(
        Matchers.<Configuration> anyObject(), eq(rsrc), anyLong(),
        Matchers.<CacheStatus> anyObject(), Matchers.<FileStatus> anyObject(),
        anyBoolean());
    // localizeCache initializes mtime of cached rsrc; set to uninitialized val
    doReturn(-1L).when(manager).checkStampSinceJobStarted(
        Matchers.<Configuration> anyObject(),
        Matchers.<FileSystem> anyObject(), eq(rsrc), anyLong(),
        Matchers.<CacheStatus> anyObject(), Matchers.<FileStatus> anyObject());
    doReturn(-1L).when(manager).getTimestamp(
        Matchers.<Configuration> anyObject(), eq(rsrc));
    FileStatus rsrcStatus = mock(FileStatus.class);
    when(rsrcStatus.getLen()).thenReturn(4344L);

    Path localizedPathForFile =
        manager.getLocalCache(rsrc, conf, "sub", rsrcStatus, false, 20L,
            new Path("file:///tmp"), false, true);
    Path localizedPathForArchive =
        manager.getLocalCache(rsrc, conf, "sub", rsrcStatus, true, 20L,
            new Path("file:///tmp"), false, true);
    assertNotSame("File and Archive resolve to the same path: "
        + localizedPathForFile + ". Should differ.", localizedPathForFile,
        localizedPathForArchive);
  }
View Full Code Here

    conf2.set(MRConfig.LOCAL_DIR, ROOT_MAPRED_LOCAL_DIR.toString());
    conf2.setLong(TTConfig.TT_LOCAL_CACHE_SIZE, LOCAL_CACHE_LIMIT);
    conf2.setLong(TTConfig.TT_LOCAL_CACHE_SUBDIRS_LIMIT, LOCAL_CACHE_SUBDIR);
    conf2.setLong(TTConfig.TT_DISTRIBUTED_CACHE_CHECK_PERIOD, CACHE_DELETE_PERIOD_MS);
    refreshConf(conf2);
    TrackerDistributedCacheManager manager =
        new TrackerDistributedCacheManager(conf2, taskController);
    manager.startCleanupThread();
    FileSystem localfs = FileSystem.getLocal(conf2);
    String userName = getJobOwnerName();
    conf2.set(MRJobConfig.USER_NAME, userName);

    // We first test the size limit
    Path localCache = manager.getLocalCache(firstCacheFile.toUri(), conf2,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(firstCacheFile), false,
        getFileStamp(firstCacheFile), new Path(TEST_ROOT_DIR), false, false);
    manager.releaseCache(firstCacheFile.toUri(), conf2,
        getFileStamp(firstCacheFile),
        TrackerDistributedCacheManager.getLocalizedCacheOwner(false), false);
    //in above code,localized a file of size 4K and then release the cache
    // which will cause the cache be deleted when the limit goes out.
    // The below code localize another cache which's designed to
    //sweep away the first cache.
    manager.getLocalCache(secondCacheFile.toUri(), conf2,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(secondCacheFile), false,
        getFileStamp(secondCacheFile), new Path(TEST_ROOT_DIR), false, false);
    checkCacheDeletion(localfs, localCache, "DistributedCache failed " +
        "deleting old cache when the cache store is full.");
    // Now we test the number of sub directories limit
    // Create the temporary cache files to be used in the tests.
    Path thirdCacheFile = new Path(TEST_ROOT_DIR, "thirdcachefile");
    Path fourthCacheFile = new Path(TEST_ROOT_DIR, "fourthcachefile");
    // Adding two more small files, so it triggers the number of sub directory
    // limit but does not trigger the file size limit.
    createTempFile(thirdCacheFile, 1);
    createTempFile(fourthCacheFile, 1);
    Path thirdLocalCache = manager.getLocalCache(thirdCacheFile.toUri(), conf2,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(thirdCacheFile), false,
        getFileStamp(thirdCacheFile), new Path(TEST_ROOT_DIR), false, false);
    // Release the third cache so that it can be deleted while sweeping
    manager.releaseCache(thirdCacheFile.toUri(), conf2,
        getFileStamp(thirdCacheFile),
        TrackerDistributedCacheManager.getLocalizedCacheOwner(false), false);
    // Getting the fourth cache will make the number of sub directories becomes
    // 3 which is greater than 2. So the released cache will be deleted.
    manager.getLocalCache(fourthCacheFile.toUri(), conf2,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(fourthCacheFile), false,
        getFileStamp(fourthCacheFile), new Path(TEST_ROOT_DIR), false, false);
    checkCacheDeletion(localfs, thirdLocalCache,
        "DistributedCache failed deleting old" +
        " cache when the cache exceeds the number of sub directories limit.");
    // Clean up the files created in this test
    new File(thirdCacheFile.toString()).delete();
    new File(fourthCacheFile.toString()).delete();
    manager.stopCleanupThread();
  }
View Full Code Here

 
  public void testFileSystemOtherThanDefault() throws Exception {
    if (!canRun()) {
      return;
    }
    TrackerDistributedCacheManager manager =
      new TrackerDistributedCacheManager(conf, taskController);
    conf.set("fs.fakefile.impl", conf.get("fs.file.impl"));
    String userName = getJobOwnerName();
    conf.set(MRJobConfig.USER_NAME, userName);
    Path fileToCache = new Path("fakefile:///"
        + firstCacheFile.toUri().getPath());
    Path result = manager.getLocalCache(fileToCache.toUri(), conf,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(firstCacheFile), false,
        getFileStamp(firstCacheFile),
        new Path(TEST_ROOT_DIR), false, false);
    assertNotNull("DistributedCache cached file on non-default filesystem.",
View Full Code Here

    Configuration myConf = new Configuration(conf);
    myConf.set(FileSystem.FS_DEFAULT_NAME_KEY, "refresh:///");
    myConf.setClass("fs.refresh.impl", FakeFileSystem.class, FileSystem.class);
    String userName = getJobOwnerName();

    TrackerDistributedCacheManager manager =
      new TrackerDistributedCacheManager(myConf, taskController);
    // ****** Imitate JobClient code
    // Configures a task/job with both a regular file and a "classpath" file.
    Configuration subConf = new Configuration(myConf);
    subConf.set(MRJobConfig.USER_NAME, userName);
    DistributedCache.addCacheFile(firstCacheFile.toUri(), subConf);
    ClientDistributedCacheManager.determineTimestamps(subConf);
    ClientDistributedCacheManager.determineCacheVisibilities(subConf);
    // ****** End of imitating JobClient code

    // ****** Imitate TaskRunner code.
    TaskDistributedCacheManager handle =
      manager.newTaskDistributedCacheManager(subConf);
    assertNull(null, DistributedCache.getLocalCacheFiles(subConf));
    File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString());
    handle.setup(localDirAllocator, workDir, TaskTracker
        .getPrivateDistributedCacheDir(userName),
        TaskTracker.getPublicDistributedCacheDir());
    // ****** End of imitating TaskRunner code

    Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(subConf);
    assertNotNull(null, localCacheFiles);
    assertEquals(1, localCacheFiles.length);
    Path cachedFirstFile = localCacheFiles[0];
    assertFileLengthEquals(firstCacheFile, cachedFirstFile);
    assertFalse("Paths should be different.",
        firstCacheFile.equals(cachedFirstFile));
    // release
    handle.release();
   
    // change the file timestamp
    FileSystem fs = FileSystem.get(myConf);
    ((FakeFileSystem)fs).advanceClock(1);

    // running a task of the same job
    Throwable th = null;
    try {
      handle.setup(localDirAllocator, workDir, TaskTracker
          .getPrivateDistributedCacheDir(userName), TaskTracker.getPublicDistributedCacheDir());
    } catch (IOException ie) {
      th = ie;
    }
    assertNotNull("Throwable is null", th);
    assertTrue("Exception message does not match",
        th.getMessage().contains("has changed on HDFS since job started"));
    // release
    handle.release();
   
    // running a task of the same job on another TaskTracker which has never
    // initialized the cache
    TrackerDistributedCacheManager manager2 =
      new TrackerDistributedCacheManager(myConf, taskController);
    TaskDistributedCacheManager handle2 =
      manager2.newTaskDistributedCacheManager(subConf);
    File workDir2 = new File(new Path(TEST_ROOT_DIR, "workdir2").toString());
    th = null;
    try {
      handle2.setup(localDirAllocator, workDir2, TaskTracker
          .getPrivateDistributedCacheDir(userName),
View Full Code Here

    if (!canRun()) {
      return;
    }
    String userName = getJobOwnerName();
    conf.set(MRJobConfig.USER_NAME, userName);
    TrackerDistributedCacheManager manager =
        new TrackerDistributedCacheManager(conf, taskController);
    FileSystem localfs = FileSystem.getLocal(conf);

    Path[] localCache = new Path[2];
    localCache[0] = manager.getLocalCache(firstCacheFile.toUri(), conf,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(firstCacheFile), false,
        getFileStamp(firstCacheFile), new Path(TEST_ROOT_DIR), false, false);
    FsPermission myPermission = new FsPermission((short)0600);
    Path myFile = new Path(localCache[0].getParent(), "myfile.txt");
    if (FileSystem.create(localfs, myFile, myPermission) == null) {
      throw new IOException("Could not create " + myFile);
    }
    try {
      localCache[1] = manager.getLocalCache(secondCacheFile.toUri(), conf,
          TaskTracker.getPrivateDistributedCacheDir(userName),
          fs.getFileStatus(secondCacheFile), false,
          getFileStamp(secondCacheFile), new Path(TEST_ROOT_DIR), false,
          false);
      FileStatus stat = localfs.getFileStatus(myFile);
View Full Code Here

    // setup and create jobcache directory with appropriate permissions
    taskController.setup();

    // Initialize DistributedCache
    this.distributedCacheManager =
        new TrackerDistributedCacheManager(this.fConf, taskController,
        asyncDiskService);
    this.distributedCacheManager.startCleanupThread();

    this.jobClient = (InterTrackerProtocol)
    UserGroupInformation.getLoginUser().doAs(
View Full Code Here

    // setup and create jobcache directory with appropriate permissions
    taskController.setup();

    // Initialize DistributedCache
    this.distributedCacheManager =
        new TrackerDistributedCacheManager(this.fConf, taskController,
        asyncDiskService);
    this.distributedCacheManager.startCleanupThread();

    this.jobClient = (InterTrackerProtocol)
    UserGroupInformation.getLoginUser().doAs(
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.filecache.TrackerDistributedCacheManager

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.