Examples of MetadataRepository


Examples of de.anomic.search.MetadataRepository

            final int count = post.getInt("lines", 100);
            Iterator<MetadataRepository.HostStat> statsiter;
            prop.put("statistics_lines", count);
            int cnt = 0;
            try {
                final MetadataRepository metadata = segment.urlMetadata();
                statsiter = metadata.statistics(count, metadata.urlSampleScores(metadata.domainSampleCollector()));
                boolean dark = true;
                MetadataRepository.HostStat hs;
                while (statsiter.hasNext() && cnt < count) {
                    hs = statsiter.next();
                    prop.put("statisticslines_domains_" + cnt + "_dark", (dark) ? "1" : "0");
View Full Code Here

Examples of de.anomic.search.MetadataRepository

        final Log log = new Log("URL-CLEANUP");
        try {
            log.logInfo("STARTING URL CLEANUP");

            // db containing all currently loades urls
            final MetadataRepository currentUrlDB = new MetadataRepository(new File(new File(indexPrimaryRoot, networkName), "TEXT"), "text.urlmd", false, false);

            // db used to hold all neede urls
            final MetadataRepository minimizedUrlDB = new MetadataRepository(new File(new File(indexRoot2, networkName), "TEXT"), "text.urlmd", false, false);

            final int cacheMem = (int)(MemoryControl.maxMemory - MemoryControl.total());
            if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");

            final Segment wordIndex = new Segment(
                    log,
                    new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"),
                    10000,
                    (long) Integer.MAX_VALUE, false, false);
            final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.termIndex().references("AAAAAAAAAAAA".getBytes(), false, false);

            long urlCounter = 0, wordCounter = 0;
            long wordChunkStart = System.currentTimeMillis(), wordChunkEnd = 0;
            String wordChunkStartHash = "AAAAAAAAAAAA", wordChunkEndHash;

            while (indexContainerIterator.hasNext()) {
                ReferenceContainer<WordReference> wordIdxContainer = null;
                try {
                    wordCounter++;
                    wordIdxContainer = indexContainerIterator.next();

                    // the combined container will fit, read the container
                    final Iterator<WordReference> wordIdxEntries = wordIdxContainer.entries();
                    Reference iEntry;
                    while (wordIdxEntries.hasNext()) {
                        iEntry = wordIdxEntries.next();
                        final byte[] urlHash = iEntry.urlhash();
                        if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) try {
                            final URIMetadataRow urlEntry = currentUrlDB.load(urlHash);
                            urlCounter++;
                            minimizedUrlDB.store(urlEntry);
                            if (urlCounter % 500 == 0) {
                                log.logInfo(urlCounter + " URLs found so far.");
                            }
                        } catch (final IOException e) {}
                    }

                    if (wordCounter%500 == 0) {
                        wordChunkEndHash = ASCII.String(wordIdxContainer.getTermHash());
                        wordChunkEnd = System.currentTimeMillis();
                        final long duration = wordChunkEnd - wordChunkStart;
                        log.logInfo(wordCounter + " words scanned " +
                                "[" + wordChunkStartHash + " .. " + wordChunkEndHash + "]\n" +
                                "Duration: "+ 500*1000/duration + " words/s" +
                                " | Free memory: " + MemoryControl.free() +
                                " | Total memory: " + MemoryControl.total());
                        wordChunkStart = wordChunkEnd;
                        wordChunkStartHash = wordChunkEndHash;
                    }

                    // we have read all elements, now we can close it
                    wordIdxContainer = null;

                } catch (final Exception e) {
                    log.logSevere("Exception", e);
                } finally {
                    if (wordIdxContainer != null) try { wordIdxContainer = null; } catch (final Exception e) {}
                }
            }
            log.logInfo("current LURL DB contains " + currentUrlDB.size() + " entries.");
            log.logInfo("mimimized LURL DB contains " + minimizedUrlDB.size() + " entries.");

            currentUrlDB.close();
            minimizedUrlDB.close();
            wordIndex.close();

            // TODO: rename the mimimized UrlDB to the name of the previous UrlDB

            log.logInfo("FINISHED URL CLEANUP, WAIT FOR DUMP");
View Full Code Here

Examples of de.anomic.search.MetadataRepository

     */
    private static void urldbcleanup(final File dataHome, final File appHome, final String networkName) {
        final File root = dataHome;
        final File indexroot = new File(root, "DATA/INDEX");
        try {Log.configureLogging(dataHome, appHome, new File(dataHome, "DATA/LOG/yacy.logging"));} catch (final Exception e) {}
        final MetadataRepository currentUrlDB = new MetadataRepository(new File(new File(indexroot, networkName), "TEXT"), "text.urlmd", false, false);
        currentUrlDB.deadlinkCleaner();
        currentUrlDB.close();
    }
View Full Code Here

Examples of de.anomic.search.MetadataRepository

    }

    public static int diffurlcol(final String metadataPath, final String statisticFile, final String diffFile) throws IOException, RowSpaceExceededException {
        System.out.println("INDEX DIFF URL-COL startup");
        final HandleMap idx = new HandleMap(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 4, new File(statisticFile));
        final MetadataRepository mr = new MetadataRepository(new File(metadataPath), "text.urlmd", false, false);
        final HandleSet hs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 1000000);
        System.out.println("INDEX DIFF URL-COL loaded dump, starting diff");
        final long start = System.currentTimeMillis();
        long update = start - 7000;
        int count = 0;
        for (final byte[] refhash: mr) {
            if (idx.get(refhash) == -1) {
                // the key exists as urlhash in the URL database, but not in the collection as referenced urlhash
                hs.put(refhash);
            }
            count++;
            if (System.currentTimeMillis() - update > 10000) {
                System.out.println("INDEX DIFF URL-COL running, checked " + count + ", found " + hs.size() + " missing references so far, " + (((System.currentTimeMillis() - start) * (mr.size() - count) / count) / 60000) + " minutes remaining");
                update = System.currentTimeMillis();
            }
        }
        idx.close();
        mr.close();
        System.out.println("INDEX DIFF URL-COL finished diff, starting dump to " + diffFile);
        count = hs.dump(new File(diffFile));
        System.out.println("INDEX DIFF URL-COL finished dump, wrote " + count + " references that occur in the URL-DB, but not in the collection-dump");
        return count;
    }
View Full Code Here

Examples of de.anomic.search.MetadataRepository

    }

    public static void export(final String metadataPath, final int format, final String export, final String diffFile) throws IOException, RowSpaceExceededException {
        // format: 0=text, 1=html, 2=rss/xml
        System.out.println("URL EXPORT startup");
        final MetadataRepository mr = new MetadataRepository(new File(metadataPath), "text.urlmd", false, false);
        final HandleSet hs = (diffFile == null) ? null : new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, new File(diffFile));
        System.out.println("URL EXPORT loaded dump, starting export");
        final Export e = mr.export(new File(export), ".*", hs, format, false);
        try {
            e.join();
        } catch (final InterruptedException e1) {
            Log.logException(e1);
        }
        System.out.println("URL EXPORT finished export, wrote " + ((hs == null) ? mr.size() : hs.size()) + " entries");
    }
View Full Code Here

Examples of de.anomic.search.MetadataRepository

        System.out.println("URL EXPORT finished export, wrote " + ((hs == null) ? mr.size() : hs.size()) + " entries");
    }

    public static void delete(final String metadataPath, final String diffFile) throws IOException, RowSpaceExceededException {
        System.out.println("URL DELETE startup");
        final MetadataRepository mr = new MetadataRepository(new File(metadataPath), "text.urlmd", false, false);
        final int mrSize = mr.size();
        final HandleSet hs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, new File(diffFile));
        System.out.println("URL DELETE loaded dump, starting deletion of " + hs.size() + " entries from " + mrSize);
        for (final byte[] refhash: hs) {
            mr.remove(refhash);
        }
        System.out.println("URL DELETE finished deletions, " + mr.size() + " entries left in URL database");
    }
View Full Code Here

Examples of io.fabric8.agent.repository.MetadataRepository

        // TODO: handle default range policy on feature requirements
        // TODO: handle default range policy on feature dependencies requirements

        for (String uri : getPrefixedProperties(properties, "resources.")) {
            builder.addResourceRepository(new MetadataRepository(new HttpMetadataProvider(uri)));
        }

        updateStatus("resolving", null);
        Resource systemBundle = systemBundleContext.getBundle(0).adapt(BundleRevision.class);
        Collection<Resource> allResources = builder.resolve(systemBundle, resolveOptionalImports);
View Full Code Here

Examples of net.yacy.search.index.MetadataRepository

            final int count = post.getInt("lines", 100);
            Iterator<MetadataRepository.HostStat> statsiter;
            prop.put("statistics_lines", count);
            int cnt = 0;
            try {
                final MetadataRepository metadata = segment.urlMetadata();
                statsiter = metadata.statistics(count, metadata.urlSampleScores(metadata.domainSampleCollector()));
                boolean dark = true;
                MetadataRepository.HostStat hs;
                while (statsiter.hasNext() && cnt < count) {
                    hs = statsiter.next();
                    prop.put("statisticslines_domains_" + cnt + "_dark", (dark) ? "1" : "0");
View Full Code Here

Examples of org.apache.archiva.metadata.repository.MetadataRepository

        catch ( IOException e )
        {
            throw new ConsumerException( e.getMessage(), e );
        }

        MetadataRepository metadataRepository = repositorySession.getRepository();

        List<ArtifactMetadata> results;
        try
        {
            results = metadataRepository.getArtifactsByChecksum( repoId, checksumSha1 );
        }
        catch ( MetadataRepositoryException e )
        {
            throw new ConsumerException( e.getMessage(), e );
        }

        if ( CollectionUtils.isNotEmpty( results ) )
        {
            ArtifactMetadata originalArtifact;
            try
            {
                originalArtifact = pathTranslator.getArtifactForPath( repoId, path );
            }
            catch ( Exception e )
            {
                log.warn( "Not reporting problem for invalid artifact in checksum check: " + e.getMessage() );
                return;
            }

            for ( ArtifactMetadata dupArtifact : results )
            {
                String id = path.substring( path.lastIndexOf( "/" ) + 1 );
                if ( dupArtifact.getId().equals( id )
                    && dupArtifact.getNamespace().equals( originalArtifact.getNamespace() )
                    && dupArtifact.getProject().equals( originalArtifact.getProject() )
                    && dupArtifact.getVersion().equals( originalArtifact.getVersion() ) )
                {
                    // Skip reference to itself.
                    if ( log.isDebugEnabled() )
                    {
                        log.debug( "Not counting duplicate for artifact " + dupArtifact + " for path " + path );
                    }
                    continue;
                }

                RepositoryProblemFacet problem = new RepositoryProblemFacet();
                problem.setRepositoryId( repoId );
                problem.setNamespace( originalArtifact.getNamespace() );
                problem.setProject( originalArtifact.getProject() );
                problem.setVersion( originalArtifact.getVersion() );
                problem.setId( id );
                // FIXME: need to get the right storage resolver for the repository the dupe artifact is in, it might be
                //       a different type
                // FIXME: we need the project version here, not the artifact version
                problem.setMessage( "Duplicate Artifact Detected: " + path + " <--> " + pathTranslator.toPath(
                    dupArtifact.getNamespace(), dupArtifact.getProject(), dupArtifact.getVersion(),
                    dupArtifact.getId() ) );
                problem.setProblem( "duplicate-artifact" );

                try
                {
                    metadataRepository.addMetadataFacet( repoId, problem );
                }
                catch ( MetadataRepositoryException e )
                {
                    throw new ConsumerException( e.getMessage(), e );
                }
View Full Code Here

Examples of org.apache.archiva.metadata.repository.MetadataRepository

            File metadataFile = getMetadata( targetPath.getAbsolutePath() );
            ArchivaRepositoryMetadata metadata = getMetadata( metadataFile );

            updateMetadata( metadata, metadataFile, lastUpdatedTimestamp, artifact );

            MetadataRepository metadataRepository = repositorySession.getRepository();

            Collection<ArtifactMetadata> artifacts =
                metadataRepository.getArtifacts( repositoryId, artifact.getGroupId(), artifact.getArtifactId(),
                                                 artifact.getVersion() );

            for ( ArtifactMetadata artifactMetadata : artifacts )
            {
                // TODO: mismatch between artifact (snapshot) version and project (base) version here
                if ( artifact.getVersion().equals( artifact.getVersion() ) )
                {
                    metadataRepository.removeArtifact( artifactMetadata.getRepositoryId(),
                                                       artifactMetadata.getNamespace(), artifactMetadata.getProject(),
                                                       artifact.getVersion(), artifactMetadata.getId() );

                    // TODO: move into the metadata repository proper - need to differentiate attachment of
                    //       repository metadata to an artifact
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.