Package com.crawljax.core.state

Examples of com.crawljax.core.state.StateVertex


   *             when the method is invoked more than once.
   */
  public void setup(StateVertex indexState) {
    if (!isSet.getAndSet(true)) {
      LOG.debug("Setting up the crawlsession");
      StateVertex added = stateFlowGraph.putIfAbsent(indexState, false);
      Preconditions.checkArgument(added == null, "Could not set the initial state");
      session = new CrawlSession(config, stateFlowGraph, indexState);
    } else {
      throw new IllegalStateException("Session is already set");
    }
View Full Code Here


  @Override
  public CrawlSession call() {
    setMaximumCrawlTimeIfNeeded();
    plugins.runPreCrawlingPlugins(config);
    CrawlTaskConsumer firstConsumer = consumerFactory.get();
    StateVertex firstState = firstConsumer.crawlIndex();
    crawlSessionProvider.setup(firstState);
    plugins.runOnNewStatePlugins(firstConsumer.getContext(), firstState);
    executeConsumers(firstConsumer);
    return crawlSessionProvider.get();
  }
View Full Code Here

  }

  private void follow(CrawlPath path, StateVertex targetState)
          throws StateUnreachableException,
          CrawljaxException {
    StateVertex curState = context.getSession().getInitialState();

    for (Eventable clickable : path) {

      checkCrawlConditions(targetState);

      LOG.debug("Backtracking by executing {} on element: {}", clickable.getEventType(),
              clickable);

      boolean switched = stateMachine.changeState(clickable.getTargetStateVertex());
      if (!switched) {
        throw new StateUnreachableException(targetState, "Could not switch states");
      }
      curState = clickable.getTargetStateVertex();
      crawlpath.add(clickable);
      handleInputElements(clickable);
      if (fireEvent(clickable)) {
        if (crawlerLeftDomain()) {
          throw new StateUnreachableException(targetState,
                  "Domain left while following path");
        }
        int depth = crawlDepth.incrementAndGet();
        LOG.info("Crawl depth is now {}", depth);
        plugins.runOnRevisitStatePlugins(context, curState);

      } else {
        throw new StateUnreachableException(targetState, "couldn't fire eventable "
                + clickable);
      }

      checkCrawlConditions(targetState);
    }

    if (!curState.equals(targetState)) {
      throw new StateUnreachableException(targetState,
              "The path didn't result in the desired state but in state "
                      + curState.getName());
    }
  }
View Full Code Here

  private void inspectNewState(Eventable event) {
    if (crawlerLeftDomain()) {
      LOG.debug("The browser left the domain. Going back one state...");
      goBackOneState();
    } else {
      StateVertex newState = stateMachine.newStateFor(browser);
      if (domChanged(event, newState)) {
        inspectNewDom(event, newState);
      } else {
        LOG.debug("Dom unchanged");
      }
View Full Code Here

      context.getSession().addCrawlPath(crawlpath.immutableCopy());
    }
  }

  private void parseCurrentPageForCandidateElements() {
    StateVertex currentState = stateMachine.getCurrentState();
    LOG.debug("Parsing DOM of state {} for candidate elements", currentState.getName());
    ImmutableList<CandidateElement> extract = candidateExtractor.extract(currentState);

    plugins.runPreStateCrawlingPlugins(context, extract, currentState);

    candidateActionCache.addActions(extract, currentState);
View Full Code Here

  private void goBackOneState() {
    LOG.debug("Going back one state");
    CrawlPath currentPath = crawlpath.immutableCopy();
    crawlpath = null;
    StateVertex current = stateMachine.getCurrentState();
    reset();
    follow(currentPath, current);
  }
View Full Code Here

   */
  public StateVertex crawlIndex() {
    LOG.debug("Setting up vertex of the index page");
    browser.goToUrl(url);
    plugins.runOnUrlLoadPlugins(context);
    StateVertex index =
            new StateVertex(StateVertex.INDEX_ID, url.toExternalForm(), "index",
                    browser.getDom(),
                    stateComparator.getStrippedDom(browser));
    Preconditions.checkArgument(index.getId() == StateVertex.INDEX_ID,
            "It seems some the index state is crawled more than once.");

    LOG.debug("Parsing the index for candidate elements");
    ImmutableList<CandidateElement> extract = candidateExtractor.extract(index);

View Full Code Here

  }

  private void pollAndHandleCrawlTasks() throws InterruptedException {
    try {
      LOG.debug("Awaiting task");
      StateVertex crawlTask = candidates.awaitNewTask();
      int activeConsumers = runningConsumers.incrementAndGet();
      LOG.debug("There are {} active consumers", activeConsumers);
      handleTask(crawlTask);
    } catch (InterruptedException e) {
      throw e;
View Full Code Here

    assertThat(config.getCrawlRules().followExternalLinks(), is(true));
    assertThat(extract, hasSize(3));
  }

  private List<CandidateElement> extractFromTestFile(CandidateElementExtractor extractor) throws URISyntaxException {
    StateVertex currentState = Mockito.mock(StateVertex.class);
    String file = "/candidateElementExtractorTest/domWithOneExternalAndTwoInternal.html";
    URL dom = Resources.getResource(getClass(), file);
    browser.goToUrl(dom.toURI());
    List<CandidateElement> extract = extractor.extract(currentState);
    return extract;
View Full Code Here

    };
  }

  @Test
  public void testStateWithDomSubstring() {
    StateVertex vertex = mock(StateVertex.class);
    when(vertex.getDom()).thenReturn("paap");
    assertThat(vertex, is(stateWithDomSubstring("aap")));
    assertThat(vertex, is(not(stateWithDomSubstring("bla"))));
  }
View Full Code Here

TOP

Related Classes of com.crawljax.core.state.StateVertex

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.