package parser; import java.net.MalformedURLException; import java.net.URL; import java.time.Duration; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; import org.openqa.selenium.By; import org.openqa.selenium.Platform; import org.openqa.selenium.StaleElementReferenceException; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeOptions; import org.openqa.selenium.remote.DesiredCapabilities; import org.openqa.selenium.remote.RemoteWebDriver; import org.openqa.selenium.support.ui.ExpectedConditions; import org.openqa.selenium.support.ui.WebDriverWait; import com.google.common.base.Stopwatch; import com.google.common.base.Strings; import mysql.Mysql; import object.ResultDTO; public class OddsPortal extends ParserBase implements ParserJoinedFunctions { private static final String DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV = "//div[contains(@class,'tabs')]/div[not(@class)]/div[1]/div"; private static final String SOCCER_RESULTS_TABLE_NAME = "SoccerResults"; private static final String DATE_PATTERN = "yyyyMMdd"; private LocalDateTime baseDate; private int sportId; private int countryId; private int leagueId; final int sportsId = Mysql.getInstance().getSportId("soccer"); private int currentLeagueId; private int currentCountryId; private List resultsToInsert = new ArrayList<>(); private String currentSeason; public void getMatches(LocalDateTime date) { baseDate = date; final String dateFormatted = date.format(DateTimeFormatter.ofPattern(DATE_PATTERN)); System.out.println("Starting to get matches at date " + dateFormatted); Stopwatch stopwatch = Stopwatch.createStarted(); getMatchesByDateSelenium(dateFormatted); System.out.println( "Done with date " + dateFormatted + " took " + stopwatch.elapsed(TimeUnit.SECONDS) + " seconds"); } private int getCountryId(List links) { String country; country = links.get(1).getText().trim(); country = country.replace(" ", "-"); country = country.replace("\\.", ""); return Mysql.getInstance().getCountryId(country); } private String getLastSeason(int leagueId, int countryId) { String result = ""; if (!Strings.isNullOrEmpty(currentSeason) && currentLeagueId == leagueId && currentCountryId == countryId) { return currentSeason; } else { result = Mysql.getInstance().getLastSeason(leagueId, countryId); } if (Strings.isNullOrEmpty(result)) { result = String.valueOf(LocalDate.now().getYear()); } return result; } private int getLeagueId(List links) { String league; league = links.get(2).getText().trim(); league = league.replace(" ", "-"); league = league.replace("\\.", ""); return Mysql.getInstance().getLeagueId(sportId, countryId, league); } private void getMatchesByDateSelenium(String date) { final String soccerUrl = "https://oddsportal.com/matches/football/" + date; try { parseSoccerMatchesSelenium(soccerUrl); } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } private void parseSoccerMatchesSelenium(final String soccerUrl) throws MalformedURLException { // ChromeDriver driver = getSeleniumDriver(); // DesiredCapabilities capabilities = new DesiredCapabilities(); // capabilities.setCapability("browserName", "Chrome"); // capabilities.setPlatform(Platform.LINUX); ChromeOptions options = new ChromeOptions(); // Fixing 255 Error crashes options.addArguments("--no-sandbox"); options.addArguments("--disable-dev-shm-usage"); // Options to trick bot detection // Removing webdriver property options.addArguments("--disable-blink-features=AutomationControlled"); options.setExperimentalOption("excludeSwitches", Collections.singletonList("enable-automation")); options.setExperimentalOption("useAutomationExtension", null); // Changing the user agent / browser fingerprint options.addArguments("window-size=1920,1080"); options.addArguments( "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"); // Other options.addArguments("disable-infobars"); RemoteWebDriver driver = new RemoteWebDriver(new URL("http://nordh.xyz:4444/wd/hub"), options); sportId = Mysql.getInstance().getSportId("soccer"); driver.get(soccerUrl); WebDriverWait wait = getWaitDriver(driver, 90); wait.until(ExpectedConditions .numberOfElementsToBeMoreThan(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV), 1)); if (!checkIfElementExists(driver, "//p[text()='EU Odds']")) { WebElement oddsFormat = driver .findElement(By.xpath("//p[@class='self-center text-xs text-orange-main'][1]")); oddsFormat.click(); driver.findElement(By.xpath("//a[text()='EU Odds']")).click(); wait.withTimeout(Duration.ofSeconds(30)); } List divs = driver .findElements(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV)); boolean staleElementRetried = false; for (int i = 0; i < divs.size(); i++) { try { WebElement element = divs.get(i); scrollElementIntoView(driver, element); ResultDTO result = new ResultDTO(SOCCER_RESULTS_TABLE_NAME); boolean somethingWrong = false; List subDivs = element.findElements(By.xpath("./div")); if (subDivs.size() == 3) { WebElement competitionsDiv = subDivs.get(0); List links = competitionsDiv.findElements(By.xpath(".//a")); countryId = getCountryId(links); leagueId = getLeagueId(links); result.setLeagueId(leagueId); result.setCountryId(countryId); WebElement firstResultsDiv = subDivs.get(2); setGameDate(result, firstResultsDiv); setTeamsInfo(result, firstResultsDiv); setScoring(result, firstResultsDiv); setOdds(result, firstResultsDiv); } else if (subDivs.size() == 1) { result.setLeagueId(leagueId); result.setCountryId(countryId); WebElement firstResultsDiv = subDivs.get(0); setGameDate(result, firstResultsDiv); setTeamsInfo(result, firstResultsDiv); setScoring(result, firstResultsDiv); setOdds(result, firstResultsDiv); } else { somethingWrong = true; String message = "Subdiv size = " + subDivs.size(); Logger.getGlobal().log(Level.WARNING, message); } if (!somethingWrong) { final Mysql mysql = Mysql.getInstance(); result.setCountryId(countryId); result.setLeagueId(leagueId); result.setSeason(getLastSeason(leagueId, countryId)); resultsToInsert.add(result); if (resultsToInsert.size() > 100) { Logger.getGlobal().log(Level.INFO, "INSERTING 100 results"); mysql.addResults(resultsToInsert); resultsToInsert.clear(); } } divs = driver .findElements(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV)); } catch (StaleElementReferenceException e) { if (staleElementRetried) { System.out.println("Stale element have already been retried continueing"); staleElementRetried = false; i++; } System.out.println("Retrying stale element"); i--; staleElementRetried = true; } } String message = "INSERTING LAST " + resultsToInsert.size() + " results"; Logger.getGlobal().log(Level.INFO, message); Mysql.getInstance().addResults(resultsToInsert); resultsToInsert.clear(); driver.close(); driver.quit(); } private void setGameDate(ResultDTO result, WebElement firstResultsDiv) { final List time = new ArrayList<>(); // String datePath = "./div/a/div[1]/div/p"; String datePath = "./div/div/div/div/div/p"; if (checkIfElementExists(firstResultsDiv, datePath)) { String timeText = firstResultsDiv.findElement(By.xpath(datePath)).getText(); String[] timeSplit = timeText.split(":"); if (timeSplit.length == 2) { time.add(Integer.parseInt(timeSplit[0])); time.add(Integer.parseInt(timeSplit[1])); } else { time.add(0); time.add(0); } } else { time.add(0); time.add(0); } final LocalDateTime dt = baseDate.withHour(time.get(0)) .withMinute(time.get(1)).withSecond(0) .withNano(0); result.setGameDate(dt); } private void setOdds(ResultDTO result, WebElement firstResultsDiv) { List oddsDivs = firstResultsDiv.findElements(By.xpath("./div/div")); try { result.setOdds1(Float.parseFloat(oddsDivs.get(1).getText())); result.setOddsX(Float.parseFloat(oddsDivs.get(2).getText())); result.setOdds2(Float.parseFloat(oddsDivs.get(3).getText())); } catch (NumberFormatException e) { // Skip this one } catch (IndexOutOfBoundsException e) { System.out.println(result); } } private void setScoring(ResultDTO result, WebElement firstResultsDiv) { String resultPath = "./div/div/div[2]/div/div/div/div/div"; if (checkIfElementExists(firstResultsDiv, resultPath)) { List scoringDivs = firstResultsDiv.findElements(By.xpath(resultPath)); try { result.setHomeScore(Integer.parseInt(scoringDivs.get(0).getText())); result.setAwayScore(Integer.parseInt(scoringDivs.get(1).getText())); } catch (NumberFormatException e) { // There is no scoring yet continue processing } } } private void setTeamsInfo(ResultDTO result, WebElement firstResultsDiv) { String homeTeamName = firstResultsDiv.findElement(By.xpath(".//a[1]/div")).getText().trim(); String awayTeamName = firstResultsDiv.findElement(By.xpath(".//a[2]/div")).getText().trim(); result.setHomeTeam(homeTeamName); result.setAwayTeam(awayTeamName); result.setHomeTeamId( Mysql.getInstance().getOrInsertTeam(homeTeamName, countryId, leagueId, sportId)); result.setAwayTeamId( Mysql.getInstance().getOrInsertTeam(awayTeamName, countryId, leagueId, sportId)); } }