package parser; import java.io.IOException; import java.sql.SQLException; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.List; import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlAnchor; import com.gargoylesoftware.htmlunit.html.HtmlDivision; import com.gargoylesoftware.htmlunit.html.HtmlPage; import com.gargoylesoftware.htmlunit.html.HtmlSpan; import com.gargoylesoftware.htmlunit.html.HtmlTable; import com.gargoylesoftware.htmlunit.html.HtmlTableCell; import com.gargoylesoftware.htmlunit.html.HtmlTableRow; import com.google.common.base.Strings; import mysql.Mysql; public class OddsPortal implements ParserJoinedFunctions { private LocalDateTime baseDate; private int currentParsePage; private int sportId; private int countryId; private int leagueId; private LocalDateTime gameDate; // https://stackoverflow.com/questions/14439991/skip-particular-javascript-execution-in-html-unit Skip url public void getMatchesByDate(String date) { final String soccerUrl = "https://oddsportal.com/matches/soccer/" + date; // final String hockeyUrl = "https://oddsportal.com/matches/hockey/" + date; final WebClient webClient = new WebClient(); webClient.getOptions().setUseInsecureSSL(true); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setThrowExceptionOnScriptError(false); Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF); webClient.waitForBackgroundJavaScript(3000); parseSoccerMatches(soccerUrl, webClient); webClient.close(); } private void parseSoccerMatches(final String soccerUrl, final WebClient webClient) { try { System.out.println("Getting Webpage"); final HtmlPage soccerMatches = webClient.getPage(soccerUrl); final HtmlTable matchesTable = soccerMatches.getFirstByXPath("//table[contains(@class, table-main)]"); final List rows = matchesTable.getRows(); String countryName = ""; String leagueName = ""; int i = 1; final int size = rows.size(); for (final HtmlTableRow tr : rows) { System.out.println("Processing " + i++ + " of " + size); if (tr.getAttribute("class").equals("dark center")) { final List countryLeague = tr.getByXPath(".//a"); countryName = countryLeague.get(0).getTextContent().toLowerCase().trim(); leagueName = countryLeague.get(1).getTextContent().toLowerCase().trim(); leagueName = leagueName.replaceAll(" ", "-"); leagueName = leagueName.replaceAll("\\.", ""); countryName = countryName.replaceAll(" ", "-"); countryName = countryName.replaceAll("\\.", ""); } else { final List cells = tr.getCells(); final String[] time = cells.get(0).getTextContent().split(":"); final String[] teams = cells.get(1).getTextContent().split(" - "); float odds1 = 0F; float oddsX = 0F; float odds2 = 0F; int homeScore = -1; int awayScore = -1; boolean overtime = false; boolean abandon = false; try { for (final HtmlTableCell tc : cells) { if (tc.getAttribute("class").contains("live-score")) { abandon = true; break; } //Score if (tc.getAttribute("class").contains("table-score")) { final String[] scoreValue = tc.getTextContent().split(":"); homeScore = Integer.valueOf(scoreValue[0]); if (scoreValue[1].matches("\\D+")) { overtime = true; } awayScore = Integer.valueOf(scoreValue[1].replaceAll("\\D+", "")); } if (tc.getAttribute("class").contains("odds-nowrp")) { if (tc.getTextContent().matches("[+-][0-9][0-9][0-9]")) { if (odds1 == 0F) { odds1 = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent())); } else if (oddsX == 0F ) { oddsX = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent())); } else if (odds2 == 0F ) { odds2 = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent())); } } else if (tc.getTextContent().matches("[0-9].[0-9]+")) { if (odds1 == 0F) { odds1 = Float.valueOf(tc.getTextContent()); } else if (oddsX == 0F ) { oddsX = Float.valueOf(tc.getTextContent()); } else if (odds2 == 0F ) { odds2 = Float.valueOf(tc.getTextContent()); } } } } } catch (final NumberFormatException e) { System.out.println("Failed to get the match between " + teams[0].trim() + " and " + teams[1].trim() + " at " + baseDate.withHour(Integer.valueOf(time[0])).withMinute(Integer.valueOf(time[1])) + " odds1: " + odds1 + " oddsX: " + oddsX + " odds2: " + odds2 + " homeScore " + homeScore + " awayScore " + awayScore + " overtime: " + (overtime?"true":"false")); continue; } if (abandon) { continue; } final Mysql mysql = Mysql.getInstance(); final int leagueId = mysql.addLeague(leagueName, countryName, "soccer"); final int countryId = mysql.getCountryId(countryName); final int sportId = mysql.getSportId("soccer"); String season = mysql.getLastParsedYear(leagueName, countryId); if (Strings.isNullOrEmpty(season)) { season = String.valueOf(LocalDateTime.now().getYear()); } final LocalDateTime dt = baseDate.withHour(Integer.valueOf(time[0])).withMinute(Integer.valueOf(time[1])).withSecond(0).withNano(0); mysql.addResult("SoccerResults", dt, teams[0].trim(), teams[1].trim(), homeScore, awayScore, overtime, odds1, oddsX, odds2, countryId, season, leagueId, sportId); } } } catch (FailingHttpStatusCodeException | IOException e) { e.printStackTrace(); } catch (final SQLException e) { e.printStackTrace(); } } public void getHistoricMatches(String sport, String country, String league, String year) { final String url = "https://www.oddsportal.com/"; final String resultsPage = "/results"; final WebClient webClient = new WebClient(); webClient.getOptions().setUseInsecureSSL(true); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setThrowExceptionOnScriptError(false); Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF); league = league.replaceAll(" ", "-"); league = league.replaceAll("\\.", ""); country = country.replaceAll(" ", "-"); league = league.replaceAll("\\.", ""); final Mysql mysql = Mysql.getInstance(); currentParsePage = 1; final String urlYearPart; if (year.equals(String.valueOf(LocalDate.now().getYear()))) { urlYearPart = ""; } else { urlYearPart = "-" + year; } try { sportId = mysql.getSportId(sport); countryId = mysql.getCountryId(country); leagueId = mysql.getLeagueId(sportId, countryId, league); String season = ""; final HtmlPage leaguePage = webClient.getPage(url + "/" + sport + "/" + country + "/" + league + urlYearPart + resultsPage); final List yearFilter = leaguePage.getByXPath("//ul[contains(@class,'main-filter')]//a"); for (final HtmlAnchor a : yearFilter) { System.out.println("Year filter: " + a.getHrefAttribute()); final String active = ((HtmlSpan)a.getParentNode().getParentNode()).getAttribute("class"); if (active.contains("active") && !active.contains("inactive")) { season = a.getTextContent(); year = season.replace('/', '-'); } } HtmlDivision tournamentTableDiv = leaguePage.getHtmlElementById("tournamentTable"); HtmlTable tournamentTable = (HtmlTable) tournamentTableDiv.getFirstChild(); gameDate = LocalDateTime.now(); final DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH); parseTournamentTable(sportId, countryId, leagueId, season, tournamentTable, gameDate, dateFormatter); final HtmlDivision paginationLinksDiv = (HtmlDivision) tournamentTableDiv.getLastChild(); final List pagiantionLinks = paginationLinksDiv.getByXPath(".//a[contains(@href, 'page') and not(.//span[contains(@class, 'arrow')])]"); for (final HtmlAnchor a : pagiantionLinks) { System.out.println("Continuing with Pagination: " + a.getHrefAttribute()); // When done with start page click pagiantion final int parsePage = Integer.valueOf(a.getTextContent()); if (parsePage > currentParsePage) { a.click(); webClient.waitForBackgroundJavaScript(1000); tournamentTableDiv = leaguePage.getHtmlElementById("tournamentTable"); tournamentTable = (HtmlTable) tournamentTableDiv.getFirstChild(); parseTournamentTable(sportId, countryId, leagueId, season, tournamentTable, gameDate, dateFormatter); currentParsePage = parsePage; } // process new tournament table content } } catch (FailingHttpStatusCodeException | IOException e) { e.printStackTrace(); } catch (final SQLException sqle) { sqle.printStackTrace(); } catch (final ClassCastException cce) { System.out.println("No pagination table"); // cce.printStackTrace(); } finally { Mysql.getInstance().setParsingForLeague(leagueId, sportId, countryId, gameDate, currentParsePage, year); } webClient.close(); System.out.println("DONE with " + country + " (" + countryId + ") league " + league + "(" + leagueId + ")"); } private void parseTournamentTable(int sportId, int countryId, int leagueId, String season, HtmlTable tournamentTable, LocalDateTime gameDate, DateTimeFormatter dateFormatter) throws SQLException { for (final HtmlTableRow tr : tournamentTable.getRows()) { if (tr.getAttribute("class").contains("deactivate")) { String homeTeam; String awayTeam; int homeScore = -1; int awayScore = -1; float odds1 = 0f; float oddsX = 0f; float odds2 = 0f; boolean overtime = false; final HtmlTableCell timeCell = tr.getCell(0); final HtmlTableCell participantsCell = tr.getCell(1); // Game Time final String[] timeValue = timeCell.getTextContent().split(":"); gameDate = gameDate.withHour(Integer.valueOf(timeValue[0])); gameDate = gameDate.withMinute(Integer.valueOf(timeValue[1])); // Teams final String[] participantsValue = participantsCell.getTextContent().split(" - "); homeTeam = participantsValue[0].trim(); awayTeam = participantsValue[1].trim(); final List cells = tr.getCells(); for (final HtmlTableCell tc : cells) { //Score if (tc.getAttribute("class").contains("table-score")) { final String[] scoreValue = tc.getTextContent().split(":"); if (scoreValue[0].matches("\\D+")) { continue; } homeScore = Integer.valueOf(scoreValue[0]); if (scoreValue[1].matches("\\D+")) { overtime = true; } awayScore = Integer.valueOf(scoreValue[1].replaceAll("\\D+", "")); } if (tc.getAttribute("class").contains("odds-nowrp")) { if (tc.getTextContent().matches("[+-][0-9][0-9][0-9]")) { if (odds1 == 0F) { odds1 = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent())); } else if (oddsX == 0F ) { oddsX = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent())); } else if (odds2 == 0F ) { odds2 = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent())); } } else if (tc.getTextContent().matches("[0-9].[0-9]+")) { if (odds1 == 0F) { odds1 = Float.valueOf(tc.getTextContent()); } else if (oddsX == 0F ) { oddsX = Float.valueOf(tc.getTextContent()); } else if (odds2 == 0F ) { odds2 = Float.valueOf(tc.getTextContent()); } } } } if (gameDate != null && homeTeam != null && awayTeam != null && odds1 != 0 && oddsX != 0 && odds2 != 0 && !Strings.isNullOrEmpty(season)) { // All set.. update sql result table System.out.println("Adding game between " + homeTeam + " and " + awayTeam + " with score " + homeScore + "-" + awayScore); Mysql.getInstance().addResult("SoccerResults", gameDate, homeTeam, awayTeam, homeScore, awayScore, overtime, odds1, oddsX, odds2, countryId, season, leagueId, sportId); } else { System.out.println(String.format("Not adding, missing somethind.. gameDate: %s, homeTeam %s, awayTeam %s, odds1 %s, oddsX %s, odds2 %s, " + "season %s", gameDate, homeTeam, awayTeam, odds1, oddsX, odds2, season)); } } else if (tr.getAttribute("class").contains("center nob-border")) { // Datum rader final List dateSpan = tr.getByXPath(".//span[contains(@class, 'datet')]"); final String dateString = dateSpan.get(0).getTextContent(); if (dateString.toLowerCase().contains("yesterday")) { gameDate = LocalDateTime.now().minusDays(1); } else if (dateString.toLowerCase().contains("today")) { gameDate = LocalDateTime.now(); } else { gameDate = LocalDate.parse(dateString, dateFormatter).atStartOfDay(); } } } } }