| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319 |
- package parser;
- import java.io.IOException;
- import java.sql.SQLException;
- import java.time.LocalDate;
- import java.time.LocalDateTime;
- import java.time.format.DateTimeFormatter;
- import java.util.List;
- import java.util.Locale;
- import java.util.logging.Level;
- import java.util.logging.Logger;
- import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
- import com.gargoylesoftware.htmlunit.WebClient;
- import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
- import com.gargoylesoftware.htmlunit.html.HtmlDivision;
- import com.gargoylesoftware.htmlunit.html.HtmlPage;
- import com.gargoylesoftware.htmlunit.html.HtmlSpan;
- import com.gargoylesoftware.htmlunit.html.HtmlTable;
- import com.gargoylesoftware.htmlunit.html.HtmlTableCell;
- import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
- import com.google.common.base.Strings;
- import mysql.Mysql;
- public class OddsPortal implements ParserJoinedFunctions {
- private LocalDateTime baseDate;
- private int currentParsePage;
- private int sportId;
- private int countryId;
- private int leagueId;
- private LocalDateTime gameDate;
- // https://stackoverflow.com/questions/14439991/skip-particular-javascript-execution-in-html-unit Skip url
- public void getMatchesByDate(String date) {
- final String soccerUrl = "https://oddsportal.com/matches/soccer/" + date;
- // final String hockeyUrl = "https://oddsportal.com/matches/hockey/" + date;
- final WebClient webClient = new WebClient();
- webClient.getOptions().setUseInsecureSSL(true);
- webClient.getOptions().setCssEnabled(false);
- webClient.getOptions().setJavaScriptEnabled(true);
- webClient.getOptions().setThrowExceptionOnScriptError(false);
- Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF);
- webClient.waitForBackgroundJavaScript(3000);
- parseSoccerMatches(soccerUrl, webClient);
- webClient.close();
- }
- private void parseSoccerMatches(final String soccerUrl, final WebClient webClient) {
- try {
- System.out.println("Getting Webpage");
- final HtmlPage soccerMatches = webClient.getPage(soccerUrl);
- final HtmlTable matchesTable = soccerMatches.getFirstByXPath("//table[contains(@class, table-main)]");
- final List<HtmlTableRow> rows = matchesTable.getRows();
- String countryName = "";
- String leagueName = "";
- int i = 1;
- final int size = rows.size();
- for (final HtmlTableRow tr : rows) {
- System.out.println("Processing " + i++ + " of " + size);
- if (tr.getAttribute("class").equals("dark center")) {
- final List<HtmlAnchor> countryLeague = tr.getByXPath(".//a");
- countryName = countryLeague.get(0).getTextContent().toLowerCase().trim();
- leagueName = countryLeague.get(1).getTextContent().toLowerCase().trim();
- leagueName = leagueName.replaceAll(" ", "-");
- leagueName = leagueName.replaceAll("\\.", "");
- countryName = countryName.replaceAll(" ", "-");
- countryName = countryName.replaceAll("\\.", "");
- } else {
- final List<HtmlTableCell> cells = tr.getCells();
- final String[] time = cells.get(0).getTextContent().split(":");
- final String[] teams = cells.get(1).getTextContent().split(" - ");
- float odds1 = 0F;
- float oddsX = 0F;
- float odds2 = 0F;
- int homeScore = -1;
- int awayScore = -1;
- boolean overtime = false;
- boolean abandon = false;
- try {
- for (final HtmlTableCell tc : cells) {
- if (tc.getAttribute("class").contains("live-score")) {
- abandon = true;
- break;
- }
- //Score
- if (tc.getAttribute("class").contains("table-score")) {
- final String[] scoreValue = tc.getTextContent().split(":");
- homeScore = Integer.valueOf(scoreValue[0]);
- if (scoreValue[1].matches("\\D+")) {
- overtime = true;
- }
- awayScore = Integer.valueOf(scoreValue[1].replaceAll("\\D+", ""));
- }
- if (tc.getAttribute("class").contains("odds-nowrp")) {
- if (tc.getTextContent().matches("[+-][0-9][0-9][0-9]")) {
- if (odds1 == 0F) {
- odds1 = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent()));
- } else if (oddsX == 0F ) {
- oddsX = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent()));
- } else if (odds2 == 0F ) {
- odds2 = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent()));
- }
- } else if (tc.getTextContent().matches("[0-9].[0-9]+")) {
- if (odds1 == 0F) {
- odds1 = Float.valueOf(tc.getTextContent());
- } else if (oddsX == 0F ) {
- oddsX = Float.valueOf(tc.getTextContent());
- } else if (odds2 == 0F ) {
- odds2 = Float.valueOf(tc.getTextContent());
- }
- }
- }
- }
- } catch (final NumberFormatException e) {
- System.out.println("Failed to get the match between " + teams[0].trim() + " and " + teams[1].trim() + " at " +
- baseDate.withHour(Integer.valueOf(time[0])).withMinute(Integer.valueOf(time[1])) +
- " odds1: " + odds1 + " oddsX: " + oddsX + " odds2: " + odds2 + " homeScore " + homeScore + " awayScore " + awayScore + " overtime: " + (overtime?"true":"false"));
- continue;
- }
- if (abandon) {
- continue;
- }
- final Mysql mysql = Mysql.getInstance();
- final int leagueId = mysql.addLeague(leagueName, countryName, "soccer");
- final int countryId = mysql.getCountryId(countryName);
- final int sportId = mysql.getSportId("soccer");
- String season = mysql.getLastParsedYear(leagueName, countryId);
- if (Strings.isNullOrEmpty(season)) {
- season = String.valueOf(LocalDateTime.now().getYear());
- }
- final LocalDateTime dt = baseDate.withHour(Integer.valueOf(time[0])).withMinute(Integer.valueOf(time[1])).withSecond(0).withNano(0);
- mysql.addResult("SoccerResults", dt, teams[0].trim(), teams[1].trim(), homeScore, awayScore, overtime, odds1, oddsX, odds2, countryId, season, leagueId, sportId);
- }
- }
- } catch (FailingHttpStatusCodeException | IOException e) {
- e.printStackTrace();
- } catch (final SQLException e) {
- e.printStackTrace();
- }
- }
- public void getHistoricMatches(String sport, String country, String league, String year) {
- final String url = "https://www.oddsportal.com/";
- final String resultsPage = "/results";
- final WebClient webClient = new WebClient();
- webClient.getOptions().setUseInsecureSSL(true);
- webClient.getOptions().setCssEnabled(false);
- webClient.getOptions().setJavaScriptEnabled(true);
- webClient.getOptions().setThrowExceptionOnScriptError(false);
- Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF);
- league = league.replaceAll(" ", "-");
- league = league.replaceAll("\\.", "");
- country = country.replaceAll(" ", "-");
- league = league.replaceAll("\\.", "");
- final Mysql mysql = Mysql.getInstance();
- currentParsePage = 1;
- final String urlYearPart;
- if (year.equals(String.valueOf(LocalDate.now().getYear()))) {
- urlYearPart = "";
- } else {
- urlYearPart = "-" + year;
- }
- try {
- sportId = mysql.getSportId(sport);
- countryId = mysql.getCountryId(country);
- leagueId = mysql.getLeagueId(sportId, countryId, league);
- String season = "";
- final HtmlPage leaguePage = webClient.getPage(url + "/" + sport + "/" + country + "/" + league + urlYearPart + resultsPage);
- final List<HtmlAnchor> yearFilter = leaguePage.getByXPath("//ul[contains(@class,'main-filter')]//a");
- for (final HtmlAnchor a : yearFilter) {
- System.out.println("Year filter: " + a.getHrefAttribute());
- final String active = ((HtmlSpan)a.getParentNode().getParentNode()).getAttribute("class");
- if (active.contains("active") && !active.contains("inactive")) {
- season = a.getTextContent();
- year = season.replace('/', '-');
- }
- }
- HtmlDivision tournamentTableDiv = leaguePage.getHtmlElementById("tournamentTable");
- HtmlTable tournamentTable = (HtmlTable) tournamentTableDiv.getFirstChild();
- gameDate = LocalDateTime.now();
- final DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH);
- parseTournamentTable(sportId, countryId, leagueId, season, tournamentTable, gameDate, dateFormatter);
- final HtmlDivision paginationLinksDiv = (HtmlDivision) tournamentTableDiv.getLastChild();
- final List<HtmlAnchor> pagiantionLinks = paginationLinksDiv.getByXPath(".//a[contains(@href, 'page') and not(.//span[contains(@class, 'arrow')])]");
- for (final HtmlAnchor a : pagiantionLinks) {
- System.out.println("Continuing with Pagination: " + a.getHrefAttribute());
- // When done with start page click pagiantion
- final int parsePage = Integer.valueOf(a.getTextContent());
- if (parsePage > currentParsePage) {
- a.click();
- webClient.waitForBackgroundJavaScript(1000);
- tournamentTableDiv = leaguePage.getHtmlElementById("tournamentTable");
- tournamentTable = (HtmlTable) tournamentTableDiv.getFirstChild();
- parseTournamentTable(sportId, countryId, leagueId, season, tournamentTable, gameDate, dateFormatter);
- currentParsePage = parsePage;
- }
- // process new tournament table content
- }
- } catch (FailingHttpStatusCodeException | IOException e) {
- e.printStackTrace();
- } catch (final SQLException sqle) {
- sqle.printStackTrace();
- } catch (final ClassCastException cce) {
- System.out.println("No pagination table");
- // cce.printStackTrace();
- }
- finally {
- Mysql.getInstance().setParsingForLeague(leagueId, sportId, countryId, gameDate, currentParsePage, year);
- }
- webClient.close();
- System.out.println("DONE with " + country + " (" + countryId + ") league " + league + "(" + leagueId + ")");
- }
- private void parseTournamentTable(int sportId, int countryId, int leagueId, String season,
- HtmlTable tournamentTable, LocalDateTime gameDate, DateTimeFormatter dateFormatter) throws SQLException {
- for (final HtmlTableRow tr : tournamentTable.getRows()) {
- if (tr.getAttribute("class").contains("deactivate")) {
- String homeTeam;
- String awayTeam;
- int homeScore = -1;
- int awayScore = -1;
- float odds1 = 0f;
- float oddsX = 0f;
- float odds2 = 0f;
- boolean overtime = false;
- final HtmlTableCell timeCell = tr.getCell(0);
- final HtmlTableCell participantsCell = tr.getCell(1);
- // Game Time
- final String[] timeValue = timeCell.getTextContent().split(":");
- gameDate = gameDate.withHour(Integer.valueOf(timeValue[0]));
- gameDate = gameDate.withMinute(Integer.valueOf(timeValue[1]));
- // Teams
- final String[] participantsValue = participantsCell.getTextContent().split(" - ");
- homeTeam = participantsValue[0].trim();
- awayTeam = participantsValue[1].trim();
- final List<HtmlTableCell> cells = tr.getCells();
- for (final HtmlTableCell tc : cells) {
- //Score
- if (tc.getAttribute("class").contains("table-score")) {
- final String[] scoreValue = tc.getTextContent().split(":");
- if (scoreValue[0].matches("\\D+")) {
- continue;
- }
- homeScore = Integer.valueOf(scoreValue[0]);
- if (scoreValue[1].matches("\\D+")) {
- overtime = true;
- }
- awayScore = Integer.valueOf(scoreValue[1].replaceAll("\\D+", ""));
- }
- if (tc.getAttribute("class").contains("odds-nowrp")) {
- if (tc.getTextContent().matches("[+-][0-9][0-9][0-9]")) {
- if (odds1 == 0F) {
- odds1 = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent()));
- } else if (oddsX == 0F ) {
- oddsX = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent()));
- } else if (odds2 == 0F ) {
- odds2 = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent()));
- }
- } else if (tc.getTextContent().matches("[0-9].[0-9]+")) {
- if (odds1 == 0F) {
- odds1 = Float.valueOf(tc.getTextContent());
- } else if (oddsX == 0F ) {
- oddsX = Float.valueOf(tc.getTextContent());
- } else if (odds2 == 0F ) {
- odds2 = Float.valueOf(tc.getTextContent());
- }
- }
- }
- }
- if (gameDate != null && homeTeam != null && awayTeam != null &&
- odds1 != 0 && oddsX != 0 && odds2 != 0 && !Strings.isNullOrEmpty(season)) { // All set.. update sql result table
- System.out.println("Adding game between " + homeTeam + " and " + awayTeam + " with score " + homeScore + "-" + awayScore);
- Mysql.getInstance().addResult("SoccerResults", gameDate, homeTeam, awayTeam, homeScore, awayScore, overtime, odds1, oddsX, odds2, countryId, season, leagueId, sportId);
- } else {
- System.out.println(String.format("Not adding, missing somethind.. gameDate: %s, homeTeam %s, awayTeam %s, odds1 %s, oddsX %s, odds2 %s, "
- + "season %s", gameDate, homeTeam, awayTeam, odds1, oddsX, odds2, season));
- }
- } else if (tr.getAttribute("class").contains("center nob-border")) { // Datum rader
- final List<HtmlSpan> dateSpan = tr.getByXPath(".//span[contains(@class, 'datet')]");
- final String dateString = dateSpan.get(0).getTextContent();
- if (dateString.toLowerCase().contains("yesterday")) {
- gameDate = LocalDateTime.now().minusDays(1);
- } else if (dateString.toLowerCase().contains("today")) {
- gameDate = LocalDateTime.now();
- } else {
- gameDate = LocalDate.parse(dateString, dateFormatter).atStartOfDay();
- }
- }
- }
- }
- }
|