/*
* lContentDownloader
*
* Downloads files from websites which use numbers as page iterators.
* e.g. http://www.notexistingexample.com/posts/12
* This program is mainly written for downloading images from blogs.
* The files to be downloaded can be specified by regular expressions. (Search will be done by url)
* The program will exit if 50 fails happen in a row (already existing filename, connection problem, etc.)
*
* Tested with:
* http://www.photoschau.de/?paged=* regex: uploads
* http://momentslikethis.de/page/* regex: uploads
* http://blog.flickr.net/en/page/* regex: staticflickr (need a better regex)
* http://www.inspirational-images.tumblr.com/page/* regex: tumblr_
* http://www.philmfotos.tumblr.com/page/* regex: tumblr_
*
* @author László Ádám
* january 6. 2014
* @version 0.1
*
*/

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.BufferedInputStream;
import java.io.InputStream;
import java.io.FileOutputStream;
import java.net.URLConnection;
import java.net.URL;
import java.io.File;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

class lContentDownloader {
  
  private static int total;
  
  // downloadAndSave //
  public static boolean downloadAndSave(String fileurl, String path) {
    if (fileurl.equals("")) {
      System.out.println("Relative path, sorry...");
      return false;
    }
    try {
      int size = 0;
      String filename = fileurl.split("/")[fileurl.split("/").length-1];
      if (path.charAt(path.length()-1) != '/') {
        path = path + "/";
      }
      
      File f = new File(path + filename);
      if (f.exists()) {
        System.out.println("\tThe file is already exists.");
        return false;
        } else {
        f.createNewFile();
      }
      
      URL url = new URL(fileurl);
      InputStream is = null;
      FileOutputStream fos = null;
      URLConnection urlConn = url.openConnection();
      
      is = urlConn.getInputStream();
      fos = new FileOutputStream(path + filename);
      byte[] buffer = new byte[4096];
      int len;
      while ((len = is.read(buffer)) > 0) {
        size += len;
        fos.write(buffer, 0, len);
      }
      
      is.close();
      fos.close();
      total += size;
      System.out.println("\tDownloaded: [~" + size/1024 + " KB]  Total: [~" + total/1024/1024 + " MB]");
      
      } catch (Exception e) {
      System.out.println(e);
    }
    return true;
  }
  
  // Download //
  public static boolean download(String address, String path, String regex) {
    boolean found = false;
    boolean error = false;
    try {
      System.out.println("Looking up: " + address);
      URL url = new URL(address);
      String page = "";
      BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream()));
      String line;
      while ((line = reader.readLine()) != null) {
        page = page + line;
      }
      reader.close();
      
      Pattern pattern = Pattern.compile(regex);
      Matcher matcher;
      String[] tokens = page.split(" ");
      for (int i = 0; i < tokens.length; i++) {
        if (tokens[i].contains("src=")) {
          matcher = pattern.matcher(tokens[i]);
          if (matcher.find()) {
            found = true;
            String fileurl = "";
            if (tokens[i].indexOf("http://") >= 0) {
              // flickr does it tricky
              fileurl = tokens[i].substring(tokens[i].indexOf("http://"), tokens[i].length()-1);
            }
            System.out.println("\tFound: "+ fileurl);
            if (!downloadAndSave(fileurl, path)) {
              error = true;
            }
          }
        }
      }
      
      if (!found) {
        System.out.println("\tNothing found here.");
        return false;
      }
      return !error;
      
      } catch (Exception e) {
      System.out.println(e);
      return false;
    }
  }
  
  
  public static void main(String[] args) throws Exception {
    if (args.length != 3) {
      System.out.println("Usage:\njava lContentDownloader address path regex");
      System.out.println("- address: URL, must contain '*' substitution character");
      System.out.println("- path: path to save files");
      System.out.println("- regex: regular expression to specificate the requested content's urls");
      System.exit(1);
    }
    
    if (!args[0].contains("*")) {
      System.out.println("URL must contain the '*' substitution character.");
      System.exit(1);
    }
    
    File f = new File(args[1]);
    if (!f.exists() || !f.isDirectory()) {
      System.out.println("The path does not exist or is not a directory.");
      System.exit(1);
    }
    
    total = 0;
    int fails = 0;
    int number = 1;
    boolean succeed;
    while (true) {
      succeed = download(args[0].replace("*", Integer.toString(number)), args[1], args[2]);
      number++;
      if (!succeed) {
        fails++;
        }else {
        fails = 0;
      }
      if (fails != 0) {
        System.out.println("\tFails: " + fails);
      }
      if (fails == 50) {
        System.out.println("50 fails exceeded in a row. The program will quit.");
        break;
      }
    }
  }
}