Valhalla Image Crawler

An image crawler from Valhalla! Has a nice GUI for crawling and downloading images from tumblr, flickr, and more.
/* * Version 0.2.1 beta * Note: Includes code from http://halls-of-valhalla.org/beta/codes/lcontentdownloader-regex-multi-file-downloader,80 */ import java.awt.Cursor; import java.awt.Desktop; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.awt.event.KeyEvent; import java.awt.event.MouseAdapter; import java.awt.event.MouseEvent; import java.awt.event.WindowAdapter; import java.awt.event.WindowEvent; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import javax.swing.ButtonGroup; import javax.swing.ImageIcon; import javax.swing.JButton; import javax.swing.JCheckBox; import javax.swing.JComboBox; import javax.swing.JFileChooser; import javax.swing.JFrame; import javax.swing.JLabel; import javax.swing.JOptionPane; import javax.swing.JRadioButton; import javax.swing.JScrollPane; import javax.swing.JSeparator; import javax.swing.JTextArea; import javax.swing.JTextField; /** * A class for graphical interface. * * @author Laszlo Adam * @email [email protected] * @version 0.1 */ class GUI implements Runnable, ActionListener { private Thread t; private String layout; private Processor processor; private boolean working; /* static GUI components */ private JFrame frame; private JTextArea infoTextArea; private JComboBox<String> hostsComboBox; private JLabel seemoreLabel; private JSeparator topSeparator; private JSeparator botSeparator; private JFileChooser chooser; private JButton chooserButton; private JTextField pathTextField; private JLabel dirLabel; private JLabel hostLabel; private JLabel infoLabel; private JButton startButton; private JButton stopButton; /* dynamic GUI components */ private JTextField textfield1; private JTextField textfield2; private JButton button1; private JLabel label1; private JLabel label2; private JLabel label3; private JCheckBox checkbox1; private JCheckBox checkbox2; private JRadioButton radiobutton1; private JRadioButton radiobutton2; private JRadioButton radiobutton3; private JRadioButton radiobutton4; private JComboBox<String> combobox1; /** * Constructor. Calls the init() method. */ public GUI() { this.init(); } /** * Initializes the static GUI components. */ private void init() { this.frame = new JFrame("Valhalla Image Crawler 0.2.1 beta"); URL iconURL = getClass().getResource("favicon.png"); if (iconURL != null) { ImageIcon icon = new ImageIcon(iconURL); this.frame.setIconImage(icon.getImage()); } this.frame.setSize(600, 700); this.frame.setLayout(null); this.frame.setResizable(false); this.frame.setLocationRelativeTo(null); this.frame.setDefaultCloseOperation(JFrame.DO_NOTHING_ON_CLOSE); frame.addWindowListener(new WindowAdapter() { public void windowClosing(WindowEvent evt) { onExit(); } }); this.frame.setVisible(true); this.infoTextArea = new JTextArea(); this.infoTextArea.setEditable(false); this.infoTextArea.setLineWrap(true); JScrollPane scroll = new JScrollPane(this.infoTextArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_NEVER); scroll.setLocation(25, 370); scroll.setSize(550, 280); this.frame.getContentPane().add(scroll); this.hostsComboBox = new JComboBox<String>(); this.hostsComboBox.setLocation(375, 30); this.hostsComboBox.setSize(200, 20); this.hostsComboBox.addItem("Flickr"); this.hostsComboBox.addItem("MOMENTS LIKE THIS"); this.hostsComboBox.addItem(":photoschau"); this.hostsComboBox.addItem("Tumblr"); this.hostsComboBox.addItem("---custom---"); this.hostsComboBox.addActionListener(this); this.frame.add(this.hostsComboBox); this.hostsComboBox.setVisible(true); this.seemoreLabel = new JLabel( "<html>See more: <a href=\"http://halls-of-valhalla.org\">halls-of-valhalla.org</a></html>"); this.seemoreLabel.setCursor(new Cursor(Cursor.HAND_CURSOR)); this.seemoreLabel.addMouseListener(new MouseAdapter() { @Override public void mouseClicked(MouseEvent e) { try { Desktop.getDesktop().browse( new URI("http://halls-of-valhalla.org")); } catch (URISyntaxException | IOException ex) { JOptionPane.showMessageDialog(null, "Cannot open external browser.", "Error", JOptionPane.ERROR_MESSAGE); } } }); this.seemoreLabel.setSize(600, 20); this.seemoreLabel.setLocation(150, 652); this.seemoreLabel.setAlignmentX(300); this.frame.add(this.seemoreLabel); this.seemoreLabel.setVisible(true); this.topSeparator = new JSeparator(); this.topSeparator.setSize(550, 20); this.topSeparator.setLocation(25, 55); this.frame.add(this.topSeparator); this.topSeparator.setVisible(true); this.botSeparator = new JSeparator(); this.botSeparator.setSize(550, 20); this.botSeparator.setLocation(25, 345); this.frame.add(this.botSeparator); this.botSeparator.setVisible(true); this.chooser = new JFileChooser("Choose directory"); this.chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); this.chooser.setAcceptAllFileFilterUsed(false); this.chooser.setMultiSelectionEnabled(false); this.pathTextField = new JTextField(); this.pathTextField.setSize(250, 20); this.pathTextField.setLocation(25, 30); this.pathTextField.setEditable(false); this.frame.add(this.pathTextField); this.pathTextField.setVisible(true); this.chooserButton = new JButton("..."); this.chooserButton.setSize(50, 20); this.chooserButton.setLocation(280, 30); this.chooserButton.addActionListener(this); this.frame.add(this.chooserButton); this.chooserButton.setVisible(true); this.dirLabel = new JLabel("Saving directory"); this.dirLabel.setSize(200, 20); this.dirLabel.setLocation(25, 10); this.frame.add(this.dirLabel); this.dirLabel.setVisible(true); this.hostLabel = new JLabel("Host"); this.hostLabel.setLocation(375, 10); this.hostLabel.setSize(200, 20); this.frame.add(this.hostLabel); this.hostLabel.setVisible(true); this.infoLabel = new JLabel("Info"); this.infoLabel.setSize(200, 20); this.infoLabel.setLocation(25, 350); this.frame.add(this.infoLabel); this.infoLabel.setVisible(true); this.startButton = new JButton("Start"); this.startButton.setSize(100, 20); this.startButton.setLocation(25, 300); this.startButton.addActionListener(this); this.frame.add(this.startButton); this.startButton.setVisible(true); this.stopButton = new JButton("Stop"); this.stopButton.setSize(100, 20); this.stopButton.setLocation(475, 300); this.stopButton.setEnabled(false); this.stopButton.addActionListener(this); this.frame.add(this.stopButton); this.stopButton.setVisible(true); this.layout = this.hostsComboBox.getSelectedItem().toString(); this.changeLayout(this.layout); } /** * Start the thread with this. */ public void start() { this.t = new Thread(this); this.t.start(); this.working = true; } /** * Stops the thread. */ public void stop() { this.t = null; } /** * Implements the abstract run() method. */ public void run() { } /* * @param processor The GUI will call this to start and handle the * execution. */ public void setProcessor(Processor processor) { this.processor = processor; } /* * @return Determines if the thread is running or not. */ public boolean isWorking() { return this.working; } /* * Correctly stops the threads before the program quits. */ private void onExit() { this.processor.stopSequence(); this.processor.stop(); this.stop(); this.working = false; } /** * * @return The type of the actual layout. */ public String getlayout() { return this.layout; } /** * Returns the data which is needed to crawl Flickr images. * * @return A String contains the data. The delimiter is '|'. */ public String getFlickrData() { String page = ""; if (this.radiobutton1.isSelected()) { page = "Explore"; return this.pathTextField.getText() + "/|Explore"; } else { page = "Search"; } String where = ""; if (this.radiobutton3.isSelected()) { where = "text"; } else { where = "tags"; } return this.pathTextField.getText() + "/|" + page + "|" + this.textfield1.getText() + "|" + where + "|" + this.checkbox1.isSelected(); } /** * Returns the data which is needed to crawl Photoschau images. * * @return A String contains the data. The delimiter is '|'. */ public String getPhotoschauData() { return this.pathTextField.getText() + "/|" + Boolean.toString(this.checkbox1.isSelected()); } /** * Returns the data which is needed to crawl Tumblr images. * * @return A String contains the data. The delimiter is '|'. */ public String getTumblrData() { String s = ""; if (this.radiobutton1.isSelected()) { s = "P"; } if (this.radiobutton2.isSelected()) { s = "G"; } if (this.radiobutton3.isSelected()) { s = "B"; } return this.textfield1.getText() + "|" + this.pathTextField.getText() + "/|" + this.checkbox1.isSelected() + "|" + s + "|" + this.checkbox2.isSelected() + "|" + this.combobox1.getSelectedItem().toString(); } /** * This method is a signal to the gui, and indicates that the downloads has * been finished and a new sequence can be started. */ public void processFinished() { this.startButton.setEnabled(true); this.hostsComboBox.setEnabled(true); this.stopButton.setEnabled(false); this.chooserButton.setEnabled(true); } /** * This method will change the layout. * * @param layout * Determines which layout must be shown. */ public void changeLayout(String layout) { this.layout = layout; this.hideDynamics(); /** height: 75-300 */ if (layout.equals("Tumblr")) { this.label1 = new JLabel("www."); this.label1.setSize(50, 20); this.label1.setLocation(25, 80); this.frame.add(this.label1); this.label1.setVisible(true); this.textfield1 = new JTextField(); this.textfield1.setSize(200, 20); this.textfield1.setLocation(75, 80); this.frame.add(this.textfield1); this.textfield1.setVisible(true); this.label2 = new JLabel(".tumblr.com"); this.label2.setSize(100, 20); this.label2.setLocation(280, 80); this.frame.add(this.label2); this.label2.setVisible(true); this.checkbox1 = new JCheckBox( "Only high resolution (ignores gifs)."); this.checkbox1.setSize(300, 20); this.checkbox1.setLocation(25, 120); this.frame.add(this.checkbox1); this.checkbox1.setVisible(true); this.checkbox2 = new JCheckBox("Stop if the file already exists."); this.checkbox2.setSize(300, 20); this.checkbox2.setLocation(25, 140); this.frame.add(this.checkbox2); this.checkbox2.setVisible(true); this.radiobutton1 = new JRadioButton("Only pictures."); this.radiobutton1.setMnemonic(KeyEvent.VK_P); this.radiobutton1.setSelected(true); this.radiobutton2 = new JRadioButton("Only gifs."); this.radiobutton2.setMnemonic(KeyEvent.VK_G); this.radiobutton2.setSelected(false); this.radiobutton3 = new JRadioButton("Both."); this.radiobutton3.setMnemonic(KeyEvent.VK_B); this.radiobutton3.setSelected(false); ButtonGroup group = new ButtonGroup(); group.add(this.radiobutton1); group.add(this.radiobutton2); group.add(this.radiobutton3); this.radiobutton1.setSize(200, 20); this.radiobutton1.setLocation(25, 180); this.frame.add(this.radiobutton1); this.radiobutton1.setVisible(true); this.radiobutton2.setSize(200, 20); this.radiobutton2.setLocation(25, 200); this.frame.add(this.radiobutton2); this.radiobutton2.setVisible(true); this.radiobutton3.setSize(200, 20); this.radiobutton3.setLocation(25, 220); this.frame.add(this.radiobutton3); this.radiobutton3.setVisible(true); this.label3 = new JLabel( "<html>Stop if the number of<br>pages wihout image<br>in a row reaches:</html>"); this.label3.setSize(300, 60); this.label3.setLocation(375, 120); this.frame.add(this.label3); this.label3.setVisible(true); this.combobox1 = new JComboBox<String>(); this.combobox1.setLocation(375, 180); this.combobox1.setSize(200, 20); this.combobox1.addItem("1"); this.combobox1.addItem("10"); this.combobox1.addItem("50"); this.combobox1.addItem("1000000"); this.combobox1.addActionListener(this); this.frame.add(this.combobox1); this.combobox1.setVisible(true); } if (layout == "Flickr") { this.radiobutton1 = new JRadioButton("Check 20 latest in Explore"); this.radiobutton1.setSize(250, 20); this.radiobutton1.setLocation(25, 80); this.radiobutton1.setSelected(true); this.radiobutton1.addActionListener(this); this.frame.add(this.radiobutton1); this.radiobutton1.setVisible(true); this.radiobutton2 = new JRadioButton("Search"); this.radiobutton2.setSize(250, 20); this.radiobutton2.setLocation(25, 100); this.radiobutton2.setSelected(false); this.radiobutton2.addActionListener(this); this.frame.add(this.radiobutton2); this.radiobutton2.setVisible(true); ButtonGroup group = new ButtonGroup(); group.add(this.radiobutton1); group.add(this.radiobutton2); this.textfield1 = new JTextField(); this.textfield1.setSize(200, 20); this.textfield1.setLocation(75, 120); this.textfield1.setEditable(false); this.frame.add(this.textfield1); this.textfield1.setVisible(true); this.radiobutton3 = new JRadioButton("text"); this.radiobutton3.setSize(100, 20); this.radiobutton3.setLocation(75, 140); this.radiobutton3.setSelected(false); this.radiobutton3.setEnabled(false); this.frame.add(this.radiobutton3); this.radiobutton3.setVisible(true); this.radiobutton4 = new JRadioButton("tags"); this.radiobutton4.setSize(100, 20); this.radiobutton4.setLocation(225, 140); this.radiobutton4.setSelected(true); this.radiobutton4.setEnabled(false); this.frame.add(this.radiobutton4); this.radiobutton4.setVisible(true); ButtonGroup group2 = new ButtonGroup(); group2.add(this.radiobutton3); group2.add(this.radiobutton4); this.checkbox1 = new JCheckBox("Stop if the file already exists"); this.checkbox1.setSize(300, 20); this.checkbox1.setLocation(75, 160); this.frame.add(this.checkbox1); this.checkbox1.setVisible(true); } if (layout.equals("---custom---")) { this.label1 = new JLabel( ">.< soon in 1.0. Until that, check lContentManager on Valhalla!"); this.label1.setSize(500, 20); this.label1.setLocation(25, 80); this.frame.add(this.label1); this.label1.setVisible(true); } if (layout.equals(":photoschau")) { this.checkbox1 = new JCheckBox("Stop if the file already exists"); this.checkbox1.setSize(300, 20); this.checkbox1.setLocation(25, 80); this.frame.add(this.checkbox1); this.checkbox1.setVisible(true); } if (layout.equals("MOMENTS LIKE THIS")) { this.checkbox1 = new JCheckBox("Stop if the file already exists"); this.checkbox1.setSize(300, 20); this.checkbox1.setLocation(25, 80); this.frame.add(this.checkbox1); this.checkbox1.setVisible(true); } this.frame.repaint(); } /** * Clears the actual layout. */ private void hideDynamics() { if (this.textfield1 != null) this.frame.remove(this.textfield1); if (this.textfield2 != null) this.frame.remove(this.textfield2); if (this.button1 != null) this.frame.remove(this.button1); if (this.label1 != null) this.frame.remove(this.label1); if (this.label2 != null) this.frame.remove(this.label2); if (this.label3 != null) this.frame.remove(this.label3); if (this.checkbox1 != null) this.frame.remove(this.checkbox1); if (this.checkbox2 != null) this.frame.remove(this.checkbox2); if (this.radiobutton1 != null) this.frame.remove(this.radiobutton1); if (this.radiobutton2 != null) this.frame.remove(this.radiobutton2); if (this.radiobutton3 != null) this.frame.remove(this.radiobutton3); if (this.radiobutton4 != null) this.frame.remove(this.radiobutton4); if (this.combobox1 != null) this.frame.remove(this.combobox1); if (this.combobox1 != null) { this.frame.remove(this.combobox1); } } /** * Appends a text to the info text area. * * @param text * String to append. */ public void print(String text) { this.infoTextArea.append(text + "\n"); this.infoTextArea.setCaretPosition(this.infoTextArea.getDocument() .getLength()); } /** * Implements the abstract actionPerformed() method. */ public void actionPerformed(ActionEvent e) { if (e.getSource() == this.chooserButton) { int ret = this.chooser.showDialog(this.frame, "Select"); if (ret == JFileChooser.APPROVE_OPTION) { this.pathTextField.setText(this.chooser.getSelectedFile() .getPath()); } } else if (e.getSource() == this.startButton) { if (this.pathTextField.getText().equals("")) { this.infoTextArea.setText("Choose a directory above!"); return; } if (this.layout.equals("Tumblr")) { if (this.textfield1.getText().equals("")) { this.infoTextArea.setText("Complete the URL above!"); return; } } if (this.layout.equals("Flickr") && this.radiobutton2.isSelected()) { if (this.textfield1.getText().equals("")) { this.infoTextArea.setText("Complete the search form!"); return; } } if (this.layout.equals("---custom---")) { this.infoTextArea.setText(""); return; } this.infoTextArea.setText(""); this.startButton.setEnabled(false); this.stopButton.setEnabled(true); this.hostsComboBox.setEnabled(false); this.chooserButton.setEnabled(false); this.processor.signal(); } else if (e.getSource() == this.hostsComboBox) { this.changeLayout(this.hostsComboBox.getSelectedItem().toString()); } else if (e.getSource() == this.stopButton) { this.processor.stopSequence(); this.stopButton.setEnabled(false); } else if (e.getSource() == this.radiobutton1) { if (this.layout.equals("Flickr")) { this.textfield1.setEditable(false); this.radiobutton3.setEnabled(false); this.radiobutton4.setEnabled(false); this.checkbox1.setEnabled(false); } } else if (e.getSource() == this.radiobutton2) { if (this.layout.equals("Flickr")) { this.textfield1.setEditable(true); this.radiobutton3.setEnabled(true); this.radiobutton4.setEnabled(true); this.checkbox1.setEnabled(true); } } } } /** * This class checks the given URL-s for requested images. Behaves differently * to different hosts. * * @author Laszlo Adam * @version 0.2.1 * */ class Processor implements Runnable { private Thread t; private GUI gui; private boolean signal; private boolean stopSignal; /** * Constructor. */ public Processor() { this.signal = false; this.stopSignal = false; } /** * Start the thread with this. */ public void start() { this.t = new Thread(this); this.t.start(); } /** * Stops the thread. */ public void stop() { this.t = null; } /** * Implements the abstract run() method. Downloads and checks the web pages * for fitting links. */ public void run() { Thread thisThread = Thread.currentThread(); while (this.t == thisThread) { if (this.signal) { this.signal = false; String layout = this.gui.getlayout(); /* Tumblr */ if (layout.equals("Tumblr")) { boolean existFailure = false; String data = this.gui.getTumblrData(); String address = "http://www." + data.split("\\|")[0] + ".tumblr.com/page/*"; String dirname = data.split("\\|")[1] + "/"; boolean onlyhd = Boolean.parseBoolean(data.split("\\|")[2]); String mode = data.split("\\|")[3]; boolean stopAtExist = Boolean.parseBoolean(data .split("\\|")[4]); int nOfPagesWithotImageMax = Integer.parseInt(data .split("\\|")[5]); int number = 1; int nOfPagesWithotImage = 0; boolean pageContainsImage = false; String page; String actualAddress; int size = 0; while (true) { if (this.stopSignal) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: User aborted."); this.gui.processFinished(); break; } if (!pageContainsImage && number > 1) { nOfPagesWithotImage++; } else { pageContainsImage = false; } if (nOfPagesWithotImage == nOfPagesWithotImageMax) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: The set amount of pages without images is reached."); this.gui.processFinished(); break; } if (existFailure) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: A file already exists."); this.gui.processFinished(); break; } actualAddress = address.replace("*", Integer.toString(number)); this.gui.print("Checking: " + actualAddress); page = Downloader.downloadPage(actualAddress); if (page == null) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: Error while downloading page."); this.gui.processFinished(); break; } String[] tokens = page.split(" "); for (int i = 0; i < tokens.length; i++) { if (this.stopSignal || existFailure) { break; } if (tokens[i].contains("src=") && tokens[i].contains("tumblr_") && !tokens[i].contains("static") && !tokens[i].contains("_iframe")) { /* there IS something. */ String resolution = ""; tokens[i] = tokens[i].split("\"")[1]; String filename = tokens[i].split("/")[tokens[i] .split("/").length - 1]; String stemp = filename.replace("_", ""); if (filename.contains(".gif") && stemp.length() + 2 != filename .length()) { /* must be something belongs to the website */ continue; } pageContainsImage = true; if (filename.contains(".gif")) { if (mode.equals("G") || mode.equals("B")) { this.gui.print("\tFound: " + filename); int temp = Downloader.downloadFile( tokens[i], dirname + filename); if (temp == 0){ this.gui.print("\tCannot download picture."); continue; } if (temp > -1) { this.gui.print("\tDownloaded: [~" + temp / 1024 + " KB]"); size += temp; } else { if (stopAtExist) { existFailure = true; break; } } } } else { if (!mode.equals("G")) { resolution = tokens[i].split("_")[tokens[i] .split("_").length - 1]; resolution = resolution.split("\\.")[0]; if (onlyhd) { try { Integer.parseInt(resolution); } catch (Exception e) { /* rarely happens */ continue; } if (Integer.parseInt(resolution) >= 1280) { this.gui.print("\tFound: " + filename); int temp = Downloader .downloadFile( tokens[i], dirname + filename); if (temp == 0){ this.gui.print("\tCannot download picture."); continue; } if (temp > -1) { this.gui.print("\tDownloaded: [~" + temp / 1024 + " KB]"); size += temp; } else { if (stopAtExist) { existFailure = true; break; } } } } else { this.gui.print("\tFound: " + filename); int temp = Downloader.downloadFile( tokens[i], dirname + filename); if (temp > -1) { if (temp == 0){ this.gui.print("\tCannot download picture."); continue; } this.gui.print("\tDownloaded: [~" + temp / 1024 + " KB]"); size += temp; } else { if (stopAtExist) { existFailure = true; break; } } } } } } } number++; } } /* Flickr */ if (layout.equals("Flickr")) { ArrayList<String> list = new ArrayList<String>(); String data = this.gui.getFlickrData(); String path = data.split("\\|")[0]; String address = ""; if (data.split("\\|")[1].equals("Explore")) { address = "http://m.flickr.com/explore?"; int size = 0; String page = ""; while (true) { if (list.size() == 20) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: 20 images checked."); this.gui.processFinished(); break; } if (this.stopSignal) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: User Aborted."); this.gui.processFinished(); break; } page = Downloader.downloadPage(address); this.gui.print("Checking: " + address); if (page == null) { this.gui.processFinished(); this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download failed. . Reason: Error while downloading page."); break; } String tokens[] = page.split(" "); for (int i = 0; i < tokens.length; i++) { if (tokens[i].contains("staticflickr")) { tokens[i] = tokens[i].split("\"")[1]; tokens[i] = tokens[i].replace("_t.", "_b."); String filename = tokens[i].split("/")[tokens[i] .split("/").length - 1]; if (!list.contains(filename)) { this.gui.print("\tFound: " + filename); list.add(filename); } int temp = Downloader.downloadFile( tokens[i], path + filename); if (temp == 3346){ this.gui.print("\tThe privacy settings does not allow to download this image."); File f = new File(path + filename); f.delete(); continue; } if (temp == 0){ this.gui.print("\tCannot download picture."); continue; } if (temp != -1) { this.gui.print("\tDownloaded: [~" + temp / 1024 + " KB]"); size += temp; } else { continue; } } } } } else if (data.split("\\|")[1].equals("Search")) { /* * error image can appear, thanks for the privacy * settings */ address = "http://m.flickr.com/search/?q=" + data.split("\\|")[2] + "&m=" + data.split("\\|")[3] + "&page=*"; boolean stopAtExist = Boolean.parseBoolean(data .split("\\|")[4]); String actualAddress = ""; boolean existFailure = false; String page = ""; int size = 0; int number = 1; while (true) { if (this.stopSignal) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: User Aborted."); this.gui.processFinished(); break; } if (existFailure) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: A file already exists."); this.gui.processFinished(); break; } actualAddress = address.replace("*", Integer.toString(number)); this.gui.print("Checking: " + actualAddress); page = Downloader.downloadPage(actualAddress); if (page == null) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: Error while downloading page."); this.gui.processFinished(); break; } if (page.contains("We give up!")) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: No more pages."); this.gui.processFinished(); break; } String[] tokens = page.split(" "); for (int i = 0; i < tokens.length; i++) { if (this.stopSignal || existFailure) { break; } if (tokens[i].contains("staticflickr")) { tokens[i] = tokens[i].split("\"")[1]; tokens[i] = tokens[i].replace("_t.", "_b."); String filename = tokens[i].split("/")[tokens[i] .split("/").length - 1]; this.gui.print("\tFound: " + filename); int temp = Downloader.downloadFile( tokens[i], path + filename); if (temp == 3346){ this.gui.print("\tThe privacy settings does not allow to download this image."); File f = new File(path + filename); f.delete(); continue; } if (temp == 0){ this.gui.print("\tCannot download picture."); continue; } if (temp != -1) { this.gui.print("\tDownloaded: [~" + temp / 1024 + " KB]"); size += temp; } else { if (stopAtExist) existFailure = true; } } } number++; } } } /* Photoschau */ if (layout.equals(":photoschau")) { String data = this.gui.getPhotoschauData(); String path = data.split("\\|")[0]; boolean stopAtExist = Boolean.parseBoolean(data .split("\\|")[1]); String address = "http://www.photoschau.de/?paged=*"; String actualAddress = ""; int size = 0; int number = 1; String page = ""; boolean existFailure = false; while (true) { if (this.stopSignal) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: User Aborted."); this.gui.processFinished(); break; } if (existFailure) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: A file already exists."); this.gui.processFinished(); break; } actualAddress = address.replace("*", Integer.toString(number)); this.gui.print("Checking: " + actualAddress); page = Downloader.downloadPage(actualAddress); if (page == null) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: Error while downloading page."); this.gui.processFinished(); break; } String[] tokens = page.split(" "); for (int i = 0; i < tokens.length; i++) { if (this.stopSignal || existFailure) { break; } if (tokens[i].contains("uploads")) { tokens[i] = tokens[i].split("\"")[1]; String filename = tokens[i].split("/")[tokens[i] .split("/").length - 1]; this.gui.print("\tFound: " + filename); int temp = Downloader.downloadFile(tokens[i], path + filename); if (temp == 0){ this.gui.print("\tCannot download picture."); continue; } if (temp != -1) { this.gui.print("\tDownloaded: [~" + temp / 1024 + " KB]"); size += temp; } else { if (stopAtExist) existFailure = true; } } } number++; } } /* MOMENTS LIKE THIS */ if (layout.equals("MOMENTS LIKE THIS")) { String data = this.gui.getPhotoschauData(); String path = data.split("\\|")[0]; boolean stopAtExist = Boolean.parseBoolean(data .split("\\|")[1]); String address = "http://momentslikethis.de/page/*"; String actualAddress = ""; int size = 0; int number = 1; String page = ""; boolean existFailure = false; while (true) { if (this.stopSignal) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: User Aborted."); this.gui.processFinished(); break; } if (existFailure) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: A file already exists."); this.gui.processFinished(); break; } actualAddress = address.replace("*", Integer.toString(number)); this.gui.print("Checking: " + actualAddress); page = Downloader.downloadPage(actualAddress); if (page == null) { this.gui.print("Total: [~" + size / 1024 + "KB] [~" + size / 1024 / 1024 + "MB]"); this.gui.print("Download stopped. Reason: Error while downloading page."); this.gui.processFinished(); break; } String[] tokens = page.split(" "); for (int i = 0; i < tokens.length; i++) { if (this.stopSignal || existFailure) { break; } if (tokens[i].contains("uploads") && !tokens[i].contains("url(") && !tokens[i].contains("href")) { tokens[i] = tokens[i].split("\"")[1]; String filename = tokens[i].split("/")[tokens[i] .split("/").length - 1]; this.gui.print("\tFound: " + filename); int temp = Downloader.downloadFile(tokens[i], path + filename); if (temp == 0){ this.gui.print("\tCannot download picture."); continue; } if (temp != -1) { this.gui.print("\tDownloaded: [~" + temp / 1024 + " KB]"); size += temp; } else { if (stopAtExist) existFailure = true; } } } number++; } } } else { try { Thread.sleep(1000); } catch (Exception e) { /* I hope this won't happen */ } } } } /* * @param gui The methods will send text informations to this GUI. */ public void setGui(GUI gui) { this.gui = gui; } /** * Tells to stop executing after the current image has been crawled. */ public void stopSequence() { this.stopSignal = true; } /** * Indicates that there is new job to do. */ public void signal() { this.stopSignal = false; this.signal = true; } } /** * Downloads a single file or web page. Contains static methods. * * @author Laszlo Adam * @email [email protected] * @version 0.2.1 */ class Downloader { /** * Downloads a single file. * * @param address * URL of the file. * @param fileurl * Specifies the file name. * @return The downloaded file size in bytes. -1, if the file already * existed. */ public static int downloadFile(String address, String fileurl) { int size = 0; try { File f = new File(fileurl); if (f.exists()) { return -1; } else { f.createNewFile(); } URL url = new URL(address); InputStream is = null; FileOutputStream fos = null; URLConnection urlConn = url.openConnection(); is = urlConn.getInputStream(); fos = new FileOutputStream(fileurl); byte[] buffer = new byte[4096]; int len; while ((len = is.read(buffer)) > 0) { size += len; fos.write(buffer, 0, len); } if (size == 0){ f.delete(); } is.close(); fos.close(); } catch (Exception e) { System.out.println(e); } return size; } /** * Downloads a whole web page. * * @param address * URL of the page. * @return Returns with the page in String. Returns null, if the download * failed. */ public static String downloadPage(String address) { String page = null; try { URL url = new URL(address); BufferedReader reader = new BufferedReader(new InputStreamReader( url.openStream())); String line; while ((line = reader.readLine()) != null) { page = page + line; } reader.close(); } catch (Exception e) { System.out.println(e); } return page; } } /** * Valhalla Image Crawler A web image crawler from Valhalla. * * http://halls-of-valhalla.org * * @author Laszlo Adam * @email [email protected] * @version 0.2 */ public class ValhallaImageCrawler { /** * Main. * * @param args * Not used. */ public static void main(String[] args) { Processor processor = new Processor(); GUI gui = new GUI(); processor.setGui(gui); gui.setProcessor(processor); processor.start(); gui.start(); try { while (true) { if (!gui.isWorking()) { System.exit(0); } else { Thread.sleep(1000); } } } catch (Exception e) { /* I hope this won't happen */ } } }

This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.
Download this code in plain text format here