Commit e74469ab1ecc7227e4b0959f7e16ef42449690a1

Authored by Josh Klontz
1 parent ca8fca95

br-crawl now crawlGallery

app/CMakeLists.txt
@@ -6,6 +6,5 @@ add_subdirectory(examples) @@ -6,6 +6,5 @@ add_subdirectory(examples)
6 6
7 # Build additional OpenBR utilities 7 # Build additional OpenBR utilities
8 if(NOT ${BR_EMBEDDED}) 8 if(NOT ${BR_EMBEDDED})
9 - add_subdirectory(br-crawl)  
10 add_subdirectory(br-gui) 9 add_subdirectory(br-gui)
11 endif() 10 endif()
app/br-crawl/CMakeLists.txt deleted
1 -add_executable(br-crawl br-crawl.cpp ${BR_RESOURCES})  
2 -target_link_libraries(br-crawl openbr ${BR_THIRDPARTY_LIBS})  
3 -qt5_use_modules(br-crawl ${QT_DEPENDENCIES})  
4 -install(TARGETS br-crawl RUNTIME DESTINATION bin)  
app/br-crawl/br-crawl.cpp deleted
1 -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *  
2 - * Copyright 2014 Noblis *  
3 - * *  
4 - * Licensed under the Apache License, Version 2.0 (the "License"); *  
5 - * you may not use this file except in compliance with the License. *  
6 - * You may obtain a copy of the License at *  
7 - * *  
8 - * http://www.apache.org/licenses/LICENSE-2.0 *  
9 - * *  
10 - * Unless required by applicable law or agreed to in writing, software *  
11 - * distributed under the License is distributed on an "AS IS" BASIS, *  
12 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *  
13 - * See the License for the specific language governing permissions and *  
14 - * limitations under the License. *  
15 - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */  
16 -  
17 -#include <QtCore>  
18 -  
19 -static void help()  
20 -{  
21 - printf("br-crawl [URL] [args]\n"  
22 - "=====================\n"  
23 - "* __stdin__ - URLs\n"  
24 - "* __stdout__ - Image URLs/JSON\n"  
25 - "\n"  
26 - "_br-crawl_ conducts a recursive descent search for images from a root URL.\n"  
27 - "Crawl will read root URLs from _stdin_ if none are provided.\n"  
28 - "Crawl writes every discovered image URL in a new line to _stdout_.\n"  
29 - "Arguments specifiying the duration of crawl are on a per-root-URL basis.\n"  
30 - "\n"  
31 - "Crawl identifies image URLs based on known image file extensions like `.png`.\n"  
32 - "Crawl is not expected to verify that URLs are images and may produce false positives.\n"  
33 - "\n"  
34 - "Optional Arguments\n"  
35 - "------------------\n"  
36 - "* -auto - Crawl chooses its own root URL (must be specified otherwise).\n"  
37 - "* -depth <int> - The levels to recursively search (unlimited otherwise).\n"  
38 - "* -depthFirst - Depth-first search (breadth-first otherwise).\n"  
39 - "* -help - Print usage information.\n"  
40 - "* -images <int> - The number of image URLs to obtain (unlimited otherwise).\n"  
41 - "* -json - Output JSON instead or URLs.\n"  
42 - "* -time <int> - The seconds to spend searching for images (unlimited otherwise).\n");  
43 -}  
44 -  
45 -static const char *root = NULL;  
46 -static bool autoRoot = false;  
47 -static int depth = INT_MAX;  
48 -static bool depthFirst = false;  
49 -static int images = INT_MAX;  
50 -static bool json = false;  
51 -static int timeLimit = INT_MAX;  
52 -  
53 -static QTime elapsed;  
54 -static int currentImages = 0;  
55 -  
56 -static void crawl(QFileInfo url, int currentDepth = 0)  
57 -{  
58 - if ((currentImages >= images) || (currentDepth >= depth) || (elapsed.elapsed()/1000 >= timeLimit))  
59 - return;  
60 -  
61 - if (url.filePath().startsWith("file://"))  
62 - url = QFileInfo(url.filePath().mid(7));  
63 -  
64 - if (url.isDir()) {  
65 - const QDir dir(url.absoluteFilePath());  
66 - const QFileInfoList files = dir.entryInfoList(QDir::Files);  
67 - const QFileInfoList subdirs = dir.entryInfoList(QDir::Dirs | QDir::NoDotAndDotDot);  
68 - foreach (const QFileInfo &first, depthFirst ? subdirs : files)  
69 - crawl(first, currentDepth + 1);  
70 - foreach (const QFileInfo &second, depthFirst ? files : subdirs)  
71 - crawl(second, currentDepth + 1);  
72 - } else if (url.isFile()) {  
73 - const QString suffix = url.suffix();  
74 - if ((suffix == "bmp") || (suffix == "jpg") || (suffix == "jpeg") || (suffix == "png") || (suffix == "tiff")) {  
75 - printf(json ? "{ \"URL\" : \"file://%s\" }\n" : "file://%s\n", qPrintable(url.canonicalFilePath()));  
76 - fflush(stdout);  
77 - currentImages++;  
78 - }  
79 - }  
80 -}  
81 -  
82 -int main(int argc, char *argv[])  
83 -{  
84 - for (int i=1; i<argc; i++) {  
85 - if (!strcmp(argv[i], "-auto" )) autoRoot = true;  
86 - else if (!strcmp(argv[i], "-depth" )) depth = atoi(argv[++i]);  
87 - else if (!strcmp(argv[i], "-depthFirst")) depthFirst = true;  
88 - else if (!strcmp(argv[i], "-help" )) { help(); exit(EXIT_SUCCESS); }  
89 - else if (!strcmp(argv[i], "-images" )) images = atoi(argv[++i]);  
90 - else if (!strcmp(argv[i], "-json" )) json = true;  
91 - else if (!strcmp(argv[i], "-time" )) timeLimit = atoi(argv[++i]);  
92 - else root = argv[i];  
93 - }  
94 -  
95 - elapsed.start();  
96 - if (root != NULL) {  
97 - crawl(QFileInfo(root));  
98 - } else {  
99 - if (autoRoot) {  
100 - foreach (const QString &path, QStandardPaths::standardLocations(QStandardPaths::HomeLocation))  
101 - crawl(path);  
102 - } else {  
103 - QFile file;  
104 - file.open(stdin, QFile::ReadOnly);  
105 - while (!file.atEnd()) {  
106 - const QString url = QString::fromLocal8Bit(file.readLine()).simplified();  
107 - if (!url.isEmpty())  
108 - crawl(url);  
109 - }  
110 - }  
111 - }  
112 -  
113 - return EXIT_SUCCESS;  
114 -}  
openbr/plugins/gallery.cpp
@@ -333,7 +333,7 @@ class urlGallery : public BinaryGallery @@ -333,7 +333,7 @@ class urlGallery : public BinaryGallery
333 333
334 void writeTemplate(const Template &t) 334 void writeTemplate(const Template &t)
335 { 335 {
336 - const QString url = t.file.get<QString>("URL", ""); 336 + const QString url = t.file.get<QString>("URL", t.file.name);
337 if (!url.isEmpty()) { 337 if (!url.isEmpty()) {
338 gallery.write(qPrintable(url)); 338 gallery.write(qPrintable(url));
339 gallery.write("\n"); 339 gallery.write("\n");
@@ -459,6 +459,93 @@ BR_REGISTER(Gallery, EmptyGallery) @@ -459,6 +459,93 @@ BR_REGISTER(Gallery, EmptyGallery)
459 459
460 /*! 460 /*!
461 * \ingroup galleries 461 * \ingroup galleries
  462 + * \brief Crawl a root location for image files.
  463 + * \author Josh Klontz \cite jklontz
  464 + */
  465 +class crawlGallery : public Gallery
  466 +{
  467 + Q_OBJECT
  468 + Q_PROPERTY(bool autoRoot READ get_autoRoot WRITE set_autoRoot RESET reset_autoRoot STORED false)
  469 + Q_PROPERTY(int depth READ get_depth WRITE set_depth RESET reset_depth STORED false)
  470 + Q_PROPERTY(bool depthFirst READ get_depthFirst WRITE set_depthFirst RESET reset_depthFirst STORED false)
  471 + Q_PROPERTY(int images READ get_images WRITE set_images RESET reset_images STORED false)
  472 + Q_PROPERTY(bool json READ get_json WRITE set_json RESET reset_json STORED false)
  473 + Q_PROPERTY(int timeLimit READ get_timeLimit WRITE set_timeLimit RESET reset_timeLimit STORED false)
  474 + BR_PROPERTY(bool, autoRoot, false)
  475 + BR_PROPERTY(int, depth, INT_MAX)
  476 + BR_PROPERTY(bool, depthFirst, false)
  477 + BR_PROPERTY(int, images, INT_MAX)
  478 + BR_PROPERTY(bool, json, false)
  479 + BR_PROPERTY(int, timeLimit, INT_MAX)
  480 +
  481 + QTime elapsed;
  482 + TemplateList templates;
  483 +
  484 + void crawl(QFileInfo url, int currentDepth = 0)
  485 + {
  486 + if ((templates.size() >= images) || (currentDepth >= depth) || (elapsed.elapsed()/1000 >= timeLimit))
  487 + return;
  488 +
  489 + if (url.filePath().startsWith("file://"))
  490 + url = QFileInfo(url.filePath().mid(7));
  491 +
  492 + if (url.isDir()) {
  493 + const QDir dir(url.absoluteFilePath());
  494 + const QFileInfoList files = dir.entryInfoList(QDir::Files);
  495 + const QFileInfoList subdirs = dir.entryInfoList(QDir::Dirs | QDir::NoDotAndDotDot);
  496 + foreach (const QFileInfo &first, depthFirst ? subdirs : files)
  497 + crawl(first, currentDepth + 1);
  498 + foreach (const QFileInfo &second, depthFirst ? files : subdirs)
  499 + crawl(second, currentDepth + 1);
  500 + } else if (url.isFile()) {
  501 + const QString suffix = url.suffix();
  502 + if ((suffix == "bmp") || (suffix == "jpg") || (suffix == "jpeg") || (suffix == "png") || (suffix == "tiff")) {
  503 + File f;
  504 + if (json) f.set("URL", "file://"+url.canonicalFilePath());
  505 + else f.name = "file://"+url.canonicalFilePath();
  506 + templates.append(f);
  507 + }
  508 + }
  509 + }
  510 +
  511 + void init()
  512 + {
  513 + elapsed.start();
  514 + const QString root = file.name.mid(0, file.name.size()-6); // Remove .crawl suffix";
  515 + if (!root.isEmpty()) {
  516 + crawl(root);
  517 + } else {
  518 + if (autoRoot) {
  519 + foreach (const QString &path, QStandardPaths::standardLocations(QStandardPaths::HomeLocation))
  520 + crawl(path);
  521 + } else {
  522 + QFile file;
  523 + file.open(stdin, QFile::ReadOnly);
  524 + while (!file.atEnd()) {
  525 + const QString url = QString::fromLocal8Bit(file.readLine()).simplified();
  526 + if (!url.isEmpty())
  527 + crawl(url);
  528 + }
  529 + }
  530 + }
  531 + }
  532 +
  533 + TemplateList readBlock(bool *done)
  534 + {
  535 + *done = true;
  536 + return templates;
  537 + }
  538 +
  539 + void write(const Template &)
  540 + {
  541 + qFatal("Not supported");
  542 + }
  543 +};
  544 +
  545 +BR_REGISTER(Gallery, crawlGallery)
  546 +
  547 +/*!
  548 + * \ingroup galleries
462 * \brief Treats the gallery as a br::Format. 549 * \brief Treats the gallery as a br::Format.
463 * \author Josh Klontz \cite jklontz 550 * \author Josh Klontz \cite jklontz
464 */ 551 */