Commit e74469ab1ecc7227e4b0959f7e16ef42449690a1
1 parent
ca8fca95
br-crawl now crawlGallery
Showing
4 changed files
with
88 additions
and
120 deletions
app/CMakeLists.txt
app/br-crawl/CMakeLists.txt deleted
app/br-crawl/br-crawl.cpp deleted
| 1 | -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | |
| 2 | - * Copyright 2014 Noblis * | |
| 3 | - * * | |
| 4 | - * Licensed under the Apache License, Version 2.0 (the "License"); * | |
| 5 | - * you may not use this file except in compliance with the License. * | |
| 6 | - * You may obtain a copy of the License at * | |
| 7 | - * * | |
| 8 | - * http://www.apache.org/licenses/LICENSE-2.0 * | |
| 9 | - * * | |
| 10 | - * Unless required by applicable law or agreed to in writing, software * | |
| 11 | - * distributed under the License is distributed on an "AS IS" BASIS, * | |
| 12 | - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * | |
| 13 | - * See the License for the specific language governing permissions and * | |
| 14 | - * limitations under the License. * | |
| 15 | - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ | |
| 16 | - | |
| 17 | -#include <QtCore> | |
| 18 | - | |
| 19 | -static void help() | |
| 20 | -{ | |
| 21 | - printf("br-crawl [URL] [args]\n" | |
| 22 | - "=====================\n" | |
| 23 | - "* __stdin__ - URLs\n" | |
| 24 | - "* __stdout__ - Image URLs/JSON\n" | |
| 25 | - "\n" | |
| 26 | - "_br-crawl_ conducts a recursive descent search for images from a root URL.\n" | |
| 27 | - "Crawl will read root URLs from _stdin_ if none are provided.\n" | |
| 28 | - "Crawl writes every discovered image URL in a new line to _stdout_.\n" | |
| 29 | - "Arguments specifiying the duration of crawl are on a per-root-URL basis.\n" | |
| 30 | - "\n" | |
| 31 | - "Crawl identifies image URLs based on known image file extensions like `.png`.\n" | |
| 32 | - "Crawl is not expected to verify that URLs are images and may produce false positives.\n" | |
| 33 | - "\n" | |
| 34 | - "Optional Arguments\n" | |
| 35 | - "------------------\n" | |
| 36 | - "* -auto - Crawl chooses its own root URL (must be specified otherwise).\n" | |
| 37 | - "* -depth <int> - The levels to recursively search (unlimited otherwise).\n" | |
| 38 | - "* -depthFirst - Depth-first search (breadth-first otherwise).\n" | |
| 39 | - "* -help - Print usage information.\n" | |
| 40 | - "* -images <int> - The number of image URLs to obtain (unlimited otherwise).\n" | |
| 41 | - "* -json - Output JSON instead or URLs.\n" | |
| 42 | - "* -time <int> - The seconds to spend searching for images (unlimited otherwise).\n"); | |
| 43 | -} | |
| 44 | - | |
| 45 | -static const char *root = NULL; | |
| 46 | -static bool autoRoot = false; | |
| 47 | -static int depth = INT_MAX; | |
| 48 | -static bool depthFirst = false; | |
| 49 | -static int images = INT_MAX; | |
| 50 | -static bool json = false; | |
| 51 | -static int timeLimit = INT_MAX; | |
| 52 | - | |
| 53 | -static QTime elapsed; | |
| 54 | -static int currentImages = 0; | |
| 55 | - | |
| 56 | -static void crawl(QFileInfo url, int currentDepth = 0) | |
| 57 | -{ | |
| 58 | - if ((currentImages >= images) || (currentDepth >= depth) || (elapsed.elapsed()/1000 >= timeLimit)) | |
| 59 | - return; | |
| 60 | - | |
| 61 | - if (url.filePath().startsWith("file://")) | |
| 62 | - url = QFileInfo(url.filePath().mid(7)); | |
| 63 | - | |
| 64 | - if (url.isDir()) { | |
| 65 | - const QDir dir(url.absoluteFilePath()); | |
| 66 | - const QFileInfoList files = dir.entryInfoList(QDir::Files); | |
| 67 | - const QFileInfoList subdirs = dir.entryInfoList(QDir::Dirs | QDir::NoDotAndDotDot); | |
| 68 | - foreach (const QFileInfo &first, depthFirst ? subdirs : files) | |
| 69 | - crawl(first, currentDepth + 1); | |
| 70 | - foreach (const QFileInfo &second, depthFirst ? files : subdirs) | |
| 71 | - crawl(second, currentDepth + 1); | |
| 72 | - } else if (url.isFile()) { | |
| 73 | - const QString suffix = url.suffix(); | |
| 74 | - if ((suffix == "bmp") || (suffix == "jpg") || (suffix == "jpeg") || (suffix == "png") || (suffix == "tiff")) { | |
| 75 | - printf(json ? "{ \"URL\" : \"file://%s\" }\n" : "file://%s\n", qPrintable(url.canonicalFilePath())); | |
| 76 | - fflush(stdout); | |
| 77 | - currentImages++; | |
| 78 | - } | |
| 79 | - } | |
| 80 | -} | |
| 81 | - | |
| 82 | -int main(int argc, char *argv[]) | |
| 83 | -{ | |
| 84 | - for (int i=1; i<argc; i++) { | |
| 85 | - if (!strcmp(argv[i], "-auto" )) autoRoot = true; | |
| 86 | - else if (!strcmp(argv[i], "-depth" )) depth = atoi(argv[++i]); | |
| 87 | - else if (!strcmp(argv[i], "-depthFirst")) depthFirst = true; | |
| 88 | - else if (!strcmp(argv[i], "-help" )) { help(); exit(EXIT_SUCCESS); } | |
| 89 | - else if (!strcmp(argv[i], "-images" )) images = atoi(argv[++i]); | |
| 90 | - else if (!strcmp(argv[i], "-json" )) json = true; | |
| 91 | - else if (!strcmp(argv[i], "-time" )) timeLimit = atoi(argv[++i]); | |
| 92 | - else root = argv[i]; | |
| 93 | - } | |
| 94 | - | |
| 95 | - elapsed.start(); | |
| 96 | - if (root != NULL) { | |
| 97 | - crawl(QFileInfo(root)); | |
| 98 | - } else { | |
| 99 | - if (autoRoot) { | |
| 100 | - foreach (const QString &path, QStandardPaths::standardLocations(QStandardPaths::HomeLocation)) | |
| 101 | - crawl(path); | |
| 102 | - } else { | |
| 103 | - QFile file; | |
| 104 | - file.open(stdin, QFile::ReadOnly); | |
| 105 | - while (!file.atEnd()) { | |
| 106 | - const QString url = QString::fromLocal8Bit(file.readLine()).simplified(); | |
| 107 | - if (!url.isEmpty()) | |
| 108 | - crawl(url); | |
| 109 | - } | |
| 110 | - } | |
| 111 | - } | |
| 112 | - | |
| 113 | - return EXIT_SUCCESS; | |
| 114 | -} |
openbr/plugins/gallery.cpp
| ... | ... | @@ -333,7 +333,7 @@ class urlGallery : public BinaryGallery |
| 333 | 333 | |
| 334 | 334 | void writeTemplate(const Template &t) |
| 335 | 335 | { |
| 336 | - const QString url = t.file.get<QString>("URL", ""); | |
| 336 | + const QString url = t.file.get<QString>("URL", t.file.name); | |
| 337 | 337 | if (!url.isEmpty()) { |
| 338 | 338 | gallery.write(qPrintable(url)); |
| 339 | 339 | gallery.write("\n"); |
| ... | ... | @@ -459,6 +459,93 @@ BR_REGISTER(Gallery, EmptyGallery) |
| 459 | 459 | |
| 460 | 460 | /*! |
| 461 | 461 | * \ingroup galleries |
| 462 | + * \brief Crawl a root location for image files. | |
| 463 | + * \author Josh Klontz \cite jklontz | |
| 464 | + */ | |
| 465 | +class crawlGallery : public Gallery | |
| 466 | +{ | |
| 467 | + Q_OBJECT | |
| 468 | + Q_PROPERTY(bool autoRoot READ get_autoRoot WRITE set_autoRoot RESET reset_autoRoot STORED false) | |
| 469 | + Q_PROPERTY(int depth READ get_depth WRITE set_depth RESET reset_depth STORED false) | |
| 470 | + Q_PROPERTY(bool depthFirst READ get_depthFirst WRITE set_depthFirst RESET reset_depthFirst STORED false) | |
| 471 | + Q_PROPERTY(int images READ get_images WRITE set_images RESET reset_images STORED false) | |
| 472 | + Q_PROPERTY(bool json READ get_json WRITE set_json RESET reset_json STORED false) | |
| 473 | + Q_PROPERTY(int timeLimit READ get_timeLimit WRITE set_timeLimit RESET reset_timeLimit STORED false) | |
| 474 | + BR_PROPERTY(bool, autoRoot, false) | |
| 475 | + BR_PROPERTY(int, depth, INT_MAX) | |
| 476 | + BR_PROPERTY(bool, depthFirst, false) | |
| 477 | + BR_PROPERTY(int, images, INT_MAX) | |
| 478 | + BR_PROPERTY(bool, json, false) | |
| 479 | + BR_PROPERTY(int, timeLimit, INT_MAX) | |
| 480 | + | |
| 481 | + QTime elapsed; | |
| 482 | + TemplateList templates; | |
| 483 | + | |
| 484 | + void crawl(QFileInfo url, int currentDepth = 0) | |
| 485 | + { | |
| 486 | + if ((templates.size() >= images) || (currentDepth >= depth) || (elapsed.elapsed()/1000 >= timeLimit)) | |
| 487 | + return; | |
| 488 | + | |
| 489 | + if (url.filePath().startsWith("file://")) | |
| 490 | + url = QFileInfo(url.filePath().mid(7)); | |
| 491 | + | |
| 492 | + if (url.isDir()) { | |
| 493 | + const QDir dir(url.absoluteFilePath()); | |
| 494 | + const QFileInfoList files = dir.entryInfoList(QDir::Files); | |
| 495 | + const QFileInfoList subdirs = dir.entryInfoList(QDir::Dirs | QDir::NoDotAndDotDot); | |
| 496 | + foreach (const QFileInfo &first, depthFirst ? subdirs : files) | |
| 497 | + crawl(first, currentDepth + 1); | |
| 498 | + foreach (const QFileInfo &second, depthFirst ? files : subdirs) | |
| 499 | + crawl(second, currentDepth + 1); | |
| 500 | + } else if (url.isFile()) { | |
| 501 | + const QString suffix = url.suffix(); | |
| 502 | + if ((suffix == "bmp") || (suffix == "jpg") || (suffix == "jpeg") || (suffix == "png") || (suffix == "tiff")) { | |
| 503 | + File f; | |
| 504 | + if (json) f.set("URL", "file://"+url.canonicalFilePath()); | |
| 505 | + else f.name = "file://"+url.canonicalFilePath(); | |
| 506 | + templates.append(f); | |
| 507 | + } | |
| 508 | + } | |
| 509 | + } | |
| 510 | + | |
| 511 | + void init() | |
| 512 | + { | |
| 513 | + elapsed.start(); | |
| 514 | + const QString root = file.name.mid(0, file.name.size()-6); // Remove .crawl suffix"; | |
| 515 | + if (!root.isEmpty()) { | |
| 516 | + crawl(root); | |
| 517 | + } else { | |
| 518 | + if (autoRoot) { | |
| 519 | + foreach (const QString &path, QStandardPaths::standardLocations(QStandardPaths::HomeLocation)) | |
| 520 | + crawl(path); | |
| 521 | + } else { | |
| 522 | + QFile file; | |
| 523 | + file.open(stdin, QFile::ReadOnly); | |
| 524 | + while (!file.atEnd()) { | |
| 525 | + const QString url = QString::fromLocal8Bit(file.readLine()).simplified(); | |
| 526 | + if (!url.isEmpty()) | |
| 527 | + crawl(url); | |
| 528 | + } | |
| 529 | + } | |
| 530 | + } | |
| 531 | + } | |
| 532 | + | |
| 533 | + TemplateList readBlock(bool *done) | |
| 534 | + { | |
| 535 | + *done = true; | |
| 536 | + return templates; | |
| 537 | + } | |
| 538 | + | |
| 539 | + void write(const Template &) | |
| 540 | + { | |
| 541 | + qFatal("Not supported"); | |
| 542 | + } | |
| 543 | +}; | |
| 544 | + | |
| 545 | +BR_REGISTER(Gallery, crawlGallery) | |
| 546 | + | |
| 547 | +/*! | |
| 548 | + * \ingroup galleries | |
| 462 | 549 | * \brief Treats the gallery as a br::Format. |
| 463 | 550 | * \author Josh Klontz \cite jklontz |
| 464 | 551 | */ | ... | ... |