=== modified file 'ddeb_retriever.py'
--- ddeb_retriever.py	2014-08-07 09:03:46 +0000
+++ ddeb_retriever.py	2015-04-14 19:16:49 +0000
@@ -10,6 +10,9 @@
 import tarfile
 import argparse
 import fcntl
+import datetime
+
+import wadllib.iso_strptime
 
 import lpinfo
 import archive_tools
@@ -61,6 +64,75 @@
     return result
 
 
+distroarchseries_links = {}
+distroseries_links = {}
+
+def get_suite_for_publication(pub):
+    '''Return the suite for a binary_package_publishing_history object.
+
+    We memoise the series objects, as otherwise this is very slow for lots
+    of publications.
+    '''
+    # Don't be tempted to shorten this using setdefault; fetching
+    # pub.distro_arch_series and das.distroseries is expensive.
+    distroarchseries_link = pub.distro_arch_series_link
+    if distroarchseries_link not in distroarchseries_links:
+        distroarchseries_links[distroarchseries_link] = pub.distro_arch_series
+    distroarchseries = distroarchseries_links[distroarchseries_link]
+    distroseries_link = distroarchseries.distroseries_link
+    if distroseries_link not in distroseries_links:
+        distroseries_links[distroseries_link] = distroarchseries.distroseries
+    distroseries = distroseries_links[distroseries_link]
+    return lpinfo.make_suite(distroseries, pub.pocket)
+
+
+def install_from_librarian(pub, url, ddeb_archive_root):
+    '''Fetch ddeb from the librarian and install it into ddeb_archive_root
+
+    If the file already exists in the appropriate place in pool, it will not
+    be downloaded again.
+
+    Return True if it was installed successfully or already present,
+    otherwise False.
+    '''
+    ddeb = urllib.unquote(os.path.basename(url))
+    try:
+        (dbgsymname, version, arch) = ddeb.split('_')
+        assert arch.endswith('.ddeb')
+        (arch, _) = arch.split('.')
+        assert dbgsymname.endswith('-dbgsym')
+    except (ValueError, AssertionError):
+        logging.warning('Ignoring non-ddeb file %s in %s', ddeb, pub)
+        return False
+
+    source_pub = pub.build.getLatestSourcePublication()
+    if source_pub is None:
+        logging.warning('Ignoring %s which has no source publication', pub)
+        return False
+
+    source_name = source_pub.source_package_name
+    if source_name.startswith('lib'):
+        prefix = source_name[:4]
+    else:
+        prefix = source_name[0]
+
+    destdir = os.path.join(
+        ddeb_archive_root, 'pool', pub.component_name, prefix, source_name)
+    dest = os.path.join(destdir, ddeb)
+    if os.path.exists(dest):
+        logging.debug('%s already exists, skipping', dest)
+    else:
+        logging.debug('Downloading %s into %s', ddeb, os.path.dirname(dest))
+        try:
+            os.makedirs(destdir)
+        except OSError:
+            pass
+        urllib.urlretrieve(url, dest)
+        logging.debug('Downloaded %s into %s', ddeb, os.path.dirname(dest))
+
+    return True
+
+
 def get_ddeb_index(url):
     '''Parse the ddeb index file at the given URL
 
@@ -251,9 +323,39 @@
         logging.error('Cannot acquire lock, another ddeb-retriever instance is already running')
         return
 
+    UTC = wadllib.iso_strptime.TimeZone('+00:00')
+    lp_threshold_path = os.path.join(args.archive_root, '.lp-threshold')
+    try:
+        with open(lp_threshold_path) as lp_threshold_file:
+            lp_threshold = datetime.datetime.fromtimestamp(
+                int(lp_threshold_file.readline()), tz=UTC)
+    except IOError:
+        lp_threshold = None
+
     series_info = lpinfo.get_series(args.distribution)
     logging.debug('series info: %s', series_info)
 
+    if lp_threshold is not None:
+        # Allow a grace period to cope with publications arriving out of
+        # order during long transactions.
+        real_threshold = lp_threshold - datetime.timedelta(hours=1)
+        logging.info(
+            'Retrieving Launchpad publications since %s' % real_threshold)
+    else:
+        real_threshold = None
+        logging.info('Retrieving all Launchpad publications')
+    binary_pubs = lpinfo.get_binary_publications(args.distribution)
+    debug_pubs = []
+    latest_date_created = lp_threshold
+    for pub in binary_pubs:
+        if pub.date_created < real_threshold:
+            break
+        if (latest_date_created is None or
+                pub.date_created > latest_date_created):
+            latest_date_created = pub.date_created
+        if pub.is_debug:
+            debug_pubs.append(pub)
+
     # Download ddebs into temporary dir; all in parallel, as buildd Apaches are
     # very slow
     for date in args.date:
@@ -275,6 +377,8 @@
     queue_dir = os.path.join(args.archive_root, 'queue')
     if os.path.isdir(queue_dir):
         updated_pockets.update(os.listdir(queue_dir))
+    for pub in debug_pubs:
+        updated_pockets.add(get_suite_for_publication(pub))
     logging.info('got downloaded/queued ddebs for pockets: %s', ' '.join(updated_pockets))
 
     # complete the pocket list
@@ -295,8 +399,39 @@
                                             pockets_filter=updated_pockets,
                                             strip_epoch=True)
 
+    logging.info('Building ddeb map, this can take a while')
+    ddeb_map = archive_tools.archive_map(series_info, [args.archive_root],
+                                         suffix='')
+
+    # try to install ddebs from publishing history records
+    installed_pockets = set()
+    for pub in debug_pubs:
+        already_present = False
+        pub_installed = False
+        if pub.binary_package_name in ddeb_map:
+            existing = ddeb_map[pub.binary_package_name]
+            for _, series_map in existing.items():
+                if pub.binary_package_version in series_map:
+                    already_present = True
+                    break
+        if already_present:
+            # We already have this ddeb on disk, so just note that indexes
+            # for this pocket need to be rebuilt.
+            pub_installed = True
+        else:
+            for url in pub.binaryFileUrls():
+                if install_from_librarian(pub, url, args.archive_root):
+                    pub_installed = True
+        if pub_installed:
+            installed_pockets.add(get_suite_for_publication(pub))
+
+    if latest_date_created is not None:
+        epoch = datetime.datetime.fromtimestamp(0, tz=UTC)
+        new_threshold = (latest_date_created - epoch).total_seconds()
+        with open(lp_threshold_path, 'w') as lp_threshold_file:
+            lp_threshold_file.write("%d\n" % new_threshold)
+
     # try to install downloaded ddebs
-    installed_pockets = set()
     for pocket in updated_pockets:
         d = os.path.join(download_dir, pocket)
         if not os.path.isdir(d):

=== modified file 'lpinfo.py'
--- lpinfo.py	2014-07-25 06:50:12 +0000
+++ lpinfo.py	2015-04-14 19:16:49 +0000
@@ -10,7 +10,9 @@
     global lp
     if lp is None:
         lp = Launchpad.login_anonymously(
-            'ddeb-retriever', os.environ.get('LAUNCHPAD_INSTANCE', 'production'))
+            'ddeb-retriever',
+            os.environ.get('LAUNCHPAD_INSTANCE', 'production'),
+            version='devel')
 
 
 def get_series(distribution):
@@ -45,3 +47,27 @@
         buildds.append(builder.name)
 
     return buildds
+
+
+def get_binary_publications(distribution, created_since_date=None):
+    '''Get recently-published binaries for the given distribution'''
+
+    _get_lp()
+    archive = lp.distributions[distribution].main_archive
+    # It's important to omit the status filter here, even if we later decide
+    # that we only care about the Published status (although at the moment
+    # it seems reasonable to scan everything and let the garbage-collection
+    # pass clean up superseded ddebs later).  This is because the collection
+    # may change as we're iterating over it.  Without any filtering, this is
+    # OK because entries can never be removed from the collection: the worst
+    # case is that we encounter the same publication twice.  With filtering
+    # on mutable properties, it would be possible to lose entries between
+    # two successive batches.
+    return archive.getPublishedBinaries(order_by_date=True)
+
+
+def make_suite(distroseries, pocket):
+    if pocket == 'Release':
+        return distroseries.name
+    else:
+        return '%s-%s' % (distroseries.name, pocket.lower())

