[oe-commits] org.oe.dev ipkg 0.99.163: Apply patches to optimize internal algorithms.

pfalcon commit openembedded-commits at lists.openembedded.org
Fri May 11 12:07:42 UTC 2007


ipkg 0.99.163: Apply patches to optimize internal algorithms.
* Leading to 10x+ speedup on typical OE feeds (thousands of packages)
* Still needs more testing, though developers' testing didn't show any issues.
* Read patch comments if in doubt.
* Closes #2244.

Author: pfalcon at openembedded.org
Branch: org.openembedded.dev
Revision: 5ae61fd4def2a2a2fb4ff032dcc351c33f111256
ViewMTN: http://monotone.openembedded.org/revision.psp?id=5ae61fd4def2a2a2fb4ff032dcc351c33f111256
Files:
1
packages/ipkg/files/1-pkg-parse--Optimize-inefficient-parsing.patch
packages/ipkg/files/2-pkg-vec--Optimize-gross-inefficiency.patch
packages/ipkg/ipkg_0.99.163.bb
Diffs:

#
# mt diff -r098b7308900ba9b5e6e5ff83f5e4a2f5d3b8da04 -r5ae61fd4def2a2a2fb4ff032dcc351c33f111256
#
# 
# 
# add_file "packages/ipkg/files/1-pkg-parse--Optimize-inefficient-parsing.patch"
#  content [1fb6417a60fc963ce4acd45dc86010a7ae947b17]
# 
# add_file "packages/ipkg/files/2-pkg-vec--Optimize-gross-inefficiency.patch"
#  content [988efdd744386f7bb517bdc0266b258fec8fcab4]
# 
# patch "packages/ipkg/ipkg_0.99.163.bb"
#  from [cd7517e70eb13ced53531ae54bda1c37ccfe42bb]
#    to [44259469fdf46bd0af832f5c1311a633806d832a]
# 
============================================================
--- packages/ipkg/files/1-pkg-parse--Optimize-inefficient-parsing.patch	1fb6417a60fc963ce4acd45dc86010a7ae947b17
+++ packages/ipkg/files/1-pkg-parse--Optimize-inefficient-parsing.patch	1fb6417a60fc963ce4acd45dc86010a7ae947b17
@@ -0,0 +1,227 @@
+# HG changeset patch
+# User pfalcon at localhost
+# Date 1178334771 0
+# Node ID e4c99830ba0d55813e1774bd6d41039ca640d6a6
+# Parent  d324eba24e79a0a86df7978f0511e4632a2732c7
+pkg_parse: Optimize inefficient parsing.
+
+Instead of expensively probing all fields in row, dispatch based on the
+first letter of the field. Tests show ~12 times reduction in number of calls
+to low-level parsing functions.
+
+diff -r d324eba24e79 -r e4c99830ba0d pkg_parse.c
+--- a/pkg_parse.c	Fri May 04 23:22:33 2007 +0000
++++ b/pkg_parse.c	Sat May 05 03:12:51 2007 +0000
+@@ -241,87 +241,116 @@ int pkg_parse_raw(pkg_t *pkg, char ***ra
+ 
+     for (lines = *raw; *lines; lines++) {
+ 	/*	fprintf(stderr, "PARSING %s\n", *lines);*/
+-	if(isGenericFieldType("Package:", *lines)) 
+-	    pkg->name = parseGenericFieldType("Package", *lines);
+-	else if(isGenericFieldType("Architecture:", *lines))
+-	    pkg->architecture = parseGenericFieldType("Architecture", *lines);
+-	else if(isGenericFieldType("Filename:", *lines))
+-	    pkg->filename = parseGenericFieldType("Filename", *lines);
+-	else if(isGenericFieldType("Section:", *lines))
+-	    pkg->section = parseGenericFieldType("Section", *lines);
+-	else if(isGenericFieldType("MD5sum:", *lines))
+-	    pkg->md5sum = parseGenericFieldType("MD5sum", *lines);
+-	/* The old ipkg wrote out status files with the wrong case for MD5sum,
+-	   let's parse it either way */
+-	else if(isGenericFieldType("MD5Sum:", *lines))
+-	    pkg->md5sum = parseGenericFieldType("MD5Sum", *lines);
+-	else if(isGenericFieldType("Size:", *lines))
+-	    pkg->size = parseGenericFieldType("Size", *lines);
+-	else if(isGenericFieldType("Source:", *lines))
+-	    pkg->source = parseGenericFieldType("Source", *lines);
+-	else if(isGenericFieldType("Installed-Size:", *lines))
+-	    pkg->installed_size = parseGenericFieldType("Installed-Size", *lines);
+-	else if(isGenericFieldType("Installed-Time:", *lines)) {
+-	     char *time_str = parseGenericFieldType("Installed-Time", *lines);
+-	     pkg->installed_time = strtoul(time_str, NULL, 0);
+-	} else if(isGenericFieldType("Priority:", *lines))
+-	    pkg->priority = parseGenericFieldType("Priority", *lines);
+-	else if(isGenericFieldType("Essential:", *lines)) {
+-	    char *essential_value;
+-	    essential_value = parseGenericFieldType("Essential", *lines);
+-	    if (strcmp(essential_value, "yes") == 0) {
+-		pkg->essential = 1;
+-	    }
+-	    free(essential_value);
+-	}
+-	else if(isGenericFieldType("Status", *lines))
+-	    parseStatus(pkg, *lines);
+-	else if(isGenericFieldType("Version", *lines))
+-	    parseVersion(pkg, *lines);
+-	else if(isGenericFieldType("Maintainer", *lines))
+-	    pkg->maintainer = parseGenericFieldType("Maintainer", *lines);
+-	else if(isGenericFieldType("Conffiles", *lines)){
+-	    parseConffiles(pkg, *lines);
+-	    reading_conffiles = 1;
+-	}
+-	else if(isGenericFieldType("Description", *lines)) {
+-	    pkg->description = parseGenericFieldType("Description", *lines);
+-	    reading_conffiles = 0;
+-	    reading_description = 1;
+-	}
+-
+-	else if(isGenericFieldType("Provides", *lines)){
++	switch (**lines) {
++	case 'P':
++	    if(isGenericFieldType("Package:", *lines)) 
++		pkg->name = parseGenericFieldType("Package", *lines);
++	    else if(isGenericFieldType("Priority:", *lines))
++		pkg->priority = parseGenericFieldType("Priority", *lines);
++	    else if(isGenericFieldType("Provides", *lines)){
+ /* Here we add the internal_use to align the off by one problem between provides_str and provides */
+-            provide = (char * ) malloc(strlen(*lines)+ 35 ); /* Preparing the space for the new ipkg_internal_use_only */
+-            if ( alterProvidesLine(*lines,provide) ){
+-               return EINVAL;
+-            }
+-	    pkg->provides_str = parseDependsString( provide, &pkg->provides_count);
++        	provide = (char * ) malloc(strlen(*lines)+ 35 ); /* Preparing the space for the new ipkg_internal_use_only */
++        	if ( alterProvidesLine(*lines,provide) ){
++        	    return EINVAL;
++        	}
++		pkg->provides_str = parseDependsString( provide, &pkg->provides_count);
+ /* Let's try to hack a bit here.
+    The idea is that if a package has no Provides, we would add one generic, to permit the check of dependencies
+    in alot of other places. We will remove it before writing down the status database */
+-            pkg_false_provides=0;
+-            free(provide);
+-        } 
+-
+-	else if(isGenericFieldType("Depends", *lines))
+-	    pkg->depends_str = parseDependsString(*lines, &pkg->depends_count);
+-	else if(isGenericFieldType("Pre-Depends", *lines))
+-	    pkg->pre_depends_str = parseDependsString(*lines, &pkg->pre_depends_count);
+-	else if(isGenericFieldType("Recommends", *lines))
+-	    pkg->recommends_str = parseDependsString(*lines, &pkg->recommends_count);
+-	else if(isGenericFieldType("Suggests", *lines))
+-	    pkg->suggests_str = parseDependsString(*lines, &pkg->suggests_count);
+-	/* Abhaya: support for conflicts */
+-	else if(isGenericFieldType("Conflicts", *lines))
+-	    pkg->conflicts_str = parseDependsString(*lines, &pkg->conflicts_count);
+-	else if(isGenericFieldType("Replaces", *lines))
+-	    pkg->replaces_str = parseDependsString(*lines, &pkg->replaces_count);
+-	else if(line_is_blank(*lines)) {
+-	    lines++;
+-	    break;
+-	}
+-	else if(**lines == ' '){
++        	pkg_false_provides=0;
++        	free(provide);
++    	    } 
++	    else if(isGenericFieldType("Pre-Depends", *lines))
++		pkg->pre_depends_str = parseDependsString(*lines, &pkg->pre_depends_count);
++	    break;
++
++	case 'A':
++	    if(isGenericFieldType("Architecture:", *lines))
++		pkg->architecture = parseGenericFieldType("Architecture", *lines);
++	    break;
++
++	case 'F':
++	    if(isGenericFieldType("Filename:", *lines))
++		pkg->filename = parseGenericFieldType("Filename", *lines);
++	    break;
++
++	case 'S':
++	    if(isGenericFieldType("Section:", *lines))
++		pkg->section = parseGenericFieldType("Section", *lines);
++	    else if(isGenericFieldType("Size:", *lines))
++		pkg->size = parseGenericFieldType("Size", *lines);
++	    else if(isGenericFieldType("Source:", *lines))
++		pkg->source = parseGenericFieldType("Source", *lines);
++	    else if(isGenericFieldType("Status", *lines))
++		parseStatus(pkg, *lines);
++	    else if(isGenericFieldType("Suggests", *lines))
++		pkg->suggests_str = parseDependsString(*lines, &pkg->suggests_count);
++	    break;
++
++	case 'M':
++	    if(isGenericFieldType("MD5sum:", *lines))
++		pkg->md5sum = parseGenericFieldType("MD5sum", *lines);
++	    /* The old ipkg wrote out status files with the wrong case for MD5sum,
++		let's parse it either way */
++	    else if(isGenericFieldType("MD5Sum:", *lines))
++		pkg->md5sum = parseGenericFieldType("MD5Sum", *lines);
++	    else if(isGenericFieldType("Maintainer", *lines))
++		pkg->maintainer = parseGenericFieldType("Maintainer", *lines);
++	    break;
++
++	case 'I':
++	    if(isGenericFieldType("Installed-Size:", *lines))
++		pkg->installed_size = parseGenericFieldType("Installed-Size", *lines);
++	    else if(isGenericFieldType("Installed-Time:", *lines)) {
++		char *time_str = parseGenericFieldType("Installed-Time", *lines);
++		pkg->installed_time = strtoul(time_str, NULL, 0);
++	    }	    
++	    break;
++
++	case 'E':
++	    if(isGenericFieldType("Essential:", *lines)) {
++		char *essential_value;
++		essential_value = parseGenericFieldType("Essential", *lines);
++		if (strcmp(essential_value, "yes") == 0) {
++		    pkg->essential = 1;
++		}
++		free(essential_value);
++	    }
++	    break;
++
++	case 'V':
++	    if(isGenericFieldType("Version", *lines))
++		parseVersion(pkg, *lines);
++	    break;
++
++	case 'C':
++	    if(isGenericFieldType("Conffiles", *lines)){
++		parseConffiles(pkg, *lines);
++		reading_conffiles = 1;
++	    }
++	    else if(isGenericFieldType("Conflicts", *lines))
++		pkg->conflicts_str = parseDependsString(*lines, &pkg->conflicts_count);
++	    break;
++
++	case 'D':
++	    if(isGenericFieldType("Description", *lines)) {
++		pkg->description = parseGenericFieldType("Description", *lines);
++		reading_conffiles = 0;
++		reading_description = 1;
++	    }
++	    else if(isGenericFieldType("Depends", *lines))
++		pkg->depends_str = parseDependsString(*lines, &pkg->depends_count);
++	    break;
++
++	case 'R':
++	    if(isGenericFieldType("Recommends", *lines))
++	        pkg->recommends_str = parseDependsString(*lines, &pkg->recommends_count);
++	    else if(isGenericFieldType("Replaces", *lines))
++		pkg->replaces_str = parseDependsString(*lines, &pkg->replaces_count);
++	    
++	    break;
++
++	case ' ':
+ 	    if(reading_description) {
+ 		/* we already know it's not blank, so the rest of description */      
+ 		pkg->description = realloc(pkg->description,
+@@ -332,8 +361,18 @@ int pkg_parse_raw(pkg_t *pkg, char ***ra
+ 	    }
+ 	    else if(reading_conffiles)
+ 		parseConffiles(pkg, *lines);
++		
++	    break;
++
++	default:
++	    if(line_is_blank(*lines)) {
++		lines++;
++		goto out;
++	    }
+ 	}
+     }
++out:;
++    
+     *raw = lines;
+ /* If the ipk has not a Provides line, we insert our false line */ 
+     if ( pkg_false_provides==1)
============================================================
--- packages/ipkg/files/2-pkg-vec--Optimize-gross-inefficiency.patch	988efdd744386f7bb517bdc0266b258fec8fcab4
+++ packages/ipkg/files/2-pkg-vec--Optimize-gross-inefficiency.patch	988efdd744386f7bb517bdc0266b258fec8fcab4
@@ -0,0 +1,65 @@
+# HG changeset patch
+# User pfalcon at localhost
+# Date 1178812057 0
+# Node ID eff4450fdea2f8210a4fc927bd35ae2562d2eced
+# Parent  e4c99830ba0d55813e1774bd6d41039ca640d6a6
+pkg_vec: Optimize gross inefficiency.
+
+This module tries to implement *unique* vector (without duplicating
+objects), and does this by iterating thru all already existing elements.
+Thus, complexity of adding N elements was O(N^2). However, there're no grave
+reasons to do uniqueness at all:
+1. First of all, if feeds are correct, there won't be duplicates.
+2. Then, even if there will be, there won't be serious problems like
+segfaults.
+3. Finally, for quite a few operations vectors is constructed from a
+hashtable, thus uniqueness is guaranteed (which reduces possible cases of
+non-uniqueness to values of Depends: and friends).
+
+All an all, remove dup check, and make ipkg work order of magnitude faster
+on a feed with few thousands of packages.
+
+diff -r e4c99830ba0d -r eff4450fdea2 pkg_vec.c
+--- a/pkg_vec.c	Sat May 05 03:12:51 2007 +0000
++++ b/pkg_vec.c	Thu May 10 15:47:37 2007 +0000
+@@ -104,6 +104,7 @@ void pkg_vec_insert(pkg_vec_t *vec, cons
+      int i;
+      int found = 0;
+ 
++#if 0
+      /* look for a duplicate pkg by name, version, and architecture */
+      for (i = 0; i < vec->len; i++)
+ 	  if ((strcmp(pkg->name, vec->pkgs[i]->name) == 0)
+@@ -112,6 +113,7 @@ void pkg_vec_insert(pkg_vec_t *vec, cons
+ 	       found = 1;
+ 	       break;
+ 	  }
++#endif
+ 
+      /* we didn't find one, add it */
+      if(!found){   
+@@ -191,6 +193,7 @@ void abstract_pkg_vec_insert(abstract_pk
+ {
+     int i;
+ 
++#if 0
+     /* look for a duplicate pkg by name */
+     for(i = 0; i < vec->len; i++)
+ 	if (strcmp(pkg->name, vec->pkgs[i]->name) == 0)
+@@ -198,12 +201,15 @@ void abstract_pkg_vec_insert(abstract_pk
+ 
+     /* we didn't find one, add it */
+     if(i == vec->len){   
++#endif
+ 	vec->pkgs = 
+ 	  (abstract_pkg_t **)
+ 	    realloc(vec->pkgs, (vec->len + 1) * sizeof(abstract_pkg_t *));
+ 	vec->pkgs[vec->len] = pkg;
+ 	vec->len++;
+-    }
++#if 0
++    }
++#endif
+ }
+ 
+ abstract_pkg_t * abstract_pkg_vec_get(abstract_pkg_vec_t *vec, int i)
============================================================
--- packages/ipkg/ipkg_0.99.163.bb	cd7517e70eb13ced53531ae54bda1c37ccfe42bb
+++ packages/ipkg/ipkg_0.99.163.bb	44259469fdf46bd0af832f5c1311a633806d832a
@@ -1,11 +1,14 @@ include ipkg.inc
 include ipkg.inc
-PR = "r3"
+PR = "r4"
 
 S = "${WORKDIR}/ipkg-${PV}"
 
 SRC_URI = "http://www.handhelds.org/pub/packages/ipkg/ipkg-${PV}.tar.gz \
 	file://terse.patch;patch=1 \
-	file://is-processing.patch;patch=1"
+	file://is-processing.patch;patch=1 \
+	file://1-pkg-parse--Optimize-inefficient-parsing.patch;patch=1 \
+	file://2-pkg-vec--Optimize-gross-inefficiency.patch;patch=1 \
+	"
 
 do_stage() {
 	oe_libinstall -so libipkg ${STAGING_LIBDIR}






More information about the Openembedded-commits mailing list