diskcache.c 69 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561
  1. /*
  2. Copyright (c) 2003-2010 by Juliusz Chroboczek
  3. Permission is hereby granted, free of charge, to any person obtaining a copy
  4. of this software and associated documentation files (the "Software"), to deal
  5. in the Software without restriction, including without limitation the rights
  6. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. copies of the Software, and to permit persons to whom the Software is
  8. furnished to do so, subject to the following conditions:
  9. The above copyright notice and this permission notice shall be included in
  10. all copies or substantial portions of the Software.
  11. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  12. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  13. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  14. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  15. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  16. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  17. THE SOFTWARE.
  18. */
  19. #include "polipo.h"
  20. #ifndef NO_DISK_CACHE
  21. #include "md5import.h"
  22. int maxDiskEntries = 32;
  23. /* Because the functions in this file can be called during object
  24. expiry, we cannot use get_chunk. */
  25. AtomPtr diskCacheRoot;
  26. AtomPtr localDocumentRoot;
  27. DiskCacheEntryPtr diskEntries = NULL, diskEntriesLast = NULL;
  28. int numDiskEntries = 0;
  29. int diskCacheDirectoryPermissions = 0700;
  30. int diskCacheFilePermissions = 0600;
  31. int diskCacheWriteoutOnClose = (64 * 1024);
  32. int maxDiskCacheEntrySize = -1;
  33. int diskCacheUnlinkTime = 32 * 24 * 60 * 60;
  34. int diskCacheTruncateTime = 4 * 24 * 60 * 60 + 12 * 60 * 60;
  35. int diskCacheTruncateSize = 1024 * 1024;
  36. int preciseExpiry = 0;
  37. static DiskCacheEntryRec negativeEntry = {
  38. NULL, NULL,
  39. -1, -1, -1, -1, 0, 0, NULL, NULL
  40. };
  41. #ifndef LOCAL_ROOT
  42. #define LOCAL_ROOT "/usr/share/polipo/www/"
  43. #endif
  44. #ifndef DISK_CACHE_ROOT
  45. #define DISK_CACHE_ROOT "/var/cache/polipo/"
  46. #endif
  47. static int maxDiskEntriesSetter(ConfigVariablePtr, void*);
  48. static int atomSetterFlush(ConfigVariablePtr, void*);
  49. static int reallyWriteoutToDisk(ObjectPtr object, int upto, int max);
  50. void
  51. preinitDiskcache()
  52. {
  53. diskCacheRoot = internAtom(DISK_CACHE_ROOT);
  54. localDocumentRoot = internAtom(LOCAL_ROOT);
  55. CONFIG_VARIABLE_SETTABLE(diskCacheDirectoryPermissions, CONFIG_OCTAL,
  56. configIntSetter,
  57. "Access rights for new directories.");
  58. CONFIG_VARIABLE_SETTABLE(diskCacheFilePermissions, CONFIG_OCTAL,
  59. configIntSetter,
  60. "Access rights for new cache files.");
  61. CONFIG_VARIABLE_SETTABLE(diskCacheWriteoutOnClose, CONFIG_INT,
  62. configIntSetter,
  63. "Number of bytes to write out eagerly.");
  64. CONFIG_VARIABLE_SETTABLE(diskCacheRoot, CONFIG_ATOM, atomSetterFlush,
  65. "Root of the disk cache.");
  66. CONFIG_VARIABLE_SETTABLE(localDocumentRoot, CONFIG_ATOM, atomSetterFlush,
  67. "Root of the local tree.");
  68. CONFIG_VARIABLE_SETTABLE(maxDiskEntries, CONFIG_INT, maxDiskEntriesSetter,
  69. "File descriptors used by the on-disk cache.");
  70. CONFIG_VARIABLE(diskCacheUnlinkTime, CONFIG_TIME,
  71. "Time after which on-disk objects are removed.");
  72. CONFIG_VARIABLE(diskCacheTruncateTime, CONFIG_TIME,
  73. "Time after which on-disk objects are truncated.");
  74. CONFIG_VARIABLE(diskCacheTruncateSize, CONFIG_INT,
  75. "Size to which on-disk objects are truncated.");
  76. CONFIG_VARIABLE(preciseExpiry, CONFIG_BOOLEAN,
  77. "Whether to consider all files for purging.");
  78. CONFIG_VARIABLE_SETTABLE(maxDiskCacheEntrySize, CONFIG_INT,
  79. configIntSetter,
  80. "Maximum size of objects cached on disk.");
  81. }
  82. static int
  83. maxDiskEntriesSetter(ConfigVariablePtr var, void *value)
  84. {
  85. int i;
  86. assert(var->type == CONFIG_INT && var->value.i == &maxDiskEntries);
  87. i = *(int*)value;
  88. if(i < 0 || i > 1000000)
  89. return -3;
  90. maxDiskEntries = i;
  91. while(numDiskEntries > maxDiskEntries)
  92. destroyDiskEntry(diskEntriesLast->object, 0);
  93. return 1;
  94. }
  95. static int
  96. atomSetterFlush(ConfigVariablePtr var, void *value)
  97. {
  98. discardObjects(1, 0);
  99. return configAtomSetter(var, value);
  100. }
  101. static int
  102. checkRoot(AtomPtr root)
  103. {
  104. struct stat ss;
  105. int rc;
  106. if(!root || root->length == 0)
  107. return 0;
  108. #ifdef WIN32 /* Require "x:/" or "x:\\" */
  109. rc = isalpha(root->string[0]) && (root->string[1] == ':') &&
  110. ((root->string[2] == '/') || (root->string[2] == '\\'));
  111. if(!rc) {
  112. return -2;
  113. }
  114. #else
  115. if(root->string[0] != '/') {
  116. return -2;
  117. }
  118. #endif
  119. rc = stat(root->string, &ss);
  120. if(rc < 0)
  121. return -1;
  122. else if(!S_ISDIR(ss.st_mode)) {
  123. errno = ENOTDIR;
  124. return -1;
  125. }
  126. return 1;
  127. }
  128. static AtomPtr
  129. maybeAddSlash(AtomPtr atom)
  130. {
  131. AtomPtr newAtom = NULL;
  132. if(!atom) return NULL;
  133. if(atom->length > 0 && atom->string[atom->length - 1] != '/') {
  134. newAtom = atomCat(atom, "/");
  135. releaseAtom(atom);
  136. return newAtom;
  137. }
  138. return atom;
  139. }
  140. void
  141. initDiskcache()
  142. {
  143. int rc;
  144. diskCacheRoot = expandTilde(maybeAddSlash(diskCacheRoot));
  145. rc = checkRoot(diskCacheRoot);
  146. if(rc <= 0) {
  147. switch(rc) {
  148. case 0: break;
  149. case -1: do_log_error(L_WARN, errno, "Disabling disk cache"); break;
  150. case -2:
  151. do_log(L_WARN, "Disabling disk cache: path %s is not absolute.\n",
  152. diskCacheRoot->string);
  153. break;
  154. default: abort();
  155. }
  156. releaseAtom(diskCacheRoot);
  157. diskCacheRoot = NULL;
  158. }
  159. localDocumentRoot = expandTilde(maybeAddSlash(localDocumentRoot));
  160. rc = checkRoot(localDocumentRoot);
  161. if(rc <= 0) {
  162. switch(rc) {
  163. case 0: break;
  164. case -1: do_log_error(L_WARN, errno, "Disabling local tree"); break;
  165. case -2:
  166. do_log(L_WARN, "Disabling local tree: path is not absolute.\n");
  167. break;
  168. default: abort();
  169. }
  170. releaseAtom(localDocumentRoot);
  171. localDocumentRoot = NULL;
  172. }
  173. }
  174. #ifdef DEBUG_DISK_CACHE
  175. #define CHECK_ENTRY(entry) check_entry((entry))
  176. static void
  177. check_entry(DiskCacheEntryPtr entry)
  178. {
  179. if(entry && entry->fd < 0)
  180. assert(entry == &negativeEntry);
  181. if(entry && entry->fd >= 0) {
  182. assert((!entry->previous) == (entry == diskEntries));
  183. assert((!entry->next) == (entry == diskEntriesLast));
  184. if(entry->size >= 0)
  185. assert(entry->size + entry->body_offset >= entry->offset);
  186. assert(entry->body_offset >= 0);
  187. if(entry->offset >= 0) {
  188. off_t offset;
  189. offset = lseek(entry->fd, 0, SEEK_CUR);
  190. assert(offset == entry->offset);
  191. }
  192. if(entry->size >= 0) {
  193. int rc;
  194. struct stat ss;
  195. rc = fstat(entry->fd, &ss);
  196. assert(rc >= 0);
  197. assert(ss.st_size == entry->size + entry->body_offset);
  198. }
  199. }
  200. }
  201. #else
  202. #define CHECK_ENTRY(entry) do {} while(0)
  203. #endif
  204. int
  205. diskEntrySize(ObjectPtr object)
  206. {
  207. struct stat buf;
  208. int rc;
  209. DiskCacheEntryPtr entry = object->disk_entry;
  210. if(!entry || entry == &negativeEntry)
  211. return -1;
  212. if(entry->size >= 0)
  213. return entry->size;
  214. rc = fstat(entry->fd, &buf);
  215. if(rc < 0) {
  216. do_log_error(L_ERROR, errno, "Couldn't stat");
  217. return -1;
  218. }
  219. if(buf.st_size <= entry->body_offset)
  220. entry->size = 0;
  221. else
  222. entry->size = buf.st_size - entry->body_offset;
  223. CHECK_ENTRY(entry);
  224. if(object->length >= 0 && entry->size == object->length)
  225. object->flags |= OBJECT_DISK_ENTRY_COMPLETE;
  226. return entry->size;
  227. }
  228. static int
  229. entrySeek(DiskCacheEntryPtr entry, off_t offset)
  230. {
  231. off_t rc;
  232. CHECK_ENTRY(entry);
  233. assert(entry != &negativeEntry);
  234. if(entry->offset == offset)
  235. return 1;
  236. if(offset > entry->body_offset) {
  237. /* Avoid extending the file by mistake */
  238. if(entry->size < 0)
  239. diskEntrySize(entry->object);
  240. if(entry->size < 0)
  241. return -1;
  242. if(entry->size + entry->body_offset < offset)
  243. return -1;
  244. }
  245. rc = lseek(entry->fd, offset, SEEK_SET);
  246. if(rc < 0) {
  247. do_log_error(L_ERROR, errno, "Couldn't seek");
  248. entry->offset = -1;
  249. return -1;
  250. }
  251. entry->offset = offset;
  252. return 1;
  253. }
  254. /* Given a local URL, constructs the filename where it can be found. */
  255. int
  256. localFilename(char *buf, int n, char *key, int len)
  257. {
  258. int i, j;
  259. if(len <= 0 || key[0] != '/') return -1;
  260. if(urlIsSpecial(key, len)) return -1;
  261. if(checkRoot(localDocumentRoot) <= 0)
  262. return -1;
  263. if(n <= localDocumentRoot->length)
  264. return -1;
  265. i = 0;
  266. if(key[i] != '/')
  267. return -1;
  268. memcpy(buf, localDocumentRoot->string, localDocumentRoot->length);
  269. j = localDocumentRoot->length;
  270. if(buf[j - 1] == '/')
  271. j--;
  272. while(i < len) {
  273. if(j >= n - 1)
  274. return -1;
  275. if(key[i] == '/' && i < len - 2)
  276. if(key[i + 1] == '.' &&
  277. (key[i + 2] == '.' || key[i + 2] == '/'))
  278. return -1;
  279. buf[j++] = key[i++];
  280. }
  281. if(buf[j - 1] == '/') {
  282. if(j >= n - 11)
  283. return -1;
  284. memcpy(buf + j, "index.html", 10);
  285. j += 10;
  286. }
  287. buf[j] = '\0';
  288. return j;
  289. }
  290. static void
  291. md5(unsigned char *restrict key, int len, unsigned char *restrict dst)
  292. {
  293. static MD5_CTX ctx;
  294. MD5Init(&ctx);
  295. MD5Update(&ctx, key, len);
  296. MD5Final(&ctx);
  297. memcpy(dst, ctx.digest, 16);
  298. }
  299. /* Check whether a character can be stored in a filename. This is
  300. needed since we want to support deficient file systems. */
  301. static int
  302. fssafe(char c)
  303. {
  304. if(c <= 31 || c >= 127)
  305. return 0;
  306. if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
  307. (c >= '0' && c <= '9') || c == '.' || c == '-' || c == '_')
  308. return 1;
  309. return 0;
  310. }
  311. /* Given a URL, returns the directory name within which all files
  312. starting with this URL can be found. */
  313. static int
  314. urlDirname(char *buf, int n, const char *url, int len)
  315. {
  316. int i, j;
  317. if(len < 8)
  318. return -1;
  319. if(lwrcmp(url, "http://", 7) != 0)
  320. return -1;
  321. if(checkRoot(diskCacheRoot) <= 0)
  322. return -1;
  323. if(n <= diskCacheRoot->length)
  324. return -1;
  325. memcpy(buf, diskCacheRoot->string, diskCacheRoot->length);
  326. j = diskCacheRoot->length;
  327. if(buf[j - 1] != '/')
  328. buf[j++] = '/';
  329. for(i = 7; i < len; i++) {
  330. if(i >= len || url[i] == '/')
  331. break;
  332. if(url[i] == '.' && i != len - 1 && url[i + 1] == '.')
  333. return -1;
  334. if(url[i] == '%' || !fssafe(url[i])) {
  335. if(j + 3 >= n) return -1;
  336. buf[j++] = '%';
  337. buf[j++] = i2h((url[i] & 0xF0) >> 4);
  338. buf[j++] = i2h(url[i] & 0x0F);
  339. } else {
  340. buf[j++] = url[i]; if(j >= n) return -1;
  341. }
  342. }
  343. buf[j++] = '/'; if(j >= n) return -1;
  344. buf[j] = '\0';
  345. return j;
  346. }
  347. /* Given a URL, returns the filename where the cached data can be
  348. found. */
  349. static int
  350. urlFilename(char *restrict buf, int n, const char *url, int len)
  351. {
  352. int j;
  353. unsigned char md5buf[18];
  354. j = urlDirname(buf, n, url, len);
  355. if(j < 0 || j + 24 >= n)
  356. return -1;
  357. md5((unsigned char*)url, len, md5buf);
  358. b64cpy(buf + j, (char*)md5buf, 16, 1);
  359. buf[j + 24] = '\0';
  360. return j + 24;
  361. }
  362. static char *
  363. dirnameUrl(char *url, int n, char *name, int len)
  364. {
  365. int i, j, k, c1, c2;
  366. k = diskCacheRoot->length;
  367. if(len < k)
  368. return NULL;
  369. if(memcmp(name, diskCacheRoot->string, k) != 0)
  370. return NULL;
  371. if(n < 8)
  372. return NULL;
  373. memcpy(url, "http://", 7);
  374. if(name[len - 1] == '/')
  375. len --;
  376. j = 7;
  377. for(i = k; i < len; i++) {
  378. if(name[i] == '%') {
  379. if(i >= len - 2)
  380. return NULL;
  381. c1 = h2i(name[i + 1]);
  382. c2 = h2i(name[i + 2]);
  383. if(c1 < 0 || c2 < 0)
  384. return NULL;
  385. url[j++] = c1 * 16 + c2; if(j >= n) goto fail;
  386. i += 2; /* skip extra digits */
  387. } else if(i < len - 1 &&
  388. name[i] == '.' && name[i + 1] == '/') {
  389. return NULL;
  390. } else if(i == len - 1 && name[i] == '.') {
  391. return NULL;
  392. } else {
  393. url[j++] = name[i]; if(j >= n) goto fail;
  394. }
  395. }
  396. url[j++] = '/'; if(j >= n) goto fail;
  397. url[j] = '\0';
  398. return url;
  399. fail:
  400. return NULL;
  401. }
  402. /* Create a file and all intermediate directories. */
  403. static int
  404. createFile(const char *name, int path_start)
  405. {
  406. int fd;
  407. char buf[1024];
  408. int n;
  409. int rc;
  410. if(name[path_start] == '/')
  411. path_start++;
  412. if(path_start < 2 || name[path_start - 1] != '/' ) {
  413. do_log(L_ERROR, "Incorrect name %s (%d).\n", name, path_start);
  414. return -1;
  415. }
  416. fd = open(name, O_RDWR | O_CREAT | O_EXCL | O_BINARY,
  417. diskCacheFilePermissions);
  418. if(fd >= 0)
  419. return fd;
  420. if(errno != ENOENT) {
  421. do_log_error(L_ERROR, errno, "Couldn't create disk file %s", name);
  422. return -1;
  423. }
  424. n = path_start;
  425. while(name[n] != '\0' && n < 1024) {
  426. while(name[n] != '/' && name[n] != '\0' && n < 512)
  427. n++;
  428. if(name[n] != '/' || n >= 1024)
  429. break;
  430. memcpy(buf, name, n + 1);
  431. buf[n + 1] = '\0';
  432. rc = mkdir(buf, diskCacheDirectoryPermissions);
  433. if(rc < 0 && errno != EEXIST) {
  434. do_log_error(L_ERROR, errno, "Couldn't create directory %s", buf);
  435. return -1;
  436. }
  437. n++;
  438. }
  439. fd = open(name, O_RDWR | O_CREAT | O_EXCL | O_BINARY,
  440. diskCacheFilePermissions);
  441. if(fd < 0) {
  442. do_log_error(L_ERROR, errno, "Couldn't create file %s", name);
  443. return -1;
  444. }
  445. return fd;
  446. }
  447. static int
  448. chooseBodyOffset(int n, ObjectPtr object)
  449. {
  450. int length = MAX(object->size, object->length);
  451. int body_offset;
  452. if(object->length >= 0 && object->length + n < 4096 - 4)
  453. return -1; /* no gap for small objects */
  454. if(n <= 128)
  455. body_offset = 256;
  456. else if(n <= 192)
  457. body_offset = 384;
  458. else if(n <= 256)
  459. body_offset = 512;
  460. else if(n <= 384)
  461. body_offset = 768;
  462. else if(n <= 512)
  463. body_offset = 1024;
  464. else if(n <= 1024)
  465. body_offset = 2048;
  466. else if(n < 2048)
  467. body_offset = 4096;
  468. else
  469. body_offset = ((n + 32 + 4095) / 4096 + 1) * 4096;
  470. /* Tweak the gap so that we don't use up a full disk block for
  471. a small tail */
  472. if(object->length >= 0 && object->length < 64 * 1024) {
  473. int last = (body_offset + object->length) % 4096;
  474. int gap = body_offset - n - 32;
  475. if(last < gap / 2)
  476. body_offset -= last;
  477. }
  478. /* Rewriting large objects is expensive -- don't use small gaps.
  479. This has the additional benefit of block-aligning large bodies. */
  480. if(length >= 64 * 1024) {
  481. int min_gap, min_offset;
  482. if(length >= 512 * 1024)
  483. min_gap = 4096;
  484. else if(length >= 256 * 1024)
  485. min_gap = 2048;
  486. else
  487. min_gap = 1024;
  488. min_offset = ((n + 32 + min_gap - 1) / min_gap + 1) * min_gap;
  489. body_offset = MAX(body_offset, min_offset);
  490. }
  491. return body_offset;
  492. }
  493. /* Assumes the file descriptor is at offset 0. Returns -1 on failure,
  494. otherwise the offset at which the file descriptor is left. */
  495. /* If chunk is not null, it should be the first chunk of the object,
  496. and will be written out in the same operation if possible. */
  497. static int
  498. writeHeaders(int fd, int *body_offset_return,
  499. ObjectPtr object, char *chunk, int chunk_len)
  500. {
  501. int n, rc, error = -1;
  502. int body_offset = *body_offset_return;
  503. char *buf = NULL;
  504. int buf_is_chunk = 0;
  505. int bufsize = 0;
  506. if(object->flags & OBJECT_LOCAL)
  507. return -1;
  508. if(body_offset > CHUNK_SIZE)
  509. goto overflow;
  510. /* get_chunk might trigger object expiry */
  511. bufsize = CHUNK_SIZE;
  512. buf_is_chunk = 1;
  513. buf = maybe_get_chunk();
  514. if(!buf) {
  515. bufsize = 2048;
  516. buf_is_chunk = 0;
  517. buf = malloc(2048);
  518. if(buf == NULL) {
  519. do_log(L_ERROR, "Couldn't allocate buffer.\n");
  520. return -1;
  521. }
  522. }
  523. format_again:
  524. n = snnprintf(buf, 0, bufsize, "HTTP/1.1 %3d %s",
  525. object->code, object->message->string);
  526. n = httpWriteObjectHeaders(buf, n, bufsize, object, 0, -1);
  527. if(n < 0)
  528. goto overflow;
  529. n = snnprintf(buf, n, bufsize, "\r\nX-Polipo-Location: ");
  530. n = snnprint_n(buf, n, bufsize, object->key, object->key_size);
  531. if(object->age >= 0 && object->age != object->date) {
  532. n = snnprintf(buf, n, bufsize, "\r\nX-Polipo-Date: ");
  533. n = format_time(buf, n, bufsize, object->age);
  534. }
  535. if(object->atime >= 0) {
  536. n = snnprintf(buf, n, bufsize, "\r\nX-Polipo-Access: ");
  537. n = format_time(buf, n, bufsize, object->atime);
  538. }
  539. if(n < 0)
  540. goto overflow;
  541. if(body_offset < 0)
  542. body_offset = chooseBodyOffset(n, object);
  543. if(body_offset > bufsize)
  544. goto overflow;
  545. if(body_offset > 0 && body_offset != n + 4)
  546. n = snnprintf(buf, n, bufsize, "\r\nX-Polipo-Body-Offset: %d",
  547. body_offset);
  548. n = snnprintf(buf, n, bufsize, "\r\n\r\n");
  549. if(n < 0)
  550. goto overflow;
  551. if(body_offset < 0)
  552. body_offset = n;
  553. if(n > body_offset) {
  554. error = -2;
  555. goto fail;
  556. }
  557. if(n < body_offset)
  558. memset(buf + n, 0, body_offset - n);
  559. again:
  560. #ifdef HAVE_READV_WRITEV
  561. if(chunk_len > 0) {
  562. struct iovec iov[2];
  563. iov[0].iov_base = buf;
  564. iov[0].iov_len = body_offset;
  565. iov[1].iov_base = chunk;
  566. iov[1].iov_len = chunk_len;
  567. rc = writev(fd, iov, 2);
  568. } else
  569. #endif
  570. rc = write(fd, buf, body_offset);
  571. if(rc < 0 && errno == EINTR)
  572. goto again;
  573. if(rc < body_offset)
  574. goto fail;
  575. if(object->length >= 0 &&
  576. rc - body_offset >= object->length)
  577. object->flags |= OBJECT_DISK_ENTRY_COMPLETE;
  578. *body_offset_return = body_offset;
  579. if(buf_is_chunk)
  580. dispose_chunk(buf);
  581. else
  582. free(buf);
  583. return rc;
  584. overflow:
  585. if(bufsize < bigBufferSize) {
  586. char *oldbuf = buf;
  587. buf = malloc(bigBufferSize);
  588. if(!buf) {
  589. do_log(L_ERROR, "Couldn't allocate big buffer.\n");
  590. goto fail;
  591. }
  592. bufsize = bigBufferSize;
  593. if(oldbuf) {
  594. if(buf_is_chunk)
  595. dispose_chunk(oldbuf);
  596. else
  597. free(oldbuf);
  598. }
  599. buf_is_chunk = 0;
  600. goto format_again;
  601. }
  602. /* fall through */
  603. fail:
  604. if(buf_is_chunk)
  605. dispose_chunk(buf);
  606. else
  607. free(buf);
  608. return error;
  609. }
  610. typedef struct _MimeEntry {
  611. char *extension;
  612. char *mime;
  613. } MimeEntryRec;
  614. static const MimeEntryRec mimeEntries[] = {
  615. { "html", "text/html" },
  616. { "htm", "text/html" },
  617. { "text", "text/plain" },
  618. { "txt", "text/plain" },
  619. { "png", "image/png" },
  620. { "gif", "image/gif" },
  621. { "jpeg", "image/jpeg" },
  622. { "jpg", "image/jpeg" },
  623. { "ico", "image/x-icon" },
  624. { "pdf", "application/pdf" },
  625. { "ps", "application/postscript" },
  626. { "tar", "application/x-tar" },
  627. { "pac", "application/x-ns-proxy-autoconfig" },
  628. { "css", "text/css" },
  629. { "js", "application/x-javascript" },
  630. { "xml", "text/xml" },
  631. { "swf", "application/x-shockwave-flash" },
  632. };
  633. static char*
  634. localObjectMimeType(ObjectPtr object, char **encoding_return)
  635. {
  636. char *name = object->key;
  637. int nlen = object->key_size;
  638. int i;
  639. assert(nlen >= 1);
  640. if(name[nlen - 1] == '/') {
  641. *encoding_return = NULL;
  642. return "text/html";
  643. }
  644. if(nlen < 3) {
  645. *encoding_return = NULL;
  646. return "application/octet-stream";
  647. }
  648. if(memcmp(name + nlen - 3, ".gz", 3) == 0) {
  649. *encoding_return = "x-gzip";
  650. nlen -= 3;
  651. } else if(memcmp(name + nlen - 2, ".Z", 2) == 0) {
  652. *encoding_return = "x-compress";
  653. nlen -= 2;
  654. } else {
  655. *encoding_return = NULL;
  656. }
  657. for(i = 0; i < sizeof(mimeEntries) / sizeof(mimeEntries[0]); i++) {
  658. int len = strlen(mimeEntries[i].extension);
  659. if(nlen > len &&
  660. name[nlen - len - 1] == '.' &&
  661. memcmp(name + nlen - len, mimeEntries[i].extension, len) == 0)
  662. return mimeEntries[i].mime;
  663. }
  664. return "application/octet-stream";
  665. }
  666. /* Same interface as validateEntry -- see below */
  667. int
  668. validateLocalEntry(ObjectPtr object, int fd,
  669. int *body_offset_return, off_t *offset_return)
  670. {
  671. struct stat ss;
  672. char buf[512];
  673. int n, rc;
  674. char *encoding;
  675. rc = fstat(fd, &ss);
  676. if(rc < 0) {
  677. do_log_error(L_ERROR, errno, "Couldn't stat");
  678. return -1;
  679. }
  680. if(S_ISREG(ss.st_mode)) {
  681. if(!(ss.st_mode & S_IROTH) ||
  682. (object->length >= 0 && object->length != ss.st_size) ||
  683. (object->last_modified >= 0 &&
  684. object->last_modified != ss.st_mtime))
  685. return -1;
  686. } else {
  687. notifyObject(object);
  688. return -1;
  689. }
  690. n = snnprintf(buf, 0, 512, "%lx-%lx-%lx",
  691. (unsigned long)ss.st_ino,
  692. (unsigned long)ss.st_size,
  693. (unsigned long)ss.st_mtime);
  694. if(n >= 512)
  695. n = -1;
  696. if(n > 0 && object->etag) {
  697. if(strlen(object->etag) != n ||
  698. memcmp(object->etag, buf, n) != 0)
  699. return -1;
  700. }
  701. if(!(object->flags & OBJECT_INITIAL)) {
  702. if(!object->last_modified && !object->etag)
  703. return -1;
  704. }
  705. if(object->flags & OBJECT_INITIAL) {
  706. object->length = ss.st_size;
  707. object->last_modified = ss.st_mtime;
  708. object->date = current_time.tv_sec;
  709. object->age = current_time.tv_sec;
  710. object->code = 200;
  711. if(n > 0)
  712. object->etag = strdup(buf); /* okay if fails */
  713. object->message = internAtom("Okay");
  714. n = snnprintf(buf, 0, 512,
  715. "\r\nServer: Polipo"
  716. "\r\nContent-Type: %s",
  717. localObjectMimeType(object, &encoding));
  718. if(encoding != NULL)
  719. n = snnprintf(buf, n, 512,
  720. "\r\nContent-Encoding: %s", encoding);
  721. if(n < 0)
  722. return -1;
  723. object->headers = internAtomN(buf, n);
  724. if(object->headers == NULL)
  725. return -1;
  726. object->flags &= ~OBJECT_INITIAL;
  727. }
  728. if(body_offset_return)
  729. *body_offset_return = 0;
  730. if(offset_return)
  731. *offset_return = 0;
  732. return 0;
  733. }
  734. /* Assumes fd is at offset 0.
  735. Returns -1 if not valid, 1 if metadata should be written out, 0
  736. otherwise. */
  737. int
  738. validateEntry(ObjectPtr object, int fd,
  739. int *body_offset_return, off_t *offset_return)
  740. {
  741. char *buf;
  742. int buf_is_chunk, bufsize;
  743. int rc, n;
  744. int dummy;
  745. int code;
  746. AtomPtr headers;
  747. time_t date, last_modified, expires, polipo_age, polipo_access;
  748. int length;
  749. off_t offset = -1;
  750. int body_offset;
  751. char *etag;
  752. AtomPtr via;
  753. CacheControlRec cache_control;
  754. char *location;
  755. AtomPtr message;
  756. int dirty = 0;
  757. if(object->flags & OBJECT_LOCAL)
  758. return validateLocalEntry(object, fd,
  759. body_offset_return, offset_return);
  760. if(!(object->flags & OBJECT_PUBLIC) && (object->flags & OBJECT_INITIAL))
  761. return 0;
  762. /* get_chunk might trigger object expiry */
  763. bufsize = CHUNK_SIZE;
  764. buf_is_chunk = 1;
  765. buf = maybe_get_chunk();
  766. if(!buf) {
  767. bufsize = 2048;
  768. buf_is_chunk = 0;
  769. buf = malloc(2048);
  770. if(buf == NULL) {
  771. do_log(L_ERROR, "Couldn't allocate buffer.\n");
  772. return -1;
  773. }
  774. }
  775. again:
  776. rc = read(fd, buf, bufsize);
  777. if(rc < 0) {
  778. if(errno == EINTR)
  779. goto again;
  780. do_log_error(L_ERROR, errno, "Couldn't read disk entry");
  781. goto fail;
  782. }
  783. offset = rc;
  784. parse_again:
  785. n = findEndOfHeaders(buf, 0, rc, &dummy);
  786. if(n < 0) {
  787. char *oldbuf = buf;
  788. if(bufsize < bigBufferSize) {
  789. buf = malloc(bigBufferSize);
  790. if(!buf) {
  791. do_log(L_ERROR, "Couldn't allocate big buffer.\n");
  792. goto fail;
  793. }
  794. bufsize = bigBufferSize;
  795. memcpy(buf, oldbuf, offset);
  796. if(buf_is_chunk)
  797. dispose_chunk(oldbuf);
  798. else
  799. free(oldbuf);
  800. buf_is_chunk = 0;
  801. again2:
  802. rc = read(fd, buf + offset, bufsize - offset);
  803. if(rc < 0) {
  804. if(errno == EINTR)
  805. goto again2;
  806. do_log_error(L_ERROR, errno, "Couldn't read disk entry");
  807. goto fail;
  808. }
  809. offset += rc;
  810. goto parse_again;
  811. }
  812. do_log(L_ERROR, "Couldn't parse disk entry.\n");
  813. goto fail;
  814. }
  815. rc = httpParseServerFirstLine(buf, &code, &dummy, &message);
  816. if(rc < 0) {
  817. do_log(L_ERROR, "Couldn't parse disk entry.\n");
  818. goto fail;
  819. }
  820. if(object->code != 0 && object->code != code) {
  821. releaseAtom(message);
  822. goto fail;
  823. }
  824. rc = httpParseHeaders(0, NULL, buf, rc, NULL,
  825. &headers, &length, &cache_control, NULL, NULL,
  826. &date, &last_modified, &expires, &polipo_age,
  827. &polipo_access, &body_offset,
  828. NULL, &etag, NULL,
  829. NULL, NULL, &location, &via, NULL);
  830. if(rc < 0) {
  831. releaseAtom(message);
  832. goto fail;
  833. }
  834. if(body_offset < 0)
  835. body_offset = n;
  836. if(!location || strlen(location) != object->key_size ||
  837. memcmp(location, object->key, object->key_size) != 0) {
  838. do_log(L_ERROR, "Inconsistent cache file for %s.\n", scrub(location));
  839. goto invalid;
  840. }
  841. if(polipo_age < 0)
  842. polipo_age = date;
  843. if(polipo_age < 0) {
  844. do_log(L_ERROR, "Undated disk entry for %s.\n", scrub(location));
  845. goto invalid;
  846. }
  847. if(!(object->flags & OBJECT_INITIAL)) {
  848. if((last_modified >= 0) != (object->last_modified >= 0))
  849. goto invalid;
  850. if((object->cache_control & CACHE_MISMATCH) ||
  851. (cache_control.flags & CACHE_MISMATCH))
  852. goto invalid;
  853. if(last_modified >= 0 && object->last_modified >= 0 &&
  854. last_modified != object->last_modified)
  855. goto invalid;
  856. if(length >= 0 && object->length >= 0)
  857. if(length != object->length)
  858. goto invalid;
  859. if(!!etag != !!object->etag)
  860. goto invalid;
  861. if(etag && object->etag && strcmp(etag, object->etag) != 0)
  862. goto invalid;
  863. /* If we don't have a usable ETag, and either CACHE_VARY or we
  864. don't have a last-modified date, we validate disk entries by
  865. using their date. */
  866. if(!(etag && object->etag) &&
  867. (!(last_modified >= 0 && object->last_modified >= 0) ||
  868. ((cache_control.flags & CACHE_VARY) ||
  869. (object->cache_control & CACHE_VARY)))) {
  870. if(date >= 0 && date != object->date)
  871. goto invalid;
  872. if(polipo_age >= 0 && polipo_age != object->age)
  873. goto invalid;
  874. }
  875. if((object->cache_control & CACHE_VARY) && dontTrustVaryETag >= 1) {
  876. /* Check content-type to work around mod_gzip bugs */
  877. if(!httpHeaderMatch(atomContentType, object->headers, headers) ||
  878. !httpHeaderMatch(atomContentEncoding, object->headers, headers))
  879. goto invalid;
  880. }
  881. }
  882. if(location)
  883. free(location);
  884. if(headers) {
  885. if(!object->headers)
  886. object->headers = headers;
  887. else
  888. releaseAtom(headers);
  889. }
  890. if(object->code == 0) {
  891. object->code = code;
  892. object->message = retainAtom(message);
  893. }
  894. if(object->date <= date)
  895. object->date = date;
  896. else
  897. dirty = 1;
  898. if(object->last_modified < 0)
  899. object->last_modified = last_modified;
  900. if(object->expires < 0)
  901. object->expires = expires;
  902. else if(object->expires > expires)
  903. dirty = 1;
  904. if(object->age < 0)
  905. object->age = polipo_age;
  906. else if(object->age > polipo_age)
  907. dirty = 1;
  908. if(object->atime <= polipo_access)
  909. object->atime = polipo_access;
  910. else
  911. dirty = 1;
  912. object->cache_control |= cache_control.flags;
  913. object->max_age = cache_control.max_age;
  914. object->s_maxage = cache_control.s_maxage;
  915. if(object->age < 0) object->age = object->date;
  916. if(object->age < 0) object->age = 0; /* a long time ago */
  917. if(object->length < 0) object->length = length;
  918. if(!object->etag)
  919. object->etag = etag;
  920. else {
  921. if(etag)
  922. free(etag);
  923. }
  924. releaseAtom(message);
  925. if(object->flags & OBJECT_INITIAL) object->via = via;
  926. object->flags &= ~OBJECT_INITIAL;
  927. if(offset > body_offset) {
  928. /* We need to make sure we don't invoke object expiry recursively */
  929. objectSetChunks(object, 1);
  930. if(object->numchunks >= 1) {
  931. if(object->chunks[0].data == NULL)
  932. object->chunks[0].data = maybe_get_chunk();
  933. if(object->chunks[0].data)
  934. objectAddData(object, buf + body_offset,
  935. 0, MIN(offset - body_offset, CHUNK_SIZE));
  936. }
  937. }
  938. httpTweakCachability(object);
  939. if(buf_is_chunk)
  940. dispose_chunk(buf);
  941. else
  942. free(buf);
  943. if(body_offset_return) *body_offset_return = body_offset;
  944. if(offset_return) *offset_return = offset;
  945. return dirty;
  946. invalid:
  947. releaseAtom(message);
  948. if(etag) free(etag);
  949. if(location) free(location);
  950. if(via) releaseAtom(via);
  951. /* fall through */
  952. fail:
  953. if(buf_is_chunk)
  954. dispose_chunk(buf);
  955. else
  956. free(buf);
  957. return -1;
  958. }
  959. void
  960. dirtyDiskEntry(ObjectPtr object)
  961. {
  962. DiskCacheEntryPtr entry = object->disk_entry;
  963. if(entry && entry != &negativeEntry) entry->metadataDirty = 1;
  964. }
  965. int
  966. revalidateDiskEntry(ObjectPtr object)
  967. {
  968. DiskCacheEntryPtr entry = object->disk_entry;
  969. int rc;
  970. int body_offset;
  971. if(!entry || entry == &negativeEntry)
  972. return 1;
  973. CHECK_ENTRY(entry);
  974. rc = entrySeek(entry, 0);
  975. if(rc < 0) return 0;
  976. rc = validateEntry(object, entry->fd, &body_offset, &entry->offset);
  977. if(rc < 0) {
  978. destroyDiskEntry(object, 0);
  979. return 0;
  980. }
  981. if(body_offset != entry->body_offset) {
  982. do_log(L_WARN, "Inconsistent body offset (%d != %d).\n",
  983. body_offset, entry->body_offset);
  984. destroyDiskEntry(object, 0);
  985. return 0;
  986. }
  987. entry->metadataDirty |= !!rc;
  988. CHECK_ENTRY(entry);
  989. return 1;
  990. }
  991. static DiskCacheEntryPtr
  992. makeDiskEntry(ObjectPtr object, int create)
  993. {
  994. DiskCacheEntryPtr entry = NULL;
  995. char buf[1024];
  996. int fd = -1;
  997. int negative = 0, size = -1, name_len = -1;
  998. char *name = NULL;
  999. off_t offset = -1;
  1000. int body_offset = -1;
  1001. int rc;
  1002. int local = (object->flags & OBJECT_LOCAL) != 0;
  1003. int dirty = 0;
  1004. if(local && create)
  1005. return NULL;
  1006. if(!local && !(object->flags & OBJECT_PUBLIC))
  1007. return NULL;
  1008. if(maxDiskCacheEntrySize >= 0) {
  1009. if(object->length > 0) {
  1010. if(object->length > maxDiskCacheEntrySize)
  1011. return NULL;
  1012. } else {
  1013. if(object->size > maxDiskCacheEntrySize)
  1014. return NULL;
  1015. }
  1016. }
  1017. if(object->disk_entry) {
  1018. entry = object->disk_entry;
  1019. CHECK_ENTRY(entry);
  1020. if(entry != &negativeEntry) {
  1021. /* We'll keep the entry -- put it at the front. */
  1022. if(entry != diskEntries && entry != &negativeEntry) {
  1023. entry->previous->next = entry->next;
  1024. if(entry->next)
  1025. entry->next->previous = entry->previous;
  1026. else
  1027. diskEntriesLast = entry->previous;
  1028. entry->next = diskEntries;
  1029. diskEntries->previous = entry;
  1030. entry->previous = NULL;
  1031. diskEntries = entry;
  1032. }
  1033. return entry;
  1034. } else {
  1035. if(entry == &negativeEntry) {
  1036. negative = 1;
  1037. if(!create) return NULL;
  1038. object->disk_entry = NULL;
  1039. }
  1040. entry = NULL;
  1041. destroyDiskEntry(object, 0);
  1042. }
  1043. }
  1044. if(numDiskEntries > maxDiskEntries)
  1045. destroyDiskEntry(diskEntriesLast->object, 0);
  1046. if(!local) {
  1047. if(diskCacheRoot == NULL || diskCacheRoot->length <= 0)
  1048. return NULL;
  1049. name_len = urlFilename(buf, 1024, object->key, object->key_size);
  1050. if(name_len < 0) return NULL;
  1051. if(!negative)
  1052. fd = open(buf, O_RDWR | O_BINARY);
  1053. if(fd >= 0) {
  1054. rc = validateEntry(object, fd, &body_offset, &offset);
  1055. if(rc >= 0) {
  1056. dirty = rc;
  1057. } else {
  1058. close(fd);
  1059. fd = -1;
  1060. rc = unlink(buf);
  1061. if(rc < 0 && errno != ENOENT) {
  1062. do_log_error(L_WARN, errno,
  1063. "Couldn't unlink stale disk entry %s",
  1064. scrub(buf));
  1065. /* But continue -- it's okay to have stale entries. */
  1066. }
  1067. }
  1068. }
  1069. if(fd < 0 && create && name_len > 0 &&
  1070. !(object->flags & OBJECT_INITIAL)) {
  1071. fd = createFile(buf, diskCacheRoot->length);
  1072. if(fd < 0)
  1073. return NULL;
  1074. if(fd >= 0) {
  1075. char *data = NULL;
  1076. int dsize = 0;
  1077. if(object->numchunks > 0) {
  1078. data = object->chunks[0].data;
  1079. dsize = object->chunks[0].size;
  1080. }
  1081. rc = writeHeaders(fd, &body_offset, object, data, dsize);
  1082. if(rc < 0) {
  1083. do_log_error(L_ERROR, errno, "Couldn't write headers");
  1084. rc = unlink(buf);
  1085. if(rc < 0 && errno != ENOENT)
  1086. do_log_error(L_ERROR, errno,
  1087. "Couldn't unlink truncated entry %s",
  1088. scrub(buf));
  1089. close(fd);
  1090. return NULL;
  1091. }
  1092. assert(rc >= body_offset);
  1093. size = rc - body_offset;
  1094. offset = rc;
  1095. dirty = 0;
  1096. }
  1097. }
  1098. } else {
  1099. /* local */
  1100. if(localDocumentRoot == NULL || localDocumentRoot->length == 0)
  1101. return NULL;
  1102. name_len =
  1103. localFilename(buf, 1024, object->key, object->key_size);
  1104. if(name_len < 0)
  1105. return NULL;
  1106. fd = open(buf, O_RDONLY | O_BINARY);
  1107. if(fd >= 0) {
  1108. if(validateEntry(object, fd, &body_offset, NULL) < 0) {
  1109. close(fd);
  1110. fd = -1;
  1111. }
  1112. }
  1113. offset = 0;
  1114. }
  1115. if(fd < 0) {
  1116. object->disk_entry = &negativeEntry;
  1117. return NULL;
  1118. }
  1119. assert(body_offset >= 0);
  1120. name = strdup_n(buf, name_len);
  1121. if(name == NULL) {
  1122. do_log(L_ERROR, "Couldn't allocate name.\n");
  1123. close(fd);
  1124. fd = -1;
  1125. return NULL;
  1126. }
  1127. entry = malloc(sizeof(DiskCacheEntryRec));
  1128. if(entry == NULL) {
  1129. do_log(L_ERROR, "Couldn't allocate entry.\n");
  1130. free(name);
  1131. close(fd);
  1132. return NULL;
  1133. }
  1134. entry->filename = name;
  1135. entry->object = object;
  1136. entry->fd = fd;
  1137. entry->body_offset = body_offset;
  1138. entry->local = local;
  1139. entry->offset = offset;
  1140. entry->size = size;
  1141. entry->metadataDirty = dirty;
  1142. entry->next = diskEntries;
  1143. if(diskEntries)
  1144. diskEntries->previous = entry;
  1145. diskEntries = entry;
  1146. if(diskEntriesLast == NULL)
  1147. diskEntriesLast = entry;
  1148. entry->previous = NULL;
  1149. numDiskEntries++;
  1150. object->disk_entry = entry;
  1151. CHECK_ENTRY(entry);
  1152. return entry;
  1153. }
  1154. /* Rewrite a disk cache entry, used when the body offset needs to change. */
  1155. static int
  1156. rewriteEntry(ObjectPtr object)
  1157. {
  1158. int old_body_offset = object->disk_entry->body_offset;
  1159. int fd, rc, n;
  1160. DiskCacheEntryPtr entry;
  1161. char* buf;
  1162. int buf_is_chunk, bufsize;
  1163. int offset;
  1164. fd = dup(object->disk_entry->fd);
  1165. if(fd < 0) {
  1166. do_log_error(L_ERROR, errno, "Couldn't duplicate file descriptor");
  1167. return -1;
  1168. }
  1169. rc = destroyDiskEntry(object, 1);
  1170. if(rc < 0) {
  1171. close(fd);
  1172. return -1;
  1173. }
  1174. entry = makeDiskEntry(object, 1);
  1175. if(!entry) {
  1176. close(fd);
  1177. return -1;
  1178. }
  1179. offset = diskEntrySize(object);
  1180. if(offset < 0) {
  1181. close(fd);
  1182. return -1;
  1183. }
  1184. bufsize = CHUNK_SIZE;
  1185. buf_is_chunk = 1;
  1186. buf = maybe_get_chunk();
  1187. if(!buf) {
  1188. bufsize = 2048;
  1189. buf_is_chunk = 0;
  1190. buf = malloc(2048);
  1191. if(buf == NULL) {
  1192. do_log(L_ERROR, "Couldn't allocate buffer.\n");
  1193. close(fd);
  1194. return -1;
  1195. }
  1196. }
  1197. rc = lseek(fd, old_body_offset + offset, SEEK_SET);
  1198. if(rc < 0)
  1199. goto done;
  1200. while(1) {
  1201. CHECK_ENTRY(entry);
  1202. n = read(fd, buf, bufsize);
  1203. if(n < 0 && errno == EINTR)
  1204. continue;
  1205. if(n <= 0)
  1206. goto done;
  1207. rc = entrySeek(entry, entry->body_offset + offset);
  1208. if(rc < 0)
  1209. goto done;
  1210. write_again:
  1211. rc = write(entry->fd, buf, n);
  1212. if(rc >= 0) {
  1213. entry->offset += rc;
  1214. entry->size += rc;
  1215. } else if(errno == EINTR) {
  1216. goto write_again;
  1217. }
  1218. if(rc < n)
  1219. goto done;
  1220. }
  1221. done:
  1222. CHECK_ENTRY(entry);
  1223. if(object->length >= 0 && entry->size == object->length)
  1224. object->flags |= OBJECT_DISK_ENTRY_COMPLETE;
  1225. close(fd);
  1226. if(buf_is_chunk)
  1227. dispose_chunk(buf);
  1228. else
  1229. free(buf);
  1230. return 1;
  1231. }
  1232. int
  1233. destroyDiskEntry(ObjectPtr object, int d)
  1234. {
  1235. DiskCacheEntryPtr entry = object->disk_entry;
  1236. int rc, urc = 1;
  1237. assert(!entry || !entry->local || !d);
  1238. if(d && !entry)
  1239. entry = makeDiskEntry(object, 0);
  1240. CHECK_ENTRY(entry);
  1241. if(!entry || entry == &negativeEntry) {
  1242. return 1;
  1243. }
  1244. assert(entry->object == object);
  1245. if(maxDiskCacheEntrySize >= 0 && object->size > maxDiskCacheEntrySize) {
  1246. /* See writeoutToDisk */
  1247. d = 1;
  1248. }
  1249. if(d) {
  1250. entry->object->flags &= ~OBJECT_DISK_ENTRY_COMPLETE;
  1251. if(entry->filename) {
  1252. urc = unlink(entry->filename);
  1253. if(urc < 0)
  1254. do_log_error(L_WARN, errno,
  1255. "Couldn't unlink %s", scrub(entry->filename));
  1256. }
  1257. } else {
  1258. if(entry && entry->metadataDirty)
  1259. writeoutMetadata(object);
  1260. makeDiskEntry(object, 0);
  1261. /* rewriteDiskEntry may change the disk entry */
  1262. entry = object->disk_entry;
  1263. if(entry == NULL || entry == &negativeEntry)
  1264. return 0;
  1265. if(diskCacheWriteoutOnClose > 0) {
  1266. reallyWriteoutToDisk(object, -1, diskCacheWriteoutOnClose);
  1267. entry = object->disk_entry;
  1268. if(entry == NULL || entry == &negativeEntry)
  1269. return 0;
  1270. }
  1271. }
  1272. again:
  1273. rc = close(entry->fd);
  1274. if(rc < 0 && errno == EINTR)
  1275. goto again;
  1276. entry->fd = -1;
  1277. if(entry->filename)
  1278. free(entry->filename);
  1279. entry->filename = NULL;
  1280. if(entry->previous)
  1281. entry->previous->next = entry->next;
  1282. else
  1283. diskEntries = entry->next;
  1284. if(entry->next)
  1285. entry->next->previous = entry->previous;
  1286. else
  1287. diskEntriesLast = entry->previous;
  1288. numDiskEntries--;
  1289. assert(numDiskEntries >= 0);
  1290. free(entry);
  1291. object->disk_entry = NULL;
  1292. if(urc < 0)
  1293. return -1;
  1294. else
  1295. return 1;
  1296. }
  1297. ObjectPtr
  1298. objectGetFromDisk(ObjectPtr object)
  1299. {
  1300. DiskCacheEntryPtr entry = makeDiskEntry(object, 0);
  1301. if(!entry) return NULL;
  1302. return object;
  1303. }
  1304. int
  1305. objectFillFromDisk(ObjectPtr object, int offset, int chunks)
  1306. {
  1307. DiskCacheEntryPtr entry;
  1308. int rc, result;
  1309. int i, j, k;
  1310. int complete;
  1311. if(object->type != OBJECT_HTTP)
  1312. return 0;
  1313. if(object->flags & OBJECT_LINEAR)
  1314. return 0;
  1315. if(object->length >= 0) {
  1316. chunks = MIN(chunks,
  1317. (object->length - offset + CHUNK_SIZE - 1) / CHUNK_SIZE);
  1318. }
  1319. rc = objectSetChunks(object, offset / CHUNK_SIZE + chunks);
  1320. if(rc < 0)
  1321. return 0;
  1322. complete = 1;
  1323. if(object->flags & OBJECT_INITIAL) {
  1324. complete = 0;
  1325. } else if((object->length < 0 || object->size < object->length) &&
  1326. object->size < (offset / CHUNK_SIZE + chunks) * CHUNK_SIZE) {
  1327. complete = 0;
  1328. } else {
  1329. for(k = 0; k < chunks; k++) {
  1330. int s;
  1331. i = offset / CHUNK_SIZE + k;
  1332. s = MIN(CHUNK_SIZE, object->size - i * CHUNK_SIZE);
  1333. if(object->chunks[i].size < s) {
  1334. complete = 0;
  1335. break;
  1336. }
  1337. }
  1338. }
  1339. if(complete)
  1340. return 1;
  1341. /* This has the side-effect of revalidating the entry, which is
  1342. what makes HEAD requests work. */
  1343. entry = makeDiskEntry(object, 0);
  1344. if(!entry)
  1345. return 0;
  1346. for(k = 0; k < chunks; k++) {
  1347. i = offset / CHUNK_SIZE + k;
  1348. if(!object->chunks[i].data)
  1349. object->chunks[i].data = get_chunk();
  1350. if(!object->chunks[i].data) {
  1351. chunks = k;
  1352. break;
  1353. }
  1354. lockChunk(object, i);
  1355. }
  1356. result = 0;
  1357. for(k = 0; k < chunks; k++) {
  1358. int o;
  1359. i = offset / CHUNK_SIZE + k;
  1360. j = object->chunks[i].size;
  1361. o = i * CHUNK_SIZE + j;
  1362. if(object->chunks[i].size == CHUNK_SIZE)
  1363. continue;
  1364. if(entry->size >= 0 && entry->size <= o)
  1365. break;
  1366. if(entry->offset != entry->body_offset + o) {
  1367. rc = entrySeek(entry, entry->body_offset + o);
  1368. if(rc < 0) {
  1369. result = 0;
  1370. break;
  1371. }
  1372. }
  1373. CHECK_ENTRY(entry);
  1374. again:
  1375. rc = read(entry->fd, object->chunks[i].data + j, CHUNK_SIZE - j);
  1376. if(rc < 0) {
  1377. if(errno == EINTR)
  1378. goto again;
  1379. entry->offset = -1;
  1380. do_log_error(L_ERROR, errno, "Couldn't read");
  1381. break;
  1382. }
  1383. entry->offset += rc;
  1384. object->chunks[i].size += rc;
  1385. if(object->size < o + rc)
  1386. object->size = o + rc;
  1387. if(entry->object->length >= 0 && entry->size < 0 &&
  1388. entry->offset - entry->body_offset == entry->object->length)
  1389. entry->size = entry->object->length;
  1390. if(rc < CHUNK_SIZE - j) {
  1391. /* Paranoia: the read may have been interrupted half-way. */
  1392. if(entry->size < 0) {
  1393. if(rc == 0 ||
  1394. (entry->object->length >= 0 &&
  1395. entry->object->length ==
  1396. entry->offset - entry->body_offset))
  1397. entry->size = entry->offset - entry->body_offset;
  1398. break;
  1399. } else if(entry->size != entry->offset - entry->body_offset) {
  1400. if(rc == 0 ||
  1401. entry->size < entry->offset - entry->body_offset) {
  1402. do_log(L_WARN,
  1403. "Disk entry size changed behind our back: "
  1404. "%ld -> %ld (%d).\n",
  1405. (long)entry->size,
  1406. (long)entry->offset - entry->body_offset,
  1407. object->size);
  1408. entry->size = -1;
  1409. }
  1410. }
  1411. break;
  1412. }
  1413. CHECK_ENTRY(entry);
  1414. result = 1;
  1415. }
  1416. CHECK_ENTRY(object->disk_entry);
  1417. for(k = 0; k < chunks; k++) {
  1418. i = offset / CHUNK_SIZE + k;
  1419. unlockChunk(object, i);
  1420. }
  1421. if(result > 0) {
  1422. notifyObject(object);
  1423. return 1;
  1424. } else {
  1425. return 0;
  1426. }
  1427. }
  1428. int
  1429. writeoutToDisk(ObjectPtr object, int upto, int max)
  1430. {
  1431. if(maxDiskCacheEntrySize >= 0 && object->size > maxDiskCacheEntrySize) {
  1432. /* An object was created with an unknown length, and then grew
  1433. beyond maxDiskCacheEntrySize. Destroy the disk entry. */
  1434. destroyDiskEntry(object, 1);
  1435. return 0;
  1436. }
  1437. return reallyWriteoutToDisk(object, upto, max);
  1438. }
  1439. static int
  1440. reallyWriteoutToDisk(ObjectPtr object, int upto, int max)
  1441. {
  1442. DiskCacheEntryPtr entry;
  1443. int rc;
  1444. int i, j;
  1445. int offset;
  1446. int bytes = 0;
  1447. if(upto < 0)
  1448. upto = object->size;
  1449. if((object->cache_control & CACHE_NO_STORE) ||
  1450. (object->flags & OBJECT_LOCAL))
  1451. return 0;
  1452. if((object->flags & OBJECT_DISK_ENTRY_COMPLETE) && !object->disk_entry)
  1453. return 0;
  1454. entry = makeDiskEntry(object, 1);
  1455. if(!entry) return 0;
  1456. assert(!entry->local);
  1457. if(object->flags & OBJECT_DISK_ENTRY_COMPLETE)
  1458. goto done;
  1459. diskEntrySize(object);
  1460. if(entry->size < 0)
  1461. return 0;
  1462. if(object->length >= 0 && entry->size >= object->length) {
  1463. object->flags |= OBJECT_DISK_ENTRY_COMPLETE;
  1464. goto done;
  1465. }
  1466. if(entry->size >= upto)
  1467. goto done;
  1468. offset = entry->size;
  1469. /* Avoid a seek in case we start writing at the beginning */
  1470. if(offset == 0 && entry->metadataDirty) {
  1471. writeoutMetadata(object);
  1472. /* rewriteDiskEntry may change the entry */
  1473. entry = makeDiskEntry(object, 0);
  1474. if(entry == NULL)
  1475. return 0;
  1476. }
  1477. rc = entrySeek(entry, offset + entry->body_offset);
  1478. if(rc < 0) return 0;
  1479. do {
  1480. if(max >= 0 && bytes >= max)
  1481. break;
  1482. CHECK_ENTRY(entry);
  1483. assert(entry->offset == offset + entry->body_offset);
  1484. i = offset / CHUNK_SIZE;
  1485. j = offset % CHUNK_SIZE;
  1486. if(i >= object->numchunks)
  1487. break;
  1488. if(object->chunks[i].size <= j)
  1489. break;
  1490. again:
  1491. rc = write(entry->fd, object->chunks[i].data + j,
  1492. object->chunks[i].size - j);
  1493. if(rc < 0) {
  1494. if(errno == EINTR)
  1495. goto again;
  1496. do_log_error(L_ERROR, errno, "Couldn't write disk entry");
  1497. break;
  1498. }
  1499. entry->offset += rc;
  1500. offset += rc;
  1501. bytes += rc;
  1502. if(entry->size < offset)
  1503. entry->size = offset;
  1504. } while(j + rc >= CHUNK_SIZE);
  1505. done:
  1506. CHECK_ENTRY(entry);
  1507. if(entry->metadataDirty)
  1508. writeoutMetadata(object);
  1509. return bytes;
  1510. }
  1511. int
  1512. writeoutMetadata(ObjectPtr object)
  1513. {
  1514. DiskCacheEntryPtr entry;
  1515. int rc;
  1516. if((object->cache_control & CACHE_NO_STORE) ||
  1517. (object->flags & OBJECT_LOCAL))
  1518. return 0;
  1519. entry = makeDiskEntry(object, 0);
  1520. if(entry == NULL || entry == &negativeEntry)
  1521. goto fail;
  1522. assert(!entry->local);
  1523. rc = entrySeek(entry, 0);
  1524. if(rc < 0) goto fail;
  1525. rc = writeHeaders(entry->fd, &entry->body_offset, object, NULL, 0);
  1526. if(rc == -2) {
  1527. rc = rewriteEntry(object);
  1528. if(rc < 0) return 0;
  1529. return 1;
  1530. }
  1531. if(rc < 0) goto fail;
  1532. entry->offset = rc;
  1533. entry->metadataDirty = 0;
  1534. return 1;
  1535. fail:
  1536. /* We need this in order to avoid trying to write this entry out
  1537. multiple times. */
  1538. if(entry && entry != &negativeEntry)
  1539. entry->metadataDirty = 0;
  1540. return 0;
  1541. }
  1542. static void
  1543. mergeDobjects(DiskObjectPtr dst, DiskObjectPtr src)
  1544. {
  1545. if(dst->filename == NULL) {
  1546. dst->filename = src->filename;
  1547. dst->body_offset = src->body_offset;
  1548. } else
  1549. free(src->filename);
  1550. free(src->location);
  1551. if(dst->length < 0)
  1552. dst->length = src->length;
  1553. if(dst->size < 0)
  1554. dst->size = src->size;
  1555. if(dst->age < 0)
  1556. dst->age = src->age;
  1557. if(dst->date < 0)
  1558. dst->date = src->date;
  1559. if(dst->last_modified < 0)
  1560. dst->last_modified = src->last_modified;
  1561. free(src);
  1562. }
  1563. DiskObjectPtr
  1564. readDiskObject(char *filename, struct stat *sb)
  1565. {
  1566. int fd, rc, n, dummy, code;
  1567. int length, size;
  1568. time_t date, last_modified, age, atime, expires;
  1569. char *location = NULL, *fn = NULL;
  1570. DiskObjectPtr dobject;
  1571. char *buf;
  1572. int buf_is_chunk, bufsize;
  1573. int body_offset;
  1574. struct stat ss;
  1575. fd = -1;
  1576. if(sb == NULL) {
  1577. rc = stat(filename, &ss);
  1578. if(rc < 0) {
  1579. do_log_error(L_WARN, errno, "Couldn't stat %s", scrub(filename));
  1580. return NULL;
  1581. }
  1582. sb = &ss;
  1583. }
  1584. buf_is_chunk = 1;
  1585. bufsize = CHUNK_SIZE;
  1586. buf = get_chunk();
  1587. if(buf == NULL) {
  1588. do_log(L_ERROR, "Couldn't allocate buffer.\n");
  1589. return NULL;
  1590. }
  1591. if(S_ISREG(sb->st_mode)) {
  1592. fd = open(filename, O_RDONLY | O_BINARY);
  1593. if(fd < 0)
  1594. goto fail;
  1595. again:
  1596. rc = read(fd, buf, bufsize);
  1597. if(rc < 0)
  1598. goto fail;
  1599. n = findEndOfHeaders(buf, 0, rc, &dummy);
  1600. if(n < 0) {
  1601. long lrc;
  1602. if(buf_is_chunk) {
  1603. dispose_chunk(buf);
  1604. buf_is_chunk = 0;
  1605. bufsize = bigBufferSize;
  1606. buf = malloc(bigBufferSize);
  1607. if(buf == NULL)
  1608. goto fail2;
  1609. lrc = lseek(fd, 0, SEEK_SET);
  1610. if(lrc < 0)
  1611. goto fail;
  1612. goto again;
  1613. }
  1614. goto fail;
  1615. }
  1616. rc = httpParseServerFirstLine(buf, &code, &dummy, NULL);
  1617. if(rc < 0)
  1618. goto fail;
  1619. rc = httpParseHeaders(0, NULL, buf, rc, NULL,
  1620. NULL, &length, NULL, NULL, NULL,
  1621. &date, &last_modified, &expires, &age,
  1622. &atime, &body_offset, NULL,
  1623. NULL, NULL, NULL, NULL, &location, NULL, NULL);
  1624. if(rc < 0 || location == NULL)
  1625. goto fail;
  1626. if(body_offset < 0)
  1627. body_offset = n;
  1628. size = sb->st_size - body_offset;
  1629. if(size < 0)
  1630. size = 0;
  1631. } else if(S_ISDIR(sb->st_mode)) {
  1632. char *n;
  1633. n = dirnameUrl(buf, 512, (char*)filename, strlen(filename));
  1634. if(n == NULL)
  1635. goto fail;
  1636. location = strdup(n);
  1637. if(location == NULL)
  1638. goto fail;
  1639. length = -1;
  1640. size = -1;
  1641. body_offset = -1;
  1642. age = -1;
  1643. atime = -1;
  1644. date = -1;
  1645. last_modified = -1;
  1646. expires = -1;
  1647. } else {
  1648. goto fail;
  1649. }
  1650. dobject = malloc(sizeof(DiskObjectRec));
  1651. if(!dobject)
  1652. goto fail;
  1653. fn = strdup(filename);
  1654. if(!fn)
  1655. goto fail;
  1656. if(buf_is_chunk)
  1657. dispose_chunk(buf);
  1658. else
  1659. free(buf);
  1660. dobject->location = location;
  1661. dobject->filename = fn;
  1662. dobject->length = length;
  1663. dobject->body_offset = body_offset;
  1664. dobject->size = size;
  1665. dobject->age = age;
  1666. dobject->access = atime;
  1667. dobject->date = date;
  1668. dobject->last_modified = last_modified;
  1669. dobject->expires = expires;
  1670. if(fd >= 0) close(fd);
  1671. return dobject;
  1672. fail:
  1673. if(buf_is_chunk)
  1674. dispose_chunk(buf);
  1675. else
  1676. free(buf);
  1677. fail2:
  1678. if(fd >= 0) close(fd);
  1679. if(location) free(location);
  1680. return NULL;
  1681. }
  1682. DiskObjectPtr
  1683. processObject(DiskObjectPtr dobjects, char *filename, struct stat *sb)
  1684. {
  1685. DiskObjectPtr dobject = NULL;
  1686. int c = 0;
  1687. dobject = readDiskObject((char*)filename, sb);
  1688. if(dobject == NULL)
  1689. return dobjects;
  1690. if(!dobjects ||
  1691. (c = strcmp(dobject->location, dobjects->location)) <= 0) {
  1692. if(dobjects && c == 0) {
  1693. mergeDobjects(dobjects, dobject);
  1694. } else {
  1695. dobject->next = dobjects;
  1696. dobjects = dobject;
  1697. }
  1698. } else {
  1699. DiskObjectPtr other = dobjects;
  1700. while(other->next) {
  1701. c = strcmp(dobject->location, other->next->location);
  1702. if(c < 0)
  1703. break;
  1704. other = other->next;
  1705. }
  1706. if(strcmp(dobject->location, other->location) == 0) {
  1707. mergeDobjects(other, dobject);
  1708. } else {
  1709. dobject->next = other->next;
  1710. other->next = dobject;
  1711. }
  1712. }
  1713. return dobjects;
  1714. }
  1715. /* Determine whether p is below root */
  1716. static int
  1717. filter(DiskObjectPtr p, const char *root, int n, int recursive)
  1718. {
  1719. char *cp;
  1720. int m = strlen(p->location);
  1721. if(m < n)
  1722. return 0;
  1723. if(memcmp(root, p->location, n) != 0)
  1724. return 0;
  1725. if(recursive)
  1726. return 1;
  1727. if(m == 0 || p->location[m - 1] == '/')
  1728. return 1;
  1729. cp = strchr(p->location + n, '/');
  1730. if(cp && cp - p->location != m - 1)
  1731. return 0;
  1732. return 1;
  1733. }
  1734. /* Filter out all disk objects that are not under root */
  1735. DiskObjectPtr
  1736. filterDiskObjects(DiskObjectPtr from, const char *root, int recursive)
  1737. {
  1738. int n = strlen(root);
  1739. DiskObjectPtr p, q;
  1740. while(from && !filter(from, root, n, recursive)) {
  1741. p = from;
  1742. from = p->next;
  1743. free(p->location);
  1744. free(p);
  1745. }
  1746. p = from;
  1747. while(p && p->next) {
  1748. if(!filter(p->next, root, n, recursive)) {
  1749. q = p->next;
  1750. p->next = q->next;
  1751. free(q->location);
  1752. free(q);
  1753. } else {
  1754. p = p->next;
  1755. }
  1756. }
  1757. return from;
  1758. }
  1759. DiskObjectPtr
  1760. insertRoot(DiskObjectPtr from, const char *root)
  1761. {
  1762. DiskObjectPtr p;
  1763. p = from;
  1764. while(p) {
  1765. if(strcmp(root, p->location) == 0)
  1766. return from;
  1767. p = p->next;
  1768. }
  1769. p = malloc(sizeof(DiskObjectRec));
  1770. if(!p) return from;
  1771. p->location = strdup(root);
  1772. if(p->location == NULL) {
  1773. free(p);
  1774. return from;
  1775. }
  1776. p->filename = NULL;
  1777. p->length = -1;
  1778. p->size = -1;
  1779. p->age = -1;
  1780. p->access = -1;
  1781. p->last_modified = -1;
  1782. p->expires = -1;
  1783. p->next = from;
  1784. return p;
  1785. }
  1786. /* Insert all missing directories in a sorted list of dobjects */
  1787. DiskObjectPtr
  1788. insertDirs(DiskObjectPtr from)
  1789. {
  1790. DiskObjectPtr p, q, new;
  1791. int n, m;
  1792. char *cp;
  1793. p = NULL; q = from;
  1794. while(q) {
  1795. n = strlen(q->location);
  1796. if(n > 0 && q->location[n - 1] != '/') {
  1797. cp = strrchr(q->location, '/');
  1798. m = cp - q->location + 1;
  1799. if(cp && (!p || strlen(p->location) < m ||
  1800. memcmp(p->location, q->location, m) != 0)) {
  1801. new = malloc(sizeof(DiskObjectRec));
  1802. if(!new) break;
  1803. new->location = strdup_n(q->location, m);
  1804. if(new->location == NULL) {
  1805. free(new);
  1806. break;
  1807. }
  1808. new->filename = NULL;
  1809. new->length = -1;
  1810. new->size = -1;
  1811. new->age = -1;
  1812. new->access = -1;
  1813. new->last_modified = -1;
  1814. new->expires = -1;
  1815. new->next = q;
  1816. if(p)
  1817. p->next = new;
  1818. else
  1819. from = new;
  1820. }
  1821. }
  1822. p = q;
  1823. q = q->next;
  1824. }
  1825. return from;
  1826. }
  1827. void
  1828. indexDiskObjects(FILE *out, const char *root, int recursive)
  1829. {
  1830. int n, i, isdir;
  1831. DIR *dir;
  1832. struct dirent *dirent;
  1833. char buf[1024];
  1834. char *fts_argv[2];
  1835. FTS *fts;
  1836. FTSENT *fe;
  1837. DiskObjectPtr dobjects = NULL;
  1838. char *of = root[0] == '\0' ? "" : " of ";
  1839. fprintf(out, "<!DOCTYPE HTML PUBLIC "
  1840. "\"-//W3C//DTD HTML 4.01 Transitional//EN\" "
  1841. "\"http://www.w3.org/TR/html4/loose.dtd\">\n"
  1842. "<html><head>\n"
  1843. "<title>%s%s%s</title>\n"
  1844. "</head><body>\n"
  1845. "<h1>%s%s%s</h1>\n",
  1846. recursive ? "Recursive index" : "Index", of, root,
  1847. recursive ? "Recursive index" : "Index", of, root);
  1848. if(diskCacheRoot == NULL || diskCacheRoot->length <= 0) {
  1849. fprintf(out, "<p>No <tt>diskCacheRoot</tt>.</p>\n");
  1850. goto trailer;
  1851. }
  1852. if(diskCacheRoot->length >= 1024) {
  1853. fprintf(out,
  1854. "<p>The value of <tt>diskCacheRoot</tt> is "
  1855. "too long (%d).</p>\n",
  1856. diskCacheRoot->length);
  1857. goto trailer;
  1858. }
  1859. if(strlen(root) < 8) {
  1860. memcpy(buf, diskCacheRoot->string, diskCacheRoot->length);
  1861. buf[diskCacheRoot->length] = '\0';
  1862. n = diskCacheRoot->length;
  1863. } else {
  1864. n = urlDirname(buf, 1024, root, strlen(root));
  1865. }
  1866. if(n > 0) {
  1867. if(recursive) {
  1868. dir = NULL;
  1869. fts_argv[0] = buf;
  1870. fts_argv[1] = NULL;
  1871. fts = fts_open(fts_argv, FTS_LOGICAL, NULL);
  1872. if(fts) {
  1873. while(1) {
  1874. fe = fts_read(fts);
  1875. if(!fe) break;
  1876. if(fe->fts_info != FTS_DP)
  1877. dobjects =
  1878. processObject(dobjects,
  1879. fe->fts_path,
  1880. fe->fts_info == FTS_NS ||
  1881. fe->fts_info == FTS_NSOK ?
  1882. fe->fts_statp : NULL);
  1883. }
  1884. fts_close(fts);
  1885. }
  1886. } else {
  1887. dir = opendir(buf);
  1888. if(dir) {
  1889. while(1) {
  1890. dirent = readdir(dir);
  1891. if(!dirent) break;
  1892. if(n + strlen(dirent->d_name) < 1024) {
  1893. strcpy(buf + n, dirent->d_name);
  1894. } else {
  1895. continue;
  1896. }
  1897. dobjects = processObject(dobjects, buf, NULL);
  1898. }
  1899. closedir(dir);
  1900. } else {
  1901. fprintf(out, "<p>Couldn't open directory: %s (%d).</p>\n",
  1902. strerror(errno), errno);
  1903. goto trailer;
  1904. }
  1905. }
  1906. }
  1907. if(dobjects) {
  1908. int entryno;
  1909. dobjects = insertRoot(dobjects, root);
  1910. dobjects = insertDirs(dobjects);
  1911. dobjects = filterDiskObjects(dobjects, root, recursive);
  1912. buf[0] = '\0';
  1913. alternatingHttpStyle(out, "diskcachelist");
  1914. fprintf(out, "<table id=diskcachelist>\n");
  1915. fprintf(out, "<tbody>\n");
  1916. entryno = 0;
  1917. while(dobjects) {
  1918. DiskObjectPtr dobject = dobjects;
  1919. i = strlen(dobject->location);
  1920. isdir = (i == 0 || dobject->location[i - 1] == '/');
  1921. if(entryno % 2)
  1922. fprintf(out, "<tr class=odd>");
  1923. else
  1924. fprintf(out, "<tr class=even>");
  1925. if(dobject->size >= 0) {
  1926. fprintf(out, "<td><a href=\"%s\"><tt>",
  1927. dobject->location);
  1928. htmlPrint(out,
  1929. dobject->location, strlen(dobject->location));
  1930. fprintf(out, "</tt></a></td> ");
  1931. if(dobject->length >= 0) {
  1932. if(dobject->size == dobject->length)
  1933. fprintf(out, "<td>%d</td> ", dobject->length);
  1934. else
  1935. fprintf(out, "<td>%d/%d</td> ",
  1936. dobject->size, dobject->length);
  1937. } else {
  1938. /* Avoid a trigraph. */
  1939. fprintf(out, "<td>%d/<em>??" "?</em></td> ", dobject->size);
  1940. }
  1941. if(dobject->last_modified >= 0) {
  1942. struct tm *tm = gmtime(&dobject->last_modified);
  1943. if(tm == NULL)
  1944. n = -1;
  1945. else
  1946. n = strftime(buf, 1024, "%d.%m.%Y", tm);
  1947. } else
  1948. n = -1;
  1949. if(n > 0) {
  1950. buf[n] = '\0';
  1951. fprintf(out, "<td>%s</td> ", buf);
  1952. } else {
  1953. fprintf(out, "<td></td>");
  1954. }
  1955. if(dobject->date >= 0) {
  1956. struct tm *tm = gmtime(&dobject->date);
  1957. if(tm == NULL)
  1958. n = -1;
  1959. else
  1960. n = strftime(buf, 1024, "%d.%m.%Y", tm);
  1961. } else
  1962. n = -1;
  1963. if(n > 0) {
  1964. buf[n] = '\0';
  1965. fprintf(out, "<td>%s</td>", buf);
  1966. } else {
  1967. fprintf(out, "<td></td>");
  1968. }
  1969. } else {
  1970. fprintf(out, "<td><tt>");
  1971. htmlPrint(out, dobject->location,
  1972. strlen(dobject->location));
  1973. fprintf(out, "</tt></td><td></td><td></td><td></td>");
  1974. }
  1975. if(isdir) {
  1976. fprintf(out, "<td><a href=\"/polipo/index?%s\">plain</a></td>"
  1977. "<td><a href=\"/polipo/recursive-index?%s\">"
  1978. "recursive</a></td>",
  1979. dobject->location, dobject->location);
  1980. }
  1981. fprintf(out, "</tr>\n");
  1982. entryno++;
  1983. dobjects = dobject->next;
  1984. free(dobject->location);
  1985. free(dobject->filename);
  1986. free(dobject);
  1987. }
  1988. fprintf(out, "</tbody>\n");
  1989. fprintf(out, "</table>\n");
  1990. }
  1991. trailer:
  1992. fprintf(out, "<p><a href=\"/polipo/\">back</a></p>\n");
  1993. fprintf(out, "</body></html>\n");
  1994. return;
  1995. }
  1996. static int
  1997. checkForZeroes(char *buf, int n)
  1998. {
  1999. int i, j;
  2000. unsigned long *lbuf = (unsigned long *)buf;
  2001. assert(n % sizeof(unsigned long) == 0);
  2002. for(i = 0; i * sizeof(unsigned long) < n; i++) {
  2003. if(lbuf[i] != 0L)
  2004. return i * sizeof(unsigned long);
  2005. }
  2006. for(j = 0; i * sizeof(unsigned long) + j < n; j++) {
  2007. if(buf[i * sizeof(unsigned long) + j] != 0)
  2008. break;
  2009. }
  2010. return i * sizeof(unsigned long) + j;
  2011. }
  2012. static int
  2013. copyFile(int from, char *filename, int n)
  2014. {
  2015. char *buf;
  2016. int to, offset, nread, nzeroes, rc;
  2017. buf = malloc(CHUNK_SIZE);
  2018. if(buf == NULL)
  2019. return -1;
  2020. to = open(filename, O_RDWR | O_CREAT | O_EXCL | O_BINARY,
  2021. diskCacheFilePermissions);
  2022. if(to < 0) {
  2023. free(buf);
  2024. return -1;
  2025. }
  2026. offset = 0;
  2027. while(offset < n) {
  2028. nread = read(from, buf, MIN(CHUNK_SIZE, n - offset));
  2029. if(nread <= 0)
  2030. break;
  2031. nzeroes = checkForZeroes(buf, nread & -8);
  2032. if(nzeroes > 0) {
  2033. /* I like holes */
  2034. rc = lseek(to, nzeroes, SEEK_CUR);
  2035. if(rc != offset + nzeroes) {
  2036. if(rc < 0)
  2037. do_log_error(L_ERROR, errno, "Couldn't extend file");
  2038. else
  2039. do_log(L_ERROR,
  2040. "Couldn't extend file: "
  2041. "unexpected offset %d != %d + %d.\n",
  2042. rc, offset, nread);
  2043. break;
  2044. }
  2045. }
  2046. if(nread > nzeroes) {
  2047. rc = write(to, buf + nzeroes, nread - nzeroes);
  2048. if(rc != nread - nzeroes) {
  2049. if(rc < 0)
  2050. do_log_error(L_ERROR, errno, "Couldn't write");
  2051. else
  2052. do_log(L_ERROR, "Short write.\n");
  2053. break;
  2054. }
  2055. }
  2056. offset += nread;
  2057. }
  2058. free(buf);
  2059. close(to);
  2060. if(offset <= 0)
  2061. unlink(filename); /* something went wrong straight away */
  2062. return 1;
  2063. }
  2064. static long int
  2065. expireFile(char *filename, struct stat *sb,
  2066. int *considered, int *unlinked, int *truncated)
  2067. {
  2068. DiskObjectPtr dobject = NULL;
  2069. time_t t;
  2070. int fd, rc;
  2071. long int ret = sb->st_size;
  2072. if(!preciseExpiry) {
  2073. t = sb->st_mtime;
  2074. if(t > current_time.tv_sec + 1) {
  2075. do_log(L_WARN, "File %s has access time in the future.\n",
  2076. filename);
  2077. t = current_time.tv_sec;
  2078. }
  2079. if(t > current_time.tv_sec - diskCacheUnlinkTime &&
  2080. (sb->st_size < diskCacheTruncateSize ||
  2081. t > current_time.tv_sec - diskCacheTruncateTime))
  2082. return ret;
  2083. }
  2084. (*considered)++;
  2085. dobject = readDiskObject(filename, sb);
  2086. if(!dobject) {
  2087. do_log(L_ERROR, "Incorrect disk entry %s -- removing.\n",
  2088. scrub(filename));
  2089. rc = unlink(filename);
  2090. if(rc < 0) {
  2091. do_log_error(L_ERROR, errno,
  2092. "Couldn't unlink %s", scrub(filename));
  2093. return ret;
  2094. } else {
  2095. (*unlinked)++;
  2096. return 0;
  2097. }
  2098. }
  2099. t = dobject->access;
  2100. if(t < 0) t = dobject->age;
  2101. if(t < 0) t = dobject->date;
  2102. if(t > current_time.tv_sec)
  2103. do_log(L_WARN,
  2104. "Disk entry %s (%s) has access time in the future.\n",
  2105. scrub(dobject->location), scrub(dobject->filename));
  2106. if(t < current_time.tv_sec - diskCacheUnlinkTime) {
  2107. rc = unlink(dobject->filename);
  2108. if(rc < 0) {
  2109. do_log_error(L_ERROR, errno, "Couldn't unlink %s",
  2110. scrub(filename));
  2111. } else {
  2112. (*unlinked)++;
  2113. ret = 0;
  2114. }
  2115. } else if(dobject->size >
  2116. diskCacheTruncateSize + 4 * dobject->body_offset &&
  2117. t < current_time.tv_sec - diskCacheTruncateTime) {
  2118. /* We need to copy rather than simply truncate in place: the
  2119. latter would confuse a running polipo. */
  2120. fd = open(dobject->filename, O_RDONLY | O_BINARY, 0);
  2121. rc = unlink(dobject->filename);
  2122. if(rc < 0) {
  2123. do_log_error(L_ERROR, errno, "Couldn't unlink %s",
  2124. scrub(filename));
  2125. close(fd);
  2126. fd = -1;
  2127. } else {
  2128. (*unlinked)++;
  2129. copyFile(fd, dobject->filename,
  2130. dobject->body_offset + diskCacheTruncateSize);
  2131. close(fd);
  2132. (*unlinked)--;
  2133. (*truncated)++;
  2134. ret = sb->st_size - dobject->body_offset + diskCacheTruncateSize;
  2135. }
  2136. }
  2137. free(dobject->location);
  2138. free(dobject->filename);
  2139. free(dobject);
  2140. return ret;
  2141. }
  2142. void
  2143. expireDiskObjects()
  2144. {
  2145. int rc;
  2146. char *fts_argv[2];
  2147. FTS *fts;
  2148. FTSENT *fe;
  2149. int files = 0, considered = 0, unlinked = 0, truncated = 0;
  2150. int dirs = 0, rmdirs = 0;
  2151. long left = 0, total = 0;
  2152. if(diskCacheRoot == NULL ||
  2153. diskCacheRoot->length <= 0 || diskCacheRoot->string[0] != '/')
  2154. return;
  2155. fts_argv[0] = diskCacheRoot->string;
  2156. fts_argv[1] = NULL;
  2157. fts = fts_open(fts_argv, FTS_LOGICAL, NULL);
  2158. if(fts == NULL) {
  2159. do_log_error(L_ERROR, errno, "Couldn't fts_open disk cache");
  2160. } else {
  2161. while(1) {
  2162. gettimeofday(&current_time, NULL);
  2163. fe = fts_read(fts);
  2164. if(!fe) break;
  2165. if(fe->fts_info == FTS_D)
  2166. continue;
  2167. if(fe->fts_info == FTS_DP || fe->fts_info == FTS_DC ||
  2168. fe->fts_info == FTS_DNR) {
  2169. if(fe->fts_accpath[0] == '/' &&
  2170. strlen(fe->fts_accpath) <= diskCacheRoot->length)
  2171. continue;
  2172. dirs++;
  2173. rc = rmdir(fe->fts_accpath);
  2174. if(rc >= 0)
  2175. rmdirs++;
  2176. else if(errno != ENOTEMPTY && errno != EEXIST)
  2177. do_log_error(L_ERROR, errno,
  2178. "Couldn't remove directory %s",
  2179. scrub(fe->fts_accpath));
  2180. continue;
  2181. } else if(fe->fts_info == FTS_NS) {
  2182. do_log_error(L_ERROR, fe->fts_errno, "Couldn't stat file %s",
  2183. scrub(fe->fts_accpath));
  2184. continue;
  2185. } else if(fe->fts_info == FTS_ERR) {
  2186. do_log_error(L_ERROR, fe->fts_errno,
  2187. "Couldn't fts_read disk cache");
  2188. break;
  2189. }
  2190. if(!S_ISREG(fe->fts_statp->st_mode)) {
  2191. do_log(L_ERROR, "Unexpected file %s type 0%o.\n",
  2192. fe->fts_accpath, (unsigned int)fe->fts_statp->st_mode);
  2193. continue;
  2194. }
  2195. files++;
  2196. left += expireFile(fe->fts_accpath, fe->fts_statp,
  2197. &considered, &unlinked, &truncated);
  2198. total += fe->fts_statp->st_size;
  2199. }
  2200. fts_close(fts);
  2201. }
  2202. printf("Disk cache purged.\n");
  2203. printf("%d files, %d considered, %d removed, %d truncated "
  2204. "(%ldkB -> %ldkB).\n",
  2205. files, considered, unlinked, truncated, total/1024, left/1024);
  2206. printf("%d directories, %d removed.\n", dirs, rmdirs);
  2207. return;
  2208. }
  2209. #else
  2210. void
  2211. preinitDiskcache()
  2212. {
  2213. return;
  2214. }
  2215. void
  2216. initDiskcache()
  2217. {
  2218. return;
  2219. }
  2220. int
  2221. writeoutToDisk(ObjectPtr object, int upto, int max)
  2222. {
  2223. return 0;
  2224. }
  2225. int
  2226. destroyDiskEntry(ObjectPtr object, int d)
  2227. {
  2228. return 0;
  2229. }
  2230. ObjectPtr
  2231. objectGetFromDisk(ObjectPtr object)
  2232. {
  2233. return NULL;
  2234. }
  2235. int
  2236. objectFillFromDisk(ObjectPtr object, int offset, int chunks)
  2237. {
  2238. return 0;
  2239. }
  2240. int
  2241. revalidateDiskEntry(ObjectPtr object)
  2242. {
  2243. return 0;
  2244. }
  2245. void
  2246. dirtyDiskEntry(ObjectPtr object)
  2247. {
  2248. return;
  2249. }
  2250. void
  2251. expireDiskObjects()
  2252. {
  2253. do_log(L_ERROR, "Disk cache not supported in this version.\n");
  2254. }
  2255. int
  2256. diskEntrySize(ObjectPtr object)
  2257. {
  2258. return -1;
  2259. }
  2260. #endif