145ee55d0SChristian Halstrick /* 2aa052ea0SThomas Wolf * Copyright (C) 2016, 2021 Christian Halstrick <christian.halstrick@sap.com> and others 345ee55d0SChristian Halstrick * 45c5f7c6bSMatthias Sohn * This program and the accompanying materials are made available under the 55c5f7c6bSMatthias Sohn * terms of the Eclipse Distribution License v. 1.0 which is available at 65c5f7c6bSMatthias Sohn * https://www.eclipse.org/org/documents/edl-v10.php. 745ee55d0SChristian Halstrick * 85c5f7c6bSMatthias Sohn * SPDX-License-Identifier: BSD-3-Clause 945ee55d0SChristian Halstrick */ 1045ee55d0SChristian Halstrick package org.eclipse.jgit.lfs; 1145ee55d0SChristian Halstrick 1230c6c754SDavid Pursehouse import static java.nio.charset.StandardCharsets.UTF_8; 13171f84a0SDavid Pursehouse 14*616a8889SThomas Wolf import java.io.BufferedInputStream; 15bcb5a431SChristian Halstrick import java.io.BufferedReader; 16*616a8889SThomas Wolf import java.io.ByteArrayInputStream; 17bcb5a431SChristian Halstrick import java.io.IOException; 18bcb5a431SChristian Halstrick import java.io.InputStream; 19bcb5a431SChristian Halstrick import java.io.InputStreamReader; 2045ee55d0SChristian Halstrick import java.io.OutputStream; 2145ee55d0SChristian Halstrick import java.io.PrintStream; 221572964eSChristian Halstrick import java.io.UnsupportedEncodingException; 23a4feeb01SMatthias Sohn import java.util.Locale; 24aa052ea0SThomas Wolf import java.util.Objects; 2545ee55d0SChristian Halstrick 26bcb5a431SChristian Halstrick import org.eclipse.jgit.annotations.Nullable; 274b7747ccSMatthias Sohn import org.eclipse.jgit.lfs.lib.AnyLongObjectId; 2845ee55d0SChristian Halstrick import org.eclipse.jgit.lfs.lib.Constants; 2945ee55d0SChristian Halstrick import org.eclipse.jgit.lfs.lib.LongObjectId; 30*616a8889SThomas Wolf import org.eclipse.jgit.util.IO; 3145ee55d0SChristian Halstrick 3245ee55d0SChristian Halstrick /** 3345ee55d0SChristian Halstrick * Represents an LFS pointer file 3445ee55d0SChristian Halstrick * 3545ee55d0SChristian Halstrick * @since 4.6 3645ee55d0SChristian Halstrick */ 37c0bb9928SMarkus Duft public class LfsPointer implements Comparable<LfsPointer> { 3845ee55d0SChristian Halstrick /** 3945ee55d0SChristian Halstrick * The version of the LfsPointer file format 4045ee55d0SChristian Halstrick */ 4145ee55d0SChristian Halstrick public static final String VERSION = "https://git-lfs.github.com/spec/v1"; //$NON-NLS-1$ 4245ee55d0SChristian Halstrick 4345ee55d0SChristian Halstrick /** 440e187f14SDariusz Luksza * The version of the LfsPointer file format using legacy URL 450e187f14SDariusz Luksza * @since 4.7 460e187f14SDariusz Luksza */ 470e187f14SDariusz Luksza public static final String VERSION_LEGACY = "https://hawser.github.com/spec/v1"; //$NON-NLS-1$ 480e187f14SDariusz Luksza 490e187f14SDariusz Luksza /** 50c0bb9928SMarkus Duft * Don't inspect files that are larger than this threshold to avoid 51c0bb9928SMarkus Duft * excessive reading. No pointer file should be larger than this. 52c0bb9928SMarkus Duft * @since 4.11 53c0bb9928SMarkus Duft */ 54c0bb9928SMarkus Duft public static final int SIZE_THRESHOLD = 200; 55c0bb9928SMarkus Duft 56c0bb9928SMarkus Duft /** 5745ee55d0SChristian Halstrick * The name of the hash function as used in the pointer files. This will 5845ee55d0SChristian Halstrick * evaluate to "sha256" 5945ee55d0SChristian Halstrick */ 6045ee55d0SChristian Halstrick public static final String HASH_FUNCTION_NAME = Constants.LONG_HASH_FUNCTION 61a4feeb01SMatthias Sohn .toLowerCase(Locale.ROOT).replace("-", ""); //$NON-NLS-1$ //$NON-NLS-2$ 6245ee55d0SChristian Halstrick 63*616a8889SThomas Wolf /** 64*616a8889SThomas Wolf * {@link #SIZE_THRESHOLD} is too low; with lfs extensions a LFS pointer can 65*616a8889SThomas Wolf * be larger. But 8kB should be more than enough. 66*616a8889SThomas Wolf */ 67*616a8889SThomas Wolf static final int FULL_SIZE_THRESHOLD = 8 * 1024; 68*616a8889SThomas Wolf 69aa052ea0SThomas Wolf private final AnyLongObjectId oid; 7045ee55d0SChristian Halstrick 71aa052ea0SThomas Wolf private final long size; 7245ee55d0SChristian Halstrick 7345ee55d0SChristian Halstrick /** 74e0332bfbSMatthias Sohn * <p>Constructor for LfsPointer.</p> 75e0332bfbSMatthias Sohn * 7645ee55d0SChristian Halstrick * @param oid 7745ee55d0SChristian Halstrick * the id of the content 7845ee55d0SChristian Halstrick * @param size 7945ee55d0SChristian Halstrick * the size of the content 8045ee55d0SChristian Halstrick */ LfsPointer(AnyLongObjectId oid, long size)814b7747ccSMatthias Sohn public LfsPointer(AnyLongObjectId oid, long size) { 8245ee55d0SChristian Halstrick this.oid = oid; 8345ee55d0SChristian Halstrick this.size = size; 8445ee55d0SChristian Halstrick } 8545ee55d0SChristian Halstrick 8645ee55d0SChristian Halstrick /** 87e0332bfbSMatthias Sohn * <p>Getter for the field <code>oid</code>.</p> 88e0332bfbSMatthias Sohn * 8945ee55d0SChristian Halstrick * @return the id of the content 9045ee55d0SChristian Halstrick */ getOid()914b7747ccSMatthias Sohn public AnyLongObjectId getOid() { 9245ee55d0SChristian Halstrick return oid; 9345ee55d0SChristian Halstrick } 9445ee55d0SChristian Halstrick 9545ee55d0SChristian Halstrick /** 96e0332bfbSMatthias Sohn * <p>Getter for the field <code>size</code>.</p> 97e0332bfbSMatthias Sohn * 9845ee55d0SChristian Halstrick * @return the size of the content 9945ee55d0SChristian Halstrick */ getSize()10045ee55d0SChristian Halstrick public long getSize() { 10145ee55d0SChristian Halstrick return size; 10245ee55d0SChristian Halstrick } 10345ee55d0SChristian Halstrick 10445ee55d0SChristian Halstrick /** 10545ee55d0SChristian Halstrick * Encode this object into the LFS format defined by {@link #VERSION} 10645ee55d0SChristian Halstrick * 10745ee55d0SChristian Halstrick * @param out 108e0332bfbSMatthias Sohn * the {@link java.io.OutputStream} into which the encoded data should be 10945ee55d0SChristian Halstrick * written 11045ee55d0SChristian Halstrick */ encode(OutputStream out)11145ee55d0SChristian Halstrick public void encode(OutputStream out) { 1121572964eSChristian Halstrick try (PrintStream ps = new PrintStream(out, false, 11330c6c754SDavid Pursehouse UTF_8.name())) { 11445ee55d0SChristian Halstrick ps.print("version "); //$NON-NLS-1$ 1151572964eSChristian Halstrick ps.print(VERSION + "\n"); //$NON-NLS-1$ 11645ee55d0SChristian Halstrick ps.print("oid " + HASH_FUNCTION_NAME + ":"); //$NON-NLS-1$ //$NON-NLS-2$ 1171572964eSChristian Halstrick ps.print(oid.name() + "\n"); //$NON-NLS-1$ 11845ee55d0SChristian Halstrick ps.print("size "); //$NON-NLS-1$ 1191572964eSChristian Halstrick ps.print(size + "\n"); //$NON-NLS-1$ 1201572964eSChristian Halstrick } catch (UnsupportedEncodingException e) { 1211572964eSChristian Halstrick // should not happen, we are using a standard charset 12245ee55d0SChristian Halstrick } 12345ee55d0SChristian Halstrick } 12445ee55d0SChristian Halstrick 125bcb5a431SChristian Halstrick /** 126bcb5a431SChristian Halstrick * Try to parse the data provided by an InputStream to the format defined by 127*616a8889SThomas Wolf * {@link #VERSION}. If the given stream supports mark and reset as 128*616a8889SThomas Wolf * indicated by {@link InputStream#markSupported()}, its input position will 129*616a8889SThomas Wolf * be reset if the stream content is not actually a LFS pointer (i.e., when 130*616a8889SThomas Wolf * {@code null} is returned). If the stream content is an invalid LFS 131*616a8889SThomas Wolf * pointer or the given stream does not support mark/reset, the input 132*616a8889SThomas Wolf * position may not be reset. 133bcb5a431SChristian Halstrick * 134bcb5a431SChristian Halstrick * @param in 135e0332bfbSMatthias Sohn * the {@link java.io.InputStream} from where to read the data 136*616a8889SThomas Wolf * @return an {@link org.eclipse.jgit.lfs.LfsPointer} or {@code null} if the 137*616a8889SThomas Wolf * stream was not parseable as LfsPointer 138e0332bfbSMatthias Sohn * @throws java.io.IOException 139bcb5a431SChristian Halstrick */ 140bcb5a431SChristian Halstrick @Nullable parseLfsPointer(InputStream in)141bcb5a431SChristian Halstrick public static LfsPointer parseLfsPointer(InputStream in) 142bcb5a431SChristian Halstrick throws IOException { 143*616a8889SThomas Wolf if (in.markSupported()) { 144*616a8889SThomas Wolf return parse(in); 145*616a8889SThomas Wolf } 146*616a8889SThomas Wolf // Fallback; note that while parse() resets its input stream, that won't 147*616a8889SThomas Wolf // reset "in". 148*616a8889SThomas Wolf return parse(new BufferedInputStream(in)); 149*616a8889SThomas Wolf } 150*616a8889SThomas Wolf 151*616a8889SThomas Wolf @Nullable parse(InputStream in)152*616a8889SThomas Wolf private static LfsPointer parse(InputStream in) 153*616a8889SThomas Wolf throws IOException { 154*616a8889SThomas Wolf if (!in.markSupported()) { 155*616a8889SThomas Wolf // No translation; internal error 156*616a8889SThomas Wolf throw new IllegalArgumentException( 157*616a8889SThomas Wolf "LFS pointer parsing needs InputStream.markSupported() == true"); //$NON-NLS-1$ 158*616a8889SThomas Wolf } 159*616a8889SThomas Wolf // Try reading only a short block first. 160*616a8889SThomas Wolf in.mark(SIZE_THRESHOLD); 161*616a8889SThomas Wolf byte[] preamble = new byte[SIZE_THRESHOLD]; 162*616a8889SThomas Wolf int length = IO.readFully(in, preamble, 0); 163*616a8889SThomas Wolf if (length < preamble.length || in.read() < 0) { 164*616a8889SThomas Wolf // We have the whole file. Try to parse a pointer from it. 165*616a8889SThomas Wolf try (BufferedReader r = new BufferedReader(new InputStreamReader( 166*616a8889SThomas Wolf new ByteArrayInputStream(preamble, 0, length), UTF_8))) { 167*616a8889SThomas Wolf LfsPointer ptr = parse(r); 168*616a8889SThomas Wolf if (ptr == null) { 169*616a8889SThomas Wolf in.reset(); 170*616a8889SThomas Wolf } 171*616a8889SThomas Wolf return ptr; 172*616a8889SThomas Wolf } 173*616a8889SThomas Wolf } 174*616a8889SThomas Wolf // Longer than SIZE_THRESHOLD: expect "version" to be the first line. 175*616a8889SThomas Wolf boolean hasVersion = checkVersion(preamble); 176*616a8889SThomas Wolf in.reset(); 177*616a8889SThomas Wolf if (!hasVersion) { 178*616a8889SThomas Wolf return null; 179*616a8889SThomas Wolf } 180*616a8889SThomas Wolf in.mark(FULL_SIZE_THRESHOLD); 181*616a8889SThomas Wolf byte[] fullPointer = new byte[FULL_SIZE_THRESHOLD]; 182*616a8889SThomas Wolf length = IO.readFully(in, fullPointer, 0); 183*616a8889SThomas Wolf if (length == fullPointer.length && in.read() >= 0) { 184*616a8889SThomas Wolf in.reset(); 185*616a8889SThomas Wolf return null; // Too long. 186*616a8889SThomas Wolf } 187*616a8889SThomas Wolf try (BufferedReader r = new BufferedReader(new InputStreamReader( 188*616a8889SThomas Wolf new ByteArrayInputStream(fullPointer, 0, length), UTF_8))) { 189*616a8889SThomas Wolf LfsPointer ptr = parse(r); 190*616a8889SThomas Wolf if (ptr == null) { 191*616a8889SThomas Wolf in.reset(); 192*616a8889SThomas Wolf } 193*616a8889SThomas Wolf return ptr; 194*616a8889SThomas Wolf } 195*616a8889SThomas Wolf } 196*616a8889SThomas Wolf parse(BufferedReader r)197*616a8889SThomas Wolf private static LfsPointer parse(BufferedReader r) throws IOException { 198bcb5a431SChristian Halstrick boolean versionLine = false; 199bcb5a431SChristian Halstrick LongObjectId id = null; 200bcb5a431SChristian Halstrick long sz = -1; 201aa052ea0SThomas Wolf // This parsing is a bit too general if we go by the spec at 202aa052ea0SThomas Wolf // https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md 203*616a8889SThomas Wolf // Comment lines are not mentioned in the spec, the "version" line 204*616a8889SThomas Wolf // MUST be the first, and keys are ordered alphabetically. 205*616a8889SThomas Wolf for (String s = r.readLine(); s != null; s = r.readLine()) { 206bcb5a431SChristian Halstrick if (s.startsWith("#") || s.length() == 0) { //$NON-NLS-1$ 207bcb5a431SChristian Halstrick continue; 208aa052ea0SThomas Wolf } else if (s.startsWith("version")) { //$NON-NLS-1$ 209*616a8889SThomas Wolf if (versionLine || !checkVersionLine(s)) { 210aa052ea0SThomas Wolf return null; // Not a LFS pointer 211aa052ea0SThomas Wolf } 212*616a8889SThomas Wolf versionLine = true; 213aa052ea0SThomas Wolf } else { 214aa052ea0SThomas Wolf try { 215aa052ea0SThomas Wolf if (s.startsWith("oid sha256:")) { //$NON-NLS-1$ 216aa052ea0SThomas Wolf if (id != null) { 217aa052ea0SThomas Wolf return null; // Not a LFS pointer 218aa052ea0SThomas Wolf } 219*616a8889SThomas Wolf id = LongObjectId.fromString(s.substring(11).trim()); 220aa052ea0SThomas Wolf } else if (s.startsWith("size")) { //$NON-NLS-1$ 221*616a8889SThomas Wolf if (sz > 0 || s.length() < 5 || s.charAt(4) != ' ') { 222aa052ea0SThomas Wolf return null; // Not a LFS pointer 223aa052ea0SThomas Wolf } 224bcb5a431SChristian Halstrick sz = Long.parseLong(s.substring(5).trim()); 225bcb5a431SChristian Halstrick } 226aa052ea0SThomas Wolf } catch (RuntimeException e) { 227aa052ea0SThomas Wolf // We could not parse the line. If we have a version 228aa052ea0SThomas Wolf // already, this is a corrupt LFS pointer. Otherwise it 229aa052ea0SThomas Wolf // is just not an LFS pointer. 230aa052ea0SThomas Wolf if (versionLine) { 231aa052ea0SThomas Wolf throw e; 232aa052ea0SThomas Wolf } 233aa052ea0SThomas Wolf return null; 234aa052ea0SThomas Wolf } 235aa052ea0SThomas Wolf } 236bcb5a431SChristian Halstrick if (versionLine && id != null && sz > -1) { 237bcb5a431SChristian Halstrick return new LfsPointer(id, sz); 238bcb5a431SChristian Halstrick } 239bcb5a431SChristian Halstrick } 240bcb5a431SChristian Halstrick return null; 241bcb5a431SChristian Halstrick } 242bcb5a431SChristian Halstrick checkVersion(byte[] data)243*616a8889SThomas Wolf private static boolean checkVersion(byte[] data) { 244*616a8889SThomas Wolf // According to the spec at 245*616a8889SThomas Wolf // https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md 246*616a8889SThomas Wolf // it MUST always be the first line. 247*616a8889SThomas Wolf try (BufferedReader r = new BufferedReader( 248*616a8889SThomas Wolf new InputStreamReader(new ByteArrayInputStream(data), UTF_8))) { 249*616a8889SThomas Wolf String s = r.readLine(); 250*616a8889SThomas Wolf if (s != null && s.startsWith("version")) { //$NON-NLS-1$ 251*616a8889SThomas Wolf return checkVersionLine(s); 252*616a8889SThomas Wolf } 253*616a8889SThomas Wolf } catch (IOException e) { 254*616a8889SThomas Wolf // Doesn't occur, we're reading from a byte array! 255*616a8889SThomas Wolf } 256*616a8889SThomas Wolf return false; 257*616a8889SThomas Wolf } 258*616a8889SThomas Wolf checkVersionLine(String s)259*616a8889SThomas Wolf private static boolean checkVersionLine(String s) { 260*616a8889SThomas Wolf if (s.length() < 8 || s.charAt(7) != ' ') { 261*616a8889SThomas Wolf return false; // Not a valid LFS pointer version line 262*616a8889SThomas Wolf } 263*616a8889SThomas Wolf String rest = s.substring(8).trim(); 264*616a8889SThomas Wolf return VERSION.equals(rest) || VERSION_LEGACY.equals(rest); 265*616a8889SThomas Wolf } 266*616a8889SThomas Wolf 267e0332bfbSMatthias Sohn /** {@inheritDoc} */ 26845ee55d0SChristian Halstrick @Override toString()26945ee55d0SChristian Halstrick public String toString() { 2704b7747ccSMatthias Sohn return "LfsPointer: oid=" + oid.name() + ", size=" //$NON-NLS-1$ //$NON-NLS-2$ 27145ee55d0SChristian Halstrick + size; 27245ee55d0SChristian Halstrick } 273c0bb9928SMarkus Duft 274c0bb9928SMarkus Duft /** 275c0bb9928SMarkus Duft * @since 4.11 276c0bb9928SMarkus Duft */ 277c0bb9928SMarkus Duft @Override compareTo(LfsPointer o)278c0bb9928SMarkus Duft public int compareTo(LfsPointer o) { 279c0bb9928SMarkus Duft int x = getOid().compareTo(o.getOid()); 280c0bb9928SMarkus Duft if (x != 0) { 281c0bb9928SMarkus Duft return x; 282c0bb9928SMarkus Duft } 283c0bb9928SMarkus Duft 2843e2ec7e5SDavid Ostrovsky return Long.compare(getSize(), o.getSize()); 285c0bb9928SMarkus Duft } 286aa052ea0SThomas Wolf 287aa052ea0SThomas Wolf @Override hashCode()288aa052ea0SThomas Wolf public int hashCode() { 289aa052ea0SThomas Wolf return Objects.hash(getOid()) * 31 + Long.hashCode(getSize()); 29045ee55d0SChristian Halstrick } 291bcb5a431SChristian Halstrick 292aa052ea0SThomas Wolf @Override equals(Object obj)293aa052ea0SThomas Wolf public boolean equals(Object obj) { 294aa052ea0SThomas Wolf if (this == obj) { 295aa052ea0SThomas Wolf return true; 296aa052ea0SThomas Wolf } 297aa052ea0SThomas Wolf if (obj == null || getClass() != obj.getClass()) { 298aa052ea0SThomas Wolf return false; 299aa052ea0SThomas Wolf } 300aa052ea0SThomas Wolf LfsPointer other = (LfsPointer) obj; 301aa052ea0SThomas Wolf return Objects.equals(getOid(), other.getOid()) 302aa052ea0SThomas Wolf && getSize() == other.getSize(); 303aa052ea0SThomas Wolf } 304aa052ea0SThomas Wolf } 305