Code sample

C# method for cropping text without breaking words

I often need to crop a longer text down to a shorter version. That in it self is easy. However, it's often nice to make it less obvious that the text has been shortened by ensuring that it consists of whole words. Below is a method for doing so with .NET/C#.

It's not the most readable method, but compared to for instance splitting the string by whitespace and building it up again using a StringBuilder it's much, much faster.

//using System;
//using System.Collections.Generic;

public static class StringExtensions
{
  public static bool IsNullOrEmpty(this string value)
  {
    return string.IsNullOrEmpty(value);
  }

  private static readonly HashSet<char> DefaultNonWordCharacters 
    = new HashSet<char> { ',', '.', ':', ';' };

  /// <summary>
  /// Returns a substring from the start of <paramref name="value"/> no 
  /// longer than <paramref name="length"/>.
  /// Returning only whole words is favored over returning a string that 
  /// is exactly <paramref name="length"/> long. 
  /// </summary>
  /// <param name="value">The original string from which the substring 
  /// will be returned.</param>
  /// <param name="length">The maximum length of the substring.</param>
  /// <param name="nonWordCharacters">Characters that, while not whitespace, 
  /// are not considered part of words and therefor can be removed from a 
  /// word in the end of the returned value. 
  /// Defaults to ",", ".", ":" and ";" if null.</param>
  /// <exception cref="System.ArgumentException">
  /// Thrown when <paramref name="length"/> is negative
  /// </exception>
  /// <exception cref="System.ArgumentNullException">
  /// Thrown when <paramref name="value"/> is null
  /// </exception>
  public static string CropWholeWords(
    this string value, 
    int length, 
    HashSet<char> nonWordCharacters = null)
  {
    if (value == null)
    {
      throw new ArgumentNullException("value");
    }

    if (length < 0)
    {
      throw new ArgumentException("Negative values not allowed.", "length");
    }
      
    if (nonWordCharacters == null)
    {
      nonWordCharacters = DefaultNonWordCharacters;
    }

    if (length >= value.Length)
    {
      return value;
    }
    int end = length;

    for (int i = end; i > 0; i--)
    {
      if (value[i].IsWhitespace())
      {
        break;
      }

      if (nonWordCharacters.Contains(value[i]) 
          && (value.Length == i + 1 || value[i + 1] == ' '))
      {
        //Removing a character that isn't whitespace but not part 
        //of the word either (ie ".") given that the character is 
        //followed by whitespace or the end of the string makes it
        //possible to include the word, so we do that.
        break;
      }
      end--;
    }

    if (end == 0)
    {
      //If the first word is longer than the length we favor 
      //returning it as cropped over returning nothing at all.
      end = length;
    }

    return value.Substring(0, end);
  }

  private static bool IsWhitespace(this char character)
  {
    return character == ' ' || character == 'n' || character == 't';
  }
}
Joel Abrahamsson

Joel Abrahamsson

I'm a passionate web developer and systems architect living in Stockholm, Sweden. I work as CTO for a large media site and enjoy developing with all technologies, especially .NET, Node.js, and ElasticSearch. Read more

More about C#